2023-04-12 03:41:11 +08:00
|
|
|
/*
|
|
|
|
* Copyright 1991 Steven Smith at the Harvard Genome Lab.
|
|
|
|
* All rights reserved.
|
|
|
|
*/
|
2023-04-12 03:39:54 +08:00
|
|
|
#include "Flatio.c"
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
int main(ac, av)
|
2023-04-12 03:39:54 +08:00
|
|
|
int ac;
|
|
|
|
char **av;
|
|
|
|
{
|
|
|
|
struct data_format data[10000];
|
2023-04-12 03:41:11 +08:00
|
|
|
int Match = 2, Mismatch = 8;
|
|
|
|
int i, j, k, l, numseqs, mis, Case = 32;
|
|
|
|
int slen, pcnt, pos;
|
2023-04-12 03:39:54 +08:00
|
|
|
int UT = FALSE;
|
|
|
|
char c;
|
2023-04-12 03:41:11 +08:00
|
|
|
if (ac < 3) {
|
2023-04-12 03:39:54 +08:00
|
|
|
fprintf(stderr,
|
2023-04-12 03:41:11 +08:00
|
|
|
"usage: %s search_string %%mismatch [-case] [-match "
|
|
|
|
"color] [-mismatch color]\n",
|
|
|
|
av[0]);
|
|
|
|
fprintf(stderr, " [-u=t]\n");
|
2023-04-12 03:39:54 +08:00
|
|
|
exit(0);
|
|
|
|
}
|
2023-04-12 03:41:11 +08:00
|
|
|
for (j = 3; j < ac; j++) {
|
|
|
|
if (strcmp("-case", av[j]) == 0) Case = 0;
|
|
|
|
if (strcmp("-match", av[j]) == 0)
|
|
|
|
sscanf(av[j + 1], "%d", &Match);
|
|
|
|
if (strcmp("-u=t", av[j]) == 0) UT = TRUE;
|
|
|
|
if (strcmp("-mismatch", av[j]) == 0)
|
|
|
|
sscanf(av[j + 1], "%d", &Mismatch);
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
2023-04-12 03:41:11 +08:00
|
|
|
numseqs = ReadFlat(stdin, data, 10000);
|
2023-04-12 03:39:54 +08:00
|
|
|
|
|
|
|
slen = strlen(av[1]);
|
2023-04-12 03:41:11 +08:00
|
|
|
sscanf(av[2], "%d", &pcnt);
|
2023-04-12 03:39:54 +08:00
|
|
|
pcnt *= slen;
|
|
|
|
pcnt /= 100;
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
if (UT)
|
|
|
|
for (j = 0; j <= strlen(av[1]); j++) {
|
|
|
|
if (av[1][j] == 't') av[1][j] = 'u';
|
|
|
|
if (av[1][j] == 'T') av[1][j] = 'U';
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
for (i = 0; i < numseqs; i++) {
|
|
|
|
if (UT)
|
|
|
|
for (j = 0; data[i].nuc[j] != '\0'; j++) {
|
|
|
|
if (data[i].nuc[j] == 't')
|
2023-04-12 03:39:54 +08:00
|
|
|
data[i].nuc[j] = 'u';
|
2023-04-12 03:41:11 +08:00
|
|
|
else if (data[i].nuc[j] == 'T')
|
2023-04-12 03:39:54 +08:00
|
|
|
data[i].nuc[j] = 'U';
|
|
|
|
}
|
2023-04-12 03:41:11 +08:00
|
|
|
printf("name:%s\n", data[i].name);
|
|
|
|
printf("length:%d\n", strlen(data[i].nuc));
|
2023-04-12 03:39:54 +08:00
|
|
|
printf("start:\n");
|
2023-04-12 03:41:11 +08:00
|
|
|
for (j = 0; j < data[i].length; j++) {
|
2023-04-12 03:39:54 +08:00
|
|
|
mis = 0;
|
2023-04-12 03:41:11 +08:00
|
|
|
for (k = 0, pos = j; k < slen && pos < data[i].length;
|
|
|
|
k++, pos++) {
|
2023-04-12 03:39:54 +08:00
|
|
|
c = data[i].nuc[pos];
|
2023-04-12 03:41:11 +08:00
|
|
|
for (; (c == ' ' || c == '-' || c == '~') &&
|
|
|
|
pos < data[i].length;)
|
2023-04-12 03:39:54 +08:00
|
|
|
c = data[i].nuc[++pos];
|
|
|
|
c |= Case;
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
if (data[i].type == '#') {
|
|
|
|
if (CompIUP(c, (av[1][k] | Case)) ==
|
|
|
|
FALSE)
|
|
|
|
mis++;
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
2023-04-12 03:41:11 +08:00
|
|
|
else {
|
|
|
|
if (c != (av[1][k] | Case)) mis++;
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
|
|
|
}
|
2023-04-12 03:41:11 +08:00
|
|
|
if (k == slen && mis <= pcnt) {
|
|
|
|
for (k = j; k < pos; k++) printf("%d\n", Match);
|
|
|
|
j = pos - 1;
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
|
|
|
else
|
2023-04-12 03:41:11 +08:00
|
|
|
printf("%d\n", Mismatch);
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
int CompIUP(a, b)
|
|
|
|
char a, b;
|
2023-04-12 03:39:54 +08:00
|
|
|
{
|
2023-04-12 03:41:11 +08:00
|
|
|
static int tmatr[16] = {'-', 'a', 'c', 'm', 'g', 'r', 's', 'v',
|
|
|
|
't', 'w', 'y', 'h', 'k', 'd', 'b', 'n'};
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
static int matr[128] = {
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01,
|
|
|
|
0xe, 0x02, 0x0d, 0, 0, 0x04, 0x0b, 0, 0, 0x0c, 0,
|
|
|
|
0x03, 0x0f, 0, 0, 0, 0x05, 0x06, 0x08, 0x08, 0x07, 0x09,
|
|
|
|
0x00, 0xa, 0, 0, 0, 0, 0, 0, 0, 0x01, 0x0e,
|
|
|
|
0x02, 0x0d, 0, 0, 0x04, 0x0b, 0, 0, 0x0c, 0, 0x03,
|
|
|
|
0x0f, 0, 0, 0, 0x05, 0x06, 0x08, 0x08, 0x07, 0x09, 0x00,
|
|
|
|
0x0a, 0, 0, 0, 0, 0x00, 0};
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
int testa, testb;
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
if (a & 32 != b & 32) return (FALSE);
|
2023-04-12 03:39:54 +08:00
|
|
|
|
|
|
|
testa = matr[(int)a];
|
|
|
|
testb = matr[(int)b];
|
2023-04-12 03:41:11 +08:00
|
|
|
return (testa & testb);
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|