diff --git a/SUPPORT/CAP2.c b/SUPPORT/CAP2.c index 4778db8..7c9733c 100644 --- a/SUPPORT/CAP2.c +++ b/SUPPORT/CAP2.c @@ -12,7 +12,7 @@ Department of Computer Science Michigan Technological University Houghton, MI 49931 - E-mail: huang@cs.mtu.edu + E-mail: huang@cs.mtu.edu The CAP program uses a dynamic programming algorithm to compute the maximal-scoring overlapping alignment between two fragments. @@ -99,7 +99,7 @@ GAAATGCTTTTTAAAAGAAAATATTAAAGTTAAACTCCCC To run the program, type a command of form - cap file_of_fragments + cap file_of_fragments The output goes to the terminal screen. So redirection of the output into a file is necessary. The output consists of three parts: @@ -156,140 +156,139 @@ Mod by S.S. */ -#include +#include +#include +#include -int OVERLEN; /* Minimum length of any overlap */ -float PERCENT; /* Minimum identity percentage of any overlap */ -#define CUTOFF 50 /* cutoff score for overlap or containment */ -#define DELTA 9.0 /* Jump increment in check for overlap */ -#define MATCH 2 /* score of a match */ -#define MISMAT -6 /* score of a mismatch */ -#define LTMISM -3 /* light score of a mismatch */ -#define OPEN 0 /* gap open penalty */ -#define EXTEND 4 /* gap extension penalty */ -#define LTEXTEN 2 /* light gap extension penalty */ -#define POS5 30 /* Sequencing errors often occur before base POS5 */ -#define POS3 475 /* Sequencing errors often occur after base POS3 */ -#define LINELEN 60 /* length of one printed line */ -#define NAMELEN 20 /* length of printed fragment name */ -#define TUPLELEN 4 /* Maximum length of words for lookup table */ -#define SEQLEN 2000 /* initial size of an array for an output fragment */ +int OVERLEN; /* Minimum length of any overlap */ +float PERCENT; /* Minimum identity percentage of any overlap */ +#define CUTOFF 50 /* cutoff score for overlap or containment */ +#define DELTA 9.0 /* Jump increment in check for overlap */ +#define MATCH 2 /* score of a match */ +#define MISMAT -6 /* score of a mismatch */ +#define LTMISM -3 /* light score of a mismatch */ +#define OPEN 0 /* gap open penalty */ +#define EXTEND 4 /* gap extension penalty */ +#define LTEXTEN 2 /* light gap extension penalty */ +#define POS5 30 /* Sequencing errors often occur before base POS5 */ +#define POS3 475 /* Sequencing errors often occur after base POS3 */ +#define LINELEN 60 /* length of one printed line */ +#define NAMELEN 20 /* length of printed fragment name */ +#define TUPLELEN 4 /* Maximum length of words for lookup table */ +#define SEQLEN 2000 /* initial size of an array for an output fragment */ static int over_len; static float per_cent; -typedef struct OVERLAP /* of 5' and 3' segments */ -{ - int number; /* index of 3' segment */ - int host; /* index of 5' segment */ - int ind; /* used in reassembly */ - int stari; /* start position of 5' suffix */ - int endi; /* end position of 5' suffix */ - int starj; /* start position of 3' prefix */ - int endj; /* end position of 3' prefix */ - short orienti; /* orientation of 5' segment: 0= rev. */ - short orientj; /* orientation of 3' segment: 0= rev. */ - int score; /* score of overlap alignment */ - int length; /* length of alignment */ - int match; /* number of matches in alignment */ - short kind; /* 0 = containment; 1 = overlap */ - int *script; /* script for constructing alignment */ - struct OVERLAP *next; +typedef struct OVERLAP /* of 5' and 3' segments */ +{ + int number; /* index of 3' segment */ + int host; /* index of 5' segment */ + int ind; /* used in reassembly */ + int stari; /* start position of 5' suffix */ + int endi; /* end position of 5' suffix */ + int starj; /* start position of 3' prefix */ + int endj; /* end position of 3' prefix */ + short orienti; /* orientation of 5' segment: 0= rev. */ + short orientj; /* orientation of 3' segment: 0= rev. */ + int score; /* score of overlap alignment */ + int length; /* length of alignment */ + int match; /* number of matches in alignment */ + short kind; /* 0 = containment; 1 = overlap */ + int *script; /* script for constructing alignment */ + struct OVERLAP *next; } over, *overptr; -struct SEG -{ - char *name; /* name string */ - int len; /* length of segment name */ - char *seq; /* segment sequence */ - char *rev; /* reverse complement */ - int length; /* length of sequence */ - short kind; /* 0 = contain; 1 = overlap */ - int *lookup; /* lookup table */ - int *pos; /* location list */ - overptr list; /* list of overlapping edges */ -} *segment; /* array of segment records */ -int seg_num; /* The number of segments */ -overptr *edge; /* set of overlapping edges */ -int edge_num; /* The number of overlaps */ -struct CONS /* 1 = itself; 0 = reverse complement */ -{ - short isfive[2]; /* is 5' end free? */ - short isthree[2]; /* is 3' end free? */ - short orient[2]; /* orientation of 3' segment */ - int group; /* contig serial number */ - int next[2]; /* pointer to 3' adjacent segment */ - int other; /* the other end of the contig */ - int child; /* for the containment tree */ - int brother; - int father; - overptr node[2]; /* pointers to overlapping edges */ -} *contigs; /* list of contigs */ -struct TTREE /* multiple alignment tree */ -{ - short head; /* head = 1 means the head of a contig */ - short orient; /* orientation */ - int begin; /* start position of previous segment */ - int *script; /* alignment script */ - int size; /* length of script */ - int next; /* list of overlap segments */ - int child; /* list of child segments */ - int brother; /* list of sibling segments */ -} *mtree; -int vert[128]; /* converted digits for 'A','C','G','T' */ -int vertc[128]; /* for reverse complement */ -int tuple; /* tuple = TUPLELEN - 1 */ -int base[TUPLELEN]; /* locations of a lookup table */ -int power[TUPLELEN]; /* powers of 4 */ -typedef struct OUT -{ - char *line; /* one print line */ - char *a; /* pointer to slot in line */ - char c; /* current char */ - char *seq; /* pointer to sequence */ - int length; /* length of segment */ - int id; /* index of segment */ - int *s; /* pointer to script vector */ - int size; /* size of script vector */ - int op; /* current operation */ - char name[NAMELEN+2];/* name of segment */ +struct SEG { + char *name; /* name string */ + int len; /* length of segment name */ + char *seq; /* segment sequence */ + char *rev; /* reverse complement */ + int length; /* length of sequence */ + short kind; /* 0 = contain; 1 = overlap */ + int *lookup; /* lookup table */ + int *pos; /* location list */ + overptr list; /* list of overlapping edges */ +} *segment; /* array of segment records */ +int seg_num; /* The number of segments */ +overptr *edge; /* set of overlapping edges */ +int edge_num; /* The number of overlaps */ +struct CONS /* 1 = itself; 0 = reverse complement */ +{ + short isfive[2]; /* is 5' end free? */ + short isthree[2]; /* is 3' end free? */ + short orient[2]; /* orientation of 3' segment */ + int group; /* contig serial number */ + int next[2]; /* pointer to 3' adjacent segment */ + int other; /* the other end of the contig */ + int child; /* for the containment tree */ + int brother; + int father; + overptr node[2]; /* pointers to overlapping edges */ +} *contigs; /* list of contigs */ +struct TTREE /* multiple alignment tree */ +{ + short head; /* head = 1 means the head of a contig */ + short orient; /* orientation */ + int begin; /* start position of previous segment */ + int *script; /* alignment script */ + int size; /* length of script */ + int next; /* list of overlap segments */ + int child; /* list of child segments */ + int brother; /* list of sibling segments */ +} *mtree; +int vert[128]; /* converted digits for 'A','C','G','T' */ +int vertc[128]; /* for reverse complement */ +int tuple; /* tuple = TUPLELEN - 1 */ +int base[TUPLELEN]; /* locations of a lookup table */ +int power[TUPLELEN]; /* powers of 4 */ +typedef struct OUT { + char *line; /* one print line */ + char *a; /* pointer to slot in line */ + char c; /* current char */ + char *seq; /* pointer to sequence */ + int length; /* length of segment */ + int id; /* index of segment */ + int *s; /* pointer to script vector */ + int size; /* size of script vector */ + int op; /* current operation */ + char name[NAMELEN + 2]; /* name of segment */ short done; /* indicates if segment is done */ - int loc; /* position of next segment symbol */ - char kind; /* type of next symbol of segment */ - char up; /* type of upper symbol of operation */ - char dw; /* type of lower symbol of operation */ - int offset; /* relative to consensus sequence */ - int linesize; /* size of array line */ - struct OUT *child; /* pointer to child subtree */ - struct OUT *brother; /* pointer to brother node */ - struct OUT *link; /* for operation linked list */ - struct OUT *father; /* pointer to father node */ -} row, *rowptr; /* node for segment */ + int loc; /* position of next segment symbol */ + char kind; /* type of next symbol of segment */ + char up; /* type of upper symbol of operation */ + char dw; /* type of lower symbol of operation */ + int offset; /* relative to consensus sequence */ + int linesize; /* size of array line */ + struct OUT *child; /* pointer to child subtree */ + struct OUT *brother; /* pointer to brother node */ + struct OUT *link; /* for operation linked list */ + struct OUT *father; /* pointer to father node */ +} row, *rowptr; /* node for segment */ rowptr *work; /* a set of working segments */ rowptr head, tail; /* first and last nodes of op list */ -struct VX -{ - int id; /* Segment index */ - short kind; /* overlap or containment */ - overptr list; /* list of overlapping edges */ -} *piece; /* array of segment records */ -char *allconsen, *allconpt; /* Storing consensus sequences */ - -main(argc, argv) int argc; +struct VX { + int id; /* Segment index */ + short kind; /* overlap or containment */ + overptr list; /* list of overlapping edges */ +} *piece; /* array of segment records */ +char *allconsen, *allconpt; /* Storing consensus sequences */ +char *ckalloc(size_t size); +main(argc, argv) int argc; char *argv[]; -{ - int M; /* Sequence length */ - int V[128][128], Q,R; /* Weights */ - int V1[128][128], R1; /* Light weights */ - int total; /* Total of segment lengths */ - int number; /* The number of segments */ - char *sequence; /* Storing all segments */ - char *reverse; /* Storing all reverse segments */ - int symbol, prev; /* The next character */ - FILE *Ap, *ckopen(); /* For the sequence file */ - char *ckalloc(); /* space-allocating function */ - register int i, j, k; /* index variables */ +{ + int M; /* Sequence length */ + int V[128][128], Q, R; /* Weights */ + int V1[128][128], R1; /* Light weights */ + int total; /* Total of segment lengths */ + int number; /* The number of segments */ + char *sequence; /* Storing all segments */ + char *reverse; /* Storing all reverse segments */ + int symbol, prev; /* The next character */ + FILE *Ap, *ckopen(); /* For the sequence file */ + char *ckalloc(size_t amount); /* space-allocating function */ + register int i, j, k; /* index variables */ /* Mod by S.S. */ int jj; - short heading; /* 1: heading; 0: sequence */ + short heading; /* 1: heading; 0: sequence */ /* * Mod by S.S. @@ -298,38 +297,34 @@ char *argv[]; fatalf("The proper form of command is: \n%s file_of_fragments", argv[0]); */ - if ( argc != 4 ) + if (argc != 4) fatalf("usage: %s file_of_fragments MIN_OVERLAP PERCENT_MATCH", - argv[0]); - sscanf(argv[2],"%d",&OVERLEN); - sscanf(argv[3],"%d",&jj); - PERCENT = (float)jj/100.0; - if(PERCENT < 0.25) PERCENT = 0.25; - if(PERCENT > 1.0) PERCENT = 1.0; - if(OVERLEN < 1) OVERLEN = 1; - if(OVERLEN > 100) OVERLEN = 100; - - + argv[0]); + sscanf(argv[2], "%d", &OVERLEN); + sscanf(argv[3], "%d", &jj); + PERCENT = (float)jj / 100.0; + if (PERCENT < 0.25) PERCENT = 0.25; + if (PERCENT > 1.0) PERCENT = 1.0; + if (OVERLEN < 1) OVERLEN = 1; + if (OVERLEN > 100) OVERLEN = 100; /* determine number of segments and total lengths */ j = 0; Ap = ckopen(argv[1], "r"); prev = '\n'; - for (total = 3, number = 0; ( symbol = getc(Ap)) != EOF ; total++ ) - { - if ( symbol == '>' && prev == '\n' ) - number++; + for (total = 3, number = 0; (symbol = getc(Ap)) != EOF; total++) { + if (symbol == '>' && prev == '\n') number++; prev = symbol; } - (void) fclose(Ap); + (void)fclose(Ap); total += number * 20; /* allocate space for segments */ - sequence = ( char * ) ckalloc( total * sizeof(char)); - reverse = ( char * ) ckalloc( total * sizeof(char)); - allconpt = allconsen = ( char * ) ckalloc( total * sizeof(char)); - segment = ( struct SEG * ) ckalloc( number * sizeof(struct SEG)); + sequence = (char *)ckalloc(total * sizeof(char)); + reverse = (char *)ckalloc(total * sizeof(char)); + allconpt = allconsen = (char *)ckalloc(total * sizeof(char)); + segment = (struct SEG *)ckalloc(number * sizeof(struct SEG)); /* read the segments into sequence */ M = 0; @@ -337,50 +332,45 @@ char *argv[]; number = -1; heading = 0; prev = '\n'; - for ( i = 0, k = total ; ( symbol = getc(Ap)) != EOF ; ) - { - if ( symbol != '\n' ) - { + for (i = 0, k = total; (symbol = getc(Ap)) != EOF;) { + if (symbol != '\n') { sequence[++i] = symbol; - switch ( symbol ) - { - case 'A' : - reverse[--k] = 'T'; - break; - case 'a' : - reverse[--k] = 't'; - break; - case 'T' : - reverse[--k] = 'A'; - break; - case 't' : - reverse[--k] = 'a'; - break; - case 'C' : - reverse[--k] = 'G'; - break; - case 'c' : - reverse[--k] = 'g'; - break; - case 'G' : - reverse[--k] = 'C'; - break; - case 'g' : - reverse[--k] = 'c'; - break; - default : - reverse[--k] = symbol; - break; + switch (symbol) { + case 'A': + reverse[--k] = 'T'; + break; + case 'a': + reverse[--k] = 't'; + break; + case 'T': + reverse[--k] = 'A'; + break; + case 't': + reverse[--k] = 'a'; + break; + case 'C': + reverse[--k] = 'G'; + break; + case 'c': + reverse[--k] = 'g'; + break; + case 'G': + reverse[--k] = 'C'; + break; + case 'g': + reverse[--k] = 'c'; + break; + default: + reverse[--k] = symbol; + break; } } - if ( symbol == '>' && prev == '\n' ) - { + if (symbol == '>' && prev == '\n') { heading = 1; - if ( number >= 0 ) - { + if (number >= 0) { segment[number].length = i - j - 1; segment[number].rev = &(reverse[k]); - if ( i - j - 1 > M ) M = i - j -1; + if (i - j - 1 > M) M = i - j - 1; } number++; j = i; @@ -388,8 +378,7 @@ char *argv[]; segment[number].kind = 1; segment[number].list = 0; } - if ( heading && symbol == '\n' ) - { + if (heading && symbol == '\n') { heading = 0; segment[number].len = i - j; segment[number].seq = &(sequence[i]); @@ -401,25 +390,23 @@ char *argv[]; segment[number].length = i - j; reverse[--k] = '>'; segment[number].rev = &(reverse[k]); - if ( i - j > M ) M = i - j; + if (i - j > M) M = i - j; seg_num = ++number; - (void) fclose(Ap); + (void)fclose(Ap); Q = OPEN; R = EXTEND; R1 = LTEXTEN; /* set match and mismatch weights */ - for ( i = 0; i < 128 ; i++ ) - for ( j = 0; j < 128 ; j++ ) - if ((i|32) == (j|32) ) + for (i = 0; i < 128; i++) + for (j = 0; j < 128; j++) + if ((i | 32) == (j | 32)) V[i][j] = V1[i][j] = MATCH; - else - { + else { V[i][j] = MISMAT; V1[i][j] = LTMISM; } - for ( i = 0; i < 128 ; i++ ) - V['N'][i] = V[i]['N'] = MISMAT + 1; + for (i = 0; i < 128; i++) V['N'][i] = V[i]['N'] = MISMAT + 1; V1['N']['N'] = MISMAT + 1; over_len = OVERLEN; @@ -427,7 +414,7 @@ char *argv[]; edge_num = 0; INIT(M); MAKE(); - PAIR(V,V1,Q,R,R1); + PAIR(V, V1, Q, R, R1); ASSEM(); REPAIR(); FORM_TREE(); @@ -435,92 +422,96 @@ char *argv[]; SHOW(); } -static int (*v)[128]; /* substitution scores */ -static int q, r; /* gap penalties */ -static int qr; /* qr = q + r */ -static int (*v1)[128]; /* light substitution scores */ -static int r1; /* light extension penalty */ -static int qr1; /* qr1 = q + r1 */ +static int (*v)[128]; /* substitution scores */ +static int q, r; /* gap penalties */ +static int qr; /* qr = q + r */ +static int (*v1)[128]; /* light substitution scores */ +static int r1; /* light extension penalty */ +static int qr1; /* qr1 = q + r1 */ -static int SCORE; -static int STARI; -static int STARJ; -static int ENDI; -static int ENDJ; +static int SCORE; +static int STARI; +static int STARJ; +static int ENDI; +static int ENDJ; -static int *CC, *DD; /* saving matrix scores */ -static int *RR, *SS; /* saving start-points */ -static int *S; /* saving operations for diff */ +static int *CC, *DD; /* saving matrix scores */ +static int *RR, *SS; /* saving start-points */ +static int *S; /* saving operations for diff */ /* The following definitions are for function diff() */ -int diff(); -static int zero = 0; /* int type zero */ +int diff(); +static int zero = 0; /* int type zero */ -#define gap(k) ((k) <= 0 ? 0 : q+r*(k)) /* k-symbol indel score */ +#define gap(k) ((k) <= 0 ? 0 : q + r * (k)) /* k-symbol indel score */ -static int *sapp; /* Current script append ptr */ -static int last; /* Last script op appended */ +static int *sapp; /* Current script append ptr */ +static int last; /* Last script op appended */ -static int no_mat; /* number of matches */ +static int no_mat; /* number of matches */ -static int no_mis; /* number of mismatches */ +static int no_mis; /* number of mismatches */ -static int al_len; /* length of alignment */ +static int al_len; /* length of alignment */ /* Append "Delete k" op */ -#define DEL(k) \ -{ al_len += k; \ - if (last < 0) \ - last = sapp[-1] -= (k); \ - else \ - last = *sapp++ = -(k); \ -} +#define DEL(k) \ + { \ + al_len += k; \ + if (last < 0) \ + last = sapp[-1] -= (k); \ + else \ + last = *sapp++ = -(k); \ + } /* Append "Insert k" op */ -#define INS(k) \ -{ al_len += k; \ - if (last < 0) \ - { sapp[-1] = (k); *sapp++ = last; } \ - else \ - last = *sapp++ = (k); \ -} +#define INS(k) \ + { \ + al_len += k; \ + if (last < 0) { \ + sapp[-1] = (k); \ + *sapp++ = last; \ + } \ + else \ + last = *sapp++ = (k); \ + } /* Append "Replace" op */ -#define REP \ -{ last = *sapp++ = 0; \ - al_len += 1; \ -} +#define REP \ + { \ + last = *sapp++ = 0; \ + al_len += 1; \ + } -INIT(M) int M; -{ - register int j; /* row and column indices */ - char *ckalloc(); /* space-allocating function */ +INIT(M) int M; +{ + register int j; /* row and column indices */ + char *ckalloc(); /* space-allocating function */ /* allocate space for all vectors */ - j = (M + 1) * sizeof(int ); - CC = ( int * ) ckalloc(j); - DD = ( int * ) ckalloc(j); - RR = ( int * ) ckalloc(j); - SS = ( int * ) ckalloc(j); - S = ( int * ) ckalloc(2 * j); + j = (M + 1) * sizeof(int); + CC = (int *)ckalloc(j); + DD = (int *)ckalloc(j); + RR = (int *)ckalloc(j); + SS = (int *)ckalloc(j); + S = (int *)ckalloc(2 * j); } /* Make a lookup table for words of lengths up to TUPLELEN in each sequence. The value of a word is used as an index to the table. */ MAKE() -{ - int hash[TUPLELEN]; /* values of words of lengths up to TUPLELEN */ - int *table; /* pointer to a lookup table */ - int *loc; /* pointer to a table of sequence locations */ - int size; /* size of a lookup table */ - int limit, digit, step; /* temporary variables */ - register int i, j, k, p, q; /* index varibles */ - char *ckalloc(); /* space-allocating function */ - char *A; /* pointer to a sequence */ - int M; /* length of a sequence */ +{ + int hash[TUPLELEN]; /* values of words of lengths up to TUPLELEN */ + int *table; /* pointer to a lookup table */ + int *loc; /* pointer to a table of sequence locations */ + int size; /* size of a lookup table */ + int limit, digit, step; /* temporary variables */ + register int i, j, k, p, q; /* index varibles */ + char *ckalloc(); /* space-allocating function */ + char *A; /* pointer to a sequence */ + int M; /* length of a sequence */ tuple = TUPLELEN - 1; - for ( i = 0; i < 128; i++ ) - vert[i] = 4; + for (i = 0; i < 128; i++) vert[i] = 4; vert['A'] = vert['a'] = 0; vert['C'] = vert['c'] = 1; vert['G'] = vert['g'] = 2; @@ -529,36 +520,31 @@ MAKE() vertc['C'] = vertc['c'] = 2; vertc['G'] = vertc['g'] = 1; vertc['T'] = vertc['t'] = 0; - for ( i = 0, j = 1, size = 0; i <= tuple ; i++, j *= 4 ) - { + for (i = 0, j = 1, size = 0; i <= tuple; i++, j *= 4) { base[i] = size; power[i] = j; - size = ( size + 1 ) * 4; + size = (size + 1) * 4; } - for ( j = 0; j <= tuple; j++ ) - hash[j] = 0; + for (j = 0; j <= tuple; j++) hash[j] = 0; /* make a lookup table for each sequence */ - for ( i = 0; i < seg_num ; i++ ) - { + for (i = 0; i < seg_num; i++) { A = segment[i].seq; M = segment[i].length; - table = segment[i].lookup = (int * ) ckalloc(size * sizeof(int )); - loc = segment[i].pos = (int * ) ckalloc((M + 1) * sizeof(int )); - for ( j = 0; j < size; j++ ) - table[j] = 0; - for ( k = 0, j = 1; j <= M; j++ ) - if ( ( digit = vert[A[j]] ) != 4 ) - { - for ( p = tuple; p > 0; p-- ) - hash[p] = 4 * (hash[p-1] + 1) + digit; + table = segment[i].lookup = (int *)ckalloc(size * sizeof(int)); + loc = segment[i].pos = (int *)ckalloc((M + 1) * sizeof(int)); + for (j = 0; j < size; j++) table[j] = 0; + for (k = 0, j = 1; j <= M; j++) + if ((digit = vert[A[j]]) != 4) { + for (p = tuple; p > 0; p--) + hash[p] = 4 * (hash[p - 1] + 1) + digit; hash[0] = digit; step = j - k; limit = tuple <= step ? tuple : step; - for ( p = 0; p < limit; p++ ) - if ( ! table[(q = hash[p])] ) table[q] = 1; - if ( step > tuple ) - { - loc[(p = j - tuple)] = table[(q = hash[tuple])]; + for (p = 0; p < limit; p++) + if (!table[(q = hash[p])]) table[q] = 1; + if (step > tuple) { + loc[(p = j - tuple)] = + table[(q = hash[tuple])]; table[q] = p; } } @@ -571,24 +557,24 @@ MAKE() satisfying the necessary condition for overlap are rejected quickly. Those that do satisfy the condition are verified with a dynamic programming algorithm to see if overlaps exist. */ -PAIR(V,V1,Q,R,R1) int V[][128],V1[][128],Q,R,R1; -{ - int endi, endj, stari, starj; /* endpoint and startpoint */ +PAIR(V, V1, Q, R, R1) int V[][128], V1[][128], Q, R, R1; +{ + int endi, endj, stari, starj; /* endpoint and startpoint */ - short orienti, orientj; /* orientation of segments */ - short iscon; /* containment condition */ - int score; /* the max score */ - int count, limit; /* temporary variables */ + short orienti, orientj; /* orientation of segments */ + short iscon; /* containment condition */ + int score; /* the max score */ + int count, limit; /* temporary variables */ - register int i, j, d; /* row and column indices */ - char *ckalloc(); /* space-allocating function */ - int rl, cl; + register int i, j, d; /* row and column indices */ + char *ckalloc(); /* space-allocating function */ + int rl, cl; char *A, *B; - int M, N; + int M, N; overptr node1; - int total; /* total number of pairs */ - int hit; /* number of pairs satisfying cond. */ - int CHECK(); + int total; /* total number of pairs */ + int hit; /* number of pairs satisfying cond. */ + int CHECK(); v = V; q = Q; @@ -598,26 +584,22 @@ PAIR(V,V1,Q,R,R1) int V[][128],V1[][128],Q,R,R1; r1 = R1; qr1 = q + r1; total = hit = 0; - limit = 2 * ( seg_num - 1 ); - for ( orienti = 0, d = 0; d < limit ; d++ ) - { + limit = 2 * (seg_num - 1); + for (orienti = 0, d = 0; d < limit; d++) { i = d / 2; orienti = 1 - orienti; A = orienti ? segment[i].seq : segment[i].rev; M = segment[i].length; - for ( j = i+1; j < seg_num ; j++ ) - { + for (j = i + 1; j < seg_num; j++) { B = segment[j].seq; orientj = 1; N = segment[j].length; total += 1; - if ( CHECK(orienti, i, j) ) - { + if (CHECK(orienti, i, j)) { hit += 1; SCORE = 0; - big_pass(A,B,M,N,orienti,orientj); - if ( SCORE ) - { + big_pass(A, B, M, N, orienti, orientj); + if (SCORE) { score = SCORE; stari = ++STARI; starj = ++STARJ; @@ -628,48 +610,58 @@ PAIR(V,V1,Q,R,R1) int V[][128],V1[][128],Q,R,R1; sapp = S; last = 0; al_len = no_mat = no_mis = 0; - (void) diff(&A[stari]-1, &B[starj]-1,rl,cl,q,q); - iscon = stari == 1 && endi == M || starj == 1 && endj == N; - if ( no_mat >= al_len * per_cent && - ( al_len >= over_len || iscon ) ) - { - node1 = ( overptr ) ckalloc( (int ) sizeof(over)); - if ( iscon ) - node1->kind = 0; /* containment */ - else - { - node1->kind = 1; - edge_num++; - } /* overlap */ - if ( endi == M && ( endj != N || starj != 1 ) ) /*i is 5'*/ - { + (void)diff(&A[stari] - 1, &B[starj] - 1, + rl, cl, q, q); + iscon = stari == 1 && endi == M || + starj == 1 && endj == N; + if (no_mat >= al_len * per_cent && + (al_len >= over_len || iscon)) { + node1 = (overptr)ckalloc( + (int)sizeof(over)); + if (iscon) + node1->kind = + 0; /* containment */ + else { + node1->kind = 1; + edge_num++; + } /* overlap */ + if (endi == M && + (endj != N || + starj != 1)) /*i is 5'*/ + { node1->number = j; node1->host = i; node1->stari = stari; node1->endi = endi; - node1->orienti = orienti; + node1->orienti = + orienti; node1->starj = starj; node1->endj = endj; - node1->orientj = orientj; + node1->orientj = + orientj; } - else /* j is 5' */ - { + else /* j is 5' */ + { node1->number = i; node1->host = j; node1->stari = starj; node1->endi = endj; - node1->orienti = orientj; + node1->orienti = + orientj; node1->starj = stari; node1->endj = endi; - node1->orientj = orienti; + node1->orientj = + orienti; } node1->score = score; node1->length = al_len; node1->match = no_mat; - count = node1->number == i ? j : i; - node1->next = segment[count].list; + count = + node1->number == i ? j : i; + node1->next = + segment[count].list; segment[count].list = node1; - if ( ! node1->kind ) + if (!node1->kind) segment[count].kind = 0; } } @@ -681,48 +673,48 @@ PAIR(V,V1,Q,R,R1) int V[][128],V1[][128],Q,R,R1; /* Return 1 if two sequences satisfy the necessary condition for overlap, and 0 otherwise. Parameters first and second are the indices of segments, and parameter orient indicates the orientation of segment first. */ -int CHECK(orient,first,second) short orient; -int first, second; -{ - int limit, bound; /* maximum number of jumps */ - int small; /* smaller of limit and bound */ - float delta; /* cutoff factor */ - float cut; /* cutoff score */ - register int i; /* index variable */ - int t, q; /* temporary variables */ - int JUMP(); - int RJUMP(); - int JUMPC(); - int RJUMPC(); +int CHECK(orient, first, second) +short orient; +int first, second; +{ + int limit, bound; /* maximum number of jumps */ + int small; /* smaller of limit and bound */ + float delta; /* cutoff factor */ + float cut; /* cutoff score */ + register int i; /* index variable */ + int t, q; /* temporary variables */ + int JUMP(); + int RJUMP(); + int JUMPC(); + int RJUMPC(); delta = DELTA; - if ( orient ) + if (orient) limit = JUMP(CC, first, second, 1); else limit = JUMPC(CC, first, second); bound = RJUMP(DD, second, first, orient); small = limit <= bound ? limit : bound; cut = 0; - for ( i = 1; i <= small; i++ ) - { - if ( (t = DD[i] - 1) >= over_len && t >= cut - && (q = CC[i] - 1) >= over_len && q >= cut ) + for (i = 1; i <= small; i++) { + if ((t = DD[i] - 1) >= over_len && t >= cut && + (q = CC[i] - 1) >= over_len && q >= cut) return (1); cut += delta; } - if (DD[bound] >= delta*(bound-1) || CC[limit] >= delta*(limit-1)) + if (DD[bound] >= delta * (bound - 1) || + CC[limit] >= delta * (limit - 1)) return (1); limit = JUMP(CC, second, first, orient); - if ( orient ) + if (orient) bound = RJUMP(DD, first, second, 1); else bound = RJUMPC(DD, first, second); small = limit <= bound ? limit : bound; cut = 0; - for ( i = 1; i <= small; i++ ) - { - if ( (t = DD[i] - 1) >= over_len && t >= cut - && (q = CC[i] - 1) >= over_len && q >= cut ) + for (i = 1; i <= small; i++) { + if ((t = DD[i] - 1) >= over_len && t >= cut && + (q = CC[i] - 1) >= over_len && q >= cut) return (1); cut += delta; } @@ -730,20 +722,21 @@ int first, second; } /* Compute a vector of lengths of jumps */ -int JUMP(H,one,two,orient) int H[], one, two; +int JUMP(H, one, two, orient) +int H[], one, two; short orient; -{ - char *A, *B; /* pointers to two sequences */ - int M, N; /* lengths of two sequences */ - int *table; /* pointer to a lookup table */ - int *loc; /* pointer to a location table */ - int value; /* value of a word */ - int maxd; /* maximum length of an identical diagonal */ - int d; /* length of current identical diagonal */ - int s; /* length of jumps */ - int k; /* number of jumps */ +{ + char *A, *B; /* pointers to two sequences */ + int M, N; /* lengths of two sequences */ + int *table; /* pointer to a lookup table */ + int *loc; /* pointer to a location table */ + int value; /* value of a word */ + int maxd; /* maximum length of an identical diagonal */ + int d; /* length of current identical diagonal */ + int s; /* length of jumps */ + int k; /* number of jumps */ - register int i, j, p; /* index variables */ + register int i, j, p; /* index variables */ A = segment[one].seq; M = segment[one].length; @@ -751,28 +744,26 @@ short orient; loc = segment[one].pos; B = orient ? segment[two].seq : segment[two].rev; N = segment[two].length; - for ( s = 1, k = 1; s <= N ; k++ ) - { + for (s = 1, k = 1; s <= N; k++) { maxd = 0; - for ( value = -1, d = 0, j = s; d <= tuple && j <= N; j++, d++) - { - if ( vert[B[j]] == 4 ) break; + for (value = -1, d = 0, j = s; d <= tuple && j <= N; j++, d++) { + if (vert[B[j]] == 4) break; value = 4 * (value + 1) + vert[B[j]]; - if ( ! table[value] ) break; + if (!table[value]) break; } - if ( d > tuple ) - { - for ( p = table[value]; p ; p = loc[p] ) - { + if (d > tuple) { + for (p = table[value]; p; p = loc[p]) { d = tuple + 1; - for ( i = p+d, j = s+d; i <= M && j <= N; i++, j++, d++ ) - if ( A[i] != B[j] && vert[A[i]] != 4 && vert[B[j]] != 4 ) break; - if ( maxd < d ) - maxd = d; - if ( j > N ) break; + for (i = p + d, j = s + d; i <= M && j <= N; + i++, j++, d++) + if (A[i] != B[j] && vert[A[i]] != 4 && + vert[B[j]] != 4) + break; + if (maxd < d) maxd = d; + if (j > N) break; } } - else + else maxd = d; s += maxd + 1; H[k] = s; @@ -781,19 +772,20 @@ short orient; } /* Compute a vector of lengths of jumps for reverse complement of one */ -int JUMPC(H,one,two) int H[], one, two; -{ - char *A, *B; /* pointers to two sequences */ - int M, N; /* lengths of two sequences */ - int *table; /* pointer to a lookup table */ - int *loc; /* pointer to a location table */ - int value; /* value of a word */ - int maxd; /* maximum length of an identical diagonal */ - int d; /* length of current identical diagonal */ - int s; /* length of jumps */ - int k; /* number of jumps */ +int JUMPC(H, one, two) +int H[], one, two; +{ + char *A, *B; /* pointers to two sequences */ + int M, N; /* lengths of two sequences */ + int *table; /* pointer to a lookup table */ + int *loc; /* pointer to a location table */ + int value; /* value of a word */ + int maxd; /* maximum length of an identical diagonal */ + int d; /* length of current identical diagonal */ + int s; /* length of jumps */ + int k; /* number of jumps */ - register int i, j, p; /* index variables */ + register int i, j, p; /* index variables */ A = segment[one].rev; M = segment[one].length; @@ -801,28 +793,26 @@ int JUMPC(H,one,two) int H[], one, two; loc = segment[one].pos; B = segment[two].seq; N = segment[two].length; - for ( s = 1, k = 1; s <= N ; k++ ) - { + for (s = 1, k = 1; s <= N; k++) { maxd = 0; - for ( value = 0, d = 0, j = s; d <= tuple && j <= N; j++, d++) - { - if ( vert[B[j]] == 4 ) break; + for (value = 0, d = 0, j = s; d <= tuple && j <= N; j++, d++) { + if (vert[B[j]] == 4) break; value += vertc[B[j]] * power[d]; - if ( ! table[value+base[d]] ) break; + if (!table[value + base[d]]) break; } - if ( d > tuple ) - { - for ( p = table[value+base[tuple]]; p ; p = loc[p] ) - { + if (d > tuple) { + for (p = table[value + base[tuple]]; p; p = loc[p]) { d = tuple + 1; - for ( i = M+2-p, j = s+d; i <= M && j <= N; i++, j++, d++ ) - if ( A[i] != B[j] && vert[A[i]] != 4 && vert[B[j]] != 4 ) break; - if ( maxd < d ) - maxd = d; - if ( j > N ) break; + for (i = M + 2 - p, j = s + d; i <= M && j <= N; + i++, j++, d++) + if (A[i] != B[j] && vert[A[i]] != 4 && + vert[B[j]] != 4) + break; + if (maxd < d) maxd = d; + if (j > N) break; } } - else + else maxd = d; s += maxd + 1; H[k] = s; @@ -831,48 +821,48 @@ int JUMPC(H,one,two) int H[], one, two; } /* Compute a vector of lengths of reverse jumps */ -int RJUMP(H,one,two,orient) int H[], one, two; +int RJUMP(H, one, two, orient) +int H[], one, two; short orient; -{ - char *A, *B; /* pointers to two sequences */ - int N; /* length of a sequence */ - int *table; /* pointer to a lookup table */ - int *loc; /* pointer to a location table */ - int value; /* value of a word */ - int maxd; /* maximum length of an identical diagonal */ - int d; /* length of current identical diagonal */ - int s; /* length of jumps */ - int k; /* number of jumps */ +{ + char *A, *B; /* pointers to two sequences */ + int N; /* length of a sequence */ + int *table; /* pointer to a lookup table */ + int *loc; /* pointer to a location table */ + int value; /* value of a word */ + int maxd; /* maximum length of an identical diagonal */ + int d; /* length of current identical diagonal */ + int s; /* length of jumps */ + int k; /* number of jumps */ - register int i, j, p; /* index variables */ + register int i, j, p; /* index variables */ A = segment[one].seq; table = segment[one].lookup; loc = segment[one].pos; B = orient ? segment[two].seq : segment[two].rev; N = segment[two].length; - for ( s = 1, k = 1; s <= N ; k++ ) - { + for (s = 1, k = 1; s <= N; k++) { maxd = 0; - for (value = 0, d = 0, j = N+1-s; d <= tuple && j >= 1; j--, d++) - { - if ( vert[B[j]] == 4 ) break; + for (value = 0, d = 0, j = N + 1 - s; d <= tuple && j >= 1; + j--, d++) { + if (vert[B[j]] == 4) break; value += vert[B[j]] * power[d]; - if ( ! table[value+base[d]] ) break; + if (!table[value + base[d]]) break; } - if ( d > tuple ) - { - for ( p = table[value+base[tuple]]; p ; p = loc[p] ) - { + if (d > tuple) { + for (p = table[value + base[tuple]]; p; p = loc[p]) { d = tuple + 1; - for ( i = p-1, j = N+1-s-d; i >= 1 && j >= 1; i--, j--, d++ ) - if ( A[i] != B[j] && vert[A[i]] != 4 && vert[B[j]] != 4 ) break; - if ( maxd < d ) - maxd = d; - if ( j < 1 ) break; + for (i = p - 1, j = N + 1 - s - d; + i >= 1 && j >= 1; i--, j--, d++) + if (A[i] != B[j] && vert[A[i]] != 4 && + vert[B[j]] != 4) + break; + if (maxd < d) maxd = d; + if (j < 1) break; } } - else + else maxd = d; s += maxd + 1; H[k] = s; @@ -881,19 +871,20 @@ short orient; } /* Compute a vector of lengths of reverse jumps for reverse complement */ -int RJUMPC(H,one,two) int H[], one, two; -{ - char *A, *B; /* pointers to two sequences */ - int M, N; /* lengths of two sequences */ - int *table; /* pointer to a lookup table */ - int *loc; /* pointer to a location table */ - int value; /* value of a word */ - int maxd; /* maximum length of an identical diagonal */ - int d; /* length of current identical diagonal */ - int s; /* length of jumps */ - int k; /* number of jumps */ +int RJUMPC(H, one, two) +int H[], one, two; +{ + char *A, *B; /* pointers to two sequences */ + int M, N; /* lengths of two sequences */ + int *table; /* pointer to a lookup table */ + int *loc; /* pointer to a location table */ + int value; /* value of a word */ + int maxd; /* maximum length of an identical diagonal */ + int d; /* length of current identical diagonal */ + int s; /* length of jumps */ + int k; /* number of jumps */ - register int i, j, p; /* index variables */ + register int i, j, p; /* index variables */ A = segment[one].rev; M = segment[one].length; @@ -901,29 +892,28 @@ int RJUMPC(H,one,two) int H[], one, two; loc = segment[one].pos; B = segment[two].seq; N = segment[two].length; - for ( s = 1, k = 1; s <= N ; k++ ) - { + for (s = 1, k = 1; s <= N; k++) { maxd = 0; - for (value = -1, d = 0, j = N+1-s; d <= tuple && j >= 1; j--, d++) - { - if ( vert[B[j]] == 4 ) break; + for (value = -1, d = 0, j = N + 1 - s; d <= tuple && j >= 1; + j--, d++) { + if (vert[B[j]] == 4) break; value = 4 * (value + 1) + vertc[B[j]]; - if ( ! table[value] ) break; + if (!table[value]) break; } - if ( d > tuple ) - { - for ( p = table[value]; p ; p = loc[p] ) - { + if (d > tuple) { + for (p = table[value]; p; p = loc[p]) { d = tuple + 1; i = M - p - tuple; - for ( j = N-s-tuple; i >= 1 && j >= 1; i--, j--, d++ ) - if ( A[i] != B[j] && vert[A[i]] != 4 && vert[B[j]] != 4 ) break; - if ( maxd < d ) - maxd = d; - if ( j < 1 ) break; + for (j = N - s - tuple; i >= 1 && j >= 1; + i--, j--, d++) + if (A[i] != B[j] && vert[A[i]] != 4 && + vert[B[j]] != 4) + break; + if (maxd < d) maxd = d; + if (j < 1) break; } } - else + else maxd = d; s += maxd + 1; H[k] = s; @@ -933,92 +923,88 @@ int RJUMPC(H,one,two) int H[], one, two; /* Construct contigs */ ASSEM() -{ - char *ckalloc(); /* space-allocating function */ - register int i, j, k; /* index variables */ - overptr node1, x, y; /* temporary pointer */ - int five, three; /* indices of 5' and 3' segments */ - short orienti; /* orientation of 5' segment */ - short orientj; /* orientation of 3' segment */ - short sorted; /* boolean variable */ +{ + char *ckalloc(); /* space-allocating function */ + register int i, j, k; /* index variables */ + overptr node1, x, y; /* temporary pointer */ + int five, three; /* indices of 5' and 3' segments */ + short orienti; /* orientation of 5' segment */ + short orientj; /* orientation of 3' segment */ + short sorted; /* boolean variable */ - contigs = ( struct CONS * ) ckalloc( seg_num * sizeof(struct CONS)); - for ( i = 0; i < seg_num; i++ ) - { + contigs = (struct CONS *)ckalloc(seg_num * sizeof(struct CONS)); + for (i = 0; i < seg_num; i++) { contigs[i].isfive[0] = contigs[i].isthree[0] = 1; contigs[i].isfive[1] = contigs[i].isthree[1] = 1; contigs[i].other = i; contigs[i].group = contigs[i].child = -1; contigs[i].brother = contigs[i].father = -1; } - for ( i = 0; i < seg_num; i++ ) - if ( ! segment[i].kind ) - for ( ; ; ) - { - for ( y = segment[i].list; y->kind; y = y->next ) + for (i = 0; i < seg_num; i++) + if (!segment[i].kind) + for (;;) { + for (y = segment[i].list; y->kind; y = y->next) ; - for ( x = y->next; x != 0; x = x->next ) - if ( ! x->kind && x->score > y->score ) + for (x = y->next; x != 0; x = x->next) + if (!x->kind && x->score > y->score) y = x; - for ( j = y->number; (k = contigs[j].father) != -1; j = k ) + for (j = y->number; + (k = contigs[j].father) != -1; j = k) ; - if ( j != i ) - { + if (j != i) { contigs[i].father = j = y->number; contigs[i].brother = contigs[j].child; contigs[j].child = i; contigs[i].node[1] = y; break; } - else - { - if ( segment[i].list->number == y->number ) + else { + if (segment[i].list->number == + y->number) segment[i].list = y->next; - else - { - for ( x = segment[i].list; x->next->number != y->number ; ) + else { + for (x = segment[i].list; + x->next->number != + y->number;) x = x->next; x->next = y->next; } - for ( x = segment[i].list; x != 0 && x->kind; x = x->next ) + for (x = segment[i].list; + x != 0 && x->kind; x = x->next) ; - if ( x == 0 ) - { + if (x == 0) { segment[i].kind = 1; break; } } } - edge = ( overptr * ) ckalloc( edge_num * sizeof(overptr) ); - for ( j = 0, i = 0; i < seg_num; i++ ) - if ( segment[i].kind ) - for ( node1 = segment[i].list; node1 != 0; node1 = node1->next ) - if ( segment[node1->number].kind ) + edge = (overptr *)ckalloc(edge_num * sizeof(overptr)); + for (j = 0, i = 0; i < seg_num; i++) + if (segment[i].kind) + for (node1 = segment[i].list; node1 != 0; + node1 = node1->next) + if (segment[node1->number].kind) edge[j++] = node1; edge_num = j; - for ( i = edge_num - 1; i > 0; i-- ) - { + for (i = edge_num - 1; i > 0; i--) { sorted = 1; - for ( j = 0; j < i; j++ ) - if ( edge[j]->score < edge[j+1]->score ) - { + for (j = 0; j < i; j++) + if (edge[j]->score < edge[j + 1]->score) { node1 = edge[j]; - edge[j] = edge[j+1]; - edge[j+1] = node1; + edge[j] = edge[j + 1]; + edge[j + 1] = node1; sorted = 0; } - if ( sorted ) - break; + if (sorted) break; } - for ( k = 0; k < edge_num; k++ ) - { + for (k = 0; k < edge_num; k++) { five = edge[k]->host; three = edge[k]->number; orienti = edge[k]->orienti; orientj = edge[k]->orientj; - if ( contigs[five].isthree[orienti] && - contigs[three].isfive[orientj] && contigs[five].other != three ) - { + if (contigs[five].isthree[orienti] && + contigs[three].isfive[orientj] && + contigs[five].other != three) { contigs[five].isthree[orienti] = 0; contigs[three].isfive[orientj] = 0; contigs[five].next[orienti] = three; @@ -1038,51 +1024,48 @@ ASSEM() } REPAIR() -{ - int endi, endj, stari, starj; /* endpoint and startpoint */ +{ + int endi, endj, stari, starj; /* endpoint and startpoint */ - short orienti, orientj; /* orientation of segments */ - short isconi, isconj; /* containment condition */ - int score; /* the max score */ - int i, j, f, d, e; /* row and column indices */ - char *ckalloc(); /* space-allocating function */ + short orienti, orientj; /* orientation of segments */ + short isconi, isconj; /* containment condition */ + int score; /* the max score */ + int i, j, f, d, e; /* row and column indices */ + char *ckalloc(); /* space-allocating function */ char *A, *B; - int M, N; + int M, N; overptr node1; - int piece_num; /* The number of pieces */ - int count, limit; - int number; - int hit; + int piece_num; /* The number of pieces */ + int count, limit; + int number; + int hit; - piece = ( struct VX * ) ckalloc( seg_num * sizeof(struct VX)); - for ( j = 0, i = 0; i < seg_num; i++ ) - if (segment[i].kind && (contigs[i].isfive[1] || contigs[i].isfive[0])) + piece = (struct VX *)ckalloc(seg_num * sizeof(struct VX)); + for (j = 0, i = 0; i < seg_num; i++) + if (segment[i].kind && + (contigs[i].isfive[1] || contigs[i].isfive[0])) piece[j++].id = i; piece_num = j; - for ( i = 0; i < piece_num; i++ ) - { + for (i = 0; i < piece_num; i++) { piece[i].kind = 1; piece[i].list = 0; } - limit = 2 * ( piece_num - 1 ); + limit = 2 * (piece_num - 1); hit = number = 0; - for ( orienti = 0, d = 0; d < limit ; d++ ) - { + for (orienti = 0, d = 0; d < limit; d++) { i = piece[(e = d / 2)].id; orienti = 1 - orienti; A = orienti ? segment[i].seq : segment[i].rev; M = segment[i].length; - for ( f = e+1; f < piece_num ; f++ ) - { + for (f = e + 1; f < piece_num; f++) { j = piece[f].id; B = segment[j].seq; orientj = 1; N = segment[j].length; SCORE = 0; hit++; - big_pass(A,B,M,N,orienti,orientj); - if ( SCORE > CUTOFF ) - { + big_pass(A, B, M, N, orienti, orientj); + if (SCORE > CUTOFF) { score = SCORE; stari = ++STARI; starj = ++STARJ; @@ -1090,16 +1073,15 @@ REPAIR() endj = ENDJ; isconi = stari == 1 && endi == M; isconj = starj == 1 && endj == N; - node1 = ( overptr ) ckalloc( (int ) sizeof(over)); - if ( isconi || isconj ) - node1->kind = 0; /* containment */ - else - { - node1->kind = 1; - number++; - } /* overlap */ - if ( endi == M && ! isconj ) /*i is 5'*/ - { + node1 = (overptr)ckalloc((int)sizeof(over)); + if (isconi || isconj) + node1->kind = 0; /* containment */ + else { + node1->kind = 1; + number++; + } /* overlap */ + if (endi == M && !isconj) /*i is 5'*/ + { node1->number = j; node1->host = i; node1->ind = f; @@ -1110,8 +1092,8 @@ REPAIR() node1->endj = endj; node1->orientj = orientj; } - else /* j is 5' */ - { + else /* j is 5' */ + { node1->number = i; node1->host = j; node1->ind = e; @@ -1126,8 +1108,7 @@ REPAIR() count = node1->number == i ? f : e; node1->next = piece[count].list; piece[count].list = node1; - if ( ! node1->kind ) - piece[count].kind = 0; + if (!node1->kind) piece[count].kind = 0; } } } @@ -1135,29 +1116,29 @@ REPAIR() } /* Construct contigs */ -REASSEM(piece_num, number) int piece_num, number; -{ - char *ckalloc(); /* space-allocating function */ - int i, j, k, d; /* index variables */ - overptr node1, x, y; /* temporary pointer */ - int five, three; /* indices of 5' and 3' segments */ - short orienti; /* orientation of 5' segment */ - short orientj; /* orientation of 3' segment */ - short sorted; /* boolean variable */ +REASSEM(piece_num, number) int piece_num, number; +{ + char *ckalloc(); /* space-allocating function */ + int i, j, k, d; /* index variables */ + overptr node1, x, y; /* temporary pointer */ + int five, three; /* indices of 5' and 3' segments */ + short orienti; /* orientation of 5' segment */ + short orientj; /* orientation of 3' segment */ + short sorted; /* boolean variable */ - for ( d = 0; d < piece_num; d++ ) - if ( ! piece[d].kind ) - for ( i = piece[d].id ; ; ) - { - for ( y = piece[d].list; y->kind; y = y->next ) + for (d = 0; d < piece_num; d++) + if (!piece[d].kind) + for (i = piece[d].id;;) { + for (y = piece[d].list; y->kind; y = y->next) ; - for ( x = y->next; x != 0; x = x->next ) - if ( ! x->kind && x->score > y->score ) + for (x = y->next; x != 0; x = x->next) + if (!x->kind && x->score > y->score) y = x; - for ( j = y->number; (k = contigs[j].father) != -1; j = k ) + for (j = y->number; + (k = contigs[j].father) != -1; j = k) ; - if ( j != i && RECONCILE(y,&piece_num,&number) ) - { + if (j != i && + RECONCILE(y, &piece_num, &number)) { contigs[i].father = j = y->number; contigs[i].brother = contigs[j].child; contigs[j].child = i; @@ -1165,56 +1146,52 @@ REASSEM(piece_num, number) int piece_num, number; segment[i].kind = 0; break; } - else - { - if ( piece[d].list->number == y->number ) + else { + if (piece[d].list->number == y->number) piece[d].list = y->next; - else - { - for ( x = piece[d].list; x->next->number != y->number ; ) + else { + for (x = piece[d].list; + x->next->number != + y->number;) x = x->next; x->next = y->next; } - for ( x = piece[d].list; x != 0 && x->kind; x = x->next ) + for (x = piece[d].list; + x != 0 && x->kind; x = x->next) ; - if ( x == 0 ) - { + if (x == 0) { piece[d].kind = 1; break; } } } - if ( number > edge_num ) - edge = ( overptr * ) ckalloc( number * sizeof(overptr) ); - for ( j = 0, d = 0; d < piece_num; d++ ) - if ( piece[d].kind ) - for ( node1 = piece[d].list; node1 != 0; node1 = node1->next ) - if ( piece[node1->ind].kind ) - edge[j++] = node1; + if (number > edge_num) + edge = (overptr *)ckalloc(number * sizeof(overptr)); + for (j = 0, d = 0; d < piece_num; d++) + if (piece[d].kind) + for (node1 = piece[d].list; node1 != 0; + node1 = node1->next) + if (piece[node1->ind].kind) edge[j++] = node1; edge_num = j; - for ( i = edge_num - 1; i > 0; i-- ) - { + for (i = edge_num - 1; i > 0; i--) { sorted = 1; - for ( j = 0; j < i; j++ ) - if ( edge[j]->score < edge[j+1]->score ) - { + for (j = 0; j < i; j++) + if (edge[j]->score < edge[j + 1]->score) { node1 = edge[j]; - edge[j] = edge[j+1]; - edge[j+1] = node1; + edge[j] = edge[j + 1]; + edge[j + 1] = node1; sorted = 0; } - if ( sorted ) - break; + if (sorted) break; } - for ( k = 0; k < edge_num; k++ ) - { + for (k = 0; k < edge_num; k++) { five = edge[k]->host; three = edge[k]->number; orienti = edge[k]->orienti; orientj = edge[k]->orientj; - if ( contigs[five].isthree[orienti] && - contigs[three].isfive[orientj] && contigs[five].other != three ) - { + if (contigs[five].isthree[orienti] && + contigs[three].isfive[orientj] && + contigs[five].other != three) { contigs[five].isthree[orienti] = 0; contigs[three].isfive[orientj] = 0; contigs[five].next[orienti] = three; @@ -1233,26 +1210,24 @@ REASSEM(piece_num, number) int piece_num, number; } } -RECONCILE(y, pp,nn) overptr y; -int *pp,*nn; -{ - short orienti, orientj; /* orientation of segments */ - short orientk, orientd; /* orientation of segments */ - int i, j, k, d, f; /* row and column indices */ - char *ckalloc(); /* space-allocating function */ +RECONCILE(y, pp, nn) overptr y; +int *pp, *nn; +{ + short orienti, orientj; /* orientation of segments */ + short orientk, orientd; /* orientation of segments */ + int i, j, k, d, f; /* row and column indices */ + char *ckalloc(); /* space-allocating function */ char *A, *B; - int M, N; + int M, N; overptr node1; k = y->host; d = y->number; orientk = y->orienti; orientd = y->orientj; - if ( ! contigs[k].isthree[orientk] ) - { - if ( ! piece[y->ind].kind ) return (0); - if ( contigs[d].isthree[orientd] ) - { + if (!contigs[k].isthree[orientk]) { + if (!piece[y->ind].kind) return (0); + if (contigs[d].isthree[orientd]) { orienti = orientd; i = d; orientj = contigs[k].orient[orientk]; @@ -1261,32 +1236,29 @@ int *pp,*nn; else return (0); } - else - if ( ! contigs[k].isfive[orientk] ) - { - if ( ! piece[y->ind].kind ) return (0); - if ( contigs[d].isfive[orientd] ) - { - orienti = contigs[k].orient[1-orientk]; - orienti = 1 - orienti; - i = contigs[k].next[1-orientk]; - orientj = orientd; - j = d; - } - else - return (0); + else if (!contigs[k].isfive[orientk]) { + if (!piece[y->ind].kind) return (0); + if (contigs[d].isfive[orientd]) { + orienti = contigs[k].orient[1 - orientk]; + orienti = 1 - orienti; + i = contigs[k].next[1 - orientk]; + orientj = orientd; + j = d; } else return (0); + } + else + return (0); A = orienti ? segment[i].seq : segment[i].rev; M = segment[i].length; B = orientj ? segment[j].seq : segment[j].rev; N = segment[j].length; SCORE = 0; - big_pass(A,B,M,N,orienti,orientj); - if ( SCORE > CUTOFF && ENDI - STARI > over_len && ENDI == M && STARJ == 0 ) - { - node1 = ( overptr ) ckalloc( (int ) sizeof(over)); + big_pass(A, B, M, N, orienti, orientj); + if (SCORE > CUTOFF && ENDI - STARI > over_len && ENDI == M && + STARJ == 0) { + node1 = (overptr)ckalloc((int)sizeof(over)); node1->kind = 1; node1->host = i; node1->number = j; @@ -1298,16 +1270,14 @@ int *pp,*nn; node1->orientj = orientj; node1->score = SCORE; piece[*pp].kind = 1; - if ( i == d ) - { + if (i == d) { node1->ind = *pp; node1->next = piece[y->ind].list; piece[y->ind].list = node1; piece[*pp].id = j; piece[*pp].list = 0; } - else - { + else { node1->ind = y->ind; piece[*pp].list = node1; node1->next = 0; @@ -1316,8 +1286,7 @@ int *pp,*nn; (*nn)++; (*pp)++; f = contigs[k].other; - if ( ! contigs[k].isthree[orientk] ) - { + if (!contigs[k].isthree[orientk]) { contigs[j].isfive[orientj] = 1; contigs[j].isthree[1 - orientj] = 1; contigs[k].isthree[orientk] = 1; @@ -1325,8 +1294,7 @@ int *pp,*nn; contigs[f].other = j; contigs[j].other = f; } - else - { + else { contigs[i].isthree[orienti] = 1; contigs[i].isfive[1 - orienti] = 1; contigs[k].isfive[orientk] = 1; @@ -1342,69 +1310,73 @@ int *pp,*nn; /* Construct a tree of overlapping-containment segments */ FORM_TREE() -{ - register int i, j, k; /* index variables */ - char *ckalloc(); /* space-allocating function */ - overptr node1; /* temporary pointer */ - short orient; /* orientation of segment */ - int group; /* serial number of contigs */ - char *A, *B; /* pointers to segment sequences */ - int stari, endi, starj, endj;/* positions where alignment begins */ - int M, N; /* lengths of segment sequences */ - int count; /* temporary variables */ +{ + register int i, j, k; /* index variables */ + char *ckalloc(); /* space-allocating function */ + overptr node1; /* temporary pointer */ + short orient; /* orientation of segment */ + int group; /* serial number of contigs */ + char *A, *B; /* pointers to segment sequences */ + int stari, endi, starj, endj; /* positions where alignment begins */ + int M, N; /* lengths of segment sequences */ + int count; /* temporary variables */ - mtree = ( struct TTREE * ) ckalloc( seg_num * sizeof(struct TTREE)); - for ( i = 0; i < seg_num; i++ ) - { + mtree = (struct TTREE *)ckalloc(seg_num * sizeof(struct TTREE)); + for (i = 0; i < seg_num; i++) { mtree[i].head = 0; mtree[i].next = mtree[i].child = mtree[i].brother = -1; } - for ( group = 0, i = 0; i < seg_num; i++ ) - if ( segment[i].kind && contigs[i].group < 0 && - ( contigs[i].isfive[1] || contigs[i].isfive[0] ) ) - { + for (group = 0, i = 0; i < seg_num; i++) + if (segment[i].kind && contigs[i].group < 0 && + (contigs[i].isfive[1] || contigs[i].isfive[0])) { orient = contigs[i].isfive[1] ? 1 : 0; mtree[i].head = 1; - for ( j = i; ; ) - { + for (j = i;;) { contigs[j].group = group; mtree[j].orient = orient; SORT(j, orient); - if ( contigs[j].isthree[orient] ) + if (contigs[j].isthree[orient]) break; - else - { + else { k = contigs[j].next[orient]; node1 = contigs[j].node[orient]; - if ( j == node1->host ) - { + if (j == node1->host) { stari = node1->stari; - endi = node1->endi; + endi = node1->endi; starj = node1->starj; - endj = node1->endj; - A = node1->orienti ? segment[j].seq : segment[j].rev; - B = node1->orientj ? segment[k].seq : segment[k].rev; + endj = node1->endj; + A = node1->orienti + ? segment[j].seq + : segment[j].rev; + B = node1->orientj + ? segment[k].seq + : segment[k].rev; } - else - { + else { M = segment[j].length; stari = M + 1 - node1->endj; endi = M + 1 - node1->starj; N = segment[k].length; starj = N + 1 - node1->endi; endj = N + 1 - node1->stari; - A = node1->orientj ? segment[j].rev : segment[j].seq; - B = node1->orienti ? segment[k].rev : segment[k].seq; + A = node1->orientj + ? segment[j].rev + : segment[j].seq; + B = node1->orienti + ? segment[k].rev + : segment[k].seq; } M = endi - stari + 1; N = endj - starj + 1; sapp = S; last = 0; al_len = no_mat = no_mis = 0; - (void) diff(&A[stari]-1, &B[starj]-1,M,N,q,q); - count = ( (N = sapp - S) + 1 ) * sizeof(int ); - mtree[k].script = ( int * ) ckalloc( count ); - for ( M = 0; M < N; M++) + (void)diff(&A[stari] - 1, &B[starj] - 1, + M, N, q, q); + count = + ((N = sapp - S) + 1) * sizeof(int); + mtree[k].script = (int *)ckalloc(count); + for (M = 0; M < N; M++) mtree[k].script[M] = S[M]; mtree[k].size = N; mtree[k].begin = stari; @@ -1418,71 +1390,66 @@ FORM_TREE() } /* Sort the children of each node by the `begin' field */ -SORT(seg, ort) int seg; +SORT(seg, ort) int seg; short ort; -{ - register int i, j, k; /* index variables */ - char *ckalloc(); /* space-allocating function */ - overptr node1; /* temporary pointer */ - short orient; /* orientation of segment */ - char *A, *B; /* pointers to segment sequences */ - int stari, endi, starj, endj;/* positions where alignment begins */ - int M, N; /* lengths of segment sequences */ - int count; /* temporary variables */ +{ + register int i, j, k; /* index variables */ + char *ckalloc(); /* space-allocating function */ + overptr node1; /* temporary pointer */ + short orient; /* orientation of segment */ + char *A, *B; /* pointers to segment sequences */ + int stari, endi, starj, endj; /* positions where alignment begins */ + int M, N; /* lengths of segment sequences */ + int count; /* temporary variables */ - for ( j = contigs[seg].child; j != -1; j = contigs[j].brother ) - { + for (j = contigs[seg].child; j != -1; j = contigs[j].brother) { node1 = contigs[j].node[1]; - if ( ort == node1->orientj ) - { + if (ort == node1->orientj) { stari = node1->starj; - endi = node1->endj; + endi = node1->endj; starj = node1->stari; - endj = node1->endi; - A = node1->orientj ? segment[seg].seq : segment[seg].rev; + endj = node1->endi; + A = node1->orientj ? segment[seg].seq + : segment[seg].rev; B = node1->orienti ? segment[j].seq : segment[j].rev; orient = node1->orienti; } - else - { + else { M = segment[seg].length; stari = M + 1 - node1->endj; endi = M + 1 - node1->starj; N = segment[j].length; starj = N + 1 - node1->endi; endj = N + 1 - node1->stari; - A = node1->orientj ? segment[seg].rev : segment[seg].seq; + A = node1->orientj ? segment[seg].rev + : segment[seg].seq; B = node1->orienti ? segment[j].rev : segment[j].seq; - orient = 1 - node1->orienti; + orient = 1 - node1->orienti; } M = endi - stari + 1; N = endj - starj + 1; sapp = S; last = 0; al_len = no_mat = no_mis = 0; - (void) diff(&A[stari]-1, &B[starj]-1,M,N,q,q); - count = ( (M = sapp - S ) + 1 ) * sizeof(int ); - mtree[j].script = ( int * ) ckalloc( count ); - for ( k = 0; k < M; k++) - mtree[j].script[k] = S[k]; + (void)diff(&A[stari] - 1, &B[starj] - 1, M, N, q, q); + count = ((M = sapp - S) + 1) * sizeof(int); + mtree[j].script = (int *)ckalloc(count); + for (k = 0; k < M; k++) mtree[j].script[k] = S[k]; mtree[j].size = M; mtree[j].begin = stari; mtree[j].orient = orient; - if ( mtree[seg].child == -1 ) + if (mtree[seg].child == -1) mtree[seg].child = j; - else - { + else { i = mtree[seg].child; - if ( mtree[i].begin >= stari ) - { + if (mtree[i].begin >= stari) { mtree[j].brother = i; mtree[seg].child = j; } - else - { + else { M = mtree[i].brother; - for ( ; M != -1; i = M, M = mtree[M].brother ) - if ( mtree[M].begin >= stari ) break; + for (; M != -1; i = M, M = mtree[M].brother) + if (mtree[M].begin >= stari) break; mtree[j].brother = M; mtree[i].brother = j; } @@ -1493,34 +1460,31 @@ short ort; /* Display the alignments of segments */ SHOW() -{ - register int i, j, k; /* index variables */ - char *ckalloc(); /* space-allocating function */ - int n; /* number of working segments */ - int limit; /* number of slots in work */ - int col; /* number of output columns prepared */ - short done; /* tells if current group is done */ - rowptr root; /* pointer to root of op tree */ - int sym[6]; /* occurrance counts for six chars */ - char c; /* temp variable */ - rowptr t, w, yy; /* temp pointer */ - int x; /* temp variables */ - int group; /* Contigs number */ - char conlit[20], *a; /* String form of contig number */ - char *spt; /* pointer to the start of consensus */ +{ + register int i, j, k; /* index variables */ + char *ckalloc(); /* space-allocating function */ + int n; /* number of working segments */ + int limit; /* number of slots in work */ + int col; /* number of output columns prepared */ + short done; /* tells if current group is done */ + rowptr root; /* pointer to root of op tree */ + int sym[6]; /* occurrance counts for six chars */ + char c; /* temp variable */ + rowptr t, w, yy; /* temp pointer */ + int x; /* temp variables */ + int group; /* Contigs number */ + char conlit[20], *a; /* String form of contig number */ + char *spt; /* pointer to the start of consensus */ - work = ( rowptr * ) ckalloc( seg_num * sizeof(rowptr)); + work = (rowptr *)ckalloc(seg_num * sizeof(rowptr)); group = 0; yy = 0; - for ( j = 0; j < 6; j++ ) - sym[j] = 0; + for (j = 0; j < 6; j++) sym[j] = 0; n = limit = col = 0; - for ( i = 0; i < seg_num; i++ ) - if ( mtree[i].head ) - { - (void) sprintf(conlit, ">Contig %d\n", group); - for ( a = conlit; *a; ) - *allconpt++ = *a++; + for (i = 0; i < seg_num; i++) + if (mtree[i].head) { + (void)sprintf(conlit, ">Contig %d\n", group); + for (a = conlit; *a;) *allconpt++ = *a++; /* Mod by S.S. (void) printf("\n#Contig %d\n\n", group++); */ @@ -1529,103 +1493,103 @@ SHOW() ENTER(&limit, &n, i, col, yy); root = work[0]; spt = allconpt; - while ( ! done ) - { - for ( j = 0; j < n; j++ ) /* get segments into work */ - { + while (!done) { + for (j = 0; j < n; + j++) /* get segments into work */ + { t = work[j]; k = t->id; - if ((x = mtree[k].next) != -1 && mtree[x].begin == t->loc) - { + if ((x = mtree[k].next) != -1 && + mtree[x].begin == t->loc) { ENTER(&limit, &n, x, col, t); mtree[k].next = -1; } - for ( x = mtree[k].child; x != -1; x = mtree[x].brother ) - if ( mtree[x].begin == t->loc ) - { - ENTER(&limit, &n, x, col, t); - mtree[k].child = mtree[x].brother; + for (x = mtree[k].child; x != -1; + x = mtree[x].brother) + if (mtree[x].begin == t->loc) { + ENTER(&limit, &n, x, + col, t); + mtree[k].child = + mtree[x].brother; } else break; } - COLUMN(root); /* determine next column */ + COLUMN(root); /* determine next column */ root->c = root->kind; - for ( t = head; t != 0; t = t->link ) + for (t = head; t != 0; t = t->link) t->c = t->kind; - for ( j = 0; j < n; j++ ) - { + for (j = 0; j < n; j++) { t = work[j]; - if ( t->done ) + if (t->done) *t->a++ = ' '; - else - { - if ( t->c == 'L' ) - { - if ( t->loc == 1 ) - t->offset = allconpt - spt; - c = *t->a++ = t->seq[t->loc++]; + else { + if (t->c == 'L') { + if (t->loc == 1) + t->offset = + allconpt - + spt; + c = *t->a++ = + t->seq[t->loc++]; } + else if (t->loc > 1) + c = *t->a++ = '-'; else - if ( t->loc > 1 ) - c = *t->a++ = '-'; - else - c = *t->a++ = ' '; - if ( c != ' ' ) - if ( c == '-' ) + c = *t->a++ = ' '; + if (c != ' ') + if (c == '-') sym[5] += 1; else - sym[vert[c]] += 1; + sym[vert[c]] += + 1; t->c = ' '; } } /* determine consensus char */ k = sym[0] + sym[1] + sym[2] + sym[3] + sym[4]; - if ( k < sym[5] ) + if (k < sym[5]) *allconpt++ = '-'; - else - if ( sym[0] == sym[1] && sym[1] == sym[2] && - sym[2] == sym[3] ) - *allconpt++ = 'N'; - else - { - k = sym[0]; - c = 'A'; - if ( k < sym[1] ) { - k = sym[1]; - c = 'C'; - } - if ( k < sym[2] ) { - k = sym[2]; - c = 'G'; - } - if ( k < sym[3] ) c = 'T'; - *allconpt++ = c; + else if (sym[0] == sym[1] && sym[1] == sym[2] && + sym[2] == sym[3]) + *allconpt++ = 'N'; + else { + k = sym[0]; + c = 'A'; + if (k < sym[1]) { + k = sym[1]; + c = 'C'; } - for ( j = 0; j < 6; j++ ) - sym[j] = 0; - for ( t = head; t != 0; t = t->link ) - { + if (k < sym[2]) { + k = sym[2]; + c = 'G'; + } + if (k < sym[3]) c = 'T'; + *allconpt++ = c; + } + for (j = 0; j < 6; j++) sym[j] = 0; + for (t = head; t != 0; t = t->link) { NEXTOP(t); - if ( t->done ) /* delete it from op tree */ - { + if (t->done) /* delete it from op tree + */ + { w = t->father; - if ( w->child->id == t->id ) + if (w->child->id == t->id) w->child = t->brother; - else - { + else { w = w->child; - for ( ; w->brother->id != t->id; w = w->brother ) + for (; w->brother->id != + t->id; + w = w->brother) ; w->brother = t->brother; } } } - if ( root->loc > root->length ) /* check root node */ - { + if (root->loc > + root->length) /* check root node */ + { root->done = 1; - if ( (w = root->child) != 0 ) - { + if ((w = root->child) != 0) { w->father = 0; root = w; } @@ -1633,13 +1597,12 @@ SHOW() done = 1; } col++; - if ( col == LINELEN || done ) /* output */ - { + if (col == LINELEN || done) /* output */ + { col = 0; - for ( j = 0; j < n; j++ ) - { + for (j = 0; j < n; j++) { t = work[j]; - if ( t->done ) + if (t->done) /* Mod by S.S. { (void) printf("#"); @@ -1648,41 +1611,65 @@ SHOW() */ { int jj; - (void) printf("{\nname "); - for(jj=0;jjname)-1;jj++) - (void) printf("%c", t->name[jj]); - printf("\nstrandedness %c\n", - t->name[strlen(t->name)] == '+'? '1':'2'); + (void)printf( + "{\nname "); + for (jj = 0; + jj < + strlen(t->name) - + 1; + jj++) + (void)printf( + "%c", + t->name + [jj]); + printf( + "\nstrandedness" + " %c\n", + t->name[strlen( + t->name)] == '+' + ? '1' + : '2'); - printf("offset %d\ntype DNA\ngroup-ID %d\nsequence \"\n", - t->offset,group); - for ( k = 0, a = t->line ; a != t->a; a++ ) - if ( *a != ' ' ) - { + printf( + "offset " + "%d\ntype " + "DNA\ngroup-" + "ID %d\nsequence \"\n", + t->offset, group); + for (k = 0, a = t->line; + a != t->a; a++) + if (*a != ' ') { k++; - (void) printf("%c", *a); - if ( k == LINELEN ) - { - (void) printf("\n"); + (void)printf( + "%" + "c", + *a); + if (k == + LINELEN) { + (void)printf( + "\n"); k = 0; } } -/* - if ( k ) -*/ - (void) printf("\"\n}\n"); + /* + if ( k ) + */ + (void)printf("\"\n}\n"); } - if ( t->linesize - (t->a - t->line) < LINELEN + 3 ) + if (t->linesize - + (t->a - t->line) < + LINELEN + 3) ALOC_SEQ(t); } - if ( !done ) - { - for ( k = j = n - 1; j >= 0; j-- ) - if ( work[j]->done ) - { + if (!done) { + for (k = j = n - 1; j >= 0; j--) + if (work[j]->done) { t = work[j]; - for ( x = j; x < k; x++ ) - work[x] = work[x+1]; + for (x = j; + x < k; x++) + work[x] = work + [x + + 1]; work[k--] = t; } n = k + 1; @@ -1696,60 +1683,55 @@ SHOW() /* allocate more space for output fragment */ ALOC_SEQ(t) rowptr t; -{ - char *start, *end, *p; +{ + char *start, *end, *p; t->linesize *= 2; start = t->line; end = t->a; - t->line = ( char * ) ckalloc( t->linesize * sizeof(char)); - for ( t->a = t->line, p = start ; p != end; ) - *t->a++ = *p++; + t->line = ckalloc(t->linesize * sizeof(char)); + for (t->a = t->line, p = start; p != end;) *t->a++ = *p++; free(start); } /* enter a segment into working set */ -ENTER(b, d, id, pos, par) int *b, *d, id, pos; +ENTER(b, d, id, pos, par) int *b, *d, id, pos; rowptr par; -{ - int i; - char *ckalloc(); /* space-allocating function */ - rowptr t; +{ + int i; + char *ckalloc(); /* space-allocating function */ + rowptr t; - if ( *b <= *d ) - { - work[*b] = ( rowptr ) ckalloc( (int ) sizeof(row)); - work[*b]->line = ( char * ) ckalloc( SEQLEN * sizeof(char)); + if (*b <= *d) { + work[*b] = (rowptr)ckalloc((int)sizeof(row)); + work[*b]->line = (char *)ckalloc(SEQLEN * sizeof(char)); work[*b]->linesize = SEQLEN; *b += 1; } t = work[*d]; *d += 1; t->a = t->line; - for ( i = 0; i < pos; i++ ) - *t->a++ = ' '; + for (i = 0; i < pos; i++) *t->a++ = ' '; t->c = ' '; t->seq = mtree[id].orient ? segment[id].seq : segment[id].rev; t->length = segment[id].length; t->id = id; - if ( par != 0 ) - { + if (par != 0) { t->s = mtree[id].script; t->size = mtree[id].size; } t->op = 0; - for ( i = 1; i <= segment[id].len && i <= NAMELEN; i++ ) - t->name[i-1] = segment[id].name[i]; - if ( mtree[id].orient ) - t->name[i-1] = '+'; + for (i = 1; i <= segment[id].len && i <= NAMELEN; i++) + t->name[i - 1] = segment[id].name[i]; + if (mtree[id].orient) + t->name[i - 1] = '+'; else - t->name[i-1] = '-'; + t->name[i - 1] = '-'; t->name[i] = '\0'; t->done = 0; t->loc = 1; t->child = 0; t->father = par; - if ( par != 0 ) - { + if (par != 0) { t->brother = par->child; par->child = t; NEXTOP(t); @@ -1757,146 +1739,120 @@ rowptr par; } /* get the next operation */ -NEXTOP(t) rowptr t; -{ - if ( t->size || t->op ) - if ( t->op == 0 && *t->s == 0 ) - { +NEXTOP(t) rowptr t; +{ + if (t->size || t->op) + if (t->op == 0 && *t->s == 0) { t->op = *t->s++; t->size--; t->up = 'L'; t->dw = 'L'; } - else - { - if ( t->op == 0 ) - { + else { + if (t->op == 0) { t->op = *t->s++; t->size--; } - if ( t->op > 0 ) - { + if (t->op > 0) { t->up = '-'; t->dw = 'L'; t->op--; } - else - { + else { t->up = 'L'; t->dw = '-'; t->op++; } } - else - if ( t->loc > t->length ) - t->done = 1; + else if (t->loc > t->length) + t->done = 1; } COLUMN(x) rowptr x; -{ - rowptr y; - rowptr start, end; /* first and last nodes for subtree */ +{ + rowptr y; + rowptr start, end; /* first and last nodes for subtree */ - if ( x->child == 0 ) - { + if (x->child == 0) { head = tail = 0; x->kind = 'L'; } - else - { + else { start = end = 0; x->kind = 'L'; - for ( y = x->child; y != 0; y = y->brother ) - { + for (y = x->child; y != 0; y = y->brother) { COLUMN(y); - if ( x->kind == y->up ) - if ( y->kind == y->dw ) - { - if ( head == 0 ) - { + if (x->kind == y->up) + if (y->kind == y->dw) { + if (head == 0) { y->link = 0; head = tail = y; } - else - { + else { y->link = head; head = y; } - if ( end == 0 ) + if (end == 0) start = head; else end->link = head; end = tail; } - else - if ( y->kind == '-' ) - { - start = head; - end = tail; - x->kind = '-'; + else if (y->kind == '-') { + start = head; + end = tail; + x->kind = '-'; + } + else { + y->link = 0; + y->kind = '-'; + if (end == 0) + start = end = y; + else { + end->link = y; + end = y; } - else - { + } + else if (y->kind == y->dw) + if (x->kind == '-') + ; + else { + if (head == 0) { y->link = 0; - y->kind = '-'; - if ( end == 0 ) - start = end = y; - else - { - end->link = y; - end = y; - } + head = tail = y; } - else - if ( y->kind == y->dw ) - if ( x->kind == '-' ) + else { + y->link = head; + head = y; + } + start = head; + end = tail; + x->kind = '-'; + } + else if (x->kind == '-') + if (y->kind == '-') { + if (end == 0) { + start = head; + end = tail; + } + else if (head == 0) + /* code folded from here */ ; - else - { - if ( head == 0 ) - { - y->link = 0; - head = tail = y; - } - else - { - y->link = head; - head = y; - } - start = head; + /* unfolding */ + else { + /* code folded from here */ + end->link = head; end = tail; - x->kind = '-'; + /* unfolding */ } + } else - if ( x->kind == '-' ) - if ( y->kind == '-' ) - { - if ( end == 0 ) - { - start = head; - end = tail; - } - else - if ( head == 0 ) -/* code folded from here */ - ; -/* unfolding */ - else - { -/* code folded from here */ - end->link = head; - end = tail; -/* unfolding */ - } - } - else - ; - else - { - start = head; - end = tail; - x->kind = '-'; - } + ; + else { + start = head; + end = tail; + x->kind = '-'; + } } head = start; tail = end; @@ -1905,107 +1861,104 @@ COLUMN(x) rowptr x; /* Display a summary of contigs */ GRAPH() -{ - int i, j, k; /* index variables */ - int group; /* serial number of contigs */ - char name[NAMELEN+2]; /* name of segment */ - char *t; /* temp var */ - int length; /* length of name */ +{ + int i, j, k; /* index variables */ + int group; /* serial number of contigs */ + char name[NAMELEN + 2]; /* name of segment */ + char *t; /* temp var */ + int length; /* length of name */ - (void) printf("\nOVERLAPS CONTAINMENTS\n\n"); + (void)printf("\nOVERLAPS CONTAINMENTS\n\n"); group = 1; - for ( i = 0; i < seg_num; i++ ) - if ( mtree[i].head ) - { - (void) printf("******************* Contig %d ********************\n", - group++ ); - for ( j = i; j != -1; j = mtree[j].next ) - { + for (i = 0; i < seg_num; i++) + if (mtree[i].head) { + (void)printf( + "******************* Contig %d " + "********************\n", + group++); + for (j = i; j != -1; j = mtree[j].next) { length = segment[j].len; t = segment[j].name + 1; - for ( k = 0; k < length && k < NAMELEN; k++ ) + for (k = 0; k < length && k < NAMELEN; k++) name[k] = *t++; - if ( mtree[j].orient ) + if (mtree[j].orient) name[k] = '+'; else name[k] = '-'; - name[k+1] = '\0'; - (void) printf("%s\n", name); + name[k + 1] = '\0'; + (void)printf("%s\n", name); CONTAIN(mtree[j].child, name); } } } -CONTAIN(id, f) int id; +CONTAIN(id, f) int id; char *f; -{ - int k; /* index variable */ - char name[NAMELEN+2]; /* name of segment */ - char *t; /* temp var */ - int length; /* length of name */ +{ + int k; /* index variable */ + char name[NAMELEN + 2]; /* name of segment */ + char *t; /* temp var */ + int length; /* length of name */ - if ( id != -1 ) - { + if (id != -1) { length = segment[id].len; t = segment[id].name + 1; - for ( k = 0; k < length && k < NAMELEN; k++ ) - name[k] = *t++; - if ( mtree[id].orient ) + for (k = 0; k < length && k < NAMELEN; k++) name[k] = *t++; + if (mtree[id].orient) name[k] = '+'; else name[k] = '-'; - name[k+1] = '\0'; - (void) printf(" %s is in %s\n", name,f); + name[k + 1] = '\0'; + (void)printf(" %s is in %s\n", name, f); CONTAIN(mtree[id].child, name); CONTAIN(mtree[id].brother, f); } } -big_pass(A,B,M,N,orienti,orientj) char A[],B[]; -int M,N; +big_pass(A, B, M, N, orienti, orientj) char A[], B[]; +int M, N; short orienti, orientj; -{ - register int i, j; /* row and column indices */ - register int c; /* best score at current point */ - register int f; /* best score ending with insertion */ - register int d; /* best score ending with deletion */ - register int p; /* best score at (i-1, j-1) */ - register int ci; /* end-point associated with c */ +{ + register int i, j; /* row and column indices */ + register int c; /* best score at current point */ + register int f; /* best score ending with insertion */ + register int d; /* best score ending with deletion */ + register int p; /* best score at (i-1, j-1) */ + register int ci; /* end-point associated with c */ - register int di; /* end-point associated with d */ - register int fi; /* end-point associated with f */ - register int pi; /* end-point associated with p */ - int *va; /* pointer to v(A[i], B[j]) */ - int x1, x2; /* regions of A before x1 or after x2 are lightly penalized */ - int y1, y2; /* regions of B before y1 or after y2 are lightly penalized */ - short heavy; /* 1 = heavy penalty */ - int ex, gx; /* current gap penalty scores */ + register int di; /* end-point associated with d */ + register int fi; /* end-point associated with f */ + register int pi; /* end-point associated with p */ + int *va; /* pointer to v(A[i], B[j]) */ + int x1, + x2; /* regions of A before x1 or after x2 are lightly penalized */ + int y1, + y2; /* regions of B before y1 or after y2 are lightly penalized */ + short heavy; /* 1 = heavy penalty */ + int ex, gx; /* current gap penalty scores */ /* determine x1, x2, y1, y2 */ - if ( POS5 >= POS3 ) - fatal("The value for POS5 must be less than the value for POS3"); - if ( orienti ) - { + if (POS5 >= POS3) + fatal( + "The value for POS5 must be less than the value for POS3"); + if (orienti) { x1 = POS5 >= M ? 1 : POS5; x2 = POS3 >= M ? M : POS3; } - else - { + else { x1 = POS3 >= M ? 1 : M - POS3 + 1; x2 = POS5 >= M ? M : M - POS5 + 1; } - if ( orientj ) - { + if (orientj) { y1 = POS5 >= N ? 1 : POS5; y2 = POS3 >= N ? N : POS3; } - else - { + else { y1 = POS3 >= N ? 1 : N - POS3 + 1; y2 = POS5 >= N ? N : N - POS5 + 1; } - if ( x1 + 1 <= x2 ) x1++; - if ( y1 + 1 <= y2 ) y1++; + if (x1 + 1 <= x2) x1++; + if (y1 + 1 <= y2) y1++; heavy = 0; /* Compute the matrix. @@ -2014,70 +1967,59 @@ short orienti, orientj; DD : the scores of the current row, ending with deletion SS : the starting point that leads to score DD */ /* Initialize the 0 th row */ - for ( j = 1; j <= N ; j++ ) - { + for (j = 1; j <= N; j++) { CC[j] = 0; - DD[j] = - (q); + DD[j] = -(q); RR[j] = SS[j] = -j; } - for ( i = 1; i <= M; i++) - { - if ( i == x1 ) heavy = 1 - heavy; - if ( i == x2 ) heavy = 1 - heavy; + for (i = 1; i <= M; i++) { + if (i == x1) heavy = 1 - heavy; + if (i == x2) heavy = 1 - heavy; ex = r1; gx = qr1; va = v1[A[i]]; - c = 0; /* Initialize column 0 */ - f = - (q); + c = 0; /* Initialize column 0 */ + f = -(q); ci = fi = i; p = 0; pi = i - 1; - for ( j = 1 ; j <= N ; j++ ) - { - if ( j == y1 ) - { - if ( heavy ) - { + for (j = 1; j <= N; j++) { + if (j == y1) { + if (heavy) { ex = r; gx = qr; /* S.S. - va = v[A[i]]; + va = v[A[i]]; */ va = v[A[i]]; } } - if ( j == y2 ) - { - if ( heavy ) - { + if (j == y2) { + if (heavy) { ex = r1; gx = qr1; va = v1[A[i]]; } } - if ( ( f = f - ex ) < ( c = c - gx ) ) - { - f = c; - fi = ci; + if ((f = f - ex) < (c = c - gx)) { + f = c; + fi = ci; } di = SS[j]; - if ( ( d = DD[j] - ex ) < ( c = CC[j] - gx ) ) - { - d = c; - di = RR[j]; + if ((d = DD[j] - ex) < (c = CC[j] - gx)) { + d = c; + di = RR[j]; } - c = p+va[B[j]]; /* diagonal */ + c = p + va[B[j]]; /* diagonal */ ci = pi; - if ( c < d ) - { - c = d; - ci = di; + if (c < d) { + c = d; + ci = di; } - if ( c < f ) - { - c = f; - ci = fi; + if (c < f) { + c = f; + ci = fi; } p = CC[j]; CC[j] = c; @@ -2085,8 +2027,7 @@ S.S. RR[j] = ci; DD[j] = d; SS[j] = di; - if ( ( j == N || i == M ) && c > SCORE ) - { + if ((j == N || i == M) && c > SCORE) { SCORE = c; ENDI = i; ENDJ = j; @@ -2094,10 +2035,9 @@ S.S. } } } - if ( SCORE ) - if ( STARI < 0 ) - { - STARJ = - STARI; + if (SCORE) + if (STARI < 0) { + STARJ = -STARI; STARI = 0; } else @@ -2108,85 +2048,75 @@ S.S. A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero and appends such a conversion to the current script. */ -int diff(A,B,M,N,tb,te) char *A, *B; -int M, N; -int tb, te; +int diff(A, B, M, N, tb, te) +char *A, *B; +int M, N; +int tb, te; -{ - int midi, midj, type; /* Midpoint, type, and cost */ - int midc; +{ + int midi, midj, type; /* Midpoint, type, and cost */ + int midc; - { - register int i, j; - register int c, e, d, s; - int t, *va; - char *ckalloc(); + { + register int i, j; + register int c, e, d, s; + int t, *va; + char *ckalloc(); /* Boundary cases: M <= 1 or N == 0 */ - if (N <= 0) - { + if (N <= 0) { if (M > 0) DEL(M) - return - gap(M); + return -gap(M); } - if (M <= 1) - { - if (M <= 0) - { + if (M <= 1) { + if (M <= 0) { INS(N); - return - gap(N); + return -gap(N); } if (tb > te) tb = te; - midc = - (tb + r + gap(N) ); + midc = -(tb + r + gap(N)); midj = 0; va = v[A[1]]; - for (j = 1; j <= N; j++) - { - c = va[B[j]] - ( gap(j-1) + gap(N-j) ); - if (c > midc) - { + for (j = 1; j <= N; j++) { + c = va[B[j]] - (gap(j - 1) + gap(N - j)); + if (c > midc) { midc = c; midj = j; } } - if (midj == 0) - { - INS(N) DEL(1) } - else - { - if (midj > 1) INS(midj-1) - REP - if ( (A[1]|32) == (B[midj]|32) ) - no_mat += 1; - else - no_mis += 1; - if (midj < N) INS(N-midj) + if (midj == 0) { + INS(N) DEL(1) + } + else { + if (midj > 1) INS(midj - 1) + REP if ((A[1] | 32) == (B[midj] | 32)) no_mat += + 1; + else no_mis += 1; + if (midj < N) INS(N - midj) } return midc; } /* Divide: Find optimum midpoint (midi,midj) of cost midc */ - midi = M/2; /* Forward phase: */ - CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */ + midi = M / 2; /* Forward phase: */ + CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */ t = -q; - for (j = 1; j <= N; j++) - { - CC[j] = t = t-r; - DD[j] = t-q; + for (j = 1; j <= N; j++) { + CC[j] = t = t - r; + DD[j] = t - q; } t = -tb; - for (i = 1; i <= midi; i++) - { + for (i = 1; i <= midi; i++) { s = CC[0]; - CC[0] = c = t = t-r; - e = t-q; + CC[0] = c = t = t - r; + e = t - q; va = v[A[i]]; - for (j = 1; j <= N; j++) - { + for (j = 1; j <= N; j++) { if ((c = c - qr) > (e = e - r)) e = c; if ((c = CC[j] - qr) > (d = DD[j] - r)) d = c; - c = s+va[B[j]]; + c = s + va[B[j]]; if (c < d) c = d; if (c < e) c = e; s = CC[j]; @@ -2196,25 +2126,22 @@ int tb, te; } DD[0] = CC[0]; - RR[N] = 0; /* Reverse phase: */ - t = -q; /* Compute R(M/2,k) & S(M/2,k) for all k */ - for (j = N-1; j >= 0; j--) - { - RR[j] = t = t-r; - SS[j] = t-q; + RR[N] = 0; /* Reverse phase: */ + t = -q; /* Compute R(M/2,k) & S(M/2,k) for all k */ + for (j = N - 1; j >= 0; j--) { + RR[j] = t = t - r; + SS[j] = t - q; } t = -te; - for (i = M-1; i >= midi; i--) - { + for (i = M - 1; i >= midi; i--) { s = RR[N]; - RR[N] = c = t = t-r; - e = t-q; - va = v[A[i+1]]; - for (j = N-1; j >= 0; j--) - { + RR[N] = c = t = t - r; + e = t - q; + va = v[A[i + 1]]; + for (j = N - 1; j >= 0; j--) { if ((c = c - qr) > (e = e - r)) e = c; if ((c = RR[j] - qr) > (d = SS[j] - r)) d = c; - c = s+va[B[j+1]]; + c = s + va[B[j + 1]]; if (c < d) c = d; if (c < e) c = e; s = RR[j]; @@ -2224,19 +2151,18 @@ int tb, te; } SS[N] = RR[N]; - midc = CC[0]+RR[0]; /* Find optimal midpoint */ + midc = CC[0] + RR[0]; /* Find optimal midpoint */ midj = 0; type = 1; for (j = 0; j <= N; j++) if ((c = CC[j] + RR[j]) >= midc) - if (c > midc || CC[j] != DD[j] && RR[j] == SS[j]) - { + if (c > midc || + CC[j] != DD[j] && RR[j] == SS[j]) { midc = c; midj = j; } for (j = N; j >= 0; j--) - if ((c = DD[j] + SS[j] + q) > midc) - { + if ((c = DD[j] + SS[j] + q) > midc) { midc = c; midj = j; type = 2; @@ -2245,16 +2171,15 @@ int tb, te; /* Conquer: recursively around midpoint */ - if (type == 1) - { - (void) diff(A,B,midi,midj,tb,q); - (void) diff(A+midi,B+midj,M-midi,N-midj,q,te); + if (type == 1) { + (void)diff(A, B, midi, midj, tb, q); + (void)diff(A + midi, B + midj, M - midi, N - midj, q, te); } - else - { - (void) diff(A,B,midi-1,midj,tb,zero); + else { + (void)diff(A, B, midi - 1, midj, tb, zero); DEL(2); - (void) diff(A+midi+1,B+midj,M-midi-1,N-midj,zero,te); + (void)diff(A + midi + 1, B + midj, M - midi - 1, N - midj, zero, + te); } return midc; } @@ -2262,19 +2187,17 @@ int tb, te; /* lib.c - library of C procedures. */ /* fatal - print message and die */ -fatal(msg) -char *msg; +fatal(msg) char *msg; { - (void) fprintf(stderr, "%s\n", msg); + (void)fprintf(stderr, "%s\n", msg); exit(1); } /* fatalf - format message, print it, and die */ -fatalf(msg, val) -char *msg, *val; +fatalf(msg, val) char *msg, *val; { - (void) fprintf(stderr, msg, val); - (void) putc('\n', stderr); + (void)fprintf(stderr, msg, val); + (void)putc('\n', stderr); exit(1); } @@ -2284,19 +2207,17 @@ char *name, *mode; { FILE *fopen(), *fp; - if ((fp = fopen(name, mode)) == NULL) - fatalf("Cannot open %s.", name); - return(fp); + if ((fp = fopen(name, mode)) == NULL) fatalf("Cannot open %s.", name); + return (fp); } /* ckalloc - allocate space; check for success */ char *ckalloc(amount) -int amount; +size_t amount; { - char *malloc(), *p; + void *malloc(), *p; - if ((p = malloc( (unsigned) amount)) == NULL) - fatal("Ran out of memory."); - return(p); + if ((p = malloc((unsigned)amount)) == NULL) fatal("Ran out of memory."); + return (p); } diff --git a/SUPPORT/Flatio.c b/SUPPORT/Flatio.c index e4e58d1..9c028de 100644 --- a/SUPPORT/Flatio.c +++ b/SUPPORT/Flatio.c @@ -1,12 +1,13 @@ #include #include +#include +#include #define TRUE 1 #define FALSE 0 -#define MAX(a,b) ((a)>(b)?(a):(b)) -#define MIN(a,b) ((a)<(b)?(a):(b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) -struct data_format -{ +struct data_format { int length; char *nuc; int offset; @@ -14,155 +15,135 @@ struct data_format char type; }; +char *Realloc(char *block, int size); +char *Calloc(int count, int size); +int ErrorOut(int code, char *string); +int Errorout(char *string); +int ReadFlat(FILE *file, struct data_format align[], int maxseqs); +int WriteData(FILE *file, struct data_format data[], int count); -int ReadFlat(file,align,maxseqs) -FILE *file; -struct data_format align[]; -int maxseqs; +int ReadFlat(FILE *file, struct data_format align[], int maxseqs) { - int j,len=0, count=-1,offset; + int j, len = 0, count = -1, offset; unsigned maxlen = 1024; - char inline[1025]; - extern char *Calloc(),*Realloc(); + char cinline[1025]; + extern char *Calloc(), *Realloc(); - if(file == NULL) - Errorout("Cannot open data file"); + if (file == NULL) Errorout("Cannot open data file"); - for(;fgets(inline,1024,file) != NULL;) - { - inline[strlen(inline)-1] = '\0'; - switch(inline[0]) - { + for (; fgets(cinline, 1024, file) != NULL;) { + cinline[strlen(cinline) - 1] = '\0'; + switch (cinline[0]) { case '>': case '#': case '%': case '"': case '@': - offset = 0; - for(j=0;j maxseqs) - Errorout("Sorry, alignment is too large"); + if (count > maxseqs) + Errorout( + "Sorry, alignment is too large"); - align[count].nuc = Calloc(maxlen,sizeof(char)); - align[count].type = inline[0]; + align[count].nuc = Calloc(maxlen, sizeof(char)); + align[count].type = cinline[0]; align[count].offset = offset; - if( align[count].nuc == NULL) + if (align[count].nuc == NULL) Errorout("Calloc problem"); - sscanf((char*)(inline+1),"%s", - align[count].name); + sscanf((char *)(cinline + 1), "%s", + align[count].name); len = 0; break; default: - if(len+strlen(inline) > maxlen) - { - maxlen = (maxlen+strlen(inline))*2; + if (len + strlen(cinline) > maxlen) { + maxlen = (maxlen + strlen(cinline)) * 2; align[count].nuc = - Realloc(align[count].nuc, maxlen); + Realloc(align[count].nuc, maxlen); } - for(j=0;j +#include -typedef struct Sequence -{ - int len; - char name[80]; - char type[8]; - char *nuc; +typedef struct Sequence { + int len; + char name[80]; + char type[8]; + char *nuc; } Sequence; - main() { - char a[5000],b[5000],inline[132]; - int pos1,pos2,pos3,i,j,k,FLAG; + char a[5000], b[5000], cinline[132]; + int pos1, pos2, pos3, i, j, k, FLAG; Sequence pair[2]; - - for(j=0;j<5000;j++) - b[j]='-'; - FLAG = (int)gets(inline); - for(j=0;FLAG;j++) - { - FLAG = (int)gets(inline); - sscanf(inline,"%d",&pos1); - if((sscanf(inline,"%*6c %c %d %d %d",&(a[j]),&k,&pos2,&pos3) - == 4) && (FLAG)) - { - if(pos3!=0) - { - if(pos1 +#include "Flatio.c" + #define FALSE 0 #define TRUE 1 @@ -12,126 +13,118 @@ #define OLSEN 1 #define NONE 2 -#define Min(a,b) (a)<(b)?(a):(b) +#define Min(a, b) (a) < (b) ? (a) : (b) -int width,start,jump,usecase,sim,correction; -int tbl,numseq,num,denom,special; -char argtyp[255],argval[255]; +int width, start, jump, usecase, sim, correction; +int tbl, numseq, num, denom, special; +char argtyp[255], argval[255]; -float acwt=1.0, agwt=1.0, auwt=1.0, ucwt=1.0, ugwt=1.0, gcwt=1.0; +float acwt = 1.0, agwt = 1.0, auwt = 1.0, ucwt = 1.0, ugwt = 1.0, gcwt = 1.0; float dist[200][200]; struct data_format data[10000]; float parta[200], partc[200], partg[200], partu[200], setdist(); -main(ac,av) -int ac; +main(ac, av) int ac; char **av; { - int i,j,k; + int i, j, k; extern int ReadFlat(); FILE *file; width = 1; jump = 1; - if(ac==1) - { + if (ac == 1) { fprintf(stderr, - "usage: %s [-sim] [-case] [-c=] ",av[0]); - fprintf(stderr,"[-t] alignment_flat_file\n"); + "usage: %s [-sim] [-case] [-c=] ", + av[0]); + fprintf(stderr, "[-t] alignment_flat_file\n"); exit(1); } - for(j=1;jlength,db->length); + mn = Min(da->length, db->length); - for(j=0;jnuc[j+i] | casefix; - bc = db->nuc[j+i] | casefix; - if(ac == 't') - ac = 'u'; - if(ac == 'T') - ac = 'U'; - if(bc == 't') - bc = 'u'; - if(bc == 'T') - bc = 'U'; + for (i = 0; i < width; i++) { + ac = da->nuc[j + i] | casefix; + bc = db->nuc[j + i] | casefix; + if (ac == 't') ac = 'u'; + if (ac == 'T') ac = 'U'; + if (bc == 't') bc = 'u'; + if (bc == 'T') bc = 'U'; - if((ac=='-') || (ac|32)=='n' || (ac==' ') || - (bc== '-') || (bc|32)=='n' || (bc==' ')); + if ((ac == '-') || (ac | 32) == 'n' || (ac == ' ') || + (bc == '-') || (bc | 32) == 'n' || (bc == ' ')) + ; - else - { + else { blank = FALSE; - if(ac != bc) - { + if (ac != bc) { match = FALSE; - switch(ac) - { - case 'a': - if (bc == 'c') fnum+=acwt; - else if(bc == 'g') fnum+=agwt; - else if(bc == 'u') fnum+=auwt; - break; + switch (ac) { + case 'a': + if (bc == 'c') + fnum += acwt; + else if (bc == 'g') + fnum += agwt; + else if (bc == 'u') + fnum += auwt; + break; - case 'c': - if (bc == 'a') fnum+=acwt; - else if(bc == 'g') fnum+=gcwt; - else if(bc == 'u') fnum+=ucwt; - break; + case 'c': + if (bc == 'a') + fnum += acwt; + else if (bc == 'g') + fnum += gcwt; + else if (bc == 'u') + fnum += ucwt; + break; - case 'g': - if (bc == 'a') fnum+=agwt; - else if(bc == 'c') fnum+=gcwt; - else if(bc == 'u') fnum+=ugwt; - break; + case 'g': + if (bc == 'a') + fnum += agwt; + else if (bc == 'c') + fnum += gcwt; + else if (bc == 'u') + fnum += ugwt; + break; - case 'u': - if (bc == 'a') fnum+=auwt; - else if(bc == 'c') fnum+=ucwt; - else if(bc == 'g') fnum+=ugwt; - break; + case 'u': + if (bc == 'a') + fnum += auwt; + else if (bc == 'c') + fnum += ucwt; + else if (bc == 'g') + fnum += ugwt; + break; - case 't': - if (bc == 'a') fnum+=auwt; - else if(bc == 'c') fnum+=ucwt; - else if(bc == 'g') fnum+=ugwt; - break; + case 't': + if (bc == 'a') + fnum += auwt; + else if (bc == 'c') + fnum += ucwt; + else if (bc == 'g') + fnum += ugwt; + break; - default: - break; + default: + break; }; } } - if((blank == FALSE) && match) - { - (*num) ++; - (*denom) ++; + if ((blank == FALSE) && match) { + (*num)++; + (*denom)++; } - else if(!blank) - (*denom) ++; + else if (!blank) + (*denom)++; } } - if(special) - (*num) = *denom - (int)fnum; - return; + if (special) (*num) = *denom - (int)fnum; + return 0; } - -float setdist(num,denom,a,b) -int num,denom,a,b; +float setdist(num, denom, a, b) +int num, denom, a, b; { float cor; - switch (correction) - { - case OLSEN: - cor = parta[a]*parta[b]+ - partc[a]*partc[b]+ - partg[a]*partg[b]+ - partu[a]*partu[b]; - break; + switch (correction) { + case OLSEN: + cor = parta[a] * parta[b] + partc[a] * partc[b] + + partg[a] * partg[b] + partu[a] * partu[b]; + break; - case JUKES: - cor = 0.25; - break; + case JUKES: + cor = 0.25; + break; - case NONE: - cor = 0.0; - break; + case NONE: + cor = 0.0; + break; - default: - cor = 0.0; - break; + default: + cor = 0.0; + break; }; - if(correction == NONE) - return(1.0 - (float)num/(float)denom); + if (correction == NONE) + return (1.0 - (float)num / (float)denom); else - return( -(1.0-cor)*log(1.0 / (1.0-cor)*((float)num/(float)denom-cor))); + return (-(1.0 - cor) * log(1.0 / (1.0 - cor) * + ((float)num / (float)denom - cor))); } - -getarg(av,ndx,atype,aval) -char **av,atype[],aval[]; +getarg(av, ndx, atype, aval) char **av, atype[], aval[]; int ndx; { - int i,j; + int i, j; char c; - for(j=0;(c=av[ndx][j])!=' ' && c!= '=' && c!= '\0';j++) - atype[j]=c; - if (c=='=') - { + for (j = 0; (c = av[ndx][j]) != ' ' && c != '=' && c != '\0'; j++) + atype[j] = c; + if (c == '=') { atype[j++] = c; atype[j] = '\0'; } - else - { + else { atype[j] = '\0'; j++; } - if(c=='=') - { - for(i=0;(c=av[ndx][j]) != '\0' && c!= ' ';i++,j++) + if (c == '=') { + for (i = 0; (c = av[ndx][j]) != '\0' && c != ' '; i++, j++) aval[i] = c; aval[i] = '\0'; } - return; + return 0; } SetPart() { - int a,c,g,u,tot,i,j; + int a, c, g, u, tot, i, j; char nuc; - for(j=0;jC conversion by Mike Maciukenas, CPGA, Microbiology at University of Illinois. C----------------------------------------------------------------------- @@ -31,16 +31,17 @@ C ERRORS. C C----------------------------------------------------------------------- */ -#include -#include -#include #include -#include #include +#include +#include +#include +#include +#include +#include - -#define BUFLEN 1024 -#define MAXLEAVES 256 +#define BUFLEN 1024 +#define MAXLEAVES 256 static int m, n, dissim, pr, start, save, seed, nempty; static double ps1, ps2, f, empty, tol, c; @@ -60,34 +61,34 @@ extern double strtod(); double dabs(a) double a; { - return((a<0.0) ? -a : a); + return ((a < 0.0) ? -a : a); } double sqr(a) double a; { - return(a*a); + return (a * a); } double max(a, b) double a; double b; { - return((a>b)?a:b); + return ((a > b) ? a : b); } int imin(a, b) int a; int b; { - return((ai) - return(d[j][i]); + if (j < i) + return (d[i][j]); + else if (j > i) + return (d[j][i]); else show_error("gd: i=j -- programmer screwed up!"); } @@ -156,8 +155,9 @@ char *string; int ch; int num; { - for(string[num--] = '\0'; num >= 0; string[num--] = ch); - return(string); + for (string[num--] = '\0'; num >= 0; string[num--] = ch) + ; + return (string); } int getachar() @@ -165,31 +165,30 @@ int getachar() { static int oldchar = '\0'; int ch; - int more=1; + int more = 1; - while(more) - { + while (more) { ch = getchar(); - if(oldchar == '\n' && ch == '#') - { - while(ch!='\n'&&ch!=EOF) - ch=getchar(); + if (oldchar == '\n' && ch == '#') { + while (ch != '\n' && ch != EOF) ch = getchar(); oldchar = ch; } - else if(oldchar == '\n' && isspace(ch)) + else if (oldchar == '\n' && isspace(ch)) ; - else more=0; + else + more = 0; } oldchar = ch; - return(ch); + return (ch); } int skip_space() { int ch; - while(isspace(ch=getachar())); - return(ch); + while (isspace(ch = getachar())) + ; + return (ch); } int getaword(string, len) @@ -200,21 +199,18 @@ int len; int i; int ch; ch = skip_space(); - if(ch == EOF) - return(0); - for(i=0; !isspace(ch) && ch != EOF; i++) - { - if(i=0 && isspace(string[i]); i--); - string[i+1] = '\0'; - if(ch==EOF) - return(-1); + len = i < len ? i : len - 1; + for (i = len - 1; i >= 0 && isspace(string[i]); i--) + ; + string[i + 1] = '\0'; + if (ch == EOF) + return (-1); else - return(1); + return (1); } int readtobarorcolon(string, len) @@ -257,26 +250,23 @@ int len; int ch; ch = skip_space(); - if(ch==EOF) - return(0); - for(i=0; ch!=EOF && ch!='|' && ch!=':'; i++) - { - if(ch=='\n'||ch=='\r'||ch=='\t') + if (ch == EOF) return (0); + for (i = 0; ch != EOF && ch != '|' && ch != ':'; i++) { + if (ch == '\n' || ch == '\r' || ch == '\t') i--; - else - { - if(i=0 && isspace(string[i]); i--); - string[i+1] = '\0'; - if(ch==EOF) - return(-1); + len = i < len ? i : len - 1; + for (i = len - 1; i >= 0 && isspace(string[i]); i--) + ; + string[i + 1] = '\0'; + if (ch == EOF) + return (-1); else - return(1); + return (1); } char *getmem(nelem, elsize) @@ -284,9 +274,11 @@ unsigned nelem, elsize; { char *temp; - temp = (char *)calloc(nelem+1, elsize); - if(temp == NULL) show_error("Couldn't allocate memory."); - else return(temp); + temp = (char *)calloc(nelem + 1, elsize); + if (temp == NULL) + show_error("Couldn't allocate memory."); + else + return (temp); } int get_parms(argc, argv) @@ -316,88 +308,85 @@ char **argv; empty = 0.0; n = 0; cur_arg = 0; - for(i=1; i 3) start = 3; + if (save != 1) save = 0; + if (seed < 0) seed = 12345; -/* check validity of parameters */ - if(ps1<0.0) ps1 = 0.0001; - if(ps2<0.0) ps2 = 0.0001; - if(dissim<0) dissim = 0; - if(start < 1 || start > 3) start = 3; - if(save != 1) save = 0; - if(seed < 0) seed = 12345; - -/*printf("dissim=%d\n", dissim);*/ -/*printf("pr=%d\n", pr);*/ -/*printf("start=%d\n", start);*/ -/*printf("save=%d\n", save);*/ -/*printf("seed=%d\n", seed);*/ -/*printf("ps1=%f\n", ps1);*/ -/*printf("ps2=%f\n", ps2);*/ -/*printf("empty=%f\n", empty);*/ + /*printf("dissim=%d\n", dissim);*/ + /*printf("pr=%d\n", pr);*/ + /*printf("start=%d\n", start);*/ + /*printf("save=%d\n", save);*/ + /*printf("seed=%d\n", seed);*/ + /*printf("ps1=%f\n", ps1);*/ + /*printf("ps2=%f\n", ps2);*/ + /*printf("empty=%f\n", empty);*/ } int get_data() @@ -408,162 +397,146 @@ int get_data() char ch; int result; double temp, nfactor, datmin, datmax; - nempty = n = 0; more = 1; ptr = &ch; - while(more) - { - result=readtobarorcolon(buf, BUFLEN); - if(result == 0 || result == -1) + while (more) { + result = readtobarorcolon(buf, BUFLEN); + if (result == 0 || result == -1) more = 0; - else - { + else { n++; names[n] = getmem(BUFLEN, 1); - result=readtobar(buf, BUFLEN); - if(result != 1) - show_error("get_data: bad name syntax, or missing '|'"); + result = readtobar(buf, BUFLEN); + if (result != 1) + show_error( + "get_data: bad name syntax, or missing " + "'|'"); strcpy(names[n], buf); - if(n>1) - delta[n]=(double *)getmem(n, sizeof(double)); + if (n > 1) + delta[n] = (double *)getmem(n, sizeof(double)); else - delta[n]=NULL; - for(j=1; j datmax && delta[i][j] != empty) + for (i = 2; i <= n; i++) + for (j = 1; j < i; j++) + if (delta[i][j] > datmax && + delta[i][j] != empty) datmax = delta[i][j]; datmax += 1.0; - for(i=2; i<=n; i++) - for(j=1; j=d[l][i]+d[k][j])&& - (d[k][i]+d[l][j]>=d[l][i]+d[k][j])) - { - wijkl=d[j][i]+d[l][k] - -d[k][i]-d[l][j]; - fitp+=sqr(wijkl); - g[j][i]+=fac*wijkl; - g[l][k]+=fac*wijkl; - g[k][i]-=fac*wijkl; - g[l][j]-=fac*wijkl; + fac = 2.0 * r; + for (i = 1; i <= nm3; i++) + for (j = i + 1; j <= nm2; j++) + for (k = j + 1; k <= nm1; k++) + for (l = k + 1; l <= nm0; l++) + if ((d[j][i] + d[l][k] >= + d[l][i] + d[k][j]) && + (d[k][i] + d[l][j] >= + d[l][i] + d[k][j])) { + wijkl = d[j][i] + d[l][k] - + d[k][i] - d[l][j]; + fitp += sqr(wijkl); + g[j][i] += fac * wijkl; + g[l][k] += fac * wijkl; + g[k][i] -= fac * wijkl; + g[l][j] -= fac * wijkl; } - else - if((d[j][i]+d[l][k]>=d[k][i]+d[l][j])&& - (d[l][i]+d[k][j]>=d[k][i]+d[l][j])) - { - wijkl=d[j][i]+d[l][k] - -d[l][i]-d[k][j]; - fitp+=sqr(wijkl); - g[j][i]+=fac*wijkl; - g[l][k]+=fac*wijkl; - g[k][j]-=fac*wijkl; - g[l][i]-=fac*wijkl; + else if ((d[j][i] + d[l][k] >= + d[k][i] + d[l][j]) && + (d[l][i] + d[k][j] >= + d[k][i] + d[l][j])) { + wijkl = d[j][i] + d[l][k] - + d[l][i] - d[k][j]; + fitp += sqr(wijkl); + g[j][i] += fac * wijkl; + g[l][k] += fac * wijkl; + g[k][j] -= fac * wijkl; + g[l][i] -= fac * wijkl; } - else - if((d[k][i]+d[l][j]>=d[j][i]+d[l][k])&& - (d[l][i]+d[k][j]>=d[j][i]+d[l][k])) - { - wijkl=d[k][i]+d[l][j] - -d[l][i]-d[k][j]; - fitp+=sqr(wijkl); - g[k][i]+=fac*wijkl; - g[l][j]+=fac*wijkl; - g[l][i]-=fac*wijkl; - g[k][j]-=fac*wijkl; + else if ((d[k][i] + d[l][j] >= + d[j][i] + d[l][k]) && + (d[l][i] + d[k][j] >= + d[j][i] + d[l][k])) { + wijkl = d[k][i] + d[l][j] - + d[l][i] - d[k][j]; + fitp += sqr(wijkl); + g[k][i] += fac * wijkl; + g[l][j] += fac * wijkl; + g[l][i] -= fac * wijkl; + g[k][j] -= fac * wijkl; } - f = fitl+r*fitp; + f = fitl + r * fitp; } static double **dr, **dgr, **d1, **gs, **xx, **gg; static int iterc, prc; -print_iter(maxfnc, f) -int maxfnc; +print_iter(maxfnc, f) int maxfnc; double f; { int i, j; - if(pr == 0) - { + if (pr == 0) { iterc++; } - else if(prc < abs(pr)) - { + else if (prc < abs(pr)) { prc++; iterc++; } - else - { + else { printf("Iteration %6d", iterc); printf(": function values %6d", maxfnc); printf(" f = %24.16e\n", f); - if(pr < 0) - { + if (pr < 0) { printf(" d[] looks like this:\n"); - for(i=2;i<=n;i++) - { + for (i = 2; i <= n; i++) { printf(" "); - for(j=1;j 0) - { + if (flag > 0) { prc = 0; print_iter(maxfnc, f); return; } - if(itcrs>m) - { - for(i=2;i<=n;i++) - for(j=1;j m) { + for (i = 2; i <= n; i++) + for (j = 1; j < i; j++) d1[i][j] = -g[i][j]; + } print_iter(maxfnc, f); dginit = 0.0; - for(i=2;i<=n;i++) - for(j=1;j= 0.0) - { + if (dginit >= 0.0) { retry = -retry; - if(imin(retry, maxfnk)<2) - { - printf("minfungra: \ + if (imin(retry, maxfnk) < 2) { + printf( + "minfungra: \ gradient wrong or acc too small\n"); flag = 2; } else - itcrs = m+1; + itcrs = m + 1; goto L30; } xmin = 0.0; @@ -751,107 +708,95 @@ int maxfn; gmin = dginit; gm = dginit; xbound = -1.0; - xnew = xnew * min(1.0, dgstep/dginit); + xnew = xnew * min(1.0, dgstep / dginit); dgstep = dginit; - L170: - c = xnew-xmin; +L170: + c = xnew - xmin; dtest = 0.0; - for(i=1;i<=n;i++) - for(j=1;j1) - { - for(gnew = 0.0,i=1;i<=n;i++) - for(j=1;j1 && f1 && f==fmin && dabs(gnew) <= dabs(gmin))) - { + if (maxfnc > 1) { + for (gnew = 0.0, i = 1; i <= n; i++) + for (j = 1; j < i; j++) gnew += d1[i][j] * g[i][j]; + } + if (maxfnc <= 1 || (maxfnc > 1 && f < fmin) || + (maxfnc > 1 && f == fmin && dabs(gnew) <= dabs(gmin))) { maxfnk = maxfnc; gsumsq = 0.0; - for(i=1;i<=n;i++) - for(j=1;jclt*dabs(dginit) || - (dabs(gm)<=clt*dabs(dginit) && dabs(gm*beta) >= clt*gsumsq)) - { - L310: + for (i = 1; i <= n; i++) + for (j = 1; j < i; j++) ggstar += gs[i][j] * g[i][j]; + beta = (gsumsq - ggstar) / (gm - dginit); +L300: + if (dabs(gm) > clt * dabs(dginit) || + (dabs(gm) <= clt * dabs(dginit) && + dabs(gm * beta) >= clt * gsumsq)) { + L310: clt += 0.3; - if(clt>0.8) - { + if (clt > 0.8) { retry = -retry; - if(imin(retry, maxfnk)<2) - { - printf("minfungra: \ + if (imin(retry, maxfnk) < 2) { + printf( + "minfungra: \ gradient wrong or acc too small\n"); flag = 2; } else - itcrs = m+1; + itcrs = m + 1; goto L30; } xold = xnew; - xnew = .5*(xmin+xold); - if(maxfnk >= maxfnc && gmin*gnew > 0.0) - { - xnew = 10.0*xold; - if(xbound>=0.0) { - xnew = 0.5*(xold+xbound); + xnew = .5 * (xmin + xold); + if (maxfnk >= maxfnc && gmin * gnew > 0.0) { + xnew = 10.0 * xold; + if (xbound >= 0.0) { + xnew = 0.5 * (xold + xbound); } } - c = gnew-(3.0*gnew + gmin-4.0*(f-fmin)/(xold-xmin))* - (xold-xnew)/(xold-xmin); - if(maxfnk>=maxfnc) - { - if(gmin*gnew<=0.0) { - xbound = xmin; + c = gnew - + (3.0 * gnew + gmin - 4.0 * (f - fmin) / (xold - xmin)) * + (xold - xnew) / (xold - xmin); + if (maxfnk >= maxfnc) { + if (gmin * gnew <= 0.0) { + xbound = xmin; } xmin = xold; fmin = f; @@ -859,71 +804,63 @@ int maxfn; } else xbound = xold; - if(c*gmin < 0.0) - xnew = (xmin*c-xnew*gmin)/(c-gmin); + if (c * gmin < 0.0) + xnew = (xmin * c - xnew * gmin) / (c - gmin); goto L170; } - if(min(f, fmin)=finit && gsumsq < gsinit)) - { - if(itcrs= finit && gsumsq < gsinit)) { + if (itcrs < m && dabs(ggstar) < .2 * gsumsq) { gamma = 0.0; c = 0.0; - for(i=1;i<=n;i++) - for(j=1;jps2) - { + if (dif > ps2) { iter++; - r*=10.0; + r *= 10.0; } - } while (dif>ps2); + } while (dif > ps2); fungra(); - for(i=2; i<=n; i++) - { + for (i = 2; i <= n; i++) { free(dr[i]); free(dgr[i]); free(d1[i]); @@ -1016,28 +945,38 @@ double gttol() result = 0.0; nm0 = n; - nm1 = n-1; - nm2 = n-2; - nm3 = n-3; - for(i=1;i<=nm3;i++) - for(j=i+1;j<=nm2;j++) - for(k=j+1;k<=nm1;k++) - for(l=k+1;l<=nm0;l++) - if((d[j][i]+d[l][k]>=d[l][i]+d[k][j])&& - (d[k][i]+d[l][j]>=d[l][i]+d[k][j])) - result=max(result, - dabs(d[j][i]+d[l][k]-d[k][i]-d[l][j])); - else - if((d[j][i]+d[l][k]>=d[k][i]+d[l][j])&& - (d[l][i]+d[k][j]>=d[k][i]+d[l][j])) - result=max(result, - dabs(d[j][i]+d[l][k]-d[l][i]-d[k][j])); - else - if((d[k][i]+d[l][j]>=d[j][i]+d[l][k])&& - (d[l][i]+d[k][j]>=d[j][i]+d[l][k])) - result=max(result, - dabs(d[k][i]+d[l][j]-d[l][i]-d[k][j])); - return(result); + nm1 = n - 1; + nm2 = n - 2; + nm3 = n - 3; + for (i = 1; i <= nm3; i++) + for (j = i + 1; j <= nm2; j++) + for (k = j + 1; k <= nm1; k++) + for (l = k + 1; l <= nm0; l++) + if ((d[j][i] + d[l][k] >= + d[l][i] + d[k][j]) && + (d[k][i] + d[l][j] >= + d[l][i] + d[k][j])) + result = max( + result, + dabs(d[j][i] + d[l][k] - + d[k][i] - d[l][j])); + else if ((d[j][i] + d[l][k] >= + d[k][i] + d[l][j]) && + (d[l][i] + d[k][j] >= + d[k][i] + d[l][j])) + result = max( + result, + dabs(d[j][i] + d[l][k] - + d[l][i] - d[k][j])); + else if ((d[k][i] + d[l][j] >= + d[j][i] + d[l][k]) && + (d[l][i] + d[k][j] >= + d[j][i] + d[l][k])) + result = max( + result, + dabs(d[k][i] + d[l][j] - + d[l][i] - d[k][j])); + return (result); } gtcord() @@ -1046,51 +985,48 @@ gtcord() int i, j; sumx = sumy = ssqx = ssqy = scp = 0.0; - for(i=1;i<=n;i++) - for(j=1;j4) - { - maxcnt=0; - for(i=1;i<=nnode;i++) if(!act[i]) - for(j=1;jmaxcnt) - { - maxcnt = count; - arcim=max(0.0, arci/count); - arcjm=max(0.0, arcj/count); - im=i; - jm=j; - } - } - - nnode++; - if(nnode+2>maxnode) - show_error("get_tree: number of nodes exceeds 2N-2"); - ninv++; - mergei[ninv]=im; - mergej[ninv]=jm; - act[im]=true; - act[jm]=true; - d[nnode]=(double *)getmem(nnode-1, sizeof(double)); - d[nnode][im]=arcim; - d[nnode][jm]=arcjm; - for(i=1;i<=nnode-1;i++) - if(!act[i]) - d[nnode][i] = max(0.0, gd(im,i)-arcim); - nact--; + for (i = 1; i <= maxnode; i++) act[i] = false; + nact = n; + ninv = 0; + nnode = n; + while (nact > 4) { + maxcnt = 0; + for (i = 1; i <= nnode; i++) + if (!act[i]) + for (j = 1; j < i; j++) + if (!act[j]) { + count = 0; + arci = 0.0; + arcj = 0.0; + for (k = 2; k <= nnode; k++) + if (!act[k] && k != i && + k != j) + for (l = 1; + l < k; l++) + if (!act[l] && + l != + i && + l != + j) + if (gd(i, + j) + gd(k, + l) <= + gd(i, + k) + + gd(j, + l) && + gd(i, + j) + gd(k, + l) <= + gd(i, + l) + + gd(j, + k)) { + count++; + arci += + (gd(i, + j) + + gd(i, + k) - + gd(j, + k)) / + 2.0; + arcj += + (gd(i, + j) + + gd(j, + l) - + gd(i, + l)) / + 2.0; + } + if (count > maxcnt) { + maxcnt = count; + arcim = max( + 0.0, arci / count); + arcjm = max( + 0.0, arcj / count); + im = i; + jm = j; + } + } + + nnode++; + if (nnode + 2 > maxnode) + show_error("get_tree: number of nodes exceeds 2N-2"); + ninv++; + mergei[ninv] = im; + mergej[ninv] = jm; + act[im] = true; + act[jm] = true; + d[nnode] = (double *)getmem(nnode - 1, sizeof(double)); + d[nnode][im] = arcim; + d[nnode][jm] = arcjm; + for (i = 1; i <= nnode - 1; i++) + if (!act[i]) d[nnode][i] = max(0.0, gd(im, i) - arcim); + nact--; } - for(i=1;act[i];i++) - if(i>nnode) - show_error("get_tree: can't find last two invisible nodes"); - im=i; - for(i=im+1;act[i];i++) - if(i>nnode) - show_error("get_tree: can't find last two invisible nodes"); - jm=i; - for(i=jm+1;act[i];i++) - if(i>nnode) - show_error("get_tree: can't find last two invisible nodes"); - km=i; - for(i=km+1;act[i];i++) - if(i>nnode) - show_error("get_tree: can't find last two invisible nodes"); - lm=i; - if(gd(im,jm)+gd(km,lm)<=gd(im,km)+gd(jm,lm)+tol && - gd(im,jm)+gd(km,lm)<=gd(im,lm)+gd(jm,km)+tol) - { - i=im; - j=jm; - k=km; - l=lm; + for (i = 1; act[i]; i++) + if (i > nnode) + show_error( + "get_tree: can't find last two invisible nodes"); + im = i; + for (i = im + 1; act[i]; i++) + if (i > nnode) + show_error( + "get_tree: can't find last two invisible nodes"); + jm = i; + for (i = jm + 1; act[i]; i++) + if (i > nnode) + show_error( + "get_tree: can't find last two invisible nodes"); + km = i; + for (i = km + 1; act[i]; i++) + if (i > nnode) + show_error( + "get_tree: can't find last two invisible nodes"); + lm = i; + if (gd(im, jm) + gd(km, lm) <= gd(im, km) + gd(jm, lm) + tol && + gd(im, jm) + gd(km, lm) <= gd(im, lm) + gd(jm, km) + tol) { + i = im; + j = jm; + k = km; + l = lm; } - else if(gd(im,lm)+gd(jm,km)<=gd(im,km)+gd(jm,lm)+tol && - gd(im,lm)+gd(jm,km)<=gd(im,jm)+gd(km,lm)+tol) - { - i=im; - j=lm; - k=km; - l=jm; + else if (gd(im, lm) + gd(jm, km) <= gd(im, km) + gd(jm, lm) + tol && + gd(im, lm) + gd(jm, km) <= gd(im, jm) + gd(km, lm) + tol) { + i = im; + j = lm; + k = km; + l = jm; } - else if(gd(im,km)+gd(jm,lm)<=gd(im,jm)+gd(km,lm)+tol && - gd(im,km)+gd(jm,lm)<=gd(im,lm)+gd(jm,km)+tol) - { - i=im; - j=km; - k=lm; - l=jm; + else if (gd(im, km) + gd(jm, lm) <= gd(im, jm) + gd(km, lm) + tol && + gd(im, km) + gd(jm, lm) <= gd(im, lm) + gd(jm, km) + tol) { + i = im; + j = km; + k = lm; + l = jm; } nnode++; ninv++; - mergei[ninv]=i; - mergej[ninv]=j; - d[nnode]=(double *)getmem(nnode-1, sizeof(double)); - d[nnode][i] = max(0.0, (gd(i,j)+gd(i,k)-gd(j,k))/2.0); - d[nnode][j] = max(0.0, (gd(i,j)+gd(j,l)-gd(i,l))/2.0); + mergei[ninv] = i; + mergej[ninv] = j; + d[nnode] = (double *)getmem(nnode - 1, sizeof(double)); + d[nnode][i] = max(0.0, (gd(i, j) + gd(i, k) - gd(j, k)) / 2.0); + d[nnode][j] = max(0.0, (gd(i, j) + gd(j, l) - gd(i, l)) / 2.0); nnode++; ninv++; - mergei[ninv]=k; - mergej[ninv]=l; - d[nnode]=(double *)getmem(nnode-1, sizeof(double)); - d[nnode][k] = max(0.0, (gd(k,l)+gd(i,k)-gd(i,l))/2.0); - d[nnode][l] = max(0.0, (gd(k,l)+gd(j,l)-gd(j,k))/2.0); - d[nnode][nnode-1] = max(0.0, (gd(i,k)+gd(j,l)-gd(i,j)-gd(k,l))/2.0); - + mergei[ninv] = k; + mergej[ninv] = l; + d[nnode] = (double *)getmem(nnode - 1, sizeof(double)); + d[nnode][k] = max(0.0, (gd(k, l) + gd(i, k) - gd(i, l)) / 2.0); + d[nnode][l] = max(0.0, (gd(k, l) + gd(j, l) - gd(j, k)) / 2.0); + d[nnode][nnode - 1] = + max(0.0, (gd(i, k) + gd(j, l) - gd(i, j) - gd(k, l)) / 2.0); } -print_node(node, dist, indent) -int node; +print_node(node, dist, indent) int node; double dist; int indent; { static char buf[BUFLEN]; - if(node<=n) - printf("%s%s:%6.4f", - repeatch(buf, '\t', indent), - names[node], - dist/nfac); - else - { - printf("%s(\n", - repeatch(buf, '\t', indent)); - print_node(mergei[node-n], gd(node, mergei[node-n]), indent+1); + if (node <= n) + printf("%s%s:%6.4f", repeatch(buf, '\t', indent), names[node], + dist / nfac); + else { + printf("%s(\n", repeatch(buf, '\t', indent)); + print_node(mergei[node - n], gd(node, mergei[node - n]), + indent + 1); printf(",\n"); - print_node(mergej[node-n], gd(node, mergej[node-n]), indent+1); - printf("\n%s):%6.4f", repeatch(buf, '\t', indent), - dist/nfac); + print_node(mergej[node - n], gd(node, mergej[node - n]), + indent + 1); + printf("\n%s):%6.4f", repeatch(buf, '\t', indent), dist / nfac); } } @@ -1246,17 +1211,17 @@ show_tree() int i, j, current; int ij[2]; - current=0; - for(i=1;current<2;i++) - { - for(j=1;(mergei[j]!=i && mergej[j] != i) && j<=ninv;j++); - if(j>ninv) - ij[current++]=i; + current = 0; + for (i = 1; current < 2; i++) { + for (j = 1; (mergei[j] != i && mergej[j] != i) && j <= ninv; + j++) + ; + if (j > ninv) ij[current++] = i; } printf("(\n"); - print_node(ij[0], gd(ij[0],ij[1])/2.0, 1); + print_node(ij[0], gd(ij[0], ij[1]) / 2.0, 1); printf(",\n"); - print_node(ij[1], gd(ij[0],ij[1])/2.0, 1); + print_node(ij[1], gd(ij[0], ij[1]) / 2.0, 1); printf("\n);\n"); } @@ -1267,62 +1232,68 @@ show_help() printf(" -d - treat data as dissimilarities (default)\n"); printf(" -s - treat data as similarities\n"); printf(" -print 0 - don't print out iteration history (default)\n"); - printf(" -print n>0 - print out iteration history every n iterations\n"); - printf(" -print n<0 - print out iteration history every n iterations\n"); - printf(" (including current distance estimates & gradients)\n"); + printf( + " -print n>0 - print out iteration history every n iterations\n"); + printf( + " -print n<0 - print out iteration history every n iterations\n"); + printf( + " (including current distance estimates & " + "gradients)\n"); printf(" -init n - initial parameter estimates (default = 3)\n"); printf(" n=1 - uniformly distributed random numbers\n"); printf(" n=2 - error-perturbed data\n"); printf(" n=3 - original distance data from input matrix\n"); - printf(" -save - save final parameter estimates (default is don't save\n"); - printf(" -seed n - seed for random number generator (default = 12345)\n"); - printf(" -ps1 n - convergence criterion for inner iterations (default = 0.0001)\n"); - printf(" -ps2 n - convergence criterion for major iterations (default = 0.0001)\n"); + printf( + " -save - save final parameter estimates (default is don't " + "save\n"); + printf( + " -seed n - seed for random number generator (default = " + "12345)\n"); + printf( + " -ps1 n - convergence criterion for inner iterations (default " + "= 0.0001)\n"); + printf( + " -ps2 n - convergence criterion for major iterations (default " + "= 0.0001)\n"); printf(" -empty n - missing data indicator (default = 0.0)\n"); printf(" -help - show this help text\n"); exit(0); } -show_error(message) -char *message; +show_error(message) char *message; { printf("\n>>>>ERROR:\n>>>>%s\n", message); exit(0); } -main(argc, argv) -int argc; +main(argc, argv) int argc; char **argv; { int i; strcpy(fname, ""); get_parms(argc, argv); - if(strcmp(fname, "")) - { - report=1; + if (strcmp(fname, "")) { + report = 1; reportf = fopen(fname, "w"); - if(reportf==NULL) - { + if (reportf == NULL) { perror("lsadt"); exit(0); } - } else - report=0; + report = 0; get_data(); d = (double **)getmem(n, sizeof(delta[1])); g = (double **)getmem(n, sizeof(delta[1])); dold = (double **)getmem(n, sizeof(delta[1])); - d[1]=NULL; - g[1]=NULL; - dold[1]=NULL; - for(i=2; i<=n; i++) - { - d[i]=(double *)getmem(i-1, sizeof(double)); - g[i]=(double *)getmem(i-1, sizeof(double)); - dold[i]=(double *)getmem(i-1, sizeof(double)); + d[1] = NULL; + g[1] = NULL; + dold[1] = NULL; + for (i = 2; i <= n; i++) { + d[i] = (double *)getmem(i - 1, sizeof(double)); + g[i] = (double *)getmem(i - 1, sizeof(double)); + dold[i] = (double *)getmem(i - 1, sizeof(double)); } initd(); get_dist(); @@ -1330,6 +1301,5 @@ char **argv; additive_const(); get_tree(); show_tree(); - if(report) - close(reportf); + if (report) close(reportf); }