diff --git a/HGL_SRC/Alloc.c b/HGL_SRC/Alloc.c index 6b227a8..2be136d 100755 --- a/HGL_SRC/Alloc.c +++ b/HGL_SRC/Alloc.c @@ -1,133 +1,122 @@ -#include -#include "global_defs.h" +#include #include +#include +#include +#include "global_defs.h" /* -* Alloc.c -* Memory functions for Harvard Genome Laboratory. -* Last revised 6/3/91 -* -* Print error message, and die -*/ -void ErrorOut(code,string) -int code; + * Alloc.c + * Memory functions for Harvard Genome Laboratory. + * Last revised 6/3/91 + * + * Print error message, and die + */ +void ErrorOut(code, string) int code; char *string; { - if (code == 0) - { - fprintf(stderr,"Error:%s\n",string); - exit(1); - } - return; -} - - -/* -* Calloc count*size bytes with memory aligned to size. -* Return pointer to new block. -*/ -char *Calloc(count,size) -int count,size; -/*unsigned count,size;*/ -{ - char *temp; - temp = calloc(count,(unsigned)size); - - if(count*size == 0) - fprintf(stderr,"Allocate ZERO blocks?\n"); - ErrorOut(temp,"Cannot allocate memory"); - return(temp); -} - -/* -* Reallocate memory at block, expand to size. -* Return pointer to (possibly) new block. -*/ -char *Realloc(block,size) -char *block; -unsigned size; -{ - char *temp; - temp=realloc(block,size); - ErrorOut(temp,"Cannot change memory size"); - return(temp); -} - -/* -* Free block Allocated by Calloc. -* Return error code from free(). -*/ - -void Cfree(block) -char* block; -{ - extern void Warning(); - if(block != NULL) - { -#ifdef SUN4 - if(free(block) == 0) - Warning("Error in Cfree..."); -#endif - } -/* else - Warning("Error in Cfree, NULL block"); -*/ - return; -} - - - -/* -* Print Warning message to stderr. -*/ -void Warning(s) -char *s; -{ - fprintf(stderr,"Warning:%s\n",s); + if (code == 0) { + fprintf(stderr, "Error:%s\n", string); + exit(1); + } return; } - /* -* Get array element from a sequence structure. The index -* is relative to the alignment. -*/ -char GetElem(seq,indx) -Sequence *seq; /*Sequence to search*/ -int indx; /*Index relative to the global offset*/ + * Calloc count*size bytes with memory aligned to size. + * Return pointer to new block. + */ +char *Calloc(count, size) +int count, size; +/*unsigned count,size;*/ { - if((indxoffset) || (indx >= seq->offset + seq->seqlen)) - return('-'); - else - return((char)(seq->c_elem[indx-seq->offset])); + char *temp; + temp = calloc(count, (unsigned)size); + + if (count * size == 0) fprintf(stderr, "Allocate ZERO blocks?\n"); + ErrorOut(temp, "Cannot allocate memory"); + return (temp); } /* -* Replace the array element at seq[indx] with elem. The index -* is relative to the alignment. -*/ - -void ReplaceElem(seq,indx,elem) -Sequence *seq; /*Sequence */ -int indx; /*Position to overwrite (replace) */ -unsigned char elem; /*Character to replace with */ + * Reallocate memory at block, expand to size. + * Return pointer to (possibly) new block. + */ +char *Realloc(block, size) +char *block; +unsigned size; { - int j; + char *temp; + temp = realloc(block, size); + ErrorOut(temp, "Cannot change memory size"); + return (temp); +} + +/* + * Free block Allocated by Calloc. + * Return error code from free(). + */ + +void Cfree(block) char *block; +{ + extern void Warning(); + if (block != NULL) { +#ifdef SUN4 + if (free(block) == 0) Warning("Error in Cfree..."); +#endif + } + /* else + Warning("Error in Cfree, NULL block"); + */ + return; +} + +/* + * Print Warning message to stderr. + */ +void Warning(s) char *s; +{ + fprintf(stderr, "Warning:%s\n", s); + return; +} + +/* + * Get array element from a sequence structure. The index + * is relative to the alignment. + */ +char GetElem(seq, indx) +Sequence *seq; /*Sequence to search*/ +int indx; /*Index relative to the global offset*/ +{ + if ((indx < seq->offset) || (indx >= seq->offset + seq->seqlen)) + return ('-'); + else + return ((char)(seq->c_elem[indx - seq->offset])); +} + +/* + * Replace the array element at seq[indx] with elem. The index + * is relative to the alignment. + */ + +void ReplaceElem(seq, indx, elem) Sequence *seq; /*Sequence */ +int indx; /*Position to overwrite (replace) */ +unsigned char elem; /*Character to replace with */ +{ + int j; extern char *Calloc(); int width; -/* -* If no c_elem has been allocated yet... -*/ -/* if(index("abcdefghijklmnopqrstuvwxyz-0123456789",elem)==0) - fprintf(stderr,"Warning (ReplaceElem) elem = %c\n",elem); -*/ - width = seq->offset-indx; - if(seq->seqlen == 0 && elem != '-') - { - if(seq->seqmaxlen == 0 || seq->c_elem == NULL) - { - seq->c_elem = Calloc(4,sizeof(char)); + /* + * If no c_elem has been allocated yet... + */ + /* if(index("abcdefghijklmnopqrstuvwxyz-0123456789",elem)==0) + fprintf(stderr,"Warning (ReplaceElem) elem = + %c\n",elem); + */ + width = seq->offset - indx; + if (seq->seqlen == 0 && elem != '-') { + if (seq->seqmaxlen == 0 || seq->c_elem == NULL) { + seq->c_elem = Calloc(4, sizeof(char)); seq->offset = indx; seq->seqmaxlen = 4; } @@ -135,118 +124,109 @@ unsigned char elem; /*Character to replace with */ seq->c_elem[0] = elem; seq->offset = indx; } -/* -* If inserting before the c_elem (< offset) -*/ - else if((indxoffset) && (elem!='-')) - { + /* + * If inserting before the c_elem (< offset) + */ + else if ((indx < seq->offset) && (elem != '-')) { seq->seqmaxlen += width; - seq->c_elem = Realloc(seq->c_elem,seq->seqmaxlen*sizeof(char)); - for(j=seq->seqmaxlen-1;j>=width;j--) - seq->c_elem[j] = seq->c_elem[j-width]; - for(j=0;jc_elem[j] = '-'; + seq->c_elem = + Realloc(seq->c_elem, seq->seqmaxlen * sizeof(char)); + for (j = seq->seqmaxlen - 1; j >= width; j--) + seq->c_elem[j] = seq->c_elem[j - width]; + for (j = 0; j < width; j++) seq->c_elem[j] = '-'; seq->c_elem[0] = elem; seq->seqlen += width; seq->offset = indx; } -/* -* if inserting after c_elem (indx > offset + seqlen) -*/ - else if((indx>=seq->offset+seq->seqlen) && (elem!='-')) - { - if(indx-seq->offset >= seq->seqmaxlen) - { - seq->seqmaxlen = indx-seq->offset+256; - seq->c_elem = Realloc(seq->c_elem,seq->seqmaxlen* - sizeof(char)); + /* + * if inserting after c_elem (indx > offset + seqlen) + */ + else if ((indx >= seq->offset + seq->seqlen) && (elem != '-')) { + if (indx - seq->offset >= seq->seqmaxlen) { + seq->seqmaxlen = indx - seq->offset + 256; + seq->c_elem = + Realloc(seq->c_elem, seq->seqmaxlen * sizeof(char)); } - for(j=seq->seqlen;jseqmaxlen;j++) + for (j = seq->seqlen; j < seq->seqmaxlen; j++) seq->c_elem[j] = '-'; - seq->c_elem[indx-seq->offset] = elem; - seq->seqlen = indx-seq->offset+1; - } - else - { - if(indx-(seq->offset)>=0 && indx-(seq->offset)seqlen) - seq->c_elem[indx-(seq->offset)] = elem; - else if(elem!='-') - fprintf(stderr,"%c better be a -\n",elem); + seq->c_elem[indx - seq->offset] = elem; + seq->seqlen = indx - seq->offset + 1; } - return; + else { + if (indx - (seq->offset) >= 0 && + indx - (seq->offset) < seq->seqlen) + seq->c_elem[indx - (seq->offset)] = elem; + else if (elem != '-') + fprintf(stderr, "%c better be a -\n", elem); + } + return; } - /* -* InsertElem is a modification of InsertElems, and should be -* optimized. s.s.5/6/91 -*/ -int InsertElem(a,b,ch) -Sequence *a; /* Sequence */ -int b; /*Position to insert BEFORE*/ -char ch; /*element to insert */ + * InsertElem is a modification of InsertElems, and should be + * optimized. s.s.5/6/91 + */ +int InsertElem(a, b, ch) +Sequence *a; /* Sequence */ +int b; /*Position to insert BEFORE*/ +char ch; /*element to insert */ { - char c[2]; - c[0]=ch; - c[1] = '\0'; - - return (InsertElems(a,b,c)); + char c[2]; + c[0] = ch; + c[1] = '\0'; + + return (InsertElems(a, b, c)); } - /* -* Make a copy of Sequence one, place in Sequence two -*/ -void SeqCopy(one,two) -Sequence *one,*two; + * Make a copy of Sequence one, place in Sequence two + */ +void SeqCopy(one, two) Sequence *one, *two; { int j; *two = *one; - if(two->seqmaxlen) - two->c_elem = Calloc(one->seqmaxlen,sizeof(char)); - if(two->commentsmaxlen) - two->comments = Calloc(one->commentsmaxlen,sizeof(char)); - for(j=0;jseqlen;j++) - two->c_elem[j] = one->c_elem[j]; - for(j=0;jcommentslen;j++) + if (two->seqmaxlen) two->c_elem = Calloc(one->seqmaxlen, sizeof(char)); + if (two->commentsmaxlen) + two->comments = Calloc(one->commentsmaxlen, sizeof(char)); + for (j = 0; j < one->seqlen; j++) two->c_elem[j] = one->c_elem[j]; + for (j = 0; j < one->commentslen; j++) two->comments[j] = one->comments[j]; return; } - /* -* Normalize seq (remove leading indels in the c_elem; -*/ -void SeqNormal(seq) -Sequence *seq; + * Normalize seq (remove leading indels in the c_elem; + */ +void SeqNormal(seq) Sequence *seq; { - int len,j,shift_width,trailer; + int len, j, shift_width, trailer; char *c_elem; len = seq->seqlen; c_elem = seq->c_elem; - if(len == 0) return; + if (len == 0) return; - for(shift_width=0; (shift_widthseqlen -= shift_width; seq->offset += shift_width; - for(trailer=seq->seqlen-1;(c_elem[trailer] =='-' || - c_elem[trailer] == '\0') && trailer>=0; - trailer--) - c_elem[trailer] = '\0'; - seq->seqlen = trailer+1; + for (trailer = seq->seqlen - 1; + (c_elem[trailer] == '-' || c_elem[trailer] == '\0') && + trailer >= 0; + trailer--) + c_elem[trailer] = '\0'; + seq->seqlen = trailer + 1; return; } -void SeqRev(seq,min,max) -Sequence *seq; -int min,max; +void SeqRev(seq, min, max) Sequence *seq; +int min, max; /* SeqRev will reverse a given sequence within a window from min to max (inclusive). The idea is to allow several sequences @@ -260,72 +240,68 @@ int min,max; */ { int j; - char temp1,temp2; + char temp1, temp2; extern char GetElem(); extern void ReplaceElem(); - for(j=0;j<= (max-min)/2;j++) - { - temp1 = GetElem(seq,min+j); - temp2 = GetElem(seq,max-j); - ReplaceElem(seq,min+j,(unsigned char)temp2); - ReplaceElem(seq,max-j,(unsigned char)temp1); + for (j = 0; j <= (max - min) / 2; j++) { + temp1 = GetElem(seq, min + j); + temp2 = GetElem(seq, max - j); + ReplaceElem(seq, min + j, (unsigned char)temp2); + ReplaceElem(seq, max - j, (unsigned char)temp1); } seq->direction *= -1; - + SeqNormal(seq); return; -} - +} /* sequence complementing. */ -void SeqComp(seq) -Sequence *seq; +void SeqComp(seq) Sequence *seq; { - int j; - unsigned char in,out,case_bit; + int j; + unsigned char in, out, case_bit; char *c; - static int tmatr[16] = {'-','a','c','m','g','r','s','v', - 't','w','y','h','k','d','b','n'}; - - static int matr[128] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0x01,0x0e,0x02,0x0d,0,0,0x04,0x0b,0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06, - 0x08,0x08,0x07,0x09,0x00,0x0a,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04, - 0x0b,0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06,0x08,0x08,0x07,0x09,0x00,0x0a, - 0,0,0,0,0x00,0 - }; - + static int tmatr[16] = {'-', 'a', 'c', 'm', 'g', 'r', 's', 'v', + 't', 'w', 'y', 'h', 'k', 'd', 'b', 'n'}; + + static int matr[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, + 0x0e, 0x02, 0x0d, 0, 0, 0x04, 0x0b, 0, 0, 0x0c, 0, + 0x03, 0x0f, 0, 0x05, 0, 0x05, 0x06, 0x08, 0x08, 0x07, 0x09, + 0x00, 0x0a, 0, 0, 0, 0, 0, 0, 0, 0x01, 0x0e, + 0x02, 0x0d, 0, 0, 0x04, 0x0b, 0, 0, 0x0c, 0, 0x03, + 0x0f, 0, 0x05, 0, 0x05, 0x06, 0x08, 0x08, 0x07, 0x09, 0x00, + 0x0a, 0, 0, 0, 0, 0x00, 0}; + c = seq->c_elem; - for(j=0;jseqlen;j++) - { -/* -* Save Case bit... -*/ - case_bit = c[j] & 32; + for (j = 0; j < seq->seqlen; j++) { + /* + * Save Case bit... + */ + case_bit = c[j] & 32; out = 0; in = matr[c[j]]; - if(in&1) - out|=8; - if(in&2) - out|=4; - if(in&4) - out|=2; - if(in&8) - out|=1; + if (in & 1) out |= 8; + if (in & 2) out |= 4; + if (in & 4) out |= 2; + if (in & 8) out |= 1; - if(case_bit == 0) - c[j] = toupper(tmatr[out]); + if (case_bit == 0) + c[j] = toupper(tmatr[out]); else - c[j] = tmatr[out]; + c[j] = tmatr[out]; } seq->direction *= -1; - seq->strandedness = ( seq->strandedness == 2)?1: - ( seq->strandedness == 1)?2: - 0; + seq->strandedness = (seq->strandedness == 2) ? 1 + : (seq->strandedness == 1) ? 2 + : 0; return; - } diff --git a/HGL_SRC/HGLfuncs.c b/HGL_SRC/HGLfuncs.c index dfb1281..f9d9931 100755 --- a/HGL_SRC/HGLfuncs.c +++ b/HGL_SRC/HGLfuncs.c @@ -1,27 +1,28 @@ /**************************************************************** * - * This is a set of functions defined for the genome + * This is a set of functions defined for the genome * project. * ****************************************************************/ - #ifndef _GLOBAL_DEFS_H #define _GLOBAL_DEFS_H +#include +#include + #include "global_defs.h" #endif -#define MAXLINELEN 256 - -static char Default_DNA_Trans[16] = { -'-', 'a','c','m','g','r','s','v','t','w','y','h','k','d','b','n' }; +#define MAXLINELEN 256 +static char Default_DNA_Trans[16] = {'-', 'a', 'c', 'm', 'g', 'r', 's', 'v', + 't', 'w', 'y', 'h', 'k', 'd', 'b', 'n'}; /*********** * * WriteRecord() outputs one record at a time in HGL format. - * Only the fields in the fields_array will be output. All the + * Only the fields in the fields_array will be output. All the * fields will be output if fields_array is NULL. * * fp : pointer to the output file. @@ -35,716 +36,684 @@ static char Default_DNA_Trans[16] = { * **********/ -int -WriteRecord(fp, tSeq, fields_array, array_size) +int WriteRecord(fp, tSeq, fields_array, array_size) FILE *fp; const Sequence *tSeq; int *fields_array; int array_size; { - int i, save_str_size, tt; - int all_fields = FALSE; - int first_field = TRUE; - char temp_str[256]; - char *save_str; - char *ptr; + int i, save_str_size, tt; + int all_fields = FALSE; + int first_field = TRUE; + char temp_str[256]; + char *save_str; + char *ptr; - save_str = (char *)Calloc(256, 1); - save_str_size = 256; - - /* When all the fields are selected. */ - if(fields_array == NULL) - { - all_fields = TRUE; - fields_array = (int *)Calloc(NUM_OF_FIELDS, sizeof(int)); + save_str = (char *)Calloc(256, 1); + save_str_size = 256; - for(i=0; icreation_date[0] != 0 ) - { - sprintf(save_str,"\n%s\t%d/%d/%d ", - at[fields_array[i]], - tSeq->creation_date[1], - tSeq->creation_date[2], - tSeq->creation_date[0]); - - if(tSeq->creation_date[3]>=0) - { - if(tSeq->creation_date[4] < 0) - tSeq->creation_date[4] = 0; - if(tSeq->creation_date[5] < 0) - tSeq->creation_date[5] = 0; - sprintf(save_str, "%s%d:%d:%d", - save_str, - tSeq->creation_date[3], - tSeq->creation_date[4], - tSeq->creation_date[5]); - } - } - else if (fields_array[i] == e_probing_date && - tSeq->probing_date[0] != 0 ) - { - sprintf(save_str,"\n%s\t%d/%d/%d ", - at[fields_array[i]], - tSeq->probing_date[1], - tSeq->probing_date[2], - tSeq->probing_date[0]); - - if(tSeq->probing_date[3]>=0) - { - if(tSeq->probing_date[4] < 0) - tSeq->probing_date[4] = 0; - if(tSeq->probing_date[5] < 0) - tSeq->probing_date[5] = 0; - sprintf(save_str, "%s%d:%d:%d", - save_str, - tSeq->probing_date[3], - tSeq->probing_date[4], - tSeq->probing_date[5]); - } - } - else if (fields_array[i] == e_autorad_date && - tSeq->autorad_date[0] != 0 ) - { - sprintf(save_str,"\n%s\t%d/%d/%d ", - at[fields_array[i]], - tSeq->autorad_date[1], - tSeq->autorad_date[2], - tSeq->autorad_date[0]); - - if(tSeq->autorad_date[3]>=0) - { - if(tSeq->autorad_date[4] < 0) - tSeq->autorad_date[4] = 0; - if(tSeq->autorad_date[5] < 0) - tSeq->autorad_date[5] = 0; - sprintf(save_str, "%s%d:%d:%d", - save_str, - tSeq->autorad_date[3], - tSeq->autorad_date[4], - tSeq->autorad_date[5]); - } - } - else if ( fields_array[i] == e_c_elem && - tSeq->c_elem != NULL ) - { - ptr = tSeq->c_elem; - sprintf(save_str,"\n%s\t\"",at[fields_array[i]]); - while ( ptr < tSeq->c_elem + tSeq->seqlen ) - { - if ( ptr != tSeq->c_elem ) - strcat(save_str,"\n"); - strncpy(temp_str, ptr, MIN(60, tSeq->c_elem +tSeq->seqlen-ptr)); - temp_str[MIN(60, tSeq->c_elem+tSeq->seqlen - ptr)] = '\0'; - - /* Gurantee strlen(temp_str) chars for the string, - * one for \n, one for ", and one for \0. - */ - while(save_str_size - strlen(save_str) < strlen(temp_str)+3) - { - save_str_size *= 2; - save_str = (char *)Realloc(save_str,save_str_size); + for (i = 0; i < NUM_OF_FIELDS; i++) { + fields_array[i] = i; } - strcat(save_str, temp_str); - ptr += 60; - } - strcat(save_str,"\""); + array_size = NUM_OF_FIELDS; } - else if ( fields_array[i] == e_comments && - tSeq->commentslen != 0) - { - while(save_str_size < 20+tSeq->commentslen) - { - save_str_size *= 2; - save_str = (char *)Realloc(save_str,save_str_size); - } - - strcat(save_str,"\n"); - strcat(save_str,at[fields_array[i]]); - strcat(save_str,"\t\"\n"); - - /* put a \0 at the end of comments. */ - while(tSeq->commentslen + 1 > tSeq->commentsmaxlen) - { - tSeq->commentsmaxlen *= 2; - tSeq->comments = (char *) - Realloc(tSeq->comments, - tSeq->commentsmaxlen); - } - tSeq->comments[tSeq->commentslen] = '\0'; - - /* clean up the leading empty lines.*/ - tt = 0; - while(tSeq->comments[tt] == '\n' || tSeq->comments[tt] == ' ') - tt++; - tSeq->commentslen -= tt; - strcat(save_str,tSeq->comments+tt); - strcat(save_str,"\""); + + for (i = 0; i < array_size; i++) { + save_str[0] = '\0'; + + if (fields_array[i] == e_creation_date && + tSeq->creation_date[0] != 0) { + sprintf(save_str, "\n%s\t%d/%d/%d ", + at[fields_array[i]], tSeq->creation_date[1], + tSeq->creation_date[2], tSeq->creation_date[0]); + + if (tSeq->creation_date[3] >= 0) { + if (tSeq->creation_date[4] < 0) + tSeq->creation_date[4] = 0; + if (tSeq->creation_date[5] < 0) + tSeq->creation_date[5] = 0; + sprintf(save_str, "%s%d:%d:%d", save_str, + tSeq->creation_date[3], + tSeq->creation_date[4], + tSeq->creation_date[5]); + } + } + else if (fields_array[i] == e_probing_date && + tSeq->probing_date[0] != 0) { + sprintf(save_str, "\n%s\t%d/%d/%d ", + at[fields_array[i]], tSeq->probing_date[1], + tSeq->probing_date[2], tSeq->probing_date[0]); + + if (tSeq->probing_date[3] >= 0) { + if (tSeq->probing_date[4] < 0) + tSeq->probing_date[4] = 0; + if (tSeq->probing_date[5] < 0) + tSeq->probing_date[5] = 0; + sprintf(save_str, "%s%d:%d:%d", save_str, + tSeq->probing_date[3], + tSeq->probing_date[4], + tSeq->probing_date[5]); + } + } + else if (fields_array[i] == e_autorad_date && + tSeq->autorad_date[0] != 0) { + sprintf(save_str, "\n%s\t%d/%d/%d ", + at[fields_array[i]], tSeq->autorad_date[1], + tSeq->autorad_date[2], tSeq->autorad_date[0]); + + if (tSeq->autorad_date[3] >= 0) { + if (tSeq->autorad_date[4] < 0) + tSeq->autorad_date[4] = 0; + if (tSeq->autorad_date[5] < 0) + tSeq->autorad_date[5] = 0; + sprintf(save_str, "%s%d:%d:%d", save_str, + tSeq->autorad_date[3], + tSeq->autorad_date[4], + tSeq->autorad_date[5]); + } + } + else if (fields_array[i] == e_c_elem && tSeq->c_elem != NULL) { + ptr = tSeq->c_elem; + sprintf(save_str, "\n%s\t\"", at[fields_array[i]]); + while (ptr < tSeq->c_elem + tSeq->seqlen) { + if (ptr != tSeq->c_elem) strcat(save_str, "\n"); + strncpy( + temp_str, ptr, + MIN(60, tSeq->c_elem + tSeq->seqlen - ptr)); + temp_str[MIN(60, tSeq->c_elem + tSeq->seqlen - + ptr)] = '\0'; + + /* Gurantee strlen(temp_str) chars for the + * string, one for \n, one for ", and one for + * \0. + */ + while (save_str_size - strlen(save_str) < + strlen(temp_str) + 3) { + save_str_size *= 2; + save_str = (char *)Realloc( + save_str, save_str_size); + } + strcat(save_str, temp_str); + ptr += 60; + } + strcat(save_str, "\""); + } + else if (fields_array[i] == e_comments && + tSeq->commentslen != 0) { + while (save_str_size < 20 + tSeq->commentslen) { + save_str_size *= 2; + save_str = + (char *)Realloc(save_str, save_str_size); + } + + strcat(save_str, "\n"); + strcat(save_str, at[fields_array[i]]); + strcat(save_str, "\t\"\n"); + + /* put a \0 at the end of comments. */ + while (tSeq->commentslen + 1 > tSeq->commentsmaxlen) { + tSeq->commentsmaxlen *= 2; + tSeq->comments = (char *)Realloc( + tSeq->comments, tSeq->commentsmaxlen); + } + tSeq->comments[tSeq->commentslen] = '\0'; + + /* clean up the leading empty lines.*/ + tt = 0; + while (tSeq->comments[tt] == '\n' || + tSeq->comments[tt] == ' ') + tt++; + tSeq->commentslen -= tt; + strcat(save_str, tSeq->comments + tt); + strcat(save_str, "\""); + } + else if (fields_array[i] == e_laneset && tSeq->laneset != -1) + sprintf(save_str, "\n%s\t\t%d", at[fields_array[i]], + tSeq->laneset); + else if (fields_array[i] == e_strandedness && + tSeq->strandedness != 0) + sprintf(save_str, "\n%s\t%d", at[fields_array[i]], + tSeq->strandedness); + else if (fields_array[i] == e_direction && tSeq->direction != 0) + sprintf(save_str, "\n%s\t%d", at[fields_array[i]], + tSeq->direction); + else if (fields_array[i] == e_orig_strand && + tSeq->orig_strand != 0) + sprintf(save_str, "\n%s\t%d", at[fields_array[i]], + tSeq->orig_strand); + else if (fields_array[i] == e_orig_direction && + tSeq->orig_direction != 0) + sprintf(save_str, "\n%s\t%d", at[fields_array[i]], + tSeq->orig_direction); + else if (fields_array[i] == e_offset) + sprintf(save_str, "\n%s\t\t%d", at[fields_array[i]], + tSeq->offset); + else if (fields_array[i] == e_group_number && + tSeq->group_number != 0) + sprintf(save_str, "\n%s\t%d", at[fields_array[i]], + tSeq->group_number); + else if (fields_array[i] == e_group_ID) + sprintf(save_str, "\n%s\t%d", at[fields_array[i]], + tSeq->group_ID); + else if (fields_array[i] == e_type && tSeq->type[0] != '\0') + sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], + tSeq->type); + else if (fields_array[i] == e_barcode && + tSeq->barcode[0] != '\0') + sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], + tSeq->barcode); + else if (fields_array[i] == e_name && tSeq->name[0] != '\0') + sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], + tSeq->name); + else if (fields_array[i] == e_status && tSeq->status[0] != '\0') + sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], + tSeq->status); + else if (fields_array[i] == e_walk && tSeq->walk[0] != '\0') + sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], + tSeq->walk); + else if (fields_array[i] == e_sequence_ID && + tSeq->sequence_ID[0] != '\0') + sprintf(save_str, "\n%s\t\"%s\"", at[fields_array[i]], + tSeq->sequence_ID); + else if (fields_array[i] == e_creator && + tSeq->creator[0] != '\0') + sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], + tSeq->creator); + else if (fields_array[i] == e_film && tSeq->film[0] != '\0') + sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], + tSeq->film); + else if (fields_array[i] == e_membrane && + tSeq->membrane[0] != '\0') + sprintf(save_str, "\n%s\t\"%s\"", at[fields_array[i]], + tSeq->membrane); + else if (fields_array[i] == e_source_ID && + tSeq->source_ID[0] != '\0') + sprintf(save_str, "\n%s\t\"%s\"", at[fields_array[i]], + tSeq->source_ID); + else if (fields_array[i] == e_contig && tSeq->contig[0] != '\0') + sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], + tSeq->contig); + else if (fields_array[i] == e_baggage && tSeq->baglen != 0) { + if (save_str_size < tSeq->baglen + 2) { + save_str_size = tSeq->baglen + 2; + save_str = + (char *)Realloc(save_str, save_str_size); + } + + save_str[0] = '\n'; + save_str[1] = '\0'; + + /* put a \0 at the end of baggage. */ + strncat(save_str, tSeq->baggage, tSeq->baglen); + while (save_str[tSeq->baglen - 1] == '\n') { + tSeq->baglen--; + } + save_str[tSeq->baglen] = '\0'; + } + if (save_str[0] != '\0') { + if (first_field == TRUE) { + first_field = FALSE; + fprintf(fp, "{"); + } + fprintf(fp, "%s", save_str); + } } - else if (fields_array[i] == e_laneset && tSeq->laneset != -1) - sprintf(save_str,"\n%s\t\t%d", - at[fields_array[i]],tSeq->laneset); - else if (fields_array[i] == e_strandedness && tSeq->strandedness != 0) - sprintf(save_str,"\n%s\t%d", - at[fields_array[i]],tSeq->strandedness); - else if (fields_array[i] == e_direction && tSeq->direction != 0) - sprintf(save_str,"\n%s\t%d", - at[fields_array[i]],tSeq->direction); - else if (fields_array[i] == e_orig_strand && tSeq->orig_strand != 0) - sprintf(save_str,"\n%s\t%d", - at[fields_array[i]],tSeq->orig_strand); - else if (fields_array[i] == e_orig_direction && tSeq->orig_direction != 0) - sprintf(save_str,"\n%s\t%d", - at[fields_array[i]],tSeq->orig_direction); - else if (fields_array[i] == e_offset) - sprintf(save_str,"\n%s\t\t%d", - at[fields_array[i]],tSeq->offset); - else if (fields_array[i] == e_group_number && tSeq->group_number != 0) - sprintf(save_str,"\n%s\t%d", - at[fields_array[i]],tSeq->group_number); - else if (fields_array[i] == e_group_ID) - sprintf(save_str,"\n%s\t%d", - at[fields_array[i]],tSeq->group_ID); - else if (fields_array[i] == e_type && tSeq->type[0] != '\0' ) - sprintf(save_str,"\n%s\t\t\"%s\"", - at[fields_array[i]],tSeq->type); - else if (fields_array[i] == e_barcode && tSeq->barcode[0] != '\0' ) - sprintf(save_str,"\n%s\t\t\"%s\"", - at[fields_array[i]],tSeq->barcode); - else if (fields_array[i] == e_name && tSeq->name[0] != '\0' ) - sprintf(save_str,"\n%s\t\t\"%s\"", - at[fields_array[i]],tSeq->name); - else if (fields_array[i] == e_status && tSeq->status[0] != '\0' ) - sprintf(save_str,"\n%s\t\t\"%s\"", - at[fields_array[i]],tSeq->status); - else if (fields_array[i] == e_walk && tSeq->walk[0] != '\0' ) - sprintf(save_str,"\n%s\t\t\"%s\"", - at[fields_array[i]],tSeq->walk); - else if (fields_array[i] == e_sequence_ID && - tSeq->sequence_ID[0] != '\0' ) - sprintf(save_str,"\n%s\t\"%s\"", - at[fields_array[i]],tSeq->sequence_ID); - else if (fields_array[i] == e_creator && tSeq->creator[0] != '\0') - sprintf(save_str,"\n%s\t\t\"%s\"", - at[fields_array[i]],tSeq->creator); - else if (fields_array[i]==e_film && tSeq->film[0]!='\0') - sprintf(save_str,"\n%s\t\t\"%s\"", - at[fields_array[i]],tSeq->film); - else if (fields_array[i] == e_membrane && tSeq->membrane[0] != '\0') - sprintf(save_str,"\n%s\t\"%s\"", - at[fields_array[i]],tSeq->membrane); - else if (fields_array[i] == e_source_ID && tSeq->source_ID[0] != '\0') - sprintf(save_str,"\n%s\t\"%s\"", - at[fields_array[i]],tSeq->source_ID); - else if (fields_array[i] == e_contig && tSeq->contig[0] != '\0') - sprintf(save_str,"\n%s\t\t\"%s\"", - at[fields_array[i]],tSeq->contig); - else if (fields_array[i] == e_baggage && tSeq->baglen != 0) - { - if(save_str_size < tSeq->baglen+2) - { - save_str_size = tSeq->baglen+2; - save_str = (char *)Realloc(save_str,save_str_size); - } - - save_str[0] = '\n'; - save_str[1] = '\0'; - /* put a \0 at the end of baggage. */ - strncat(save_str, tSeq->baggage, tSeq->baglen); - while(save_str[tSeq->baglen-1] == '\n') - { - tSeq->baglen--; - } - save_str[tSeq->baglen] = '\0'; + if (first_field == FALSE) { + fprintf(fp, "\n}\n"); } - if(save_str[0] != '\0') - { - if (first_field == TRUE) - { - first_field = FALSE; - fprintf(fp,"{"); - } - fprintf(fp,"%s",save_str); + + if (all_fields == TRUE && fields_array != NULL) { + Cfree(fields_array); + fields_array = NULL; + } + if (save_str != NULL) { + Cfree(save_str); + save_str = NULL; } - } - if (first_field == FALSE) - { - fprintf(fp,"\n}\n"); - } - - if(all_fields == TRUE && fields_array != NULL) - { - Cfree(fields_array); - fields_array = NULL; - } - if(save_str != NULL) - { - Cfree(save_str); - save_str = NULL; - } - - if (first_field == TRUE) - return 0; - else - return 1; + if (first_field == TRUE) + return 0; + else + return 1; } - - /********* * * ReadRecord() reads one record from fp into tSeq. fp remains at - * the finishing position so that next time when ReadRecord() is + * the finishing position so that next time when ReadRecord() is * called, it reads the next record. * * The caller program should LOCATE MEMORY for the tSeq before calling. * * ReadRecord() returns: - * TRUE if no error; + * TRUE if no error; * FALSE if anything is wrong * -1 if end-of-file is reached * **********/ -int -ReadRecord(fp, tSeq) +int ReadRecord(fp, tSeq) FILE *fp; Sequence *tSeq; { - char field_name[20], line[256], orig_line[256]; - int temp_str_size, start, end, l, max_len = 255; - char *fgets_ret, *temp_str, *fgets_ret1; - int start_rec = FALSE; - int need_to_read = TRUE; - char started = 'F'; - void InitRecord(); - void FreeRecord(); + char field_name[20], line[256], orig_line[256]; + int temp_str_size, start, end, l, max_len = 255; + char *fgets_ret, *temp_str, *fgets_ret1; + int start_rec = FALSE; + int need_to_read = TRUE; + char started = 'F'; + void InitRecord(); + void FreeRecord(); - temp_str = (char *)Calloc(256, 1); - temp_str_size = 256; + temp_str = (char *)Calloc(256, 1); + temp_str_size = 256; - InitRecord(tSeq); + InitRecord(tSeq); - if(tSeq->c_elem == NULL) - { - tSeq->c_elem = (char *)Calloc(256, 1); - tSeq->seqmaxlen = 256; - } - tSeq->c_elem[0] = '\0'; - - - /* read file line-by-line. */ - while (need_to_read == TRUE && - ((fgets_ret = fgets(line, max_len, fp)) != NULL || - start_rec == TRUE)) - { - strcpy(orig_line, line); - end = strlen(line) -1; - while(end>=0 && (line[end] == ' ' || - line[end] == '\t' || - line[end] == ',' || - line[end] == '\n') ) - end--; - - /* ignore empty lines. */ - if(end == -1) - continue; - - if(line[end] == '{') - started = 'T'; - - /* to ignore the lines between a } and a {. */ - while(started == 'F' && fgets_ret != NULL) - { - fgets_ret = fgets(line, max_len, fp); - strcpy(orig_line, line); - end = strlen(line) -1; - while(end>=0 && (line[end] == ' ' || - line[end] == '\t' || - line[end] == ',' || - line[end] == '\n') ) - end--; - - /* ignore empty lines. */ - if(end == -1) - continue; - - if(line[end] == '{') - started = 'T'; + if (tSeq->c_elem == NULL) { + tSeq->c_elem = (char *)Calloc(256, 1); + tSeq->seqmaxlen = 256; } + tSeq->c_elem[0] = '\0'; - if(fgets_ret == NULL) - return -1; + /* read file line-by-line. */ + while (need_to_read == TRUE && + ((fgets_ret = fgets(line, max_len, fp)) != NULL || + start_rec == TRUE)) { + strcpy(orig_line, line); + end = strlen(line) - 1; + while (end >= 0 && (line[end] == ' ' || line[end] == '\t' || + line[end] == ',' || line[end] == '\n')) + end--; - if (end < 0) - { - } - else if ((line[end] == '}') && (end==0)) - { - start_rec = FALSE; - need_to_read = FALSE; - } - else if (line[end] == '{' && end <= 10) - { - start_rec = TRUE; - } - else - { - if (line[end]=='}') - { - need_to_read = FALSE; - start_rec = FALSE; - } + /* ignore empty lines. */ + if (end == -1) continue; - /* locate the tag. */ - start = 0; - while(line[start] == ' ' || - line[start] == '\t'|| - line[start] == '\n'|| - line[start] == '{' ) - start++; - - end = start +1; - while(line[end] != ' ' && - line[end] != '\t' && - line[end] != '\n' && - line[end] != '\0') - end++; - strncpy(field_name, line+start, end-start); - field_name[end-start] = '\0'; - - /* process the field value. */ - - /* - * creation_date, probing_date, or autorad_date - */ - - if ( strcmp(field_name,"creation-date") == 0) - { - while(!isdigit(line[end])) - end++; - if(strToDate(line + end, tSeq->creation_date) == -1) - { - return FALSE; + if (line[end] == '{') started = 'T'; + + /* to ignore the lines between a } and a {. */ + while (started == 'F' && fgets_ret != NULL) { + fgets_ret = fgets(line, max_len, fp); + strcpy(orig_line, line); + end = strlen(line) - 1; + while (end >= 0 && + (line[end] == ' ' || line[end] == '\t' || + line[end] == ',' || line[end] == '\n')) + end--; + + /* ignore empty lines. */ + if (end == -1) continue; + + if (line[end] == '{') started = 'T'; } - } - else if (strcmp(field_name,"probing-date") == 0) - { - while(line[end] != '\0' && !isdigit(line[end])) - end++; - - if(line[end] != '\0' && - strToDate(line + end, tSeq->probing_date) == -1) - { - return FALSE; - } - } - else if ( strcmp(field_name,"autorad-date") == 0) - { - while(line[end] != '\0' && !isdigit(line[end])) - end++; - if(line[end] != '\0' && - strToDate(line + end, tSeq->autorad_date) == -1) - { - return FALSE; - } - } - - /* - * sequence or comments. - */ - - else if (strcmp(field_name,"sequence") == 0 || - strcmp(field_name,"comments") == 0 ) - { - temp_str[0] = '\0'; - - /* locate the first ". */ - while(line[end++] != '"'); - start = end; - end = strlen(line); - /* ---"\n\0. */ - if(line[end-2] == '"') - end -= 2; - else if(line[end-1] == '\n' && - strcmp(field_name,"sequence") == 0) - end--; + if (fgets_ret == NULL) return -1; - while(temp_str_size < end-start+1 ) - { - temp_str_size *= 2; - temp_str = (char *)Realloc(temp_str, temp_str_size); + if (end < 0) { } - if(end - start > 0) - strncat(temp_str, line+start, end-start); - - /* Read the second line of the seq. or comments, if any. - end-start<0 is the case that " is the only char this line.*/ - if (line[strlen(line)-2] != '"' || end-start<0) - { - while((fgets_ret1 = fgets(line, max_len, fp)) != NULL) - { - /* IGNORE empty lines. 5/4/92 */ - int empty_line = 0; - while(line[empty_line] == ' ') - empty_line++; - if(line[empty_line] == '\n') - { - continue; - /* strncat(temp_str, line, end); 5/4/92 */ + else if ((line[end] == '}') && (end == 0)) { + start_rec = FALSE; + need_to_read = FALSE; + } + else if (line[end] == '{' && end <= 10) { + start_rec = TRUE; + } + else { + if (line[end] == '}') { + need_to_read = FALSE; + start_rec = FALSE; } - - l = strlen(line) -1; - if(line[l-1] == '"') - end = l-1; - else - end = l; - if(line[end] == '\n' && - strcmp(field_name,"comments") == 0) - end++; + /* locate the tag. */ + start = 0; + while (line[start] == ' ' || line[start] == '\t' || + line[start] == '\n' || line[start] == '{') + start++; - /* Gurantee 'end' chars for the string, one for ", - * and one for \0. - */ - while(temp_str_size - strlen(temp_str) < end+3 ) - { - temp_str_size *= 2; - temp_str=(char *)Realloc(temp_str,temp_str_size); - } - strncat(temp_str, line, end); + end = start + 1; + while (line[end] != ' ' && line[end] != '\t' && + line[end] != '\n' && line[end] != '\0') + end++; + strncpy(field_name, line + start, end - start); + field_name[end - start] = '\0'; - if(line[l-1] == '"') - break; - } - if(fgets_ret1 == NULL && need_to_read == TRUE) - { - fprintf(stderr, "ReadRecord(): incomplete record.\n"); - return FALSE; - } - } - - l = strlen(temp_str); - if(strcmp(field_name,"comments") == 0 ) - { - if(tSeq->commentsmaxlen == 0) - { - tSeq->comments = (char *)Calloc(l+1, 1); - tSeq->commentsmaxlen = l+1; - } - else - { - while(tSeq->commentslen+l+1>tSeq->commentsmaxlen) - { - tSeq->commentsmaxlen *= 2; - tSeq->comments = (char *) - Realloc(tSeq->comments, tSeq->commentsmaxlen); - } - } - tSeq->comments[tSeq->commentslen] = '\0'; - strcat(tSeq->comments, temp_str); - tSeq->commentslen += l; - } - else /* it is the sequence. */ - { - if(tSeq->seqmaxlen == 0) - { - tSeq->c_elem = (char *)Calloc(l+1, 1); - } - else if(l+1>tSeq->seqmaxlen) - { - tSeq->c_elem = (char *)Realloc(tSeq->c_elem, l+1); - } - tSeq->seqmaxlen = l+1; - tSeq->seqlen = l; - strcpy(tSeq->c_elem, temp_str); - } - } + /* process the field value. */ - /* - * Integer or String. - */ - - else - { - /* locate the value: a string or an integer. */ - - while(line[end] == ' ' || line[end] == '\t') - end++; - if (line[end] == '"') - { - /* It is a string. */ - end++; - start = end; - while(line[end] != '\0' && line[end] != '"') - end++; - /* - * strncat will not put a \0 at the end of a string - * if the copying string is longer than n. - */ - line[end++] = '\0'; - } - else - { - /* It is an integer. */ - start = end; - while(line[end] != ' ' && - line[end] != '\t' && - line[end] != '\n' && - line[end] != '\0') - end++; - strncpy(temp_str, line+start, end-start+1); /*4/26 add 1*/ - temp_str[end-start] = '\0'; - } - - /* assign to an integer field. */ - if (strcmp(field_name,"laneset") == 0 ) - tSeq->laneset = atoi(temp_str); - else if (strcmp(field_name,"strandedness") == 0 ) - tSeq->strandedness = atoi(temp_str); - else if (strcmp(field_name,"direction") == 0) - tSeq->direction = atoi(temp_str); - else if (strcmp(field_name,"orig_strand") == 0 ) - tSeq->orig_strand = atoi(temp_str); - else if (strcmp(field_name,"orig_direction") == 0 ) - tSeq->orig_direction = atoi(temp_str); - else if (strcmp(field_name,"offset") == 0 ) - tSeq->offset = atoi(temp_str); - else if (strcmp(field_name,"group-number") == 0 ) - tSeq->group_number = atoi(temp_str); - else if (strcmp(field_name,"group-ID") == 0 ) - tSeq->group_ID = atoi(temp_str); - - /* assign to a string field. */ - else if (strcmp(field_name,"type") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->type, line+start, end-start); - tSeq->type[end-start] = '\0'; - } - else if (strcmp(field_name,"barcode") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->barcode, line+start, end-start); - tSeq->barcode[end-start] = '\0'; - } - else if (strcmp(field_name,"name") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->name, line+start, end-start); - tSeq->name[end-start] = '\0'; - } - else if (strcmp(field_name,"status") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->status, line+start, end-start); - tSeq->status[end-start] = '\0'; - } - else if (strcmp(field_name,"walk") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->walk, line+start, end-start); - tSeq->walk[end-start] = '\0'; - } - else if (strcmp(field_name,"sequence-ID") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->sequence_ID, line+start, end-start); - tSeq->sequence_ID[end-start] = '\0'; - } - else if (strcmp(field_name,"creator") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->creator, line+start, end-start); - tSeq->creator[end-start] = '\0'; - } - else if (strcmp(field_name,"film") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->film, line+start, end-start); - tSeq->film[end-start] = '\0'; - } - else if (strcmp(field_name,"membrane") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->membrane, line+start, end-start); - tSeq->membrane[end-start] = '\0'; - } - else if (strcmp(field_name,"source-ID") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->source_ID, line+start, end-start); - tSeq->source_ID[end-start] = '\0'; - } - else if (strcmp(field_name,"contig") == 0 ) - { - if(end - start > 31) end = start + 31; - strncpy(tSeq->contig, line+start, end-start); - tSeq->contig[end-start] = '\0'; - } - else - { - if(tSeq->bagmaxlen == 0) - { - tSeq->bagmaxlen = 4*strlen(orig_line); - tSeq->baggage = (char *)Calloc(tSeq->bagmaxlen, 1); - } - else - { - while(tSeq->bagmaxlenbaglen+2+strlen(orig_line)) - { - tSeq->bagmaxlen *= 2; - tSeq->baggage = (char *) - Realloc(tSeq->baggage, tSeq->bagmaxlen); - } - } - if(tSeq->baglen == 0) - { /* - tSeq->baggage[0] = '\n'; - tSeq->baggage[1] = '\0'; - tSeq->baglen = 1; - */ - tSeq->baggage[0] = '\0'; - } + * creation_date, probing_date, or autorad_date + */ - /* strcat(tSeq->baggage, "\n");*/ - strcat(tSeq->baggage, orig_line); - tSeq->baglen += strlen(orig_line); + if (strcmp(field_name, "creation-date") == 0) { + while (!isdigit(line[end])) end++; + if (strToDate(line + end, + tSeq->creation_date) == -1) { + return FALSE; + } + } + else if (strcmp(field_name, "probing-date") == 0) { + while (line[end] != '\0' && !isdigit(line[end])) + end++; + + if (line[end] != '\0' && + strToDate(line + end, tSeq->probing_date) == + -1) { + return FALSE; + } + } + else if (strcmp(field_name, "autorad-date") == 0) { + while (line[end] != '\0' && !isdigit(line[end])) + end++; + if (line[end] != '\0' && + strToDate(line + end, tSeq->autorad_date) == + -1) { + return FALSE; + } + } + + /* + * sequence or comments. + */ + + else if (strcmp(field_name, "sequence") == 0 || + strcmp(field_name, "comments") == 0) { + temp_str[0] = '\0'; + + /* locate the first ". */ + while (line[end++] != '"') + ; + start = end; + end = strlen(line); + + /* ---"\n\0. */ + if (line[end - 2] == '"') + end -= 2; + else if (line[end - 1] == '\n' && + strcmp(field_name, "sequence") == 0) + end--; + + while (temp_str_size < end - start + 1) { + temp_str_size *= 2; + temp_str = (char *)Realloc( + temp_str, temp_str_size); + } + if (end - start > 0) + strncat(temp_str, line + start, + end - start); + + /* Read the second line of the seq. or comments, + if any. end-start<0 is the case that " is the + only char this line.*/ + if (line[strlen(line) - 2] != '"' || + end - start < 0) { + while ((fgets_ret1 = + fgets(line, max_len, fp)) != + NULL) { + /* IGNORE empty lines. 5/4/92 */ + int empty_line = 0; + while (line[empty_line] == ' ') + empty_line++; + if (line[empty_line] == '\n') { + continue; + /* strncat(temp_str, + * line, end); 5/4/92 */ + } + + l = strlen(line) - 1; + if (line[l - 1] == '"') + end = l - 1; + else + end = l; + + if (line[end] == '\n' && + strcmp(field_name, + "comments") == 0) + end++; + + /* Gurantee 'end' chars for the + * string, one for ", and one + * for \0. + */ + while (temp_str_size - + strlen(temp_str) < + end + 3) { + temp_str_size *= 2; + temp_str = + (char *)Realloc( + temp_str, + temp_str_size); + } + strncat(temp_str, line, end); + + if (line[l - 1] == '"') break; + } + if (fgets_ret1 == NULL && + need_to_read == TRUE) { + fprintf(stderr, + "ReadRecord(): " + "incomplete record.\n"); + return FALSE; + } + } + + l = strlen(temp_str); + if (strcmp(field_name, "comments") == 0) { + if (tSeq->commentsmaxlen == 0) { + tSeq->comments = + (char *)Calloc(l + 1, 1); + tSeq->commentsmaxlen = l + 1; + } + else { + while (tSeq->commentslen + l + + 1 > + tSeq->commentsmaxlen) { + tSeq->commentsmaxlen *= + 2; + tSeq->comments = + (char *)Realloc( + tSeq->comments, + tSeq->commentsmaxlen); + } + } + tSeq->comments[tSeq->commentslen] = + '\0'; + strcat(tSeq->comments, temp_str); + tSeq->commentslen += l; + } + else /* it is the sequence. */ + { + if (tSeq->seqmaxlen == 0) { + tSeq->c_elem = + (char *)Calloc(l + 1, 1); + } + else if (l + 1 > tSeq->seqmaxlen) { + tSeq->c_elem = (char *)Realloc( + tSeq->c_elem, l + 1); + } + tSeq->seqmaxlen = l + 1; + tSeq->seqlen = l; + strcpy(tSeq->c_elem, temp_str); + } + } + + /* + * Integer or String. + */ + + else { + /* locate the value: a string or an integer. */ + + while (line[end] == ' ' || line[end] == '\t') + end++; + if (line[end] == '"') { + /* It is a string. */ + end++; + start = end; + while (line[end] != '\0' && + line[end] != '"') + end++; + /* + * strncat will not put a \0 at the end + * of a string if the copying string is + * longer than n. + */ + line[end++] = '\0'; + } + else { + /* It is an integer. */ + start = end; + while (line[end] != ' ' && + line[end] != '\t' && + line[end] != '\n' && + line[end] != '\0') + end++; + strncpy(temp_str, line + start, + end - start + 1); /*4/26 add 1*/ + temp_str[end - start] = '\0'; + } + + /* assign to an integer field. */ + if (strcmp(field_name, "laneset") == 0) + tSeq->laneset = atoi(temp_str); + else if (strcmp(field_name, "strandedness") == + 0) + tSeq->strandedness = atoi(temp_str); + else if (strcmp(field_name, "direction") == 0) + tSeq->direction = atoi(temp_str); + else if (strcmp(field_name, "orig_strand") == 0) + tSeq->orig_strand = atoi(temp_str); + else if (strcmp(field_name, "orig_direction") == + 0) + tSeq->orig_direction = atoi(temp_str); + else if (strcmp(field_name, "offset") == 0) + tSeq->offset = atoi(temp_str); + else if (strcmp(field_name, "group-number") == + 0) + tSeq->group_number = atoi(temp_str); + else if (strcmp(field_name, "group-ID") == 0) + tSeq->group_ID = atoi(temp_str); + + /* assign to a string field. */ + else if (strcmp(field_name, "type") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->type, line + start, + end - start); + tSeq->type[end - start] = '\0'; + } + else if (strcmp(field_name, "barcode") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->barcode, line + start, + end - start); + tSeq->barcode[end - start] = '\0'; + } + else if (strcmp(field_name, "name") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->name, line + start, + end - start); + tSeq->name[end - start] = '\0'; + } + else if (strcmp(field_name, "status") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->status, line + start, + end - start); + tSeq->status[end - start] = '\0'; + } + else if (strcmp(field_name, "walk") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->walk, line + start, + end - start); + tSeq->walk[end - start] = '\0'; + } + else if (strcmp(field_name, "sequence-ID") == + 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->sequence_ID, line + start, + end - start); + tSeq->sequence_ID[end - start] = '\0'; + } + else if (strcmp(field_name, "creator") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->creator, line + start, + end - start); + tSeq->creator[end - start] = '\0'; + } + else if (strcmp(field_name, "film") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->film, line + start, + end - start); + tSeq->film[end - start] = '\0'; + } + else if (strcmp(field_name, "membrane") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->membrane, line + start, + end - start); + tSeq->membrane[end - start] = '\0'; + } + else if (strcmp(field_name, "source-ID") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->source_ID, line + start, + end - start); + tSeq->source_ID[end - start] = '\0'; + } + else if (strcmp(field_name, "contig") == 0) { + if (end - start > 31) end = start + 31; + strncpy(tSeq->contig, line + start, + end - start); + tSeq->contig[end - start] = '\0'; + } + else { + if (tSeq->bagmaxlen == 0) { + tSeq->bagmaxlen = + 4 * strlen(orig_line); + tSeq->baggage = (char *)Calloc( + tSeq->bagmaxlen, 1); + } + else { + while (tSeq->bagmaxlen < + tSeq->baglen + 2 + + strlen(orig_line)) { + tSeq->bagmaxlen *= 2; + tSeq->baggage = + (char *)Realloc( + tSeq->baggage, + tSeq->bagmaxlen); + } + } + if (tSeq->baglen == 0) { + /* + tSeq->baggage[0] = '\n'; + tSeq->baggage[1] = '\0'; + tSeq->baglen = 1; + */ + tSeq->baggage[0] = '\0'; + } + + /* strcat(tSeq->baggage, "\n");*/ + strcat(tSeq->baggage, orig_line); + tSeq->baglen += strlen(orig_line); + } + } } - } } - } - if(temp_str != NULL) - { - Cfree(temp_str); - temp_str = NULL; - } + if (temp_str != NULL) { + Cfree(temp_str); + temp_str = NULL; + } - if ( start_rec == FALSE && fgets_ret == NULL) - { - /* end of file, did not get a record. */ - return -1; - } - else - return TRUE; + if (start_rec == FALSE && fgets_ret == NULL) { + /* end of file, did not get a record. */ + return -1; + } + else + return TRUE; } - /********* * * Initialize a record. @@ -753,212 +722,184 @@ Sequence *tSeq; * **********/ -void -InitRecord(tSeq) -Sequence *tSeq; +void InitRecord(tSeq) Sequence *tSeq; { - int i; - - strcpy(tSeq->type, "DNA"); - tSeq->barcode[0] = '\0'; - tSeq->name[0] = '\0'; - tSeq->status[0] = '\0'; - strcpy(tSeq->walk, "FALSE"); - tSeq->sequence_ID[0] = '\0'; + int i; - tSeq->c_elem = NULL; - tSeq->seqlen = 0; - tSeq->seqmaxlen = 0; - - for (i = 0; i<6; i++) - { - tSeq->creation_date[i] = 0; - tSeq->probing_date[i] = 0; - tSeq->autorad_date[i] = 0; - } + strcpy(tSeq->type, "DNA"); + tSeq->barcode[0] = '\0'; + tSeq->name[0] = '\0'; + tSeq->status[0] = '\0'; + strcpy(tSeq->walk, "FALSE"); + tSeq->sequence_ID[0] = '\0'; - tSeq->creator[0] = '\0'; - tSeq->film[0] = '\0'; - tSeq->membrane[0] = '\0'; - tSeq->source_ID[0] = '\0'; - tSeq->contig[0] = '\0'; - tSeq->laneset = -1; - tSeq->direction = 1; /* (1/-1/0),default: 5 to 3. */ - tSeq->strandedness = 1; /* (1/2/0), default: primary.*/ - tSeq->orig_direction= 0; /* (0 unknown, -1:3'->5', 1:5'->3') */ - tSeq->orig_strand = 0; /* (0 unknown, 1:primary, 2:secondary) */ - tSeq->offset = 0; + tSeq->c_elem = NULL; + tSeq->seqlen = 0; + tSeq->seqmaxlen = 0; - tSeq->comments = NULL; - tSeq->commentslen = 0; - tSeq->commentsmaxlen = 0; + for (i = 0; i < 6; i++) { + tSeq->creation_date[i] = 0; + tSeq->probing_date[i] = 0; + tSeq->autorad_date[i] = 0; + } - tSeq->baggage = NULL; - tSeq->baglen = 0; - tSeq->bagmaxlen = 0; - tSeq->group_number = 0; - tSeq->group_ID = 0; + tSeq->creator[0] = '\0'; + tSeq->film[0] = '\0'; + tSeq->membrane[0] = '\0'; + tSeq->source_ID[0] = '\0'; + tSeq->contig[0] = '\0'; + tSeq->laneset = -1; + tSeq->direction = 1; /* (1/-1/0),default: 5 to 3. */ + tSeq->strandedness = 1; /* (1/2/0), default: primary.*/ + tSeq->orig_direction = 0; /* (0 unknown, -1:3'->5', 1:5'->3') */ + tSeq->orig_strand = 0; /* (0 unknown, 1:primary, 2:secondary) */ + tSeq->offset = 0; + + tSeq->comments = NULL; + tSeq->commentslen = 0; + tSeq->commentsmaxlen = 0; + + tSeq->baggage = NULL; + tSeq->baglen = 0; + tSeq->bagmaxlen = 0; + tSeq->group_number = 0; + tSeq->group_ID = 0; } - - -void -CopyRecord(to, from) -Sequence *from, *to; +void CopyRecord(to, from) Sequence *from, *to; { - int i; + int i; - InitRecord(to); + InitRecord(to); - strcpy(to->type, from->type); + strcpy(to->type, from->type); - strcpy(to->barcode, from->barcode); - strcpy(to->name, from->name); - strcpy(to->status,from->status); - strcpy(to->walk,from->walk); - strcpy(to->sequence_ID, from->sequence_ID); + strcpy(to->barcode, from->barcode); + strcpy(to->name, from->name); + strcpy(to->status, from->status); + strcpy(to->walk, from->walk); + strcpy(to->sequence_ID, from->sequence_ID); - if(from->c_elem != NULL) - { - to->seqlen = from->seqlen; - to->seqmaxlen = from->seqmaxlen; - to->c_elem = (char *)Calloc(to->seqmaxlen, 1); - strncpy(to->c_elem, from->c_elem, to->seqlen); - to->c_elem[to->seqlen] = '\0'; - } - - for (i = 0; i<6; i++) - { - to->creation_date[i] = from->creation_date[i]; - to->probing_date[i] = from->probing_date[i]; - to->autorad_date[i] = from->autorad_date[i]; - } + if (from->c_elem != NULL) { + to->seqlen = from->seqlen; + to->seqmaxlen = from->seqmaxlen; + to->c_elem = (char *)Calloc(to->seqmaxlen, 1); + strncpy(to->c_elem, from->c_elem, to->seqlen); + to->c_elem[to->seqlen] = '\0'; + } - strcpy(to->creator, from->creator); - strcpy(to->film, from->film); - strcpy(to->membrane, from->membrane); - strcpy(to->source_ID, from->source_ID); - strcpy(to->contig, from->contig); - to->laneset = from->laneset; - to->strandedness = from->strandedness; - to->orig_direction = from->orig_direction; - to->orig_strand = from->orig_strand; - to->direction = from->direction; - to->offset = from->offset; + for (i = 0; i < 6; i++) { + to->creation_date[i] = from->creation_date[i]; + to->probing_date[i] = from->probing_date[i]; + to->autorad_date[i] = from->autorad_date[i]; + } - if(from->comments != NULL) - { - to->commentsmaxlen = from->commentsmaxlen; - to->commentslen = from->commentslen; - to->comments = (char *)Calloc(to->commentsmaxlen, 1); - strncpy(to->comments, from->comments, to->commentslen); - to->comments[to->commentslen] = '\0'; - } + strcpy(to->creator, from->creator); + strcpy(to->film, from->film); + strcpy(to->membrane, from->membrane); + strcpy(to->source_ID, from->source_ID); + strcpy(to->contig, from->contig); + to->laneset = from->laneset; + to->strandedness = from->strandedness; + to->orig_direction = from->orig_direction; + to->orig_strand = from->orig_strand; + to->direction = from->direction; + to->offset = from->offset; - if(from->baggage != NULL) - { - to->baglen = from->baglen; - to->bagmaxlen = from->bagmaxlen; - to->baggage = (char *)Calloc(to->bagmaxlen, 1); - strncpy(to->baggage, from->baggage, to->baglen); - to->baggage[to->baglen] = '\0'; - } - - to->group_number = from->group_number; - to->group_ID = from->group_ID; + if (from->comments != NULL) { + to->commentsmaxlen = from->commentsmaxlen; + to->commentslen = from->commentslen; + to->comments = (char *)Calloc(to->commentsmaxlen, 1); + strncpy(to->comments, from->comments, to->commentslen); + to->comments[to->commentslen] = '\0'; + } + + if (from->baggage != NULL) { + to->baglen = from->baglen; + to->bagmaxlen = from->bagmaxlen; + to->baggage = (char *)Calloc(to->bagmaxlen, 1); + strncpy(to->baggage, from->baggage, to->baglen); + to->baggage[to->baglen] = '\0'; + } + + to->group_number = from->group_number; + to->group_ID = from->group_ID; } - - - /********* * * Clean the contents of a record without changing the memory size. * **********/ -void -CleanRecord(tSeq) -Sequence *tSeq; +void CleanRecord(tSeq) Sequence *tSeq; { - int i; - - strcpy(tSeq->type, "DNA"); - tSeq->name[0] = '\0'; - tSeq->barcode[0] = '\0'; - tSeq->status[0] = '\0'; - strcpy(tSeq->walk, "FALSE"); - tSeq->sequence_ID[0] = '\0'; + int i; - if(tSeq->c_elem != NULL) - tSeq->c_elem[0] = '\0'; - tSeq->seqlen = 0; - - for (i = 0; i<6; i++) - { - tSeq->creation_date[i] = 0; - tSeq->probing_date[i] = 0; - tSeq->autorad_date[i] = 0; - } + strcpy(tSeq->type, "DNA"); + tSeq->name[0] = '\0'; + tSeq->barcode[0] = '\0'; + tSeq->status[0] = '\0'; + strcpy(tSeq->walk, "FALSE"); + tSeq->sequence_ID[0] = '\0'; - tSeq->creator[0] = '\0'; - tSeq->film[0] = '\0'; - tSeq->membrane[0] = '\0'; - tSeq->source_ID[0] = '\0'; - tSeq->contig[0] = '\0'; - tSeq->laneset = -1; - tSeq->strandedness = 1; /* (1/2/0), default. primary. */ - tSeq->direction = 1; /* (1/-1/0),default. 5 to 3. */ - tSeq->orig_direction= 0; - tSeq->orig_strand = 0; - tSeq->offset = 0; + if (tSeq->c_elem != NULL) tSeq->c_elem[0] = '\0'; + tSeq->seqlen = 0; - if(tSeq->comments != NULL) - tSeq->comments[0] = '\0'; - tSeq->commentslen = 0; + for (i = 0; i < 6; i++) { + tSeq->creation_date[i] = 0; + tSeq->probing_date[i] = 0; + tSeq->autorad_date[i] = 0; + } - if(tSeq->baggage != NULL) - tSeq->baggage[0] = '\0'; - tSeq->baglen = 0; - tSeq->group_number = 0; - tSeq->group_ID = 0; + tSeq->creator[0] = '\0'; + tSeq->film[0] = '\0'; + tSeq->membrane[0] = '\0'; + tSeq->source_ID[0] = '\0'; + tSeq->contig[0] = '\0'; + tSeq->laneset = -1; + tSeq->strandedness = 1; /* (1/2/0), default. primary. */ + tSeq->direction = 1; /* (1/-1/0),default. 5 to 3. */ + tSeq->orig_direction = 0; + tSeq->orig_strand = 0; + tSeq->offset = 0; + + if (tSeq->comments != NULL) tSeq->comments[0] = '\0'; + tSeq->commentslen = 0; + + if (tSeq->baggage != NULL) tSeq->baggage[0] = '\0'; + tSeq->baglen = 0; + tSeq->group_number = 0; + tSeq->group_ID = 0; } - - /********* * * Free memory for a record. * **********/ -void -FreeRecord(tSeq) -Sequence **tSeq; +void FreeRecord(tSeq) Sequence **tSeq; { - Cfree((*tSeq)->c_elem); - Cfree((*tSeq)->comments); - Cfree((*tSeq)->baggage); - Cfree((*tSeq)); - (*tSeq)->c_elem = NULL; - (*tSeq)->comments = NULL; - (*tSeq)->baggage = NULL; - (*tSeq) = NULL; + Cfree((*tSeq)->c_elem); + Cfree((*tSeq)->comments); + Cfree((*tSeq)->baggage); + Cfree((*tSeq)); + (*tSeq)->c_elem = NULL; + (*tSeq)->comments = NULL; + (*tSeq)->baggage = NULL; + (*tSeq) = NULL; } - -static max_day[2][13] = { -{ 0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, -{ 0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} }; - - +static max_day[2][13] = {{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; /*********** * - * strToDate() locates first six integers and translates them + * strToDate() locates first six integers and translates them * into a date. * - * String should have the format of "mm/dd/yy hh/mn/sc xm", + * String should have the format of "mm/dd/yy hh/mn/sc xm", * with anything except digit as the delimiters. * * Order in the date array is (0->5): (yy mm dd hh mn sc). @@ -967,565 +908,515 @@ static max_day[2][13] = { * **********/ -int -strToDate(str, date) -const char *str; +int strToDate(str, date) const char *str; int date[]; { - int leap; - char temp_str[2]; - char longstr[256]; + int leap; + char temp_str[2]; + char longstr[256]; - /* locate 6 integers. */ - - strcpy(longstr, str); - strcat(longstr, " -1/-1/-1 "); - sscanf(longstr, "%d%*c%d%*c%d%*c%d%*c%d%*c%d%2s", - &date[1],&date[2],&date[0],&date[3], - &date[4],&date[5],temp_str); + /* locate 6 integers. */ - /* verify year. */ - if(date[0] >= 100) - date[0] -= 1900; + strcpy(longstr, str); + strcat(longstr, " -1/-1/-1 "); + sscanf(longstr, "%d%*c%d%*c%d%*c%d%*c%d%*c%d%2s", &date[1], &date[2], + &date[0], &date[3], &date[4], &date[5], temp_str); - /* verify month. */ - if(date[1] > 12 || date[1] < 1) - { - fprintf(stderr,"invalid month %s\n", str); - return FALSE; - } - - /* verify day. */ - if ((date[0] % 4 == 0 && date[0] % 100 != 0) || - date[0] % 400 == 0) - leap = 1; - else - leap = 0; - - if(date[2] > max_day[leap][date[1]] || - date[2] < 1) - { - fprintf(stderr,"invalid day %s\n", str); - return FALSE; - } + /* verify year. */ + if (date[0] >= 100) date[0] -= 1900; - /* verify time. */ - if (strncmp(temp_str,"pm",2)==0) - date[3] += 12; - if (date[3]<-1 || date[3]>23 || - date[4]<-1 || date[4]>59 || - date[5]<-1 || date[5]>59 ) - { - fprintf(stderr,"invalid time %s\n", str); - return FALSE; - } + /* verify month. */ + if (date[1] > 12 || date[1] < 1) { + fprintf(stderr, "invalid month %s\n", str); + return FALSE; + } - return TRUE; + /* verify day. */ + if ((date[0] % 4 == 0 && date[0] % 100 != 0) || date[0] % 400 == 0) + leap = 1; + else + leap = 0; + + if (date[2] > max_day[leap][date[1]] || date[2] < 1) { + fprintf(stderr, "invalid day %s\n", str); + return FALSE; + } + + /* verify time. */ + if (strncmp(temp_str, "pm", 2) == 0) date[3] += 12; + if (date[3] < -1 || date[3] > 23 || date[4] < -1 || date[4] > 59 || + date[5] < -1 || date[5] > 59) { + fprintf(stderr, "invalid time %s\n", str); + return FALSE; + } + + return TRUE; } - /********** - * + * * Default_IUPAC_Trans() translates an ASCII IUPAC code into * an (char) integer. * **********/ -char -Default_IUPAC_Trans(base) +char Default_IUPAC_Trans(base) char base; { - int i; - char c; - c = base | 32; + int i; + char c; + c = base | 32; - if(c == 'u') - return (char ) 8; + if (c == 'u') return (char)8; - if(c == 'p') - return (char) 5; + if (c == 'p') return (char)5; - for(i=0; i<16; i++) - { - if(c == Default_DNA_Trans[i]) - { - return ( (char) i); + for (i = 0; i < 16; i++) { + if (c == Default_DNA_Trans[i]) { + return ((char)i); + } } - } - fprintf(stderr, "Character %c is not IUPAC coded.\n", base); - return -1; + fprintf(stderr, "Character %c is not IUPAC coded.\n", base); + return -1; } char *uniqueID(); /*********** - * + * * MakeConsensus() takes an array of aligned sequence and an * initialized 'Sequence' consensus. It modifies the consensus. * - * The memory that 'consensus' has located will be reused, and + * The memory that 'consensus' has located will be reused, and * consensus->seqmaxlen will be modified if necessary. * * Returns TRUE if successful, FALSE otherwise. * **********/ -int -MakeConsensus(aligned, numOfAligned, consensus, group) -Sequence aligned[]; /* input. */ -int numOfAligned; /* input. */ -Sequence *consensus; /* input and output. */ -int group; /* Group number (if zero, use all groups) */ +int MakeConsensus(aligned, numOfAligned, consensus, group) +Sequence aligned[]; /* input. */ +int numOfAligned; /* input. */ +Sequence *consensus; /* input and output. */ +int group; /* Group number (if zero, use all groups) */ { - char occurence; - int i, j, index; - int max_cons = INT_MIN; - int min_offset = INT_MAX; - char temp_str[2]; - unsigned char case_bit; + char occurence; + int i, j, index; + int max_cons = INT_MIN; + int min_offset = INT_MAX; + char temp_str[2]; + unsigned char case_bit; - /* - * Search for the minimun offset. - */ - - for (i=0; ioffset = min_offset; - - if(aligned[0].contig[0] != '\0') - { - strcpy(consensus->name, aligned[0].contig); - strcat(consensus->name, "."); - } - else if(strncmp(aligned[0].name, "cons.", 5) != 0) - { - strcpy(consensus->name, "cons."); - strcat(consensus->name, aligned[0].name); - } - strcpy(consensus->sequence_ID, uniqueID()); - strcpy(consensus->contig, aligned[0].contig); - - for(j=min_offset; j= aligned[i].offset && - j < aligned[i].offset+aligned[i].seqlen) - { - index = j-aligned[i].offset; - - if(aligned[i].c_elem[index] == '-') - case_bit = 32; - else if(case_bit == 0) - case_bit |= (aligned[i].c_elem[index] & 32); - - occurence = occurence | - Default_IUPAC_Trans(aligned[i].c_elem[index]); - - if(occurence != 1 && occurence != 2 && - occurence != 4 && occurence != 8) - case_bit = 32; - /* - printf("%1c", aligned[i].c_elem[index]); - */ - } - /* - else - printf(" "); - */ - } - } - - sprintf(temp_str, "%1c", Default_DNA_Trans[(int) occurence]); - if(case_bit == 0) - temp_str[0] = toupper(temp_str[0]); - - if(InsertElems(consensus, j, temp_str)== FALSE) - return FALSE; /* - printf(" cons[%d]=%1c\n", j - min_offset, - consensus->c_elem[j - min_offset]); - */ - } - return TRUE; + * Search for the minimun offset. + */ + + for (i = 0; i < numOfAligned; i++) { + if (group == 0 || aligned[i].group_number == group) { + SeqNormal(&aligned[i]); + min_offset = MIN(min_offset, aligned[i].offset); + max_cons = MAX(max_cons, + aligned[i].offset + aligned[i].seqlen); + } + } + + /* + * Decide consensus base by base. + */ + + CleanRecord(consensus); + consensus->offset = min_offset; + + if (aligned[0].contig[0] != '\0') { + strcpy(consensus->name, aligned[0].contig); + strcat(consensus->name, "."); + } + else if (strncmp(aligned[0].name, "cons.", 5) != 0) { + strcpy(consensus->name, "cons."); + strcat(consensus->name, aligned[0].name); + } + strcpy(consensus->sequence_ID, uniqueID()); + strcpy(consensus->contig, aligned[0].contig); + + for (j = min_offset; j < max_cons; j++) { + occurence = 00; + case_bit = 0; + for (i = 0; i < numOfAligned; i++) { + if (group == 0 || aligned[i].group_number == group) { + if (j >= aligned[i].offset && + j < aligned[i].offset + aligned[i].seqlen) { + index = j - aligned[i].offset; + + if (aligned[i].c_elem[index] == '-') + case_bit = 32; + else if (case_bit == 0) + case_bit |= + (aligned[i].c_elem[index] & + 32); + + occurence = + occurence | + Default_IUPAC_Trans( + aligned[i].c_elem[index]); + + if (occurence != 1 && occurence != 2 && + occurence != 4 && occurence != 8) + case_bit = 32; + /* + printf("%1c", aligned[i].c_elem[index]); + */ + } + /* + else + printf(" "); + */ + } + } + + sprintf(temp_str, "%1c", Default_DNA_Trans[(int)occurence]); + if (case_bit == 0) temp_str[0] = toupper(temp_str[0]); + + if (InsertElems(consensus, j, temp_str) == FALSE) return FALSE; + /* + printf(" cons[%d]=%1c\n", j - min_offset, + consensus->c_elem[j - min_offset]); + */ + } + return TRUE; } - - /*********** - * + * * MakeScore() takes an array of aligned sequence, and generates - * a consensus. Note, memory for (Sequence* consensus) should be + * a consensus. Note, memory for (Sequence* consensus) should be * located before it is passed to this function. - * + * * Returns TRUE if successful, FALSE otherwise. * **********/ -int -MakeScore(aligned, numOfAligned, consensus, group) -Sequence aligned[]; /* input. */ -int numOfAligned; /* input. */ -Sequence *consensus; /* input and output. */ +int MakeScore(aligned, numOfAligned, consensus, group) +Sequence aligned[]; /* input. */ +int numOfAligned; /* input. */ +Sequence *consensus; /* input and output. */ int group; { - int i, j, index, score; - int max_cons = INT_MIN; - int min_offset = INT_MAX; - int As, Cs, Ts, Gs, Ns, tot_in_grp; - char temp_str[2], occurence, base; - int max_occ; + int i, j, index, score; + int max_cons = INT_MIN; + int min_offset = INT_MAX; + int As, Cs, Ts, Gs, Ns, tot_in_grp; + char temp_str[2], occurence, base; + int max_occ; - static char map[17] = "0123456789ABCDEF"; + static char map[17] = "0123456789ABCDEF"; - /* - * Search for the minimum offset. - */ - - for (i=0; ioffset = min_offset; - - if(aligned[0].contig[0] != '\0') - { - strcpy(consensus->name, aligned[0].contig); - strcat(consensus->name, "."); - } - else if(strncmp(aligned[0].name, "cons.", 5) != 0) - { - strcpy(consensus->name, "cons."); - strcat(consensus->name, aligned[0].name); - } - strcpy(consensus->sequence_ID, uniqueID()); - strcpy(consensus->contig, aligned[0].contig); - - for(j=min_offset; j= aligned[i].offset && - j < aligned[i].offset+aligned[i].seqlen) - { - tot_in_grp++; - index = j-aligned[i].offset; - - /* - occurence = Default_IUPAC_Trans(aligned[i].c_elem[index]); - if((occurence & 01) == 01) - As++; - if((occurence & 02) == 02) - Cs++; - if((occurence & 04) == 04) - Gs++; - if((occurence & 010) == 010) - Ts++; - */ - - base = (aligned[i].c_elem[index]|32); - - if(base == 'a') - As++; - else if(base == 'c') - Cs++; - else if(base == 'g') - Gs++; - else if(base == 't') - Ts++; - else if(base == 'n' || base == '-') - Ns++; - /* - printf("%1c", aligned[i].c_elem[index]); - */ - } - /* - else - printf(" "); - */ - } - } - - max_occ = MAX(As, MAX(Cs, MAX(Gs,Ts))); - - /* socre = [0,E], F:all mismatches are either 'n' or '-' */ - if(Ns != 0 && max_occ+Ns == tot_in_grp) - score = 15; - else - score = max_occ*14/tot_in_grp; - - /* - if( score > 0xF ) - { - if (InsertElems(consensus, j, "F") == FALSE) - { - return FALSE; - } - } - else - { - */ - - sprintf(temp_str,"%1c", map[score]); - if(InsertElems(consensus, j, temp_str) == FALSE) - { - return FALSE; - } - /* - printf(" %2d-%2d-%2d-%2d %2d cons[%d]=%1c\n", - Ts, Gs, Cs, As, score, j, - consensus->c_elem[j]); - */ - } - return TRUE; + * Search for the minimum offset. + */ + + for (i = 0; i < numOfAligned; i++) { + if (group == 0 || aligned[i].group_number == group) { + SeqNormal(&aligned[i]); + min_offset = MIN(min_offset, aligned[i].offset); + max_cons = MAX(max_cons, + aligned[i].offset + aligned[i].seqlen); + } + } + + /* + * Decide consensus base by base. + */ + CleanRecord(consensus); + consensus->offset = min_offset; + + if (aligned[0].contig[0] != '\0') { + strcpy(consensus->name, aligned[0].contig); + strcat(consensus->name, "."); + } + else if (strncmp(aligned[0].name, "cons.", 5) != 0) { + strcpy(consensus->name, "cons."); + strcat(consensus->name, aligned[0].name); + } + strcpy(consensus->sequence_ID, uniqueID()); + strcpy(consensus->contig, aligned[0].contig); + + for (j = min_offset; j < max_cons; j++) { + As = Cs = Ts = Gs = Ns = 0; + tot_in_grp = 0; + occurence = 00; + + for (i = 0; i < numOfAligned; i++) { + if (group == 0 || aligned[i].group_number == group) { + if (j >= aligned[i].offset && + j < aligned[i].offset + aligned[i].seqlen) { + tot_in_grp++; + index = j - aligned[i].offset; + + /* + occurence = + Default_IUPAC_Trans(aligned[i].c_elem[index]); + if((occurence & 01) == 01) + As++; + if((occurence & 02) == 02) + Cs++; + if((occurence & 04) == 04) + Gs++; + if((occurence & 010) == 010) + Ts++; + */ + + base = (aligned[i].c_elem[index] | 32); + + if (base == 'a') + As++; + else if (base == 'c') + Cs++; + else if (base == 'g') + Gs++; + else if (base == 't') + Ts++; + else if (base == 'n' || base == '-') + Ns++; + /* + printf("%1c", + aligned[i].c_elem[index]); + */ + } + /* + else + printf(" "); + */ + } + } + + max_occ = MAX(As, MAX(Cs, MAX(Gs, Ts))); + + /* socre = [0,E], F:all mismatches are either 'n' or '-' */ + if (Ns != 0 && max_occ + Ns == tot_in_grp) + score = 15; + else + score = max_occ * 14 / tot_in_grp; + + /* + if( score > 0xF ) + { + if (InsertElems(consensus, j, "F") == FALSE) + { + return FALSE; + } + } + else + { + */ + + sprintf(temp_str, "%1c", map[score]); + if (InsertElems(consensus, j, temp_str) == FALSE) { + return FALSE; + } + + /* + printf(" %2d-%2d-%2d-%2d %2d cons[%d]=%1c\n", + Ts, Gs, Cs, As, score, j, + consensus->c_elem[j]); + */ + } + return TRUE; } - /*********** - * + * * MakePhyloMask() takes an array of aligned sequence, and generates * a mask that has a '0' for all columns except the columns which contain * a, c, g, t and u only. - * + * * Returns TRUE if successful, FALSE otherwise. * **********/ -int -MakePhyloMask(aligned, numOfAligned, consensus, group, acgtu) -Sequence aligned[]; /* input. */ -int numOfAligned; /* input. */ -Sequence *consensus; /* input and output. */ +int MakePhyloMask(aligned, numOfAligned, consensus, group, acgtu) +Sequence aligned[]; /* input. */ +int numOfAligned; /* input. */ +Sequence *consensus; /* input and output. */ int acgtu[]; int group; { - int i, j, cnt, max_cons = INT_MIN, min_offset = INT_MAX; + int i, j, cnt, max_cons = INT_MIN, min_offset = INT_MAX; - /* - * Search for the minimum offset. - */ + /* + * Search for the minimum offset. + */ - for (i=0; ioffset = min_offset; - strcpy(consensus->name, "mask"); - strcpy(consensus->type, "MASK"); - strcpy(consensus->sequence_ID, uniqueID()); - strcpy(consensus->contig, aligned[0].contig); - - consensus->seqlen = max_cons - min_offset; - if(consensus->seqmaxlen == 0) - { - consensus->c_elem = (char *)Calloc(max_cons - min_offset+5, 1); - consensus->seqmaxlen = max_cons - min_offset + 5; - } - else if(consensus->seqmaxlen < max_cons - min_offset) - { - consensus->seqmaxlen = max_cons - min_offset + 5; - consensus->c_elem = (char *)Realloc(consensus->c_elem, - max_cons - min_offset + 5); - } - - cnt = 0; - for(j=min_offset; jc_elem[j-min_offset] = '1'; - for(i=0; i= aligned[i].offset+aligned[i].seqlen || - acgtu[aligned[i].c_elem[j-aligned[i].offset]] == 0) - { - consensus->c_elem[j-min_offset] = '0'; - cnt++; - break; + for (i = 0; i < numOfAligned; i++) { + if (group == 0 || aligned[i].group_number == group) { + SeqNormal(&aligned[i]); + min_offset = MIN(min_offset, aligned[i].offset); + max_cons = MAX(max_cons, + aligned[i].offset + aligned[i].seqlen); } - } } - } - fprintf(stderr, "\nNumber of 1s in mask: %d\n", max_cons-min_offset-cnt); - fprintf(stderr, "Number of 0s in mask: %d\n\n", cnt); - return TRUE; + + /* + * Decide consensus base by base. + */ + CleanRecord(consensus); + consensus->offset = min_offset; + strcpy(consensus->name, "mask"); + strcpy(consensus->type, "MASK"); + strcpy(consensus->sequence_ID, uniqueID()); + strcpy(consensus->contig, aligned[0].contig); + + consensus->seqlen = max_cons - min_offset; + if (consensus->seqmaxlen == 0) { + consensus->c_elem = + (char *)Calloc(max_cons - min_offset + 5, 1); + consensus->seqmaxlen = max_cons - min_offset + 5; + } + else if (consensus->seqmaxlen < max_cons - min_offset) { + consensus->seqmaxlen = max_cons - min_offset + 5; + consensus->c_elem = (char *)Realloc(consensus->c_elem, + max_cons - min_offset + 5); + } + + cnt = 0; + for (j = min_offset; j < max_cons; j++) { + consensus->c_elem[j - min_offset] = '1'; + for (i = 0; i < numOfAligned; i++) { + if (group == 0 || aligned[i].group_number == group) { + if (j < aligned[i].offset || + j >= + aligned[i].offset + aligned[i].seqlen || + acgtu[aligned[i] + .c_elem[j - aligned[i].offset]] == + 0) { + consensus->c_elem[j - min_offset] = '0'; + cnt++; + break; + } + } + } + } + fprintf(stderr, "\nNumber of 1s in mask: %d\n", + max_cons - min_offset - cnt); + fprintf(stderr, "Number of 0s in mask: %d\n\n", cnt); + return TRUE; } - /*********** - * + * * MajorityCons() takes an array of aligned sequence, and generates - * a MAJORITY consensus. - * Note, memory for (Sequence* consensus) should be + * a MAJORITY consensus. + * Note, memory for (Sequence* consensus) should be * located before it is passed to this function. - * + * * Returns TRUE if successful, FALSE otherwise. * **********/ -int -MajorityCons(aligned, numOfAligned, consensus, group, major_perc) -Sequence aligned[]; /* input. */ -int numOfAligned; /* input. */ -Sequence *consensus; /* input and output. */ +int MajorityCons(aligned, numOfAligned, consensus, group, major_perc) +Sequence aligned[]; /* input. */ +int numOfAligned; /* input. */ +Sequence *consensus; /* input and output. */ int group, major_perc; { - int i, j, index, score, ii, base, max; - int max_cons = INT_MIN; - int min_offset = INT_MAX; - char temp_str[2], occurence; - int *cnts, tot_in_grp; - unsigned char case_bit; + int i, j, index, score, ii, base, max; + int max_cons = INT_MIN; + int min_offset = INT_MAX; + char temp_str[2], occurence; + int *cnts, tot_in_grp; + unsigned char case_bit; - cnts = (int *)Calloc(16, sizeof(int)); + cnts = (int *)Calloc(16, sizeof(int)); - /* - * Search for the minimum offset. - */ + /* + * Search for the minimum offset. + */ - for (i=0; ioffset = min_offset; - - if(aligned[0].contig[0] != '\0') - { - strcpy(consensus->name, aligned[0].contig); - strcat(consensus->name, "."); - } - else if(strncmp(aligned[0].name, "cons.", 5) != 0) - { - strcpy(consensus->name, "cons."); - strcat(consensus->name, aligned[0].name); - } - strcpy(consensus->sequence_ID, uniqueID()); - strcpy(consensus->contig, aligned[0].contig); - - for(j=min_offset; j= aligned[i].offset && - j < aligned[i].offset+aligned[i].seqlen) - { - tot_in_grp++; - index = j-aligned[i].offset; - - if(aligned[i].c_elem[index] == '-') - case_bit = 32; - else if(case_bit == 0) - case_bit |= (aligned[i].c_elem[index] & 32); - - occurence |= - Default_IUPAC_Trans(aligned[i].c_elem[index]); - cnts[(int)Default_IUPAC_Trans(aligned[i].c_elem[index])]++; - - if(case_bit == 0 && - occurence != 1 && occurence != 2 && - occurence != 4 && occurence != 8) - case_bit = 32; + for (i = 0; i < numOfAligned; i++) { + if (group == 0 || aligned[i].group_number == group) { + SeqNormal(&aligned[i]); + min_offset = MIN(min_offset, aligned[i].offset); + max_cons = MAX(max_cons, + aligned[i].offset + aligned[i].seqlen); } - } } - max = 0; - for(ii = 0; ii < 16; ii++) - { - if(cnts[ii] > max) - { - max = cnts[ii]; - base = ii; - } - } - if(max*100/tot_in_grp >= major_perc) - { - /* follow the majority rule. */ - sprintf(temp_str,"%1c", Default_DNA_Trans[base]); - } - else - { - /* use IUPAC code. */ - sprintf(temp_str,"%1c", - Default_DNA_Trans[(int) occurence]); - } + /* + * Decide consensus base by base. + */ - if(case_bit == 0) - temp_str[0] = toupper(temp_str[0]); - - if(InsertElems(consensus, j, temp_str) == FALSE) - { - return FALSE; + CleanRecord(consensus); + consensus->offset = min_offset; + + if (aligned[0].contig[0] != '\0') { + strcpy(consensus->name, aligned[0].contig); + strcat(consensus->name, "."); } - } - return TRUE; + else if (strncmp(aligned[0].name, "cons.", 5) != 0) { + strcpy(consensus->name, "cons."); + strcat(consensus->name, aligned[0].name); + } + strcpy(consensus->sequence_ID, uniqueID()); + strcpy(consensus->contig, aligned[0].contig); + + for (j = min_offset; j < max_cons; j++) { + case_bit = 0; + occurence = 00; + tot_in_grp = 0; + for (ii = 0; ii < 16; ii++) cnts[ii] = 0; + + for (i = 0; i < numOfAligned; i++) { + if (group == 0 || aligned[i].group_number == group) { + if (j >= aligned[i].offset && + j < aligned[i].offset + aligned[i].seqlen) { + tot_in_grp++; + index = j - aligned[i].offset; + + if (aligned[i].c_elem[index] == '-') + case_bit = 32; + else if (case_bit == 0) + case_bit |= + (aligned[i].c_elem[index] & + 32); + + occurence |= Default_IUPAC_Trans( + aligned[i].c_elem[index]); + cnts[(int)Default_IUPAC_Trans( + aligned[i].c_elem[index])]++; + + if (case_bit == 0 && occurence != 1 && + occurence != 2 && occurence != 4 && + occurence != 8) + case_bit = 32; + } + } + } + + max = 0; + for (ii = 0; ii < 16; ii++) { + if (cnts[ii] > max) { + max = cnts[ii]; + base = ii; + } + } + if (max * 100 / tot_in_grp >= major_perc) { + /* follow the majority rule. */ + sprintf(temp_str, "%1c", Default_DNA_Trans[base]); + } + else { + /* use IUPAC code. */ + sprintf(temp_str, "%1c", + Default_DNA_Trans[(int)occurence]); + } + + if (case_bit == 0) temp_str[0] = toupper(temp_str[0]); + + if (InsertElems(consensus, j, temp_str) == FALSE) { + return FALSE; + } + } + return TRUE; } - /*********** * * ReadGDEtoHGL() reads a GDE formated file into an array of HGL structure. @@ -1534,292 +1425,277 @@ int group, major_perc; * ***********/ -int -ReadGDEtoHGL(fp, tSeq_arr) +int ReadGDEtoHGL(fp, tSeq_arr) FILE *fp; Sequence **tSeq_arr; { - char line[MAXLINELEN]; - int ptr, num_seq, max_num_seq = 20; - int seq_len = 200; - char *newline; + char line[MAXLINELEN]; + int ptr, num_seq, max_num_seq = 20; + int seq_len = 200; + char *newline; - (*tSeq_arr) = (Sequence *)Calloc(max_num_seq, sizeof(Sequence)); - num_seq = -1; - while(fgets(line, MAXLINELEN-2, fp) != NULL) /* spaces for \n\0 */ - { - /* ptr points to the last char. */ - ptr = strlen(line)-1; + (*tSeq_arr) = (Sequence *)Calloc(max_num_seq, sizeof(Sequence)); + num_seq = -1; + while (fgets(line, MAXLINELEN - 2, fp) != NULL) /* spaces for \n\0 */ + { + /* ptr points to the last char. */ + ptr = strlen(line) - 1; - /* clear up the tail. */ - while(ptr>=0 && (line[ptr] == '\n' || - line[ptr] == ' ' || - line[ptr] == '\t')) - ptr--; - line[ptr+1] = '\0'; + /* clear up the tail. */ + while (ptr >= 0 && (line[ptr] == '\n' || line[ptr] == ' ' || + line[ptr] == '\t')) + ptr--; + line[ptr + 1] = '\0'; - if(ptr <= 0) - { - /* it is an empty line. */ - } - else if(line[0] == '#') - { - if(++num_seq == max_num_seq) - { - max_num_seq *= 2; - /* printf("max_num_seq = %d\n", max_num_seq); */ - (*tSeq_arr) = (Sequence *)Realloc((*tSeq_arr), - max_num_seq*sizeof(Sequence)); - } - - InitRecord((*tSeq_arr)[num_seq]); - - if (line[ptr] == '<') - { - (*tSeq_arr)[num_seq].direction = 2; /* 3to5 */ - line[ptr] = '\0'; - } - else if (line[ptr] == '>') - { - (*tSeq_arr)[num_seq].direction = 1; /* 5to3 */ - line[ptr] = '\0'; - } - strcpy((*tSeq_arr)[num_seq].sequence_ID, line+1); - } - else - { - ptr = 0; - if((*tSeq_arr)[num_seq].seqlen == 0) - { - /* determine the offset. */ - while(line[ptr] != '\0' && line[ptr] == '-') - { - ptr++; - } - (*tSeq_arr)[num_seq].offset += ptr; - } - - if(line[ptr] != '\0') - { - newline = line + ptr; - - if((*tSeq_arr)[num_seq].seqmaxlen == 0) - { - (*tSeq_arr)[num_seq].c_elem = - (char *)Calloc(seq_len, 1); - (*tSeq_arr)[num_seq].c_elem[0] = '\0'; - (*tSeq_arr)[num_seq].seqmaxlen = seq_len; + if (ptr <= 0) { + /* it is an empty line. */ } - else - { - while((*tSeq_arr)[num_seq].seqlen + strlen(newline) + 1 - > (*tSeq_arr)[num_seq].seqmaxlen) - { - seq_len *= 2; - (*tSeq_arr)[num_seq].c_elem = (char *) - Realloc((*tSeq_arr)[num_seq].c_elem, seq_len); - (*tSeq_arr)[num_seq].seqmaxlen = seq_len; - } - } - strcat((*tSeq_arr)[num_seq].c_elem, newline); - (*tSeq_arr)[num_seq].seqlen = strlen((*tSeq_arr)[num_seq].c_elem); - } - } - } + else if (line[0] == '#') { + if (++num_seq == max_num_seq) { + max_num_seq *= 2; + /* printf("max_num_seq = %d\n", max_num_seq); */ + (*tSeq_arr) = (Sequence *)Realloc( + (*tSeq_arr), + max_num_seq * sizeof(Sequence)); + } - return (num_seq + 1); + InitRecord((*tSeq_arr)[num_seq]); + + if (line[ptr] == '<') { + (*tSeq_arr)[num_seq].direction = 2; /* 3to5 */ + line[ptr] = '\0'; + } + else if (line[ptr] == '>') { + (*tSeq_arr)[num_seq].direction = 1; /* 5to3 */ + line[ptr] = '\0'; + } + strcpy((*tSeq_arr)[num_seq].sequence_ID, line + 1); + } + else { + ptr = 0; + if ((*tSeq_arr)[num_seq].seqlen == 0) { + /* determine the offset. */ + while (line[ptr] != '\0' && line[ptr] == '-') { + ptr++; + } + (*tSeq_arr)[num_seq].offset += ptr; + } + + if (line[ptr] != '\0') { + newline = line + ptr; + + if ((*tSeq_arr)[num_seq].seqmaxlen == 0) { + (*tSeq_arr)[num_seq].c_elem = + (char *)Calloc(seq_len, 1); + (*tSeq_arr)[num_seq].c_elem[0] = '\0'; + (*tSeq_arr)[num_seq].seqmaxlen = + seq_len; + } + else { + while ((*tSeq_arr)[num_seq].seqlen + + strlen(newline) + 1 > + (*tSeq_arr)[num_seq].seqmaxlen) { + seq_len *= 2; + (*tSeq_arr)[num_seq] + .c_elem = (char *)Realloc( + (*tSeq_arr)[num_seq].c_elem, + seq_len); + (*tSeq_arr)[num_seq].seqmaxlen = + seq_len; + } + } + strcat((*tSeq_arr)[num_seq].c_elem, newline); + (*tSeq_arr)[num_seq].seqlen = + strlen((*tSeq_arr)[num_seq].c_elem); + } + } + } + + return (num_seq + 1); } - - - /******** * * InsertElems returns TRUE if successful, FALSE otherwise. * ********/ -int -InsertElems(seq,pos,c) -Sequence *seq; /* Sequence */ -int pos; /* Position (in respect to the master consensus) - * to insert BEFORE - * always move string to the right. */ -char c[]; /*Null terminated array of elements to insert */ +int InsertElems(seq, pos, c) +Sequence *seq; /* Sequence */ +int pos; /* Position (in respect to the master consensus) + * to insert BEFORE + * always move string to the right. */ +char c[]; /*Null terminated array of elements to insert */ { - int dashes, j,len; + int dashes, j, len; - len = strlen(c); - - if(seq->seqlen == 0) - { - /* get rid of '-'s at right. */ - /* - dashes = len-1; - while(dashes >= 0 && c[dashes] == '-') - dashes--; - if(dashes < 0) - { - seq->offset = pos; - return TRUE; - } - c[dashes+1] = '\0'; - */ - - /* clear out '-'s at left. */ - dashes = 0; - /* - while(c[dashes] == '-') - dashes++; - - c += dashes; len = strlen(c); - pos += dashes; - */ - if(seq->seqmaxlen == 0) - { - seq->c_elem = (char *)Calloc(len+1, 1); - seq->seqmaxlen = len + 1; - } - else if(len+1 >= seq->seqmaxlen) - { - seq->c_elem = (char *)Realloc(seq->c_elem, len+1); - seq->seqmaxlen = len+1; + if (seq->seqlen == 0) { + /* get rid of '-'s at right. */ + /* + dashes = len-1; + while(dashes >= 0 && c[dashes] == '-') + dashes--; + if(dashes < 0) + { + seq->offset = pos; + return TRUE; + } + c[dashes+1] = '\0'; + */ + + /* clear out '-'s at left. */ + dashes = 0; + /* + while(c[dashes] == '-') + dashes++; + + c += dashes; + len = strlen(c); + pos += dashes; + */ + + if (seq->seqmaxlen == 0) { + seq->c_elem = (char *)Calloc(len + 1, 1); + seq->seqmaxlen = len + 1; + } + else if (len + 1 >= seq->seqmaxlen) { + seq->c_elem = (char *)Realloc(seq->c_elem, len + 1); + seq->seqmaxlen = len + 1; + } + + strcpy(seq->c_elem, c); + seq->seqlen = len; + seq->offset = pos; + return TRUE; } - strcpy(seq->c_elem, c); - seq->seqlen = len; - seq->offset = pos; - return TRUE; - } - - /* to make sure there is a space for '\0'. */ - if(seq->seqlen > seq->seqmaxlen) - { - fprintf(stderr, - "InsertElems(): seqlen>seqmaxlen. Something is wrong.\n"); - return FALSE; - } - else - { - while(seq->seqlen+1 >= seq->seqmaxlen) - { - seq->seqmaxlen *= 2; - seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); + /* to make sure there is a space for '\0'. */ + if (seq->seqlen > seq->seqmaxlen) { + fprintf( + stderr, + "InsertElems(): seqlen>seqmaxlen. Something is wrong.\n"); + return FALSE; } - } - seq->c_elem[seq->seqlen] = '\0'; - - if(pos < seq->offset) /* insert to the left of the seq. */ - { - /* ignore the dashes at the left. */ - dashes = 0; - /* - while(dashes < len && c[dashes] == '-') - dashes++; - if(c[dashes] == '\0') - { - seq->offset += len; - return TRUE; + else { + while (seq->seqlen + 1 >= seq->seqmaxlen) { + seq->seqmaxlen *= 2; + seq->c_elem = + (char *)Realloc(seq->c_elem, seq->seqmaxlen); + } } - c += dashes; - len -= dashes; - */ + seq->c_elem[seq->seqlen] = '\0'; - if(seq->seqlen + len + seq->offset - pos > seq->seqmaxlen) - { - seq->seqmaxlen = seq->seqlen+len+seq->offset-pos+256; - seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); - } + if (pos < seq->offset) /* insert to the left of the seq. */ + { + /* ignore the dashes at the left. */ + dashes = 0; + /* + while(dashes < len && c[dashes] == '-') + dashes++; + if(c[dashes] == '\0') + { + seq->offset += len; + return TRUE; + } + c += dashes; + len -= dashes; + */ - /* copy the old string including the last '\0'. */ - for(j=seq->seqlen; j>=0; j--) - seq->c_elem[j+len+seq->offset-pos] = seq->c_elem[j]; + if (seq->seqlen + len + seq->offset - pos > seq->seqmaxlen) { + seq->seqmaxlen = + seq->seqlen + len + seq->offset - pos + 256; + seq->c_elem = + (char *)Realloc(seq->c_elem, seq->seqmaxlen); + } - /* insert dashes. */ - for(j=len; joffset-pos; j++) - seq->c_elem[j] = '-'; + /* copy the old string including the last '\0'. */ + for (j = seq->seqlen; j >= 0; j--) + seq->c_elem[j + len + seq->offset - pos] = + seq->c_elem[j]; - /* copy the inserted string. */ - for(j=0; jc_elem[j] = c[j]; + /* insert dashes. */ + for (j = len; j < len + seq->offset - pos; j++) + seq->c_elem[j] = '-'; - /* detector. */ - if(c[j] != '\0') - fprintf(stderr, "InsertElems: Problem.....\n"); + /* copy the inserted string. */ + for (j = 0; j < len; j++) seq->c_elem[j] = c[j]; - seq->seqlen = strlen(seq->c_elem); + /* detector. */ + if (c[j] != '\0') + fprintf(stderr, "InsertElems: Problem.....\n"); - /* seq->offset = pos; commented on 6-3-91 */ - seq->offset = pos + dashes; - if(dashes > 0) - printf("\nInsertElems(): dashes is not zero.\n\n"); - } + seq->seqlen = strlen(seq->c_elem); - else if(pos - seq->offset >= seq->seqlen) /* insert to the right. */ - { - /* ignore the dashes at the right. */ - /* - dashes = len -1; - while(dashes >= 0 && c[dashes] == '-') - dashes--; - if(dashes < 0) - return TRUE; - len = dashes+1; - c[len] = '\0'; - */ + /* seq->offset = pos; commented on 6-3-91 */ + seq->offset = pos + dashes; + if (dashes > 0) + printf("\nInsertElems(): dashes is not zero.\n\n"); + } - if(pos - seq->offset + len > seq->seqmaxlen) - { - seq->seqmaxlen = pos - seq->offset + len + 256; - seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); - } + else if (pos - seq->offset >= seq->seqlen) /* insert to the right. */ + { + /* ignore the dashes at the right. */ + /* + dashes = len -1; + while(dashes >= 0 && c[dashes] == '-') + dashes--; + if(dashes < 0) + return TRUE; + len = dashes+1; + c[len] = '\0'; + */ - /* insert dashes. */ - for(j=seq->seqlen; joffset; j++) - seq->c_elem[j] = '-'; + if (pos - seq->offset + len > seq->seqmaxlen) { + seq->seqmaxlen = pos - seq->offset + len + 256; + seq->c_elem = + (char *)Realloc(seq->c_elem, seq->seqmaxlen); + } - /* copy the inserted string. */ - for(j=0; jc_elem[pos - seq->offset + j] = c[j]; - seq->c_elem[pos-seq->offset+len] = '\0'; + /* insert dashes. */ + for (j = seq->seqlen; j < pos - seq->offset; j++) + seq->c_elem[j] = '-'; - /* detector. */ - if(c[j] != '\0') - fprintf(stderr, "InsertElems: Problem too .....\n"); + /* copy the inserted string. */ + for (j = 0; j < len; j++) + seq->c_elem[pos - seq->offset + j] = c[j]; + seq->c_elem[pos - seq->offset + len] = '\0'; - seq->seqlen = strlen(seq->c_elem); - } - else /* insert into the seq. */ - { - if(seq->seqlen + len > seq->seqmaxlen) - { - seq->seqmaxlen = seq->seqlen + len + 256; - seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); - } + /* detector. */ + if (c[j] != '\0') + fprintf(stderr, "InsertElems: Problem too .....\n"); - /* move the bottom part of the older string including the last '\0'. */ - for(j=seq->seqlen; j>=pos-seq->offset; j--) - seq->c_elem[j+len] = seq->c_elem[j]; + seq->seqlen = strlen(seq->c_elem); + } + else /* insert into the seq. */ + { + if (seq->seqlen + len > seq->seqmaxlen) { + seq->seqmaxlen = seq->seqlen + len + 256; + seq->c_elem = + (char *)Realloc(seq->c_elem, seq->seqmaxlen); + } - /* copy the inserted string. */ - for(j=0; jc_elem[pos - seq->offset + j] = c[j]; + /* move the bottom part of the older string including the last + * '\0'. */ + for (j = seq->seqlen; j >= pos - seq->offset; j--) + seq->c_elem[j + len] = seq->c_elem[j]; - /* detector. */ - if(c[j] != '\0') - fprintf(stderr, "InsertElems: Problem too too .....\n"); + /* copy the inserted string. */ + for (j = 0; j < len; j++) + seq->c_elem[pos - seq->offset + j] = c[j]; - seq->seqlen = strlen(seq->c_elem); - } + /* detector. */ + if (c[j] != '\0') + fprintf(stderr, + "InsertElems: Problem too too .....\n"); - return TRUE; + seq->seqlen = strlen(seq->c_elem); + } + + return TRUE; } - - - /****************************************************************** * * int GetArgs(argArray, numArgs) @@ -1830,98 +1706,88 @@ char c[]; /*Null terminated array of elements to insert */ * ******************************************************************/ -#define MAX_ARGS 50 /* maximum args this can process */ +#define MAX_ARGS 50 /* maximum args this can process */ -int -GetArgs(argArray, numArgs, argc, argv) +int GetArgs(argArray, numArgs, argc, argv) Args *argArray; -int numArgs; +int numArgs; int argc; char **argv; { - int i, j; - Args *curarg; - int noArgOK = TRUE; + int i, j; + Args *curarg; + int noArgOK = TRUE; - if ((argArray == NULL) || (numArgs == 0) || (numArgs > MAX_ARGS)) - { - fprintf(stderr, "GetArgs: Invalid number of args.\n"); - return FALSE; - } - - /* - * Test if all are either 'default' or 'optional'. - */ - curarg = argArray; - for (i=0; istrvalue[0] == '\0' && curarg->optional == 'F') - { - noArgOK = FALSE; - break; + if ((argArray == NULL) || (numArgs == 0) || (numArgs > MAX_ARGS)) { + fprintf(stderr, "GetArgs: Invalid number of args.\n"); + return FALSE; } - } - - /* - * show usage if some arg is required but no arg is - * supllied on command line. - */ - if(noArgOK == FALSE && argc == 1) - { - fprintf(stderr, "\n%s arguments:\n\n", argv[0]); + + /* + * Test if all are either 'default' or 'optional'. + */ curarg = argArray; - - for (i = 0; i < numArgs; i++, curarg++) - { - fprintf(stderr, " -%c %s ", curarg->tag, curarg->prompt); - if (curarg->optional == 'T') - fprintf(stderr, " [Optional]"); - fprintf(stderr, "\n"); - if (curarg->strvalue[0] != '\0') - fprintf(stderr, " default = %s\n", curarg->strvalue); - } - fprintf(stderr, "\n"); - return FALSE; - } - - /* - * Process - */ - for (i = 1; i < argc; i++) - { - if (argv[i][0] != '-') - { - fprintf(stderr, "Arguments must start with -"); - return FALSE; + for (i = 0; i < numArgs; i++, curarg++) { + if (curarg->strvalue[0] == '\0' && curarg->optional == 'F') { + noArgOK = FALSE; + break; + } } - /* check the tag. */ - curarg = argArray; - for (j = 0; j < numArgs; j++, curarg++) - { - if ((argv[i][1]|32) == (curarg->tag|32)) - break; + /* + * show usage if some arg is required but no arg is + * supllied on command line. + */ + if (noArgOK == FALSE && argc == 1) { + fprintf(stderr, "\n%s arguments:\n\n", argv[0]); + curarg = argArray; + + for (i = 0; i < numArgs; i++, curarg++) { + fprintf(stderr, " -%c %s ", curarg->tag, + curarg->prompt); + if (curarg->optional == 'T') + fprintf(stderr, " [Optional]"); + fprintf(stderr, "\n"); + if (curarg->strvalue[0] != '\0') + fprintf(stderr, " default = %s\n", + curarg->strvalue); + } + fprintf(stderr, "\n"); + return FALSE; } - if (j == numArgs) - { - fprintf(stderr, "Invalid argument tag in %s\n", argv[i]); - return FALSE; + + /* + * Process + */ + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-') { + fprintf(stderr, "Arguments must start with -"); + return FALSE; + } + + /* check the tag. */ + curarg = argArray; + for (j = 0; j < numArgs; j++, curarg++) { + if ((argv[i][1] | 32) == (curarg->tag | 32)) break; + } + if (j == numArgs) { + fprintf(stderr, "Invalid argument tag in %s\n", + argv[i]); + return FALSE; + } + + strcpy(curarg->strvalue, argv[i] + 2); + if (curarg->strvalue[0] == '\'' && + curarg->strvalue[strlen(curarg->strvalue) - 1] == '\'') { + char ttmm[256]; + strcpy(ttmm, curarg->strvalue + 1); + ttmm[strlen(ttmm) - 1] = '\0'; + strcpy(curarg->strvalue, ttmm); + } } - - strcpy(curarg->strvalue, argv[i]+2); - if(curarg->strvalue[0] == '\'' - && curarg->strvalue[strlen(curarg->strvalue)-1] == '\'') - { - char ttmm[256]; - strcpy(ttmm, curarg->strvalue+1); - ttmm[strlen(ttmm)-1] = '\0'; - strcpy(curarg->strvalue, ttmm); - } - } - return TRUE; + return TRUE; } - /********* * * GetCond interprets the -c argument, the condition. @@ -1934,162 +1800,135 @@ char **argv; * *********/ -int -GetCond(arg, cond) +int GetCond(arg, cond) char *arg; str_cond **cond; { - int start, end, i, found; - char message_buf[1000]; + int start, end, i, found; + char message_buf[1000]; - if ( strcmp(arg, "null")==0) - { - (*cond) = NULL; - return TRUE; - } - else - { - (*cond) = (str_cond *)Calloc(1, sizeof(str_cond)); - - start = end = 0; - - /* find the field name. */ - while (('a'<= arg[end] && arg[end]<='z') || - ('A'<= arg[end] && arg[end]<='Z') || - arg[end] == '-' ) - end++; - - found = FALSE; - for (i=0; ifield = i; /* condition on field &at[i]. */ - found = TRUE; - break; - } - } - if (found == FALSE) - { - strncpy(message_buf, arg, end-start); - message_buf[end-start] = '\0'; - fprintf(stderr, "Field %s not found.\n", message_buf); - return FALSE; - } - - start = end; - end++; - while (arg[end] == '=' || - arg[end] == '!' || - arg[end] == '>' || - arg[end] == '<' ) - end++; - strncpy((*cond)->symbol, arg+start, end-start); - (*cond)->symbol[end-start] = '\0'; - if (strlen((*cond)->symbol)>2 || - strlen((*cond)->symbol)<1 || - (strlen((*cond)->symbol)==1 && - *((*cond)->symbol) !='>' && - *((*cond)->symbol) != '<') || - (strlen((*cond)->symbol)==2 && - (strncmp((*cond)->symbol,"!=",2)!= 0 ) && - (strncmp((*cond)->symbol,"==",2)!= 0 ) && - (strncmp((*cond)->symbol,">=",2)!= 0 ) && - (strncmp((*cond)->symbol,"<=",2)!= 0 ) - ) - ) - { - fprintf(stderr, "Invalid condition.\n"); - return FALSE; - } - - if(arg[end] == '"' && arg[strlen(arg) - 1] == '"') - { - end++; - arg[strlen(arg) - 1] = '\0'; + if (strcmp(arg, "null") == 0) { + (*cond) = NULL; + return TRUE; } + else { + (*cond) = (str_cond *)Calloc(1, sizeof(str_cond)); - (*cond)->value = (char *)Calloc(strlen(arg) - end + 2, 1); - strcpy((*cond)->value, arg+end); - } - return TRUE; + start = end = 0; + + /* find the field name. */ + while (('a' <= arg[end] && arg[end] <= 'z') || + ('A' <= arg[end] && arg[end] <= 'Z') || arg[end] == '-') + end++; + + found = FALSE; + for (i = 0; i < NUM_OF_FIELDS && found == FALSE; i++) { + if (strncmp(arg, at[i], strlen(at[i])) == 0) { + (*cond)->field = + i; /* condition on field &at[i]. */ + found = TRUE; + break; + } + } + if (found == FALSE) { + strncpy(message_buf, arg, end - start); + message_buf[end - start] = '\0'; + fprintf(stderr, "Field %s not found.\n", message_buf); + return FALSE; + } + + start = end; + end++; + while (arg[end] == '=' || arg[end] == '!' || arg[end] == '>' || + arg[end] == '<') + end++; + strncpy((*cond)->symbol, arg + start, end - start); + (*cond)->symbol[end - start] = '\0'; + if (strlen((*cond)->symbol) > 2 || + strlen((*cond)->symbol) < 1 || + (strlen((*cond)->symbol) == 1 && + *((*cond)->symbol) != '>' && *((*cond)->symbol) != '<') || + (strlen((*cond)->symbol) == 2 && + (strncmp((*cond)->symbol, "!=", 2) != 0) && + (strncmp((*cond)->symbol, "==", 2) != 0) && + (strncmp((*cond)->symbol, ">=", 2) != 0) && + (strncmp((*cond)->symbol, "<=", 2) != 0))) { + fprintf(stderr, "Invalid condition.\n"); + return FALSE; + } + + if (arg[end] == '"' && arg[strlen(arg) - 1] == '"') { + end++; + arg[strlen(arg) - 1] = '\0'; + } + + (*cond)->value = (char *)Calloc(strlen(arg) - end + 2, 1); + strcpy((*cond)->value, arg + end); + } + return TRUE; } - /********* * * GetFields interprets the -f arguments, the fields list. * - * Returns number of selected fields, 0 if anything is wrong. + * Returns number of selected fields, 0 if anything is wrong. * *********/ -int -GetFields(arg, selected_fields) +int GetFields(arg, selected_fields) char *arg; int selected_fields[]; { - int start, end, i, found, list_done, i_selected; - char message_buf[1000]; + int start, end, i, found, list_done, i_selected; + char message_buf[1000]; - if ( strcmp(arg, "all") == 0 ) - { - selected_fields[0] = -1; - return NUM_OF_FIELDS; - } - else - { - start = end = 0; - list_done = FALSE; - i_selected = 0; + if (strcmp(arg, "all") == 0) { + selected_fields[0] = -1; + return NUM_OF_FIELDS; + } + else { + start = end = 0; + list_done = FALSE; + i_selected = 0; - while ( list_done == FALSE ) - { - while (arg[end] != '\0' && arg[end] != ',') - { - end++ ; - } - if (arg[end] == '\0') - { - list_done = TRUE; - } - found = FALSE; - for (i=0; i= pl && - strncmp(string+i, pattern, pl) == 0) - num_app++; - } + for (i = 0; i <= sl - pl; i++) { + if (abs(i - orig_loc) >= pl && + strncmp(string + i, pattern, pl) == 0) + num_app++; + } - return num_app; + return num_app; } - /******* * * FindPatternNC() searches string for pattern , CASE INSENSITIVE. - * Returns the number of appearences. + * Returns the number of appearences. * *******/ -int -FindPatternNC(string, pattern) -const char *string; +int FindPatternNC(string, pattern) const char *string; const char *pattern; { - int i, j, sl, pl, num_app = 0; + int i, j, sl, pl, num_app = 0; - if(string == NULL || (sl = strlen(string)) == 0) - return 0; + if (string == NULL || (sl = strlen(string)) == 0) return 0; - pl = strlen(pattern); + pl = strlen(pattern); - for(i = 0; i <= sl-pl; i++) - { - j = 0; - while(j < pl && (string[i+j]|32) == (pattern[j]|32)) - j++; + for (i = 0; i <= sl - pl; i++) { + j = 0; + while (j < pl && (string[i + j] | 32) == (pattern[j] | 32)) j++; - if(j == pl) - num_app++; - } + if (j == pl) num_app++; + } - return num_app; + return num_app; } - /******* * * Complementary() CHANGES the given DNA/RNA string to its complementary, @@ -2284,71 +2090,65 @@ const char *pattern; * *******/ -int -Complementary(sequence, type) +int Complementary(sequence, type) char *sequence; char type; { - int i, l; - char *temp_str; + int i, l; + char *temp_str; - l = strlen(sequence); - temp_str = (char *)Calloc(l+1, sizeof(char)); - if( type == 'D' || type == 'd') - type = 0; - else if(type == 'R' || type == 'r') - type = 1; - else - { - fprintf(stderr, - "Complementary(): type unknown. Type is D/d/R/r\n"); - return (int) NULL; - } + l = strlen(sequence); + temp_str = (char *)Calloc(l + 1, sizeof(char)); + if (type == 'D' || type == 'd') + type = 0; + else if (type == 'R' || type == 'r') + type = 1; + else { + fprintf(stderr, + "Complementary(): type unknown. Type is D/d/R/r\n"); + return (int)NULL; + } - for(i=0; i 1) - { - fprintf(stderr, - "%s has 15 repatitive base(s) %s\n", - PossibleOligo, subseq); - i++; - BadOligo = TRUE; - } - } - */ + /* check if there is a substr of len(no_repeat_len) + * repeat itself in the PossibleOligo. + DOESN'T MATTER! IT COULD MESS UP AT MOST SEVERAL + BASES READ INTO THE PROBE. CUT_SITE IS WHAT REALLY + MATTERS. - /* - * To ensure that the probe is not going to hybridize - * with itself: - */ - for(PO_index = 0; - BadOligo==FALSE && PO_index<=PO_len-no_repeat_len; - PO_index++) - { - SubStr(PossibleOligo, PO_index, no_repeat_len, subseq); - strcpy(scd_str, subseq); - Complementary(scd_str, 'd'); - Reverse(scd_str); + for(PO_index = 0; + BadOligo==FALSE && PO_index<=PO_len-no_repeat_len; + PO_index++) + { + SubStr(PossibleOligo,PO_index,no_repeat_len,subseq); + if(FindPattern(PossibleOligo, subseq) > 1) + { + fprintf(stderr, + "%s has 15 repatitive base(s) %s\n", + PossibleOligo, subseq); + i++; + BadOligo = TRUE; + } + } + */ - if(FindPattern(PossibleOligo, scd_str) > 0) - { - fprintf(stderr, - "%s may hybridize with itself: %s vs. %s.\n", - PossibleOligo, subseq, scd_str); - i++; - BadOligo = TRUE; - } - } + /* + * To ensure that the probe is not going to hybridize + * with itself: + */ + for (PO_index = 0; BadOligo == FALSE && + PO_index <= PO_len - no_repeat_len; + PO_index++) { + SubStr(PossibleOligo, PO_index, no_repeat_len, + subseq); + strcpy(scd_str, subseq); + Complementary(scd_str, 'd'); + Reverse(scd_str); - for(PO_index = 0; - BadOligo == FALSE && PO_index <= PO_len-2*check_len; - PO_index++) - { - SubStr(PossibleOligo, PO_index, check_len, subseq); - Complementary(subseq, 'd'); - strcpy(scd_str, subseq); - Reverse(scd_str); - - /* - if(FindPattern2(PossibleOligo,subseq,PO_index)>0) - { - fprintf(stderr, "%s has self-compl %s\n", - PossibleOligo, subseq); - i += PO_index+1; - BadOligo = TRUE; - } - else - */ + if (FindPattern(PossibleOligo, scd_str) > 0) { + fprintf(stderr, + "%s may hybridize with itself: " + "%s vs. %s.\n", + PossibleOligo, subseq, scd_str); + i++; + BadOligo = TRUE; + } + } - if(FindPattern2(PossibleOligo,scd_str,PO_index)>0) - { - fprintf(stderr, "%s has 2nd struct %s\n", - PossibleOligo, scd_str); - i += PO_index+1; - BadOligo = TRUE; - } - } - if(BadOligo == FALSE) - { - seq_set[seq_cnt] = (char *) - Calloc(strlen(PossibleOligo)+1, sizeof(char)); - strcpy(seq_set[seq_cnt], PossibleOligo); + for (PO_index = 0; BadOligo == FALSE && + PO_index <= PO_len - 2 * check_len; + PO_index++) { + SubStr(PossibleOligo, PO_index, check_len, + subseq); + Complementary(subseq, 'd'); + strcpy(scd_str, subseq); + Reverse(scd_str); - if(++seq_cnt == max_num_probe) - { - max_num_probe *= 2; - seq_set = (char **) - Realloc(seq_set, max_num_probe*sizeof(char *)); - } - i++; - } - } /* end of l. */ - } /* end of i. */ + /* + if(FindPattern2(PossibleOligo,subseq,PO_index)>0) + { + fprintf(stderr, "%s has self-compl %s\n", + PossibleOligo, subseq); + i += PO_index+1; + BadOligo = TRUE; + } + else + */ - seq_set[seq_cnt] = NULL; + if (FindPattern2(PossibleOligo, scd_str, + PO_index) > 0) { + fprintf(stderr, + "%s has 2nd struct %s\n", + PossibleOligo, scd_str); + i += PO_index + 1; + BadOligo = TRUE; + } + } + if (BadOligo == FALSE) { + seq_set[seq_cnt] = (char *)Calloc( + strlen(PossibleOligo) + 1, sizeof(char)); + strcpy(seq_set[seq_cnt], PossibleOligo); - if(seq_cnt == 0) - return NULL; + if (++seq_cnt == max_num_probe) { + max_num_probe *= 2; + seq_set = (char **)Realloc( + seq_set, + max_num_probe * sizeof(char *)); + } + i++; + } + } /* end of l. */ + } /* end of i. */ - return seq_set; + seq_set[seq_cnt] = NULL; + + if (seq_cnt == 0) return NULL; + + return seq_set; } - - /* ALWAYS COPY the result from uniqueID() to a char[32], * (strlen(hostname)+1+10). Memory is lost when the function * is finished. @@ -2586,61 +2374,51 @@ int check_len, min_len, max_len, l_bnd, r_bnd; char vname[32]; char *uniqueID() { - char hname[32],/* vname[32], rtm 18.III.98 */ tstr[32]; - time_t *tp; - static cnt = 0; - int ll; + char hname[32], /* vname[32], rtm 18.III.98 */ tstr[32]; + time_t *tp; + static cnt = 0; + int ll; - tp = (time_t *)Calloc(1, sizeof(time_t)); + tp = (time_t *)Calloc(1, sizeof(time_t)); - if(gethostname(hname, 32) == -1) - { - fprintf(stderr, "UniqueID(): Failed to get host name.\n"); - exit(1); - } + if (gethostname(hname, 32) == -1) { + fprintf(stderr, "UniqueID(): Failed to get host name.\n"); + exit(1); + } - time(tp); - sprintf(tstr, ":%d:%ld", cnt, *tp); - if((ll = strlen(tstr)) > 31) - { - strncpy(vname, tstr, 31); - vname[31] = '\0'; - } - else - { - ll = strlen(hname)-(31-ll); - if(ll < 0) - ll = 0; - sprintf(vname, "%s%s", hname+ll, tstr); - } - cnt++; - Cfree(tp); - return(vname); + time(tp); + sprintf(tstr, ":%d:%ld", cnt, *tp); + if ((ll = strlen(tstr)) > 31) { + strncpy(vname, tstr, 31); + vname[31] = '\0'; + } + else { + ll = strlen(hname) - (31 - ll); + if (ll < 0) ll = 0; + sprintf(vname, "%s%s", hname + ll, tstr); + } + cnt++; + Cfree(tp); + return (vname); } - - /* return the percentage of GCcontents. */ int GCcontent(seq) char *seq; { - int l, gc=0, j; + int l, gc = 0, j; - l = strlen(seq); + l = strlen(seq); - for (j=0; jcomments, tSeq->c_elem); + if ((fp = fopen(fname, "w")) == NULL) { + fprintf(stderr, "Can't open IQ file: %s\n", fname); + exit(1); + } + fprintf(fp, "%s %s\n", tSeq->comments, tSeq->c_elem); } - - -Find2(string,key) - char *key,*string; - /* - * Like find, but returns the index of the leftmost - * occurence, and -1 if not found. - * Note in this program, T==U, and case insensitive. - */ +Find2(string, key) char *key, *string; +/* + * Like find, but returns the index of the leftmost + * occurence, and -1 if not found. + * Note in this program, T==U, and case insensitive. + */ { - int i,j,len1,len2,dif,flag = FALSE; - char *target; + int i, j, len1, len2, dif, flag = FALSE; + char *target; - if(string == NULL || string[0] == '\0') - return -1; + if (string == NULL || string[0] == '\0') return -1; - len2 = strlen(string); - target = (char *) Calloc(len2+1, 1); - for(i = 0; i0) - for(j=0;j 0) + for (j = 0; j < dif && flag == FALSE; j++) { + flag = TRUE; + for (i = 0; i < len1 && flag; i++) + flag = (key[i] == target[i + j]) ? TRUE : FALSE; + } + Cfree(target); + return (flag ? j - 1 : -1); } - - - /* return -1 if end-of-file. FALSE if anything is wrong. */ -int - ReadGDE(fp, seq) +int ReadGDE(fp, seq) FILE *fp; Sequence *seq; { - char temp_line[1000], waste[64]; - int ii, l1; + char temp_line[1000], waste[64]; + int ii, l1; - while(fgets(temp_line, 1000, fp) != NULL ) - { - if(strncmp(temp_line, "sequence-ID", 11) == 0) - { - sscanf(temp_line,"%s%s",waste,seq->sequence_ID); - } - else if(temp_line[0] == '#') - { - strncpy(seq->name, temp_line+1, 31); - seq->name[31] = '\0'; - ii = 0; - while(ii < strlen(seq->name) && - seq->name[ii] != ' ' && - seq->name[ii] != '\n') - ii++; - seq->name[ii] = '\0'; + while (fgets(temp_line, 1000, fp) != NULL) { + if (strncmp(temp_line, "sequence-ID", 11) == 0) { + sscanf(temp_line, "%s%s", waste, seq->sequence_ID); + } + else if (temp_line[0] == '#') { + strncpy(seq->name, temp_line + 1, 31); + seq->name[31] = '\0'; + ii = 0; + while (ii < strlen(seq->name) && seq->name[ii] != ' ' && + seq->name[ii] != '\n') + ii++; + seq->name[ii] = '\0'; - seq->seqmaxlen = 256; - seq->c_elem=(char *)Calloc(seq->seqmaxlen,1); - seq->seqlen = 0; - while(fgets(temp_line, 1000, fp) != NULL) - { - l1 = strlen(temp_line); + seq->seqmaxlen = 256; + seq->c_elem = (char *)Calloc(seq->seqmaxlen, 1); + seq->seqlen = 0; + while (fgets(temp_line, 1000, fp) != NULL) { + l1 = strlen(temp_line); - if(temp_line[l1 - 1] == '\n') - { - l1--; - temp_line[l1] = '\0'; - } + if (temp_line[l1 - 1] == '\n') { + l1--; + temp_line[l1] = '\0'; + } - while(seq->seqmaxlen < - seq->seqlen + strlen(temp_line) + 1) - { - seq->seqmaxlen *= 2; - seq->c_elem = (char *) - Realloc(seq->c_elem, seq->seqmaxlen); - } + while (seq->seqmaxlen < + seq->seqlen + strlen(temp_line) + 1) { + seq->seqmaxlen *= 2; + seq->c_elem = (char *)Realloc( + seq->c_elem, seq->seqmaxlen); + } - strcat(seq->c_elem, temp_line); - seq->seqlen += strlen(temp_line); - } + strcat(seq->c_elem, temp_line); + seq->seqlen += strlen(temp_line); + } - if(seq->seqlen == 0) - { - fprintf(stderr, "\n%s\n","Sequence is empty."); - return FALSE; - } - } - } - return -1; + if (seq->seqlen == 0) { + fprintf(stderr, "\n%s\n", "Sequence is empty."); + return FALSE; + } + } + } + return -1; } - -void heapify(seq_set, seq_size, heap_size, elem, Pkey, Skey, order) -int seq_size, elem, heap_size, **order; +void heapify(seq_set, seq_size, heap_size, elem, Pkey, Skey, order) int seq_size + , + elem, heap_size, **order; char Pkey[], Skey[]; Sequence *seq_set; { - int l, r, temp, largest; + int l, r, temp, largest; - l = 2*elem+1; - r = 2*elem+2; + l = 2 * elem + 1; + r = 2 * elem + 2; - if(l <= heap_size && - CompKey(seq_set[(*order)[l]], seq_set[(*order)[elem]], - Pkey, Skey) > 0) - largest = l; - else - largest = elem; + if (l <= heap_size && CompKey(seq_set[(*order)[l]], + seq_set[(*order)[elem]], Pkey, Skey) > 0) + largest = l; + else + largest = elem; - if(r <= heap_size && - CompKey(seq_set[(*order)[r]], seq_set[(*order)[largest]], - Pkey, Skey) > 0) - largest = r; + if (r <= heap_size && + CompKey(seq_set[(*order)[r]], seq_set[(*order)[largest]], Pkey, + Skey) > 0) + largest = r; - if(largest != elem) - { - temp = (*order)[elem]; - (*order)[elem] = (*order)[largest]; - (*order)[largest] = temp; - heapify(seq_set,seq_size,heap_size,largest,Pkey,Skey,order); - } + if (largest != elem) { + temp = (*order)[elem]; + (*order)[elem] = (*order)[largest]; + (*order)[largest] = temp; + heapify(seq_set, seq_size, heap_size, largest, Pkey, Skey, + order); + } } - -heapsort(seq_set, seq_size, Pkey, Skey, order) -int seq_size, **order; -char Pkey[], Skey[]; +heapsort(seq_set, seq_size, Pkey, Skey, order) int seq_size, **order; +char Pkey[], Skey[]; Sequence *seq_set; { - int ii, temp, heap_size; + int ii, temp, heap_size; - /* - * build_heap(seq_set, seq_size, &heap_size, order); - */ - heap_size = seq_size-1; + /* + * build_heap(seq_set, seq_size, &heap_size, order); + */ + heap_size = seq_size - 1; - for(ii = (seq_size-1)/2; ii>=0; ii--) /* (L-1)/2-1?? */ - { - heapify(seq_set, seq_size, heap_size, ii,Pkey,Skey,order); - } + for (ii = (seq_size - 1) / 2; ii >= 0; ii--) /* (L-1)/2-1?? */ + { + heapify(seq_set, seq_size, heap_size, ii, Pkey, Skey, order); + } - for(ii = seq_size-1; ii>0; ii--) - { - temp = (*order)[0]; - (*order)[0] = (*order)[ii]; - (*order)[ii] = temp; - heap_size--; - heapify(seq_set, seq_size, heap_size, 0, Pkey,Skey,order); - } + for (ii = seq_size - 1; ii > 0; ii--) { + temp = (*order)[0]; + (*order)[0] = (*order)[ii]; + (*order)[ii] = temp; + heap_size--; + heapify(seq_set, seq_size, heap_size, 0, Pkey, Skey, order); + } } - - - /* - * Return >0, ==0, <0. + * Return >0, ==0, <0. */ int CompKey(seq1, seq2, Pkey, Skey) - Sequence seq1, seq2; - char Pkey[], Skey[]; +Sequence seq1, seq2; +char Pkey[], Skey[]; { - int ii, jj, Pret; - char b1[32], b2[32]; + int ii, jj, Pret; + char b1[32], b2[32]; - if(strcmp(Pkey, "type") == 0) - { - Pret = strcmp(seq1.type, seq2.type); - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "name") == 0) - { - Pret = strcmp(seq1.name, seq2.name); - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "sequence-ID") == 0) - { - Pret = strcmp(seq1.sequence_ID, seq2.sequence_ID); - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "creator") == 0) - { - Pret = strcmp(seq1.creator, seq2.creator); - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "offset") == 0) - { - Pret = seq1.offset - seq2.offset; - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "group-ID") == 0) - { - Pret = seq1.group_ID - seq2.group_ID; - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "barcode") == 0) - { - if(seq1.barcode[0] == 'P') - strcpy(b1, seq1.barcode+2); - else - strcpy(b1, seq1.barcode); - - if(seq2.barcode[0] == 'P') - strcpy(b2, seq2.barcode+2); - else - strcpy(b2, seq2.barcode); - - Pret = strcmp(b1, b2); - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "seqlen") == 0) - { - Pret = seq1.seqlen - seq2.seqlen; - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "creation-date") == 0) - { - seq1.creation_date[0] %= 100; - seq2.creation_date[0] %= 100; - Pret = seq1.creation_date[0]*10000 - + seq1.creation_date[1]*100 - + seq1.creation_date[2] - - seq2.creation_date[0]*10000 - - seq2.creation_date[1]*100 - - seq2.creation_date[2]; - if(Pret == 0) - { - Pret = seq1.creation_date[3]*10000 - + seq1.creation_date[4]*100 - + seq1.creation_date[5] - - seq2.creation_date[3]*10000 - - seq2.creation_date[4]*100 - - seq2.creation_date[5]; + if (strcmp(Pkey, "type") == 0) { + Pret = strcmp(seq1.type, seq2.type); + if (Pret != 0 || Skey[0] == '\0') return Pret; } - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "probing-date") == 0) - { - seq1.probing_date[0] %= 100; - seq2.probing_date[0] %= 100; - Pret = seq1.probing_date[0]*10000 - + seq1.probing_date[1]*100 - + seq1.probing_date[2] - - seq2.probing_date[0]*10000 - - seq2.probing_date[1]*100 - - seq2.probing_date[2]; - if(Pret == 0) - { - Pret = seq1.probing_date[3]*10000 - + seq1.probing_date[4]*100 - + seq1.probing_date[5] - - seq2.probing_date[3]*10000 - - seq2.probing_date[4]*100 - - seq2.probing_date[5]; + else if (strcmp(Pkey, "name") == 0) { + Pret = strcmp(seq1.name, seq2.name); + if (Pret != 0 || Skey[0] == '\0') return Pret; } - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "autorad_date") == 0) - { - seq1.autorad_date[0] %= 100; - seq2.autorad_date[0] %= 100; - Pret = seq1.autorad_date[0]*10000 - + seq1.autorad_date[1]*100 - + seq1.autorad_date[2] - - seq2.autorad_date[0]*10000 - - seq2.autorad_date[1]*100 - - seq2.autorad_date[2]; - if(Pret == 0) - { - Pret = seq1.autorad_date[3]*10000 - + seq1.autorad_date[4]*100 - + seq1.autorad_date[5] - - seq2.autorad_date[3]*10000 - - seq2.autorad_date[4]*100 - - seq2.autorad_date[5]; + else if (strcmp(Pkey, "sequence-ID") == 0) { + Pret = strcmp(seq1.sequence_ID, seq2.sequence_ID); + if (Pret != 0 || Skey[0] == '\0') return Pret; } - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "film") == 0) - { - Pret = strcmp(seq1.film, seq2.film); - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "membrane") == 0) - { - Pret = strcmp(seq1.membrane, seq2.membrane); - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - else if(strcmp(Pkey, "contig") == 0) - { - Pret = strcmp(seq1.contig, seq2.contig); - if(Pret != 0 || Skey[0] == '\0') return Pret; - } - - else - { - fprintf(stderr,"CompKey(): Invalid primary key %s.\n",Pkey); - exit(1); - } + else if (strcmp(Pkey, "creator") == 0) { + Pret = strcmp(seq1.creator, seq2.creator); + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "offset") == 0) { + Pret = seq1.offset - seq2.offset; + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "group-ID") == 0) { + Pret = seq1.group_ID - seq2.group_ID; + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "barcode") == 0) { + if (seq1.barcode[0] == 'P') + strcpy(b1, seq1.barcode + 2); + else + strcpy(b1, seq1.barcode); - if(strcmp(Skey, "type") == 0) - { - return (strcmp(seq1.type, seq2.type)); - } - else if(strcmp(Skey, "name") == 0) - { - return (strcmp(seq1.name, seq2.name)); - } - else if(strcmp(Skey, "sequence-ID") == 0) - { - return (strcmp(seq1.sequence_ID, seq2.sequence_ID)); - } - else if(strcmp(Skey, "creator") == 0) - { - return (strcmp(seq1.creator, seq2.creator)); - } - else if(strcmp(Skey, "offset") == 0) - { - return (seq1.offset - seq2.offset); - } - else if(strcmp(Skey, "group-ID") == 0) - { - return (seq1.group_ID - seq2.group_ID); - } - else if(strcmp(Skey, "barcode") == 0) - { - if(seq1.barcode[0] == 'P') - strcpy(b1, seq1.barcode+2); - else - strcpy(b1, seq1.barcode); + if (seq2.barcode[0] == 'P') + strcpy(b2, seq2.barcode + 2); + else + strcpy(b2, seq2.barcode); - if(seq2.barcode[0] == 'P') - strcpy(b2, seq2.barcode+2); - else - strcpy(b2, seq2.barcode); - - return (strcmp(b1, b2)); - } - else if(strcmp(Skey, "seqlen") == 0) - { - return(seq1.seqlen - seq2.seqlen); - } - else if(strcmp(Skey, "creation-date") == 0) - { - seq1.creation_date[0] %= 100; - seq2.creation_date[0] %= 100; - Pret = seq1.creation_date[0]*10000 - + seq1.creation_date[1]*100 - + seq1.creation_date[2] - - seq2.creation_date[0]*10000 - - seq2.creation_date[1]*100 - - seq2.creation_date[2]; - if(Pret != 0) - return Pret; + Pret = strcmp(b1, b2); + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "seqlen") == 0) { + Pret = seq1.seqlen - seq2.seqlen; + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "creation-date") == 0) { + seq1.creation_date[0] %= 100; + seq2.creation_date[0] %= 100; + Pret = seq1.creation_date[0] * 10000 + + seq1.creation_date[1] * 100 + seq1.creation_date[2] - + seq2.creation_date[0] * 10000 - + seq2.creation_date[1] * 100 - seq2.creation_date[2]; + if (Pret == 0) { + Pret = seq1.creation_date[3] * 10000 + + seq1.creation_date[4] * 100 + + seq1.creation_date[5] - + seq2.creation_date[3] * 10000 - + seq2.creation_date[4] * 100 - + seq2.creation_date[5]; + } + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "probing-date") == 0) { + seq1.probing_date[0] %= 100; + seq2.probing_date[0] %= 100; + Pret = seq1.probing_date[0] * 10000 + + seq1.probing_date[1] * 100 + seq1.probing_date[2] - + seq2.probing_date[0] * 10000 - + seq2.probing_date[1] * 100 - seq2.probing_date[2]; + if (Pret == 0) { + Pret = + seq1.probing_date[3] * 10000 + + seq1.probing_date[4] * 100 + seq1.probing_date[5] - + seq2.probing_date[3] * 10000 - + seq2.probing_date[4] * 100 - seq2.probing_date[5]; + } + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "autorad_date") == 0) { + seq1.autorad_date[0] %= 100; + seq2.autorad_date[0] %= 100; + Pret = seq1.autorad_date[0] * 10000 + + seq1.autorad_date[1] * 100 + seq1.autorad_date[2] - + seq2.autorad_date[0] * 10000 - + seq2.autorad_date[1] * 100 - seq2.autorad_date[2]; + if (Pret == 0) { + Pret = + seq1.autorad_date[3] * 10000 + + seq1.autorad_date[4] * 100 + seq1.autorad_date[5] - + seq2.autorad_date[3] * 10000 - + seq2.autorad_date[4] * 100 - seq2.autorad_date[5]; + } + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "film") == 0) { + Pret = strcmp(seq1.film, seq2.film); + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "membrane") == 0) { + Pret = strcmp(seq1.membrane, seq2.membrane); + if (Pret != 0 || Skey[0] == '\0') return Pret; + } + else if (strcmp(Pkey, "contig") == 0) { + Pret = strcmp(seq1.contig, seq2.contig); + if (Pret != 0 || Skey[0] == '\0') return Pret; + } - return(seq1.creation_date[3]*10000 - + seq1.creation_date[4]*100 - + seq1.creation_date[5] - - seq2.creation_date[3]*10000 - - seq2.creation_date[4]*100 - - seq2.creation_date[5]); - } - else if(strcmp(Skey, "probing-date") == 0) - { - seq1.probing_date[0] %= 100; - seq2.probing_date[0] %= 100; - Pret = seq1.probing_date[0]*10000 - + seq1.probing_date[1]*100 - + seq1.probing_date[2] - - seq2.probing_date[0]*10000 - - seq2.probing_date[1]*100 - - seq2.probing_date[2]; - if(Pret != 0) - return Pret; + else { + fprintf(stderr, "CompKey(): Invalid primary key %s.\n", Pkey); + exit(1); + } - return(seq1.probing_date[3]*10000 - + seq1.probing_date[4]*100 - + seq1.probing_date[5] - - seq2.probing_date[3]*10000 - - seq2.probing_date[4]*100 - - seq2.probing_date[5]); - } - else if(strcmp(Skey, "autorad_date") == 0) - { - seq1.autorad_date[0] %= 100; - seq2.autorad_date[0] %= 100; - Pret = seq1.autorad_date[0]*10000 - + seq1.autorad_date[1]*100 - + seq1.autorad_date[2] - - seq2.autorad_date[0]*10000 - - seq2.autorad_date[1]*100 - - seq2.autorad_date[2]; - if(Pret != 0) - return Pret; + if (strcmp(Skey, "type") == 0) { + return (strcmp(seq1.type, seq2.type)); + } + else if (strcmp(Skey, "name") == 0) { + return (strcmp(seq1.name, seq2.name)); + } + else if (strcmp(Skey, "sequence-ID") == 0) { + return (strcmp(seq1.sequence_ID, seq2.sequence_ID)); + } + else if (strcmp(Skey, "creator") == 0) { + return (strcmp(seq1.creator, seq2.creator)); + } + else if (strcmp(Skey, "offset") == 0) { + return (seq1.offset - seq2.offset); + } + else if (strcmp(Skey, "group-ID") == 0) { + return (seq1.group_ID - seq2.group_ID); + } + else if (strcmp(Skey, "barcode") == 0) { + if (seq1.barcode[0] == 'P') + strcpy(b1, seq1.barcode + 2); + else + strcpy(b1, seq1.barcode); - return(seq1.autorad_date[3]*10000 - + seq1.autorad_date[4]*100 - + seq1.autorad_date[5] - - seq2.autorad_date[3]*10000 - - seq2.autorad_date[4]*100 - - seq2.autorad_date[5]); - } - else if(strcmp(Skey, "film") == 0) - { - return(strcmp(seq1.film, seq2.film)); - } - else if(strcmp(Skey, "membrane") == 0) - { - return(strcmp(seq1.membrane, seq2.membrane)); - } - else if(strcmp(Skey, "contig") == 0) - { - return(strcmp(seq1.contig, seq2.contig)); - } - else - { - fprintf(stderr, "CompKey(): Invalid secondary key %s.\n",Skey); - exit(1); - } + if (seq2.barcode[0] == 'P') + strcpy(b2, seq2.barcode + 2); + else + strcpy(b2, seq2.barcode); + + return (strcmp(b1, b2)); + } + else if (strcmp(Skey, "seqlen") == 0) { + return (seq1.seqlen - seq2.seqlen); + } + else if (strcmp(Skey, "creation-date") == 0) { + seq1.creation_date[0] %= 100; + seq2.creation_date[0] %= 100; + Pret = seq1.creation_date[0] * 10000 + + seq1.creation_date[1] * 100 + seq1.creation_date[2] - + seq2.creation_date[0] * 10000 - + seq2.creation_date[1] * 100 - seq2.creation_date[2]; + if (Pret != 0) return Pret; + + return (seq1.creation_date[3] * 10000 + + seq1.creation_date[4] * 100 + seq1.creation_date[5] - + seq2.creation_date[3] * 10000 - + seq2.creation_date[4] * 100 - seq2.creation_date[5]); + } + else if (strcmp(Skey, "probing-date") == 0) { + seq1.probing_date[0] %= 100; + seq2.probing_date[0] %= 100; + Pret = seq1.probing_date[0] * 10000 + + seq1.probing_date[1] * 100 + seq1.probing_date[2] - + seq2.probing_date[0] * 10000 - + seq2.probing_date[1] * 100 - seq2.probing_date[2]; + if (Pret != 0) return Pret; + + return (seq1.probing_date[3] * 10000 + + seq1.probing_date[4] * 100 + seq1.probing_date[5] - + seq2.probing_date[3] * 10000 - + seq2.probing_date[4] * 100 - seq2.probing_date[5]); + } + else if (strcmp(Skey, "autorad_date") == 0) { + seq1.autorad_date[0] %= 100; + seq2.autorad_date[0] %= 100; + Pret = seq1.autorad_date[0] * 10000 + + seq1.autorad_date[1] * 100 + seq1.autorad_date[2] - + seq2.autorad_date[0] * 10000 - + seq2.autorad_date[1] * 100 - seq2.autorad_date[2]; + if (Pret != 0) return Pret; + + return (seq1.autorad_date[3] * 10000 + + seq1.autorad_date[4] * 100 + seq1.autorad_date[5] - + seq2.autorad_date[3] * 10000 - + seq2.autorad_date[4] * 100 - seq2.autorad_date[5]); + } + else if (strcmp(Skey, "film") == 0) { + return (strcmp(seq1.film, seq2.film)); + } + else if (strcmp(Skey, "membrane") == 0) { + return (strcmp(seq1.membrane, seq2.membrane)); + } + else if (strcmp(Skey, "contig") == 0) { + return (strcmp(seq1.contig, seq2.contig)); + } + else { + fprintf(stderr, "CompKey(): Invalid secondary key %s.\n", Skey); + exit(1); + } } - - int Lock(fname) - char *fname; -{ - char buffer[1024]; - FILE *fp; - int wait = 0; - - while((fp = fopen(fname, "r")) == NULL) - { - sleep(1); - if(++wait == 30) - { - fprintf(stderr, "File %s not available, Try later.\n\n", fname); - return FALSE; - } - } - fclose(fp); - sprintf(buffer, "mv %s %s.locked", fname, fname); - system(buffer); - return TRUE; -} - - -void Unlock(fname) char *fname; { - char buffer[1024]; - sprintf(buffer, "mv %s.locked %s", fname, fname); - system(buffer); + char buffer[1024]; + FILE *fp; + int wait = 0; + + while ((fp = fopen(fname, "r")) == NULL) { + sleep(1); + if (++wait == 30) { + fprintf(stderr, + "File %s not available, Try later.\n\n", + fname); + return FALSE; + } + } + fclose(fp); + sprintf(buffer, "mv %s %s.locked", fname, fname); + system(buffer); + return TRUE; } +void Unlock(fname) char *fname; +{ + char buffer[1024]; + sprintf(buffer, "mv %s.locked %s", fname, fname); + system(buffer); +} -AppendComments(seq, str) -Sequence *seq; +AppendComments(seq, str) Sequence *seq; char *str; { - int ii, jj, kk; + int ii, jj, kk; - kk = strlen(str); + kk = strlen(str); - if(seq->commentsmaxlen == 0) - { - seq->comments = (char *)Calloc(kk+1, 1); - seq->commentsmaxlen = kk+1; - seq->commentslen = 0; - } - else if(seq->commentslen+kk+1>seq->commentsmaxlen) - { - seq->commentsmaxlen += 2*(kk+1); - seq->comments = (char *) - Realloc(seq->comments, seq->commentsmaxlen); - } - seq->comments[seq->commentslen] = '\0'; - seq->comments[seq->commentslen] = '\0'; - strcat(seq->comments, str); - seq->commentslen = strlen(seq->comments); + if (seq->commentsmaxlen == 0) { + seq->comments = (char *)Calloc(kk + 1, 1); + seq->commentsmaxlen = kk + 1; + seq->commentslen = 0; + } + else if (seq->commentslen + kk + 1 > seq->commentsmaxlen) { + seq->commentsmaxlen += 2 * (kk + 1); + seq->comments = + (char *)Realloc(seq->comments, seq->commentsmaxlen); + } + seq->comments[seq->commentslen] = '\0'; + seq->comments[seq->commentslen] = '\0'; + strcat(seq->comments, str); + seq->commentslen = strlen(seq->comments); }