/**************************************************************** * * This is a set of functions defined for the genome * project. * ****************************************************************/ #ifndef _GLOBAL_DEFS_H #define _GLOBAL_DEFS_H #include #include #include "global_defs.h" #endif #define MAXLINELEN 256 static char Default_DNA_Trans[16] = {'-', 'a', 'c', 'm', 'g', 'r', 's', 'v', 't', 'w', 'y', 'h', 'k', 'd', 'b', 'n'}; /*********** * * WriteRecord() outputs one record at a time in HGL format. * Only the fields in the fields_array will be output. All the * fields will be output if fields_array is NULL. * * fp : pointer to the output file. * tSeq: pointer to the record. * fields_array: contains the field ids of the selected fields. * array_size: number of selected fields. * * Returns: 1 if any field is printed; * 0 if no field is printed; * -1 if anything is wrong. * **********/ int WriteRecord(fp, tSeq, fields_array, array_size) FILE *fp; const Sequence *tSeq; int *fields_array; int array_size; { int i, save_str_size, tt; int all_fields = FALSE; int first_field = TRUE; char temp_str[256]; char *save_str; char *ptr; save_str = (char *)Calloc(256, 1); save_str_size = 256; /* When all the fields are selected. */ if (fields_array == NULL) { all_fields = TRUE; fields_array = (int *)Calloc(NUM_OF_FIELDS, sizeof(int)); for (i = 0; i < NUM_OF_FIELDS; i++) { fields_array[i] = i; } array_size = NUM_OF_FIELDS; } for (i = 0; i < array_size; i++) { save_str[0] = '\0'; if (fields_array[i] == e_creation_date && tSeq->creation_date[0] != 0) { sprintf(save_str, "\n%s\t%d/%d/%d ", at[fields_array[i]], tSeq->creation_date[1], tSeq->creation_date[2], tSeq->creation_date[0]); if (tSeq->creation_date[3] >= 0) { if (tSeq->creation_date[4] < 0) tSeq->creation_date[4] = 0; if (tSeq->creation_date[5] < 0) tSeq->creation_date[5] = 0; sprintf(save_str, "%s%d:%d:%d", save_str, tSeq->creation_date[3], tSeq->creation_date[4], tSeq->creation_date[5]); } } else if (fields_array[i] == e_probing_date && tSeq->probing_date[0] != 0) { sprintf(save_str, "\n%s\t%d/%d/%d ", at[fields_array[i]], tSeq->probing_date[1], tSeq->probing_date[2], tSeq->probing_date[0]); if (tSeq->probing_date[3] >= 0) { if (tSeq->probing_date[4] < 0) tSeq->probing_date[4] = 0; if (tSeq->probing_date[5] < 0) tSeq->probing_date[5] = 0; sprintf(save_str, "%s%d:%d:%d", save_str, tSeq->probing_date[3], tSeq->probing_date[4], tSeq->probing_date[5]); } } else if (fields_array[i] == e_autorad_date && tSeq->autorad_date[0] != 0) { sprintf(save_str, "\n%s\t%d/%d/%d ", at[fields_array[i]], tSeq->autorad_date[1], tSeq->autorad_date[2], tSeq->autorad_date[0]); if (tSeq->autorad_date[3] >= 0) { if (tSeq->autorad_date[4] < 0) tSeq->autorad_date[4] = 0; if (tSeq->autorad_date[5] < 0) tSeq->autorad_date[5] = 0; sprintf(save_str, "%s%d:%d:%d", save_str, tSeq->autorad_date[3], tSeq->autorad_date[4], tSeq->autorad_date[5]); } } else if (fields_array[i] == e_c_elem && tSeq->c_elem != NULL) { ptr = tSeq->c_elem; sprintf(save_str, "\n%s\t\"", at[fields_array[i]]); while (ptr < tSeq->c_elem + tSeq->seqlen) { if (ptr != tSeq->c_elem) strcat(save_str, "\n"); strncpy( temp_str, ptr, MIN(60, tSeq->c_elem + tSeq->seqlen - ptr)); temp_str[MIN(60, tSeq->c_elem + tSeq->seqlen - ptr)] = '\0'; /* Gurantee strlen(temp_str) chars for the * string, one for \n, one for ", and one for * \0. */ while (save_str_size - strlen(save_str) < strlen(temp_str) + 3) { save_str_size *= 2; save_str = (char *)Realloc( save_str, save_str_size); } strcat(save_str, temp_str); ptr += 60; } strcat(save_str, "\""); } else if (fields_array[i] == e_comments && tSeq->commentslen != 0) { while (save_str_size < 20 + tSeq->commentslen) { save_str_size *= 2; save_str = (char *)Realloc(save_str, save_str_size); } strcat(save_str, "\n"); strcat(save_str, at[fields_array[i]]); strcat(save_str, "\t\"\n"); /* put a \0 at the end of comments. */ while (tSeq->commentslen + 1 > tSeq->commentsmaxlen) { tSeq->commentsmaxlen *= 2; tSeq->comments = (char *)Realloc( tSeq->comments, tSeq->commentsmaxlen); } tSeq->comments[tSeq->commentslen] = '\0'; /* clean up the leading empty lines.*/ tt = 0; while (tSeq->comments[tt] == '\n' || tSeq->comments[tt] == ' ') tt++; tSeq->commentslen -= tt; strcat(save_str, tSeq->comments + tt); strcat(save_str, "\""); } else if (fields_array[i] == e_laneset && tSeq->laneset != -1) sprintf(save_str, "\n%s\t\t%d", at[fields_array[i]], tSeq->laneset); else if (fields_array[i] == e_strandedness && tSeq->strandedness != 0) sprintf(save_str, "\n%s\t%d", at[fields_array[i]], tSeq->strandedness); else if (fields_array[i] == e_direction && tSeq->direction != 0) sprintf(save_str, "\n%s\t%d", at[fields_array[i]], tSeq->direction); else if (fields_array[i] == e_orig_strand && tSeq->orig_strand != 0) sprintf(save_str, "\n%s\t%d", at[fields_array[i]], tSeq->orig_strand); else if (fields_array[i] == e_orig_direction && tSeq->orig_direction != 0) sprintf(save_str, "\n%s\t%d", at[fields_array[i]], tSeq->orig_direction); else if (fields_array[i] == e_offset) sprintf(save_str, "\n%s\t\t%d", at[fields_array[i]], tSeq->offset); else if (fields_array[i] == e_group_number && tSeq->group_number != 0) sprintf(save_str, "\n%s\t%d", at[fields_array[i]], tSeq->group_number); else if (fields_array[i] == e_group_ID) sprintf(save_str, "\n%s\t%d", at[fields_array[i]], tSeq->group_ID); else if (fields_array[i] == e_type && tSeq->type[0] != '\0') sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], tSeq->type); else if (fields_array[i] == e_barcode && tSeq->barcode[0] != '\0') sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], tSeq->barcode); else if (fields_array[i] == e_name && tSeq->name[0] != '\0') sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], tSeq->name); else if (fields_array[i] == e_status && tSeq->status[0] != '\0') sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], tSeq->status); else if (fields_array[i] == e_walk && tSeq->walk[0] != '\0') sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], tSeq->walk); else if (fields_array[i] == e_sequence_ID && tSeq->sequence_ID[0] != '\0') sprintf(save_str, "\n%s\t\"%s\"", at[fields_array[i]], tSeq->sequence_ID); else if (fields_array[i] == e_creator && tSeq->creator[0] != '\0') sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], tSeq->creator); else if (fields_array[i] == e_film && tSeq->film[0] != '\0') sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], tSeq->film); else if (fields_array[i] == e_membrane && tSeq->membrane[0] != '\0') sprintf(save_str, "\n%s\t\"%s\"", at[fields_array[i]], tSeq->membrane); else if (fields_array[i] == e_source_ID && tSeq->source_ID[0] != '\0') sprintf(save_str, "\n%s\t\"%s\"", at[fields_array[i]], tSeq->source_ID); else if (fields_array[i] == e_contig && tSeq->contig[0] != '\0') sprintf(save_str, "\n%s\t\t\"%s\"", at[fields_array[i]], tSeq->contig); else if (fields_array[i] == e_baggage && tSeq->baglen != 0) { if (save_str_size < tSeq->baglen + 2) { save_str_size = tSeq->baglen + 2; save_str = (char *)Realloc(save_str, save_str_size); } save_str[0] = '\n'; save_str[1] = '\0'; /* put a \0 at the end of baggage. */ strncat(save_str, tSeq->baggage, tSeq->baglen); while (save_str[tSeq->baglen - 1] == '\n') { tSeq->baglen--; } save_str[tSeq->baglen] = '\0'; } if (save_str[0] != '\0') { if (first_field == TRUE) { first_field = FALSE; fprintf(fp, "{"); } fprintf(fp, "%s", save_str); } } if (first_field == FALSE) { fprintf(fp, "\n}\n"); } if (all_fields == TRUE && fields_array != NULL) { Cfree(fields_array); fields_array = NULL; } if (save_str != NULL) { Cfree(save_str); save_str = NULL; } if (first_field == TRUE) return 0; else return 1; } /********* * * ReadRecord() reads one record from fp into tSeq. fp remains at * the finishing position so that next time when ReadRecord() is * called, it reads the next record. * * The caller program should LOCATE MEMORY for the tSeq before calling. * * ReadRecord() returns: * TRUE if no error; * FALSE if anything is wrong * -1 if end-of-file is reached * **********/ int ReadRecord(fp, tSeq) FILE *fp; Sequence *tSeq; { char field_name[20], line[256], orig_line[256]; int temp_str_size, start, end, l, max_len = 255; char *fgets_ret, *temp_str, *fgets_ret1; int start_rec = FALSE; int need_to_read = TRUE; char started = 'F'; void InitRecord(); void FreeRecord(); temp_str = (char *)Calloc(256, 1); temp_str_size = 256; InitRecord(tSeq); if (tSeq->c_elem == NULL) { tSeq->c_elem = (char *)Calloc(256, 1); tSeq->seqmaxlen = 256; } tSeq->c_elem[0] = '\0'; /* read file line-by-line. */ while (need_to_read == TRUE && ((fgets_ret = fgets(line, max_len, fp)) != NULL || start_rec == TRUE)) { strcpy(orig_line, line); end = strlen(line) - 1; while (end >= 0 && (line[end] == ' ' || line[end] == '\t' || line[end] == ',' || line[end] == '\n')) end--; /* ignore empty lines. */ if (end == -1) continue; if (line[end] == '{') started = 'T'; /* to ignore the lines between a } and a {. */ while (started == 'F' && fgets_ret != NULL) { fgets_ret = fgets(line, max_len, fp); strcpy(orig_line, line); end = strlen(line) - 1; while (end >= 0 && (line[end] == ' ' || line[end] == '\t' || line[end] == ',' || line[end] == '\n')) end--; /* ignore empty lines. */ if (end == -1) continue; if (line[end] == '{') started = 'T'; } if (fgets_ret == NULL) return -1; if (end < 0) { } else if ((line[end] == '}') && (end == 0)) { start_rec = FALSE; need_to_read = FALSE; } else if (line[end] == '{' && end <= 10) { start_rec = TRUE; } else { if (line[end] == '}') { need_to_read = FALSE; start_rec = FALSE; } /* locate the tag. */ start = 0; while (line[start] == ' ' || line[start] == '\t' || line[start] == '\n' || line[start] == '{') start++; end = start + 1; while (line[end] != ' ' && line[end] != '\t' && line[end] != '\n' && line[end] != '\0') end++; strncpy(field_name, line + start, end - start); field_name[end - start] = '\0'; /* process the field value. */ /* * creation_date, probing_date, or autorad_date */ if (strcmp(field_name, "creation-date") == 0) { while (!isdigit(line[end])) end++; if (strToDate(line + end, tSeq->creation_date) == -1) { return FALSE; } } else if (strcmp(field_name, "probing-date") == 0) { while (line[end] != '\0' && !isdigit(line[end])) end++; if (line[end] != '\0' && strToDate(line + end, tSeq->probing_date) == -1) { return FALSE; } } else if (strcmp(field_name, "autorad-date") == 0) { while (line[end] != '\0' && !isdigit(line[end])) end++; if (line[end] != '\0' && strToDate(line + end, tSeq->autorad_date) == -1) { return FALSE; } } /* * sequence or comments. */ else if (strcmp(field_name, "sequence") == 0 || strcmp(field_name, "comments") == 0) { temp_str[0] = '\0'; /* locate the first ". */ while (line[end++] != '"') ; start = end; end = strlen(line); /* ---"\n\0. */ if (line[end - 2] == '"') end -= 2; else if (line[end - 1] == '\n' && strcmp(field_name, "sequence") == 0) end--; while (temp_str_size < end - start + 1) { temp_str_size *= 2; temp_str = (char *)Realloc( temp_str, temp_str_size); } if (end - start > 0) strncat(temp_str, line + start, end - start); /* Read the second line of the seq. or comments, if any. end-start<0 is the case that " is the only char this line.*/ if (line[strlen(line) - 2] != '"' || end - start < 0) { while ((fgets_ret1 = fgets(line, max_len, fp)) != NULL) { /* IGNORE empty lines. 5/4/92 */ int empty_line = 0; while (line[empty_line] == ' ') empty_line++; if (line[empty_line] == '\n') { continue; /* strncat(temp_str, * line, end); 5/4/92 */ } l = strlen(line) - 1; if (line[l - 1] == '"') end = l - 1; else end = l; if (line[end] == '\n' && strcmp(field_name, "comments") == 0) end++; /* Gurantee 'end' chars for the * string, one for ", and one * for \0. */ while (temp_str_size - strlen(temp_str) < end + 3) { temp_str_size *= 2; temp_str = (char *)Realloc( temp_str, temp_str_size); } strncat(temp_str, line, end); if (line[l - 1] == '"') break; } if (fgets_ret1 == NULL && need_to_read == TRUE) { fprintf(stderr, "ReadRecord(): " "incomplete record.\n"); return FALSE; } } l = strlen(temp_str); if (strcmp(field_name, "comments") == 0) { if (tSeq->commentsmaxlen == 0) { tSeq->comments = (char *)Calloc(l + 1, 1); tSeq->commentsmaxlen = l + 1; } else { while (tSeq->commentslen + l + 1 > tSeq->commentsmaxlen) { tSeq->commentsmaxlen *= 2; tSeq->comments = (char *)Realloc( tSeq->comments, tSeq->commentsmaxlen); } } tSeq->comments[tSeq->commentslen] = '\0'; strcat(tSeq->comments, temp_str); tSeq->commentslen += l; } else /* it is the sequence. */ { if (tSeq->seqmaxlen == 0) { tSeq->c_elem = (char *)Calloc(l + 1, 1); } else if (l + 1 > tSeq->seqmaxlen) { tSeq->c_elem = (char *)Realloc( tSeq->c_elem, l + 1); } tSeq->seqmaxlen = l + 1; tSeq->seqlen = l; strcpy(tSeq->c_elem, temp_str); } } /* * Integer or String. */ else { /* locate the value: a string or an integer. */ while (line[end] == ' ' || line[end] == '\t') end++; if (line[end] == '"') { /* It is a string. */ end++; start = end; while (line[end] != '\0' && line[end] != '"') end++; /* * strncat will not put a \0 at the end * of a string if the copying string is * longer than n. */ line[end++] = '\0'; } else { /* It is an integer. */ start = end; while (line[end] != ' ' && line[end] != '\t' && line[end] != '\n' && line[end] != '\0') end++; strncpy(temp_str, line + start, end - start + 1); /*4/26 add 1*/ temp_str[end - start] = '\0'; } /* assign to an integer field. */ if (strcmp(field_name, "laneset") == 0) tSeq->laneset = atoi(temp_str); else if (strcmp(field_name, "strandedness") == 0) tSeq->strandedness = atoi(temp_str); else if (strcmp(field_name, "direction") == 0) tSeq->direction = atoi(temp_str); else if (strcmp(field_name, "orig_strand") == 0) tSeq->orig_strand = atoi(temp_str); else if (strcmp(field_name, "orig_direction") == 0) tSeq->orig_direction = atoi(temp_str); else if (strcmp(field_name, "offset") == 0) tSeq->offset = atoi(temp_str); else if (strcmp(field_name, "group-number") == 0) tSeq->group_number = atoi(temp_str); else if (strcmp(field_name, "group-ID") == 0) tSeq->group_ID = atoi(temp_str); /* assign to a string field. */ else if (strcmp(field_name, "type") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->type, line + start, end - start); tSeq->type[end - start] = '\0'; } else if (strcmp(field_name, "barcode") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->barcode, line + start, end - start); tSeq->barcode[end - start] = '\0'; } else if (strcmp(field_name, "name") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->name, line + start, end - start); tSeq->name[end - start] = '\0'; } else if (strcmp(field_name, "status") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->status, line + start, end - start); tSeq->status[end - start] = '\0'; } else if (strcmp(field_name, "walk") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->walk, line + start, end - start); tSeq->walk[end - start] = '\0'; } else if (strcmp(field_name, "sequence-ID") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->sequence_ID, line + start, end - start); tSeq->sequence_ID[end - start] = '\0'; } else if (strcmp(field_name, "creator") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->creator, line + start, end - start); tSeq->creator[end - start] = '\0'; } else if (strcmp(field_name, "film") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->film, line + start, end - start); tSeq->film[end - start] = '\0'; } else if (strcmp(field_name, "membrane") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->membrane, line + start, end - start); tSeq->membrane[end - start] = '\0'; } else if (strcmp(field_name, "source-ID") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->source_ID, line + start, end - start); tSeq->source_ID[end - start] = '\0'; } else if (strcmp(field_name, "contig") == 0) { if (end - start > 31) end = start + 31; strncpy(tSeq->contig, line + start, end - start); tSeq->contig[end - start] = '\0'; } else { if (tSeq->bagmaxlen == 0) { tSeq->bagmaxlen = 4 * strlen(orig_line); tSeq->baggage = (char *)Calloc( tSeq->bagmaxlen, 1); } else { while (tSeq->bagmaxlen < tSeq->baglen + 2 + strlen(orig_line)) { tSeq->bagmaxlen *= 2; tSeq->baggage = (char *)Realloc( tSeq->baggage, tSeq->bagmaxlen); } } if (tSeq->baglen == 0) { /* tSeq->baggage[0] = '\n'; tSeq->baggage[1] = '\0'; tSeq->baglen = 1; */ tSeq->baggage[0] = '\0'; } /* strcat(tSeq->baggage, "\n");*/ strcat(tSeq->baggage, orig_line); tSeq->baglen += strlen(orig_line); } } } } if (temp_str != NULL) { Cfree(temp_str); temp_str = NULL; } if (start_rec == FALSE && fgets_ret == NULL) { /* end of file, did not get a record. */ return -1; } else return TRUE; } /********* * * Initialize a record. * * Note: no memory allocation is performed. * **********/ void InitRecord(tSeq) Sequence *tSeq; { int i; strcpy(tSeq->type, "DNA"); tSeq->barcode[0] = '\0'; tSeq->name[0] = '\0'; tSeq->status[0] = '\0'; strcpy(tSeq->walk, "FALSE"); tSeq->sequence_ID[0] = '\0'; tSeq->c_elem = NULL; tSeq->seqlen = 0; tSeq->seqmaxlen = 0; for (i = 0; i < 6; i++) { tSeq->creation_date[i] = 0; tSeq->probing_date[i] = 0; tSeq->autorad_date[i] = 0; } tSeq->creator[0] = '\0'; tSeq->film[0] = '\0'; tSeq->membrane[0] = '\0'; tSeq->source_ID[0] = '\0'; tSeq->contig[0] = '\0'; tSeq->laneset = -1; tSeq->direction = 1; /* (1/-1/0),default: 5 to 3. */ tSeq->strandedness = 1; /* (1/2/0), default: primary.*/ tSeq->orig_direction = 0; /* (0 unknown, -1:3'->5', 1:5'->3') */ tSeq->orig_strand = 0; /* (0 unknown, 1:primary, 2:secondary) */ tSeq->offset = 0; tSeq->comments = NULL; tSeq->commentslen = 0; tSeq->commentsmaxlen = 0; tSeq->baggage = NULL; tSeq->baglen = 0; tSeq->bagmaxlen = 0; tSeq->group_number = 0; tSeq->group_ID = 0; } void CopyRecord(to, from) Sequence *from, *to; { int i; InitRecord(to); strcpy(to->type, from->type); strcpy(to->barcode, from->barcode); strcpy(to->name, from->name); strcpy(to->status, from->status); strcpy(to->walk, from->walk); strcpy(to->sequence_ID, from->sequence_ID); if (from->c_elem != NULL) { to->seqlen = from->seqlen; to->seqmaxlen = from->seqmaxlen; to->c_elem = (char *)Calloc(to->seqmaxlen, 1); strncpy(to->c_elem, from->c_elem, to->seqlen); to->c_elem[to->seqlen] = '\0'; } for (i = 0; i < 6; i++) { to->creation_date[i] = from->creation_date[i]; to->probing_date[i] = from->probing_date[i]; to->autorad_date[i] = from->autorad_date[i]; } strcpy(to->creator, from->creator); strcpy(to->film, from->film); strcpy(to->membrane, from->membrane); strcpy(to->source_ID, from->source_ID); strcpy(to->contig, from->contig); to->laneset = from->laneset; to->strandedness = from->strandedness; to->orig_direction = from->orig_direction; to->orig_strand = from->orig_strand; to->direction = from->direction; to->offset = from->offset; if (from->comments != NULL) { to->commentsmaxlen = from->commentsmaxlen; to->commentslen = from->commentslen; to->comments = (char *)Calloc(to->commentsmaxlen, 1); strncpy(to->comments, from->comments, to->commentslen); to->comments[to->commentslen] = '\0'; } if (from->baggage != NULL) { to->baglen = from->baglen; to->bagmaxlen = from->bagmaxlen; to->baggage = (char *)Calloc(to->bagmaxlen, 1); strncpy(to->baggage, from->baggage, to->baglen); to->baggage[to->baglen] = '\0'; } to->group_number = from->group_number; to->group_ID = from->group_ID; } /********* * * Clean the contents of a record without changing the memory size. * **********/ void CleanRecord(tSeq) Sequence *tSeq; { int i; strcpy(tSeq->type, "DNA"); tSeq->name[0] = '\0'; tSeq->barcode[0] = '\0'; tSeq->status[0] = '\0'; strcpy(tSeq->walk, "FALSE"); tSeq->sequence_ID[0] = '\0'; if (tSeq->c_elem != NULL) tSeq->c_elem[0] = '\0'; tSeq->seqlen = 0; for (i = 0; i < 6; i++) { tSeq->creation_date[i] = 0; tSeq->probing_date[i] = 0; tSeq->autorad_date[i] = 0; } tSeq->creator[0] = '\0'; tSeq->film[0] = '\0'; tSeq->membrane[0] = '\0'; tSeq->source_ID[0] = '\0'; tSeq->contig[0] = '\0'; tSeq->laneset = -1; tSeq->strandedness = 1; /* (1/2/0), default. primary. */ tSeq->direction = 1; /* (1/-1/0),default. 5 to 3. */ tSeq->orig_direction = 0; tSeq->orig_strand = 0; tSeq->offset = 0; if (tSeq->comments != NULL) tSeq->comments[0] = '\0'; tSeq->commentslen = 0; if (tSeq->baggage != NULL) tSeq->baggage[0] = '\0'; tSeq->baglen = 0; tSeq->group_number = 0; tSeq->group_ID = 0; } /********* * * Free memory for a record. * **********/ void FreeRecord(tSeq) Sequence **tSeq; { Cfree((*tSeq)->c_elem); Cfree((*tSeq)->comments); Cfree((*tSeq)->baggage); Cfree((*tSeq)); (*tSeq)->c_elem = NULL; (*tSeq)->comments = NULL; (*tSeq)->baggage = NULL; (*tSeq) = NULL; } static max_day[2][13] = {{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; /*********** * * strToDate() locates first six integers and translates them * into a date. * * String should have the format of "mm/dd/yy hh/mn/sc xm", * with anything except digit as the delimiters. * * Order in the date array is (0->5): (yy mm dd hh mn sc). * * Returns FALSE if anything is wrong, TRUE otherwise. * **********/ int strToDate(str, date) const char *str; int date[]; { int leap; char temp_str[2]; char longstr[256]; /* locate 6 integers. */ strcpy(longstr, str); strcat(longstr, " -1/-1/-1 "); sscanf(longstr, "%d%*c%d%*c%d%*c%d%*c%d%*c%d%2s", &date[1], &date[2], &date[0], &date[3], &date[4], &date[5], temp_str); /* verify year. */ if (date[0] >= 100) date[0] -= 1900; /* verify month. */ if (date[1] > 12 || date[1] < 1) { fprintf(stderr, "invalid month %s\n", str); return FALSE; } /* verify day. */ if ((date[0] % 4 == 0 && date[0] % 100 != 0) || date[0] % 400 == 0) leap = 1; else leap = 0; if (date[2] > max_day[leap][date[1]] || date[2] < 1) { fprintf(stderr, "invalid day %s\n", str); return FALSE; } /* verify time. */ if (strncmp(temp_str, "pm", 2) == 0) date[3] += 12; if (date[3] < -1 || date[3] > 23 || date[4] < -1 || date[4] > 59 || date[5] < -1 || date[5] > 59) { fprintf(stderr, "invalid time %s\n", str); return FALSE; } return TRUE; } /********** * * Default_IUPAC_Trans() translates an ASCII IUPAC code into * an (char) integer. * **********/ char Default_IUPAC_Trans(base) char base; { int i; char c; c = base | 32; if (c == 'u') return (char)8; if (c == 'p') return (char)5; for (i = 0; i < 16; i++) { if (c == Default_DNA_Trans[i]) { return ((char)i); } } fprintf(stderr, "Character %c is not IUPAC coded.\n", base); return -1; } char *uniqueID(); /*********** * * MakeConsensus() takes an array of aligned sequence and an * initialized 'Sequence' consensus. It modifies the consensus. * * The memory that 'consensus' has located will be reused, and * consensus->seqmaxlen will be modified if necessary. * * Returns TRUE if successful, FALSE otherwise. * **********/ int MakeConsensus(aligned, numOfAligned, consensus, group) Sequence aligned[]; /* input. */ int numOfAligned; /* input. */ Sequence *consensus; /* input and output. */ int group; /* Group number (if zero, use all groups) */ { char occurence; int i, j, index; int max_cons = INT_MIN; int min_offset = INT_MAX; char temp_str[2]; unsigned char case_bit; /* * Search for the minimun offset. */ for (i = 0; i < numOfAligned; i++) { if (group == 0 || aligned[i].group_number == group) { SeqNormal(&aligned[i]); min_offset = MIN(min_offset, aligned[i].offset); max_cons = MAX(max_cons, aligned[i].offset + aligned[i].seqlen); } } /* * Decide consensus base by base. */ CleanRecord(consensus); consensus->offset = min_offset; if (aligned[0].contig[0] != '\0') { strcpy(consensus->name, aligned[0].contig); strcat(consensus->name, "."); } else if (strncmp(aligned[0].name, "cons.", 5) != 0) { strcpy(consensus->name, "cons."); strcat(consensus->name, aligned[0].name); } strcpy(consensus->sequence_ID, uniqueID()); strcpy(consensus->contig, aligned[0].contig); for (j = min_offset; j < max_cons; j++) { occurence = 00; case_bit = 0; for (i = 0; i < numOfAligned; i++) { if (group == 0 || aligned[i].group_number == group) { if (j >= aligned[i].offset && j < aligned[i].offset + aligned[i].seqlen) { index = j - aligned[i].offset; if (aligned[i].c_elem[index] == '-') case_bit = 32; else if (case_bit == 0) case_bit |= (aligned[i].c_elem[index] & 32); occurence = occurence | Default_IUPAC_Trans( aligned[i].c_elem[index]); if (occurence != 1 && occurence != 2 && occurence != 4 && occurence != 8) case_bit = 32; /* printf("%1c", aligned[i].c_elem[index]); */ } /* else printf(" "); */ } } sprintf(temp_str, "%1c", Default_DNA_Trans[(int)occurence]); if (case_bit == 0) temp_str[0] = toupper(temp_str[0]); if (InsertElems(consensus, j, temp_str) == FALSE) return FALSE; /* printf(" cons[%d]=%1c\n", j - min_offset, consensus->c_elem[j - min_offset]); */ } return TRUE; } /*********** * * MakeScore() takes an array of aligned sequence, and generates * a consensus. Note, memory for (Sequence* consensus) should be * located before it is passed to this function. * * Returns TRUE if successful, FALSE otherwise. * **********/ int MakeScore(aligned, numOfAligned, consensus, group) Sequence aligned[]; /* input. */ int numOfAligned; /* input. */ Sequence *consensus; /* input and output. */ int group; { int i, j, index, score; int max_cons = INT_MIN; int min_offset = INT_MAX; int As, Cs, Ts, Gs, Ns, tot_in_grp; char temp_str[2], occurence, base; int max_occ; static char map[17] = "0123456789ABCDEF"; /* * Search for the minimum offset. */ for (i = 0; i < numOfAligned; i++) { if (group == 0 || aligned[i].group_number == group) { SeqNormal(&aligned[i]); min_offset = MIN(min_offset, aligned[i].offset); max_cons = MAX(max_cons, aligned[i].offset + aligned[i].seqlen); } } /* * Decide consensus base by base. */ CleanRecord(consensus); consensus->offset = min_offset; if (aligned[0].contig[0] != '\0') { strcpy(consensus->name, aligned[0].contig); strcat(consensus->name, "."); } else if (strncmp(aligned[0].name, "cons.", 5) != 0) { strcpy(consensus->name, "cons."); strcat(consensus->name, aligned[0].name); } strcpy(consensus->sequence_ID, uniqueID()); strcpy(consensus->contig, aligned[0].contig); for (j = min_offset; j < max_cons; j++) { As = Cs = Ts = Gs = Ns = 0; tot_in_grp = 0; occurence = 00; for (i = 0; i < numOfAligned; i++) { if (group == 0 || aligned[i].group_number == group) { if (j >= aligned[i].offset && j < aligned[i].offset + aligned[i].seqlen) { tot_in_grp++; index = j - aligned[i].offset; /* occurence = Default_IUPAC_Trans(aligned[i].c_elem[index]); if((occurence & 01) == 01) As++; if((occurence & 02) == 02) Cs++; if((occurence & 04) == 04) Gs++; if((occurence & 010) == 010) Ts++; */ base = (aligned[i].c_elem[index] | 32); if (base == 'a') As++; else if (base == 'c') Cs++; else if (base == 'g') Gs++; else if (base == 't') Ts++; else if (base == 'n' || base == '-') Ns++; /* printf("%1c", aligned[i].c_elem[index]); */ } /* else printf(" "); */ } } max_occ = MAX(As, MAX(Cs, MAX(Gs, Ts))); /* socre = [0,E], F:all mismatches are either 'n' or '-' */ if (Ns != 0 && max_occ + Ns == tot_in_grp) score = 15; else score = max_occ * 14 / tot_in_grp; /* if( score > 0xF ) { if (InsertElems(consensus, j, "F") == FALSE) { return FALSE; } } else { */ sprintf(temp_str, "%1c", map[score]); if (InsertElems(consensus, j, temp_str) == FALSE) { return FALSE; } /* printf(" %2d-%2d-%2d-%2d %2d cons[%d]=%1c\n", Ts, Gs, Cs, As, score, j, consensus->c_elem[j]); */ } return TRUE; } /*********** * * MakePhyloMask() takes an array of aligned sequence, and generates * a mask that has a '0' for all columns except the columns which contain * a, c, g, t and u only. * * Returns TRUE if successful, FALSE otherwise. * **********/ int MakePhyloMask(aligned, numOfAligned, consensus, group, acgtu) Sequence aligned[]; /* input. */ int numOfAligned; /* input. */ Sequence *consensus; /* input and output. */ int acgtu[]; int group; { int i, j, cnt, max_cons = INT_MIN, min_offset = INT_MAX; /* * Search for the minimum offset. */ for (i = 0; i < numOfAligned; i++) { if (group == 0 || aligned[i].group_number == group) { SeqNormal(&aligned[i]); min_offset = MIN(min_offset, aligned[i].offset); max_cons = MAX(max_cons, aligned[i].offset + aligned[i].seqlen); } } /* * Decide consensus base by base. */ CleanRecord(consensus); consensus->offset = min_offset; strcpy(consensus->name, "mask"); strcpy(consensus->type, "MASK"); strcpy(consensus->sequence_ID, uniqueID()); strcpy(consensus->contig, aligned[0].contig); consensus->seqlen = max_cons - min_offset; if (consensus->seqmaxlen == 0) { consensus->c_elem = (char *)Calloc(max_cons - min_offset + 5, 1); consensus->seqmaxlen = max_cons - min_offset + 5; } else if (consensus->seqmaxlen < max_cons - min_offset) { consensus->seqmaxlen = max_cons - min_offset + 5; consensus->c_elem = (char *)Realloc(consensus->c_elem, max_cons - min_offset + 5); } cnt = 0; for (j = min_offset; j < max_cons; j++) { consensus->c_elem[j - min_offset] = '1'; for (i = 0; i < numOfAligned; i++) { if (group == 0 || aligned[i].group_number == group) { if (j < aligned[i].offset || j >= aligned[i].offset + aligned[i].seqlen || acgtu[aligned[i] .c_elem[j - aligned[i].offset]] == 0) { consensus->c_elem[j - min_offset] = '0'; cnt++; break; } } } } fprintf(stderr, "\nNumber of 1s in mask: %d\n", max_cons - min_offset - cnt); fprintf(stderr, "Number of 0s in mask: %d\n\n", cnt); return TRUE; } /*********** * * MajorityCons() takes an array of aligned sequence, and generates * a MAJORITY consensus. * Note, memory for (Sequence* consensus) should be * located before it is passed to this function. * * Returns TRUE if successful, FALSE otherwise. * **********/ int MajorityCons(aligned, numOfAligned, consensus, group, major_perc) Sequence aligned[]; /* input. */ int numOfAligned; /* input. */ Sequence *consensus; /* input and output. */ int group, major_perc; { int i, j, index, score, ii, base, max; int max_cons = INT_MIN; int min_offset = INT_MAX; char temp_str[2], occurence; int *cnts, tot_in_grp; unsigned char case_bit; cnts = (int *)Calloc(16, sizeof(int)); /* * Search for the minimum offset. */ for (i = 0; i < numOfAligned; i++) { if (group == 0 || aligned[i].group_number == group) { SeqNormal(&aligned[i]); min_offset = MIN(min_offset, aligned[i].offset); max_cons = MAX(max_cons, aligned[i].offset + aligned[i].seqlen); } } /* * Decide consensus base by base. */ CleanRecord(consensus); consensus->offset = min_offset; if (aligned[0].contig[0] != '\0') { strcpy(consensus->name, aligned[0].contig); strcat(consensus->name, "."); } else if (strncmp(aligned[0].name, "cons.", 5) != 0) { strcpy(consensus->name, "cons."); strcat(consensus->name, aligned[0].name); } strcpy(consensus->sequence_ID, uniqueID()); strcpy(consensus->contig, aligned[0].contig); for (j = min_offset; j < max_cons; j++) { case_bit = 0; occurence = 00; tot_in_grp = 0; for (ii = 0; ii < 16; ii++) cnts[ii] = 0; for (i = 0; i < numOfAligned; i++) { if (group == 0 || aligned[i].group_number == group) { if (j >= aligned[i].offset && j < aligned[i].offset + aligned[i].seqlen) { tot_in_grp++; index = j - aligned[i].offset; if (aligned[i].c_elem[index] == '-') case_bit = 32; else if (case_bit == 0) case_bit |= (aligned[i].c_elem[index] & 32); occurence |= Default_IUPAC_Trans( aligned[i].c_elem[index]); cnts[(int)Default_IUPAC_Trans( aligned[i].c_elem[index])]++; if (case_bit == 0 && occurence != 1 && occurence != 2 && occurence != 4 && occurence != 8) case_bit = 32; } } } max = 0; for (ii = 0; ii < 16; ii++) { if (cnts[ii] > max) { max = cnts[ii]; base = ii; } } if (max * 100 / tot_in_grp >= major_perc) { /* follow the majority rule. */ sprintf(temp_str, "%1c", Default_DNA_Trans[base]); } else { /* use IUPAC code. */ sprintf(temp_str, "%1c", Default_DNA_Trans[(int)occurence]); } if (case_bit == 0) temp_str[0] = toupper(temp_str[0]); if (InsertElems(consensus, j, temp_str) == FALSE) { return FALSE; } } return TRUE; } /*********** * * ReadGDEtoHGL() reads a GDE formated file into an array of HGL structure. * * Return -1 if anything is wrong, number_of_sequence otherwise. * ***********/ int ReadGDEtoHGL(fp, tSeq_arr) FILE *fp; Sequence **tSeq_arr; { char line[MAXLINELEN]; int ptr, num_seq, max_num_seq = 20; int seq_len = 200; char *newline; (*tSeq_arr) = (Sequence *)Calloc(max_num_seq, sizeof(Sequence)); num_seq = -1; while (fgets(line, MAXLINELEN - 2, fp) != NULL) /* spaces for \n\0 */ { /* ptr points to the last char. */ ptr = strlen(line) - 1; /* clear up the tail. */ while (ptr >= 0 && (line[ptr] == '\n' || line[ptr] == ' ' || line[ptr] == '\t')) ptr--; line[ptr + 1] = '\0'; if (ptr <= 0) { /* it is an empty line. */ } else if (line[0] == '#') { if (++num_seq == max_num_seq) { max_num_seq *= 2; /* printf("max_num_seq = %d\n", max_num_seq); */ (*tSeq_arr) = (Sequence *)Realloc( (*tSeq_arr), max_num_seq * sizeof(Sequence)); } InitRecord((*tSeq_arr)[num_seq]); if (line[ptr] == '<') { (*tSeq_arr)[num_seq].direction = 2; /* 3to5 */ line[ptr] = '\0'; } else if (line[ptr] == '>') { (*tSeq_arr)[num_seq].direction = 1; /* 5to3 */ line[ptr] = '\0'; } strcpy((*tSeq_arr)[num_seq].sequence_ID, line + 1); } else { ptr = 0; if ((*tSeq_arr)[num_seq].seqlen == 0) { /* determine the offset. */ while (line[ptr] != '\0' && line[ptr] == '-') { ptr++; } (*tSeq_arr)[num_seq].offset += ptr; } if (line[ptr] != '\0') { newline = line + ptr; if ((*tSeq_arr)[num_seq].seqmaxlen == 0) { (*tSeq_arr)[num_seq].c_elem = (char *)Calloc(seq_len, 1); (*tSeq_arr)[num_seq].c_elem[0] = '\0'; (*tSeq_arr)[num_seq].seqmaxlen = seq_len; } else { while ((*tSeq_arr)[num_seq].seqlen + strlen(newline) + 1 > (*tSeq_arr)[num_seq].seqmaxlen) { seq_len *= 2; (*tSeq_arr)[num_seq] .c_elem = (char *)Realloc( (*tSeq_arr)[num_seq].c_elem, seq_len); (*tSeq_arr)[num_seq].seqmaxlen = seq_len; } } strcat((*tSeq_arr)[num_seq].c_elem, newline); (*tSeq_arr)[num_seq].seqlen = strlen((*tSeq_arr)[num_seq].c_elem); } } } return (num_seq + 1); } /******** * * InsertElems returns TRUE if successful, FALSE otherwise. * ********/ int InsertElems(seq, pos, c) Sequence *seq; /* Sequence */ int pos; /* Position (in respect to the master consensus) * to insert BEFORE * always move string to the right. */ char c[]; /*Null terminated array of elements to insert */ { int dashes, j, len; len = strlen(c); if (seq->seqlen == 0) { /* get rid of '-'s at right. */ /* dashes = len-1; while(dashes >= 0 && c[dashes] == '-') dashes--; if(dashes < 0) { seq->offset = pos; return TRUE; } c[dashes+1] = '\0'; */ /* clear out '-'s at left. */ dashes = 0; /* while(c[dashes] == '-') dashes++; c += dashes; len = strlen(c); pos += dashes; */ if (seq->seqmaxlen == 0) { seq->c_elem = (char *)Calloc(len + 1, 1); seq->seqmaxlen = len + 1; } else if (len + 1 >= seq->seqmaxlen) { seq->c_elem = (char *)Realloc(seq->c_elem, len + 1); seq->seqmaxlen = len + 1; } strcpy(seq->c_elem, c); seq->seqlen = len; seq->offset = pos; return TRUE; } /* to make sure there is a space for '\0'. */ if (seq->seqlen > seq->seqmaxlen) { fprintf( stderr, "InsertElems(): seqlen>seqmaxlen. Something is wrong.\n"); return FALSE; } else { while (seq->seqlen + 1 >= seq->seqmaxlen) { seq->seqmaxlen *= 2; seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); } } seq->c_elem[seq->seqlen] = '\0'; if (pos < seq->offset) /* insert to the left of the seq. */ { /* ignore the dashes at the left. */ dashes = 0; /* while(dashes < len && c[dashes] == '-') dashes++; if(c[dashes] == '\0') { seq->offset += len; return TRUE; } c += dashes; len -= dashes; */ if (seq->seqlen + len + seq->offset - pos > seq->seqmaxlen) { seq->seqmaxlen = seq->seqlen + len + seq->offset - pos + 256; seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); } /* copy the old string including the last '\0'. */ for (j = seq->seqlen; j >= 0; j--) seq->c_elem[j + len + seq->offset - pos] = seq->c_elem[j]; /* insert dashes. */ for (j = len; j < len + seq->offset - pos; j++) seq->c_elem[j] = '-'; /* copy the inserted string. */ for (j = 0; j < len; j++) seq->c_elem[j] = c[j]; /* detector. */ if (c[j] != '\0') fprintf(stderr, "InsertElems: Problem.....\n"); seq->seqlen = strlen(seq->c_elem); /* seq->offset = pos; commented on 6-3-91 */ seq->offset = pos + dashes; if (dashes > 0) printf("\nInsertElems(): dashes is not zero.\n\n"); } else if (pos - seq->offset >= seq->seqlen) /* insert to the right. */ { /* ignore the dashes at the right. */ /* dashes = len -1; while(dashes >= 0 && c[dashes] == '-') dashes--; if(dashes < 0) return TRUE; len = dashes+1; c[len] = '\0'; */ if (pos - seq->offset + len > seq->seqmaxlen) { seq->seqmaxlen = pos - seq->offset + len + 256; seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); } /* insert dashes. */ for (j = seq->seqlen; j < pos - seq->offset; j++) seq->c_elem[j] = '-'; /* copy the inserted string. */ for (j = 0; j < len; j++) seq->c_elem[pos - seq->offset + j] = c[j]; seq->c_elem[pos - seq->offset + len] = '\0'; /* detector. */ if (c[j] != '\0') fprintf(stderr, "InsertElems: Problem too .....\n"); seq->seqlen = strlen(seq->c_elem); } else /* insert into the seq. */ { if (seq->seqlen + len > seq->seqmaxlen) { seq->seqmaxlen = seq->seqlen + len + 256; seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); } /* move the bottom part of the older string including the last * '\0'. */ for (j = seq->seqlen; j >= pos - seq->offset; j--) seq->c_elem[j + len] = seq->c_elem[j]; /* copy the inserted string. */ for (j = 0; j < len; j++) seq->c_elem[pos - seq->offset + j] = c[j]; /* detector. */ if (c[j] != '\0') fprintf(stderr, "InsertElems: Problem too too .....\n"); seq->seqlen = strlen(seq->c_elem); } return TRUE; } /****************************************************************** * * int GetArgs(argArray, numArgs) * Arg *argArray; * int numArgs; * * Return TRUE if successful, FALSE otherwise. * ******************************************************************/ #define MAX_ARGS 50 /* maximum args this can process */ int GetArgs(argArray, numArgs, argc, argv) Args *argArray; int numArgs; int argc; char **argv; { int i, j; Args *curarg; int noArgOK = TRUE; if ((argArray == NULL) || (numArgs == 0) || (numArgs > MAX_ARGS)) { fprintf(stderr, "GetArgs: Invalid number of args.\n"); return FALSE; } /* * Test if all are either 'default' or 'optional'. */ curarg = argArray; for (i = 0; i < numArgs; i++, curarg++) { if (curarg->strvalue[0] == '\0' && curarg->optional == 'F') { noArgOK = FALSE; break; } } /* * show usage if some arg is required but no arg is * supllied on command line. */ if (noArgOK == FALSE && argc == 1) { fprintf(stderr, "\n%s arguments:\n\n", argv[0]); curarg = argArray; for (i = 0; i < numArgs; i++, curarg++) { fprintf(stderr, " -%c %s ", curarg->tag, curarg->prompt); if (curarg->optional == 'T') fprintf(stderr, " [Optional]"); fprintf(stderr, "\n"); if (curarg->strvalue[0] != '\0') fprintf(stderr, " default = %s\n", curarg->strvalue); } fprintf(stderr, "\n"); return FALSE; } /* * Process */ for (i = 1; i < argc; i++) { if (argv[i][0] != '-') { fprintf(stderr, "Arguments must start with -"); return FALSE; } /* check the tag. */ curarg = argArray; for (j = 0; j < numArgs; j++, curarg++) { if ((argv[i][1] | 32) == (curarg->tag | 32)) break; } if (j == numArgs) { fprintf(stderr, "Invalid argument tag in %s\n", argv[i]); return FALSE; } strcpy(curarg->strvalue, argv[i] + 2); if (curarg->strvalue[0] == '\'' && curarg->strvalue[strlen(curarg->strvalue) - 1] == '\'') { char ttmm[256]; strcpy(ttmm, curarg->strvalue + 1); ttmm[strlen(ttmm) - 1] = '\0'; strcpy(curarg->strvalue, ttmm); } } return TRUE; } /********* * * GetCond interprets the -c argument, the condition. * * The condition will be set to NULL if no condition is specified, * that is, if you pass '&p' as the address of a cond* structure, * p will be set to NULL if no condition [(p == NULL) = TRUE]. * * Return TRUE if successful, FALSE otherwise. * *********/ int GetCond(arg, cond) char *arg; str_cond **cond; { int start, end, i, found; char message_buf[1000]; if (strcmp(arg, "null") == 0) { (*cond) = NULL; return TRUE; } else { (*cond) = (str_cond *)Calloc(1, sizeof(str_cond)); start = end = 0; /* find the field name. */ while (('a' <= arg[end] && arg[end] <= 'z') || ('A' <= arg[end] && arg[end] <= 'Z') || arg[end] == '-') end++; found = FALSE; for (i = 0; i < NUM_OF_FIELDS && found == FALSE; i++) { if (strncmp(arg, at[i], strlen(at[i])) == 0) { (*cond)->field = i; /* condition on field &at[i]. */ found = TRUE; break; } } if (found == FALSE) { strncpy(message_buf, arg, end - start); message_buf[end - start] = '\0'; fprintf(stderr, "Field %s not found.\n", message_buf); return FALSE; } start = end; end++; while (arg[end] == '=' || arg[end] == '!' || arg[end] == '>' || arg[end] == '<') end++; strncpy((*cond)->symbol, arg + start, end - start); (*cond)->symbol[end - start] = '\0'; if (strlen((*cond)->symbol) > 2 || strlen((*cond)->symbol) < 1 || (strlen((*cond)->symbol) == 1 && *((*cond)->symbol) != '>' && *((*cond)->symbol) != '<') || (strlen((*cond)->symbol) == 2 && (strncmp((*cond)->symbol, "!=", 2) != 0) && (strncmp((*cond)->symbol, "==", 2) != 0) && (strncmp((*cond)->symbol, ">=", 2) != 0) && (strncmp((*cond)->symbol, "<=", 2) != 0))) { fprintf(stderr, "Invalid condition.\n"); return FALSE; } if (arg[end] == '"' && arg[strlen(arg) - 1] == '"') { end++; arg[strlen(arg) - 1] = '\0'; } (*cond)->value = (char *)Calloc(strlen(arg) - end + 2, 1); strcpy((*cond)->value, arg + end); } return TRUE; } /********* * * GetFields interprets the -f arguments, the fields list. * * Returns number of selected fields, 0 if anything is wrong. * *********/ int GetFields(arg, selected_fields) char *arg; int selected_fields[]; { int start, end, i, found, list_done, i_selected; char message_buf[1000]; if (strcmp(arg, "all") == 0) { selected_fields[0] = -1; return NUM_OF_FIELDS; } else { start = end = 0; list_done = FALSE; i_selected = 0; while (list_done == FALSE) { while (arg[end] != '\0' && arg[end] != ',') { end++; } if (arg[end] == '\0') { list_done = TRUE; } found = FALSE; for (i = 0; i < NUM_OF_FIELDS && found == FALSE; i++) { if (strncmp(arg + start, at[i], strlen(at[i])) == 0) { selected_fields[i_selected++] = i; found = TRUE; start = end + 1; break; } } if (found == FALSE) { strncpy(message_buf, (arg + start), end - start); message_buf[end - start] = '\0'; fprintf(stderr, "Field %s not found.\n", message_buf); return 0; } end++; } } return i_selected; } static char *pairs[] = {"aa", "ac", "ag", "at", "ca", "cc", "cg", "ct", "ga", "gc", "gg", "gt", "ta", "tc", "tg", "tt"}; static int stemp[16] = {55, 98, 58, 57, 55, 86, 73, 58, 87, 136, 86, 98, 37, 87, 55, 55}; /******* * * MST() returns Mean Stacking Temperature for the given sequence, * returns -1 if anything is wrong. * *******/ float MST(c_elem) const char *c_elem; { int i, j, l; int tot_stemp = 0, non_amb_pairs = 0; char *seq; l = strlen(c_elem); seq = (char *)Calloc(l, 1 + 1); /* clean out dashes. */ j = 0; for (i = 0; i < l; i++) { if (c_elem[i] != '-') { seq[j] = c_elem[i] | 32; if (seq[j] == 'u') seq[j] = 't'; j++; } } seq[j] = '\0'; l = j; for (i = 0; i < l - 1; i++) { j = 0; while (j < 16 && strncmp(seq + i, pairs[j], 2) != 0) { j++; } /* ignore the pairing of an ambiguous base. */ if (j != 16) { tot_stemp += stemp[j]; non_amb_pairs++; } } if (seq != NULL) { Cfree(seq); seq = NULL; } return ((float)tot_stemp / (float)non_amb_pairs); } /******** * * SubStr() fill ss with a substring of at most 'length' chars and returns * TRUE. If anything is wrong, it sets ss to be empty and returns FALSE. * ********/ int SubStr(string, start, length, ss) const char *string; int start, length; char *ss; { int i; if (strlen(string) <= start) { fprintf(stderr, "SubStr(): starting point is beyond the boundary.\n"); ss[0] = '\0'; return FALSE; } for (i = start; string[i] != '\0' && i < start + length; i++) { ss[i - start] = string[i]; } ss[i - start] = '\0'; return TRUE; } /******* * * FindPattern() searches string for pattern. * Returns the number of appearences. * *******/ int FindPattern(string, pattern) const char *string; const char *pattern; { int i, sl, pl, num_app = 0; if (string == NULL || (sl = strlen(string)) == 0) return 0; pl = strlen(pattern); for (i = 0; i <= sl - pl; i++) { if (strncmp(string + i, pattern, pl) == 0) num_app++; } return num_app; } /******* * * FindPattern2(), same as FindPattern(), but returns the # * of appearences that do not overlap only. * *******/ int FindPattern2(string, pattern, orig_loc) const char *string; const char *pattern; int orig_loc; { int i, sl, pl, num_app = 0; if (string == NULL || (sl = strlen(string)) == 0) return 0; pl = strlen(pattern); for (i = 0; i <= sl - pl; i++) { if (abs(i - orig_loc) >= pl && strncmp(string + i, pattern, pl) == 0) num_app++; } return num_app; } /******* * * FindPatternNC() searches string for pattern , CASE INSENSITIVE. * Returns the number of appearences. * *******/ int FindPatternNC(string, pattern) const char *string; const char *pattern; { int i, j, sl, pl, num_app = 0; if (string == NULL || (sl = strlen(string)) == 0) return 0; pl = strlen(pattern); for (i = 0; i <= sl - pl; i++) { j = 0; while (j < pl && (string[i + j] | 32) == (pattern[j] | 32)) j++; if (j == pl) num_app++; } return num_app; } /******* * * Complementary() CHANGES the given DNA/RNA string to its complementary, * and returns TRUE. Returns FALSE if anything is wrong and keep the * given string unchanged. * *******/ int Complementary(sequence, type) char *sequence; char type; { int i, l; char *temp_str; l = strlen(sequence); temp_str = (char *)Calloc(l + 1, sizeof(char)); if (type == 'D' || type == 'd') type = 0; else if (type == 'R' || type == 'r') type = 1; else { fprintf(stderr, "Complementary(): type unknown. Type is D/d/R/r\n"); return (int)NULL; } for (i = 0; i < l; i++) { switch (sequence[i]) { case 'A': temp_str[i] = (type == 0) ? 'T' : 'U'; break; case 'a': temp_str[i] = (type == 0) ? 't' : 'u'; break; case 'C': temp_str[i] = 'G'; break; case 'c': temp_str[i] = 'g'; break; case 'G': temp_str[i] = 'C'; break; case 'g': temp_str[i] = 'c'; break; case 'T': case 'U': temp_str[i] = 'A'; break; case 't': case 'u': temp_str[i] = 'a'; break; } } temp_str[i] = '\0'; strcpy(sequence, temp_str); if (temp_str != NULL) { Cfree(temp_str); temp_str = NULL; } return TRUE; } /******** * * KnownSeq() returns an integer which is the index of the first * occurence of an ambiguous base in the seq. -1 if no ambiguous * base in the seq. * ********/ int KnownSeq(seq) char *seq; { int i; char c; for (i = 0; i < strlen(seq); i++) { c = seq[i] | 32; if (c != 'a' && c != 't' && c != 'g' && c != 'c' && c != 'u') return i; } return -1; } /******** * * Reverse() reverses the given string and returns TRUE. * (NOTE: Reverse() actually changes the string). * If anything goes wrong, leave seq unchanged. * * ********/ int Reverse(seq) char *seq; { int i, l; char c; l = strlen(seq); if (l < 2) { return TRUE; } for (i = 0; i < l / 2; i++) { c = seq[i]; seq[i] = seq[l - i - 1]; seq[l - i - 1] = c; } return TRUE; } /******** * * GoodOligos() returns a pointer to an array of subsequences that * do not contant secondary structure, nor self complementary structure. * Returns NULL if anything is wrong. * * l_bnd and r_bnd are regards to the head of the probe. * * Note: this program Calloc-s memory for the returned pointer. * The caller program is responsible of Freeing the memory when * not needed. * ********/ char **GoodOligos(c_elem, check_len, min_len, max_len, l_bnd, r_bnd) char *c_elem; int check_len, min_len, max_len, l_bnd, r_bnd; /* l_bnd and r_bnd are relative to c_elem, so they should be in [0,strlen(c_elem)] */ { int i, l, seq_len, max_num_probe, seq_cnt = 0; char **seq_set; char *seq, *subseq, *scd_str, *PossibleOligo; int BadOligo, PO_len, PO_index, PO_l; /* constant(s): */ /* to check if there is a substr of length 'no_repeat_len' appears * more than once in the PossibleOligo. */ int no_repeat_len = 15; seq_len = strlen(c_elem); /* A lower case copy of the c_elem. */ seq = (char *)Calloc(seq_len + 1, sizeof(char)); /* String used to check the PossibleOligo. */ PossibleOligo = (char *)Calloc(max_len + 1, sizeof(char)); subseq = (char *)Calloc(max_len + 1, sizeof(char)); scd_str = (char *)Calloc(max_len + 1, sizeof(char)); /* The output. A set of possibly good oligos. */ max_num_probe = 20; seq_set = (char **)Calloc(max_num_probe, sizeof(char *)); for (i = 0; i < seq_len; i++) { seq[i] = c_elem[i] | 32; } i = MAX(l_bnd, 0); while (i <= MIN(r_bnd, seq_len - min_len)) { BadOligo = FALSE; for (l = min_len; BadOligo == FALSE && l <= seq_len - i && l <= max_len; l++) { int uk; SubStr(seq, i, l, PossibleOligo); /* Any unknow base? */ if ((uk = KnownSeq(PossibleOligo)) != -1) { fprintf(stderr, "%s has ambiguous base(s)\n", PossibleOligo); i += uk + 1; BadOligo = TRUE; } PO_len = strlen(PossibleOligo); /* check if there is a substr of len(no_repeat_len) * repeat itself in the PossibleOligo. DOESN'T MATTER! IT COULD MESS UP AT MOST SEVERAL BASES READ INTO THE PROBE. CUT_SITE IS WHAT REALLY MATTERS. for(PO_index = 0; BadOligo==FALSE && PO_index<=PO_len-no_repeat_len; PO_index++) { SubStr(PossibleOligo,PO_index,no_repeat_len,subseq); if(FindPattern(PossibleOligo, subseq) > 1) { fprintf(stderr, "%s has 15 repatitive base(s) %s\n", PossibleOligo, subseq); i++; BadOligo = TRUE; } } */ /* * To ensure that the probe is not going to hybridize * with itself: */ for (PO_index = 0; BadOligo == FALSE && PO_index <= PO_len - no_repeat_len; PO_index++) { SubStr(PossibleOligo, PO_index, no_repeat_len, subseq); strcpy(scd_str, subseq); Complementary(scd_str, 'd'); Reverse(scd_str); if (FindPattern(PossibleOligo, scd_str) > 0) { fprintf(stderr, "%s may hybridize with itself: " "%s vs. %s.\n", PossibleOligo, subseq, scd_str); i++; BadOligo = TRUE; } } for (PO_index = 0; BadOligo == FALSE && PO_index <= PO_len - 2 * check_len; PO_index++) { SubStr(PossibleOligo, PO_index, check_len, subseq); Complementary(subseq, 'd'); strcpy(scd_str, subseq); Reverse(scd_str); /* if(FindPattern2(PossibleOligo,subseq,PO_index)>0) { fprintf(stderr, "%s has self-compl %s\n", PossibleOligo, subseq); i += PO_index+1; BadOligo = TRUE; } else */ if (FindPattern2(PossibleOligo, scd_str, PO_index) > 0) { fprintf(stderr, "%s has 2nd struct %s\n", PossibleOligo, scd_str); i += PO_index + 1; BadOligo = TRUE; } } if (BadOligo == FALSE) { seq_set[seq_cnt] = (char *)Calloc( strlen(PossibleOligo) + 1, sizeof(char)); strcpy(seq_set[seq_cnt], PossibleOligo); if (++seq_cnt == max_num_probe) { max_num_probe *= 2; seq_set = (char **)Realloc( seq_set, max_num_probe * sizeof(char *)); } i++; } } /* end of l. */ } /* end of i. */ seq_set[seq_cnt] = NULL; if (seq_cnt == 0) return NULL; return seq_set; } /* ALWAYS COPY the result from uniqueID() to a char[32], * (strlen(hostname)+1+10). Memory is lost when the function * is finished. */ char vname[32]; char *uniqueID() { char hname[32], /* vname[32], rtm 18.III.98 */ tstr[32]; time_t *tp; static cnt = 0; int ll; tp = (time_t *)Calloc(1, sizeof(time_t)); if (gethostname(hname, 32) == -1) { fprintf(stderr, "UniqueID(): Failed to get host name.\n"); exit(1); } time(tp); sprintf(tstr, ":%d:%ld", cnt, *tp); if ((ll = strlen(tstr)) > 31) { strncpy(vname, tstr, 31); vname[31] = '\0'; } else { ll = strlen(hname) - (31 - ll); if (ll < 0) ll = 0; sprintf(vname, "%s%s", hname + ll, tstr); } cnt++; Cfree(tp); return (vname); } /* return the percentage of GCcontents. */ int GCcontent(seq) char *seq; { int l, gc = 0, j; l = strlen(seq); for (j = 0; j < l; j++) { if ((seq[j] | 32) == 'g' || (seq[j] | 32) == 'c') { gc++; } } return ((int)(gc * 100 / l)); } /****** * * HGLtoIQ() outputs a HGL format record to an ASCII file with * the Input-Queue format, the format for the synthesizer. * ******/ void HGLtoIQ(fname, tSeq) const char *fname; Sequence *tSeq; { FILE *fp; if ((fp = fopen(fname, "w")) == NULL) { fprintf(stderr, "Can't open IQ file: %s\n", fname); exit(1); } fprintf(fp, "%s %s\n", tSeq->comments, tSeq->c_elem); } Find2(string, key) char *key, *string; /* * Like find, but returns the index of the leftmost * occurence, and -1 if not found. * Note in this program, T==U, and case insensitive. */ { int i, j, len1, len2, dif, flag = FALSE; char *target; if (string == NULL || string[0] == '\0') return -1; len2 = strlen(string); target = (char *)Calloc(len2 + 1, 1); for (i = 0; i < len2; i++) { target[i] = string[i] | 32; if (target[i] == 'u') target[i] = 't'; } len1 = strlen(key); for (i = 0; i < len1; i++) { key[i] |= 32; if (key[i] == 'u') key[i] = 't'; } dif = len2 - len1 + 1; if (len1 > 0) for (j = 0; j < dif && flag == FALSE; j++) { flag = TRUE; for (i = 0; i < len1 && flag; i++) flag = (key[i] == target[i + j]) ? TRUE : FALSE; } Cfree(target); return (flag ? j - 1 : -1); } /* return -1 if end-of-file. FALSE if anything is wrong. */ int ReadGDE(fp, seq) FILE *fp; Sequence *seq; { char temp_line[1000], waste[64]; int ii, l1; while (fgets(temp_line, 1000, fp) != NULL) { if (strncmp(temp_line, "sequence-ID", 11) == 0) { sscanf(temp_line, "%s%s", waste, seq->sequence_ID); } else if (temp_line[0] == '#') { strncpy(seq->name, temp_line + 1, 31); seq->name[31] = '\0'; ii = 0; while (ii < strlen(seq->name) && seq->name[ii] != ' ' && seq->name[ii] != '\n') ii++; seq->name[ii] = '\0'; seq->seqmaxlen = 256; seq->c_elem = (char *)Calloc(seq->seqmaxlen, 1); seq->seqlen = 0; while (fgets(temp_line, 1000, fp) != NULL) { l1 = strlen(temp_line); if (temp_line[l1 - 1] == '\n') { l1--; temp_line[l1] = '\0'; } while (seq->seqmaxlen < seq->seqlen + strlen(temp_line) + 1) { seq->seqmaxlen *= 2; seq->c_elem = (char *)Realloc( seq->c_elem, seq->seqmaxlen); } strcat(seq->c_elem, temp_line); seq->seqlen += strlen(temp_line); } if (seq->seqlen == 0) { fprintf(stderr, "\n%s\n", "Sequence is empty."); return FALSE; } } } return -1; } void heapify(seq_set, seq_size, heap_size, elem, Pkey, Skey, order) int seq_size , elem, heap_size, **order; char Pkey[], Skey[]; Sequence *seq_set; { int l, r, temp, largest; l = 2 * elem + 1; r = 2 * elem + 2; if (l <= heap_size && CompKey(seq_set[(*order)[l]], seq_set[(*order)[elem]], Pkey, Skey) > 0) largest = l; else largest = elem; if (r <= heap_size && CompKey(seq_set[(*order)[r]], seq_set[(*order)[largest]], Pkey, Skey) > 0) largest = r; if (largest != elem) { temp = (*order)[elem]; (*order)[elem] = (*order)[largest]; (*order)[largest] = temp; heapify(seq_set, seq_size, heap_size, largest, Pkey, Skey, order); } } heapsort(seq_set, seq_size, Pkey, Skey, order) int seq_size, **order; char Pkey[], Skey[]; Sequence *seq_set; { int ii, temp, heap_size; /* * build_heap(seq_set, seq_size, &heap_size, order); */ heap_size = seq_size - 1; for (ii = (seq_size - 1) / 2; ii >= 0; ii--) /* (L-1)/2-1?? */ { heapify(seq_set, seq_size, heap_size, ii, Pkey, Skey, order); } for (ii = seq_size - 1; ii > 0; ii--) { temp = (*order)[0]; (*order)[0] = (*order)[ii]; (*order)[ii] = temp; heap_size--; heapify(seq_set, seq_size, heap_size, 0, Pkey, Skey, order); } } /* * Return >0, ==0, <0. */ int CompKey(seq1, seq2, Pkey, Skey) Sequence seq1, seq2; char Pkey[], Skey[]; { int ii, jj, Pret; char b1[32], b2[32]; if (strcmp(Pkey, "type") == 0) { Pret = strcmp(seq1.type, seq2.type); if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "name") == 0) { Pret = strcmp(seq1.name, seq2.name); if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "sequence-ID") == 0) { Pret = strcmp(seq1.sequence_ID, seq2.sequence_ID); if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "creator") == 0) { Pret = strcmp(seq1.creator, seq2.creator); if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "offset") == 0) { Pret = seq1.offset - seq2.offset; if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "group-ID") == 0) { Pret = seq1.group_ID - seq2.group_ID; if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "barcode") == 0) { if (seq1.barcode[0] == 'P') strcpy(b1, seq1.barcode + 2); else strcpy(b1, seq1.barcode); if (seq2.barcode[0] == 'P') strcpy(b2, seq2.barcode + 2); else strcpy(b2, seq2.barcode); Pret = strcmp(b1, b2); if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "seqlen") == 0) { Pret = seq1.seqlen - seq2.seqlen; if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "creation-date") == 0) { seq1.creation_date[0] %= 100; seq2.creation_date[0] %= 100; Pret = seq1.creation_date[0] * 10000 + seq1.creation_date[1] * 100 + seq1.creation_date[2] - seq2.creation_date[0] * 10000 - seq2.creation_date[1] * 100 - seq2.creation_date[2]; if (Pret == 0) { Pret = seq1.creation_date[3] * 10000 + seq1.creation_date[4] * 100 + seq1.creation_date[5] - seq2.creation_date[3] * 10000 - seq2.creation_date[4] * 100 - seq2.creation_date[5]; } if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "probing-date") == 0) { seq1.probing_date[0] %= 100; seq2.probing_date[0] %= 100; Pret = seq1.probing_date[0] * 10000 + seq1.probing_date[1] * 100 + seq1.probing_date[2] - seq2.probing_date[0] * 10000 - seq2.probing_date[1] * 100 - seq2.probing_date[2]; if (Pret == 0) { Pret = seq1.probing_date[3] * 10000 + seq1.probing_date[4] * 100 + seq1.probing_date[5] - seq2.probing_date[3] * 10000 - seq2.probing_date[4] * 100 - seq2.probing_date[5]; } if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "autorad_date") == 0) { seq1.autorad_date[0] %= 100; seq2.autorad_date[0] %= 100; Pret = seq1.autorad_date[0] * 10000 + seq1.autorad_date[1] * 100 + seq1.autorad_date[2] - seq2.autorad_date[0] * 10000 - seq2.autorad_date[1] * 100 - seq2.autorad_date[2]; if (Pret == 0) { Pret = seq1.autorad_date[3] * 10000 + seq1.autorad_date[4] * 100 + seq1.autorad_date[5] - seq2.autorad_date[3] * 10000 - seq2.autorad_date[4] * 100 - seq2.autorad_date[5]; } if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "film") == 0) { Pret = strcmp(seq1.film, seq2.film); if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "membrane") == 0) { Pret = strcmp(seq1.membrane, seq2.membrane); if (Pret != 0 || Skey[0] == '\0') return Pret; } else if (strcmp(Pkey, "contig") == 0) { Pret = strcmp(seq1.contig, seq2.contig); if (Pret != 0 || Skey[0] == '\0') return Pret; } else { fprintf(stderr, "CompKey(): Invalid primary key %s.\n", Pkey); exit(1); } if (strcmp(Skey, "type") == 0) { return (strcmp(seq1.type, seq2.type)); } else if (strcmp(Skey, "name") == 0) { return (strcmp(seq1.name, seq2.name)); } else if (strcmp(Skey, "sequence-ID") == 0) { return (strcmp(seq1.sequence_ID, seq2.sequence_ID)); } else if (strcmp(Skey, "creator") == 0) { return (strcmp(seq1.creator, seq2.creator)); } else if (strcmp(Skey, "offset") == 0) { return (seq1.offset - seq2.offset); } else if (strcmp(Skey, "group-ID") == 0) { return (seq1.group_ID - seq2.group_ID); } else if (strcmp(Skey, "barcode") == 0) { if (seq1.barcode[0] == 'P') strcpy(b1, seq1.barcode + 2); else strcpy(b1, seq1.barcode); if (seq2.barcode[0] == 'P') strcpy(b2, seq2.barcode + 2); else strcpy(b2, seq2.barcode); return (strcmp(b1, b2)); } else if (strcmp(Skey, "seqlen") == 0) { return (seq1.seqlen - seq2.seqlen); } else if (strcmp(Skey, "creation-date") == 0) { seq1.creation_date[0] %= 100; seq2.creation_date[0] %= 100; Pret = seq1.creation_date[0] * 10000 + seq1.creation_date[1] * 100 + seq1.creation_date[2] - seq2.creation_date[0] * 10000 - seq2.creation_date[1] * 100 - seq2.creation_date[2]; if (Pret != 0) return Pret; return (seq1.creation_date[3] * 10000 + seq1.creation_date[4] * 100 + seq1.creation_date[5] - seq2.creation_date[3] * 10000 - seq2.creation_date[4] * 100 - seq2.creation_date[5]); } else if (strcmp(Skey, "probing-date") == 0) { seq1.probing_date[0] %= 100; seq2.probing_date[0] %= 100; Pret = seq1.probing_date[0] * 10000 + seq1.probing_date[1] * 100 + seq1.probing_date[2] - seq2.probing_date[0] * 10000 - seq2.probing_date[1] * 100 - seq2.probing_date[2]; if (Pret != 0) return Pret; return (seq1.probing_date[3] * 10000 + seq1.probing_date[4] * 100 + seq1.probing_date[5] - seq2.probing_date[3] * 10000 - seq2.probing_date[4] * 100 - seq2.probing_date[5]); } else if (strcmp(Skey, "autorad_date") == 0) { seq1.autorad_date[0] %= 100; seq2.autorad_date[0] %= 100; Pret = seq1.autorad_date[0] * 10000 + seq1.autorad_date[1] * 100 + seq1.autorad_date[2] - seq2.autorad_date[0] * 10000 - seq2.autorad_date[1] * 100 - seq2.autorad_date[2]; if (Pret != 0) return Pret; return (seq1.autorad_date[3] * 10000 + seq1.autorad_date[4] * 100 + seq1.autorad_date[5] - seq2.autorad_date[3] * 10000 - seq2.autorad_date[4] * 100 - seq2.autorad_date[5]); } else if (strcmp(Skey, "film") == 0) { return (strcmp(seq1.film, seq2.film)); } else if (strcmp(Skey, "membrane") == 0) { return (strcmp(seq1.membrane, seq2.membrane)); } else if (strcmp(Skey, "contig") == 0) { return (strcmp(seq1.contig, seq2.contig)); } else { fprintf(stderr, "CompKey(): Invalid secondary key %s.\n", Skey); exit(1); } } int Lock(fname) char *fname; { char buffer[1024]; FILE *fp; int wait = 0; while ((fp = fopen(fname, "r")) == NULL) { sleep(1); if (++wait == 30) { fprintf(stderr, "File %s not available, Try later.\n\n", fname); return FALSE; } } fclose(fp); sprintf(buffer, "mv %s %s.locked", fname, fname); system(buffer); return TRUE; } void Unlock(fname) char *fname; { char buffer[1024]; sprintf(buffer, "mv %s.locked %s", fname, fname); system(buffer); } AppendComments(seq, str) Sequence *seq; char *str; { int ii, jj, kk; kk = strlen(str); if (seq->commentsmaxlen == 0) { seq->comments = (char *)Calloc(kk + 1, 1); seq->commentsmaxlen = kk + 1; seq->commentslen = 0; } else if (seq->commentslen + kk + 1 > seq->commentsmaxlen) { seq->commentsmaxlen += 2 * (kk + 1); seq->comments = (char *)Realloc(seq->comments, seq->commentsmaxlen); } seq->comments[seq->commentslen] = '\0'; seq->comments[seq->commentslen] = '\0'; strcat(seq->comments, str); seq->commentslen = strlen(seq->comments); }