#include #include #include #include #include #include #include "defines.h" #include "menudefs.h" /* Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. All rights reserved. */ ReadGDE(filename, dataset, type) char *filename; NA_Alignment *dataset; int type; { register int done = FALSE, len = 0, j = 0; int success, count, temp = 0; char Inline[GBUFSIZ], c; char *buffer, *line, *gencomments = NULL, fields[8][GBUFSIZ]; int buflen = GBUFSIZ, genclen = 0, curelem = 0, n = 0, flag = 0; NA_Sequence *this_elem = NULL, temp_elem; FILE *file; extern int Default_DNA_Trans[], Default_RNA_Trans[]; extern int OVERWRITE, Default_NA_RTrans[], Default_PROColor_LKUP[]; ErrorOut("No such file", file = fopen(filename, "r")); for (; fgets(Inline, GBUFSIZ, file) != 0;) { for (line = Inline; line[0] == ' ' || line[0] == '\t'; line++) ; if (Find2(line, "{") == 0) { this_elem = &temp_elem; InitNASeq(this_elem, DNA); this_elem->offset = -(dataset->rel_offset); } else if (Find2(line, "type") == 0) { if (Find(line, "DNA")) { this_elem->elementtype = DNA; this_elem->tmatrix = Default_DNA_Trans; this_elem->rmatrix = Default_NA_RTrans; } else if (Find(line, "RNA")) { this_elem->elementtype = RNA; this_elem->tmatrix = Default_RNA_Trans; this_elem->rmatrix = Default_NA_RTrans; } else if (Find(line, "MASK")) { this_elem->elementtype = MASK; this_elem->rmatrix = NULL; this_elem->tmatrix = NULL; this_elem->col_lut = NULL; } else if (Find(line, "TEXT")) { this_elem->elementtype = TEXT; this_elem->rmatrix = NULL; this_elem->tmatrix = NULL; this_elem->col_lut = NULL; } else if (Find(line, "PROT")) { this_elem->elementtype = PROTEIN; this_elem->rmatrix = NULL; this_elem->tmatrix = NULL; this_elem->col_lut = Default_PROColor_LKUP; } /* this_elem->attr = DEFAULT_X_ATTR; */ } else if (Find2(line, "circular") == 0) { sscanf(line, "%*s %d", &temp); if (temp == 1) { this_elem->attr |= IS_CIRCULAR; } else { this_elem->attr &= ~IS_CIRCULAR; } } else if (Find2(line, "orig_direction") == 0) { sscanf(line, "%*s %d", &temp); if (temp == 1) { this_elem->attr |= IS_ORIG_5_TO_3; this_elem->attr &= ~IS_ORIG_3_TO_5; } else { this_elem->attr |= IS_ORIG_3_TO_5; this_elem->attr &= ~IS_ORIG_5_TO_3; } } else if (Find2(line, "direction") == 0) { sscanf(line, "%*s %d", &temp); if (temp == 1) { this_elem->attr |= IS_5_TO_3; this_elem->attr &= ~IS_3_TO_5; } else { this_elem->attr |= IS_3_TO_5; this_elem->attr &= ~IS_5_TO_3; } } else if (Find2(line, "orig_strand") == 0) { sscanf(line, "%*s %d", &temp); if (temp == 1) { this_elem->attr |= IS_ORIG_PRIMARY; this_elem->attr &= ~IS_ORIG_SECONDARY; } else { this_elem->attr |= IS_ORIG_SECONDARY; this_elem->attr &= ~IS_ORIG_PRIMARY; } } else if (Find2(line, "strandedness") == 0) { sscanf(line, "%*s %d", &temp); if (temp == 1) { this_elem->attr |= IS_PRIMARY; this_elem->attr &= ~IS_SECONDARY; } else { this_elem->attr |= IS_SECONDARY; this_elem->attr &= ~IS_PRIMARY; } } else if (Find2(line, "creator") == 0) { sscanf(line, "%*s %[^\n]", this_elem->authority); RemoveQuotes(this_elem->authority); } else if (Find2(line, "longname") == 0) { sscanf(line, "%*s %[^\n]", this_elem->seq_name); RemoveQuotes(this_elem->seq_name); } else if (Find2(line, "descrip") == 0) { sscanf(line, "%*s %[^\n]", this_elem->description); RemoveQuotes(this_elem->description); } else if (Find2(line, "name") == 0) { sscanf(line, "%*s %[^\n]", this_elem->short_name); RemoveQuotes(this_elem->short_name); } else if (Find2(line, "group-ID") == 0) { sscanf(line, "%*s %d", &(this_elem->groupid)); dataset->numgroups = MAX(this_elem->groupid, dataset->numgroups); } else if (Find2(line, "sequence-ID") == 0) { sscanf(line, "%*s %[^\n]", this_elem->id); RemoveQuotes(this_elem->id); } else if (Find2(line, "barcode") == 0) { sscanf(line, "%*s %[^\n]", this_elem->barcode); RemoveQuotes(this_elem->barcode); } else if (Find2(line, "membrane") == 0) { sscanf(line, "%*s %[^\n]", this_elem->membrane); RemoveQuotes(this_elem->membrane); } else if (Find2(line, "contig") == 0) { sscanf(line, "%*s %[^\n]", this_elem->contig); RemoveQuotes(this_elem->contig); } else if (Find2(line, "creation-date") == 0) { sscanf(line, "%*s %2d%*c%2d%*c%2d%*c%2d%*c%2d%*c%2d\n", &(this_elem->t_stamp.origin.mm), &(this_elem->t_stamp.origin.dd), &(this_elem->t_stamp.origin.yy), &(this_elem->t_stamp.origin.hr), &(this_elem->t_stamp.origin.mn), &(this_elem->t_stamp.origin.sc)); } else if (Find2(line, "offset") == 0) { sscanf(line, "%*s %d", &(this_elem->offset)); this_elem->offset -= dataset->rel_offset; } else if (Find2(line, "comments") == 0) { if (this_elem->comments_maxlen == 0) buflen = 2048; else buflen = this_elem->comments_maxlen; done = FALSE; len = this_elem->comments_len; for (; line[0] != '"'; line++) if (line[0] == '\0') ErrorOut(0, "Error in input file"); line++; buffer = Calloc(buflen, sizeof(char)); for (; !done;) { for (j = 0; j < strlen(line); j++) { if (len + strlen(line) >= buflen) { buflen *= 2; buffer = Realloc( buffer, buflen * sizeof(char)); } if (line[j] == '"') done = TRUE; else buffer[len++] = line[j]; } /* * Check pad with null */ buffer[len] = '\0'; if (!done) { if (fgets(Inline, GBUFSIZ, file) == 0) done = TRUE; line = Inline; } } this_elem->comments = buffer; this_elem->comments_len = strlen(buffer); this_elem->comments_maxlen = buflen; RemoveQuotes(this_elem->comments); } else if (Find2(line, "sequence") == 0) { buflen = GBUFSIZ; done = FALSE; len = 0; buffer = Calloc(buflen, sizeof(char)); for (; line[0] != '"'; line++) if (line[0] == '\0') ErrorOut(0, "Error in input file"); line++; for (; !done;) { for (j = 0; j < strlen(line); j++) { if (len + strlen(line) >= buflen) { buflen *= 2; buffer = Realloc( buffer, buflen * sizeof(char)); } if (line[j] == '"') done = TRUE; else { /* * If not *text, ignore spaces... */ if (this_elem->elementtype != TEXT) { if (line[j] != ' ' && line[j] != '\t' && line[j] != '\n') buffer[len++] = line[j]; } else if (line[j] != '\t' && line[j] != '\n') buffer[len++] = line[j]; } } if (!done) { if (fgets(Inline, GBUFSIZ, file) == 0) done = TRUE; line = Inline; } } if (this_elem->rmatrix) for (j = 0; j < len; j++) buffer[j] = this_elem->rmatrix[buffer[j]]; this_elem->sequence = (NA_Base *)buffer; this_elem->seqlen = len; this_elem->seqmaxlen = buflen; } else if (Find2(line, "}") == 0) { if (this_elem->id[0] == '\0') strncpy(this_elem->id, uniqueID(), 79); if (this_elem->short_name[0] == '\0') strncpy(this_elem->short_name, this_elem->id, 79); if (this_elem->seqlen == 0) this_elem->protect = PROT_BASE_CHANGES + PROT_GREY_SPACE + PROT_WHITE_SPACE + PROT_TRANSLATION; genclen = 0; /* * Make a new sequence entry... */ success = -1; if (OVERWRITE) success = OverWrite(this_elem, dataset); if (success == -1) { curelem = dataset->numelements++; if (curelem == 0) { dataset->element = (NA_Sequence *)Calloc( 5, sizeof(NA_Sequence)); dataset->maxnumelements = 5; } else if (curelem == dataset->maxnumelements) { (dataset->maxnumelements) *= 2; dataset->element = (NA_Sequence *)Realloc( dataset->element, dataset->maxnumelements * sizeof(NA_Sequence)); } dataset->element[curelem] = *this_elem; } } else if (this_elem != NULL) { if (this_elem->baggage == NULL) { this_elem->baggage = String(line); this_elem->baggage_maxlen = this_elem->baggage_len = strlen(this_elem->baggage) + 1; } else { this_elem->baggage_len += strlen(line) + 1; this_elem->baggage = Realloc( this_elem->baggage, this_elem->baggage_len * sizeof(char)); this_elem->baggage_maxlen = this_elem->baggage_len; strncat(this_elem->baggage, line, GBUFSIZ); } } } fclose(file); NormalizeOffset(dataset); Regroup(dataset); AdjustGroups(dataset); return; } WriteGDE(aln, filename, method, maskable) NA_Alignment *aln; char *filename; int method, maskable; { int i, j, k, mask = -1; FILE *file; NA_Sequence *this_elem; extern char month[12][6]; if (aln == NULL) return; if (aln->na_ddata == NULL) return; file = fopen(filename, "w"); if (file == NULL) { Warning("Cannot open file for output"); return (1); } if (maskable && method != SELECT_REGION) for (j = 0; j < aln->numelements; j++) if (aln->element[j].elementtype == MASK && aln->element[j].selected) mask = j; for (j = 0; j < aln->numelements; j++) { if ((aln->element[j].selected && j != mask && method != SELECT_REGION) || (method == ALL) || (aln->element[j].subselected && method == SELECT_REGION)) { this_elem = &(aln->element[j]); SeqNorm(this_elem); fprintf(file, "{\n"); if (this_elem->short_name[0]) fprintf(file, "name \"%s\"\n", this_elem->short_name); switch (this_elem->elementtype) { case DNA: fprintf(file, "type \"DNA\"\n"); break; case RNA: fprintf(file, "type \"RNA\"\n"); break; case PROTEIN: fprintf(file, "type \"PROTEIN\"\n"); break; case MASK: fprintf(file, "type \"MASK\"\n"); break; case TEXT: fprintf(file, "type \"TEXT\"\n"); break; } if (this_elem->seq_name[0]) fprintf(file, "longname %s\n", this_elem->seq_name); if (this_elem->id[0]) fprintf(file, "sequence-ID \"%s\"\n", this_elem->id); RemoveQuotes(this_elem->barcode); RemoveQuotes(this_elem->contig); if (this_elem->barcode[0]) fprintf(file, "barcode \"%s\"\n", this_elem->barcode); if (this_elem->membrane[0]) fprintf(file, "membrane \"%s\"\n", this_elem->membrane); if (this_elem->contig[0]) fprintf(file, "contig \"%s\"\n", this_elem->contig); if (this_elem->description[0]) fprintf(file, "descrip \"%s\"\n", this_elem->description); if (this_elem->authority[0]) fprintf(file, "creator \"%s\"\n", this_elem->authority); if (this_elem->groupid) fprintf(file, "group-ID %d\n", this_elem->groupid); if (this_elem->offset + aln->rel_offset && method != SELECT_REGION) fprintf(file, "offset %d\n", this_elem->offset + aln->rel_offset); if (method == SELECT_REGION) { /* * If selecting a region, the offset should *be moved to the first non-'0' space in the *mask. */ for (k = this_elem->offset; k < aln->selection_mask_len && aln->selection_mask[k] == '0'; k++) ; fprintf(file, "offset %d\n", aln->rel_offset + k); } if (this_elem->t_stamp.origin.mm != 0) fprintf( file, "creation-date %2d/%2d/%2d " "%2d:%2d:%2d\n", this_elem->t_stamp.origin.mm, this_elem->t_stamp.origin.dd, (this_elem->t_stamp.origin.yy) > 1900 ? (this_elem->t_stamp.origin.yy - 1900) : (this_elem->t_stamp.origin.yy), this_elem->t_stamp.origin.hr, this_elem->t_stamp.origin.mn, this_elem->t_stamp.origin.sc); if ((this_elem->attr & IS_ORIG_5_TO_3) && ((this_elem->attr & IS_ORIG_3_TO_5) == 0)) fprintf(file, "orig_direction 1\n"); if ((this_elem->attr & IS_CIRCULAR)) fprintf(file, "circular 1\n"); if ((this_elem->attr & IS_5_TO_3) && ((this_elem->attr & IS_3_TO_5) == 0)) fprintf(file, "direction 1\n"); if ((this_elem->attr & IS_ORIG_3_TO_5) && ((this_elem->attr & IS_ORIG_5_TO_3) == 0)) fprintf(file, "orig_direction -1\n"); if ((this_elem->attr & IS_3_TO_5) && ((this_elem->attr & IS_5_TO_3) == 0)) fprintf(file, "direction -1\n"); if ((this_elem->attr & IS_ORIG_PRIMARY) && ((this_elem->attr & IS_ORIG_SECONDARY) == 0)) fprintf(file, "orig_strand 1\n"); if ((this_elem->attr & IS_PRIMARY) && ((this_elem->attr & IS_SECONDARY) == 0)) fprintf(file, "strandedness 1\n"); if (((this_elem->attr & IS_ORIG_PRIMARY) == 0) && (this_elem->attr & IS_ORIG_SECONDARY)) fprintf(file, "orig_strand 2\n"); if (((this_elem->attr & IS_PRIMARY) == 0) && (this_elem->attr & IS_SECONDARY)) fprintf(file, "strandedness 2\n"); if (this_elem->comments != NULL) { StripSpecial(this_elem->comments); fprintf(file, "comments \"%s\"\n", this_elem->comments); } if (this_elem->baggage != NULL) { if (this_elem ->baggage[strlen(this_elem->baggage) - 1] == '\n') fprintf(file, "%s", this_elem->baggage); else fprintf(file, "%s\n", this_elem->baggage); } fprintf(file, "sequence \""); if (this_elem->tmatrix) { if (mask == -1) { for (k = this_elem->offset; k < this_elem->seqlen + this_elem->offset; k++) { if (k % 60 == 0) putc('\n', file); if (method == SELECT_REGION) { if (aln->selection_mask [k] == '1') putc( this_elem->tmatrix [getelem( this_elem, k)], file); } else putc(this_elem->tmatrix [getelem( this_elem, k)], file); } } else { for (i = 0, k = this_elem->offset; k < this_elem->seqlen + this_elem->offset; k++) if (aln->element[mask].seqlen + this_elem->offset > k) if ((char)getelem( &(aln->element [mask]), k) != '0' && ((char)getelem( &(aln->element [mask]), k) != '-')) { if (i % 60 == 0) putc( '\n', file); putc( this_elem->tmatrix [getelem( this_elem, k)], file); i++; } } fprintf(file, "\"\n"); } else { if (mask == -1) { for (k = this_elem->offset; k < this_elem->seqlen + this_elem->offset; k++) { if (k % 60 == 0) putc('\n', file); if (method == SELECT_REGION) { if (aln->selection_mask [k] == '1') putc( getelem( this_elem, k), file); } else putc(getelem(this_elem, k), file); } } else { for (i = 0, k = this_elem->offset; k < this_elem->seqlen + this_elem->offset; k++) if (((aln->element[mask] .seqlen) + (aln->element[mask] .offset)) > k) if ((char)getelem( &(aln->element [mask]), k) == '1') { if (i % 60 == 0) putc( '\n', file); putc( getelem( this_elem, k), file); i++; } } fprintf(file, "\"\n"); } fprintf(file, "}\n"); } } fclose(file); return; } StripSpecial(string) char *string; { register int i, j, len; len = strlen(string); for (j = 0; j < len; j++) { if (string[j] == '"') string[j] = '`'; else if (string[j] == '{') string[j] = '('; else if (string[j] == '}') string[j] = ')'; } return; } RemoveQuotes(string) char *string; { register int i, j, len; len = strlen(string); for (j = 0; j < len; j++) if (string[j] == '"') string[j] = ' '; for (j = 0; string[j] == ' ' && j < strlen(string); j++) ; len = strlen(string); for (i = 0; i < len - j; i++) string[i] = string[i + j]; for (j = strlen(string) - 1; j >= 0 && (string[j] == '\n' || string[j] == ' '); j--) string[j] = '\0'; return; } /* * Normalize seq (remove leading indels in the sequence; */ void SeqNorm(seq) NA_Sequence *seq; { int len, j, shift_width, trailer; char *sequence; len = seq->seqlen; sequence = (char *)seq->sequence; if (len == 0) return; if (seq->tmatrix) for (shift_width = 0; (shift_width < len) && ((sequence[shift_width] & 15) == '\0'); shift_width++) ; else for (shift_width = 0; (shift_width < len) && (sequence[shift_width] == '-'); shift_width++) ; for (j = 0; j < len - shift_width; j++) sequence[j] = sequence[j + shift_width]; seq->seqlen -= shift_width; seq->offset += shift_width; for (trailer = seq->seqlen - 1; (sequence[trailer] == '-' || sequence[trailer] == '\0') && trailer >= 0; trailer--) sequence[trailer] = '\0'; seq->seqlen = trailer + 1; return; } /* ALWAYS COPY the result from uniqueID() to a char[32], * (strlen(hostname)+1+10). Memory is lost when the function * is finished. */ char vname[32]; char *uniqueID() { char hname[32]; /* ,vname[32]; rtm 18.III.98 */ int hnamelen = 32; time_t *tp; static cnt = 0; tp = (time_t *)Calloc(1, sizeof(time_t)); if (gethostname(hname, 10) == -1) { fprintf(stderr, "UniqueID(): Failed to get host name.\n"); exit(1); } time(tp); sprintf(vname, "%s:%d:%ld", hname, cnt, *tp); cnt++; Cfree(tp); return (vname); } /* * OverWrite(), overwrite all non-default data from a sequence entry * onto any entry with the same ID or short name. */ OverWrite(this, aln) NA_Sequence *this; NA_Alignment *aln; { int j, indx = -1; NA_Sequence *that; for (j = 0; j < aln->numelements; j++) { if (Find2(this->id, aln->element[j].id) != -1) if (Find2(aln->element[j].id, this->id) != -1) indx = j; } if (indx == -1) for (j = 0; j < aln->numelements; j++) { if (Find2(this->short_name, aln->element[j].short_name) != -1) if (Find2(aln->element[j].short_name, this->short_name) != -1) indx = j; } if (indx != -1) { that = &(aln->element[indx]); if (this->seq_name[0]) strcpy(that->seq_name, this->seq_name); if (this->barcode[0]) strcpy(that->barcode, this->barcode); if (this->contig[0]) strcpy(that->contig, this->contig); if (this->membrane[0]) strcpy(that->membrane, this->membrane); if (this->authority[0]) strcpy(that->authority, this->authority); if (this->short_name[0]) strcpy(that->short_name, this->short_name); if (this->description[0]) strcpy(that->description, this->description); if (this->sequence) { free(that->sequence); that->sequence = this->sequence; that->seqlen = this->seqlen; that->seqmaxlen = this->seqmaxlen; } if (this->baggage) { that->baggage_len += this->baggage_len; that->baggage_maxlen += this->baggage_maxlen; if (that->baggage) that->baggage = Realloc( that->baggage, that->baggage_maxlen * sizeof(char)); else that->baggage = Calloc(that->baggage_maxlen, sizeof(char)); strncat(that->baggage, this->baggage, that->baggage_maxlen); } if (this->comments) { that->comments_len += this->comments_len; that->comments_maxlen += this->comments_maxlen; if (that->comments) that->comments = Realloc( that->comments, that->comments_maxlen * sizeof(char)); else that->comments = Calloc(that->comments_maxlen, sizeof(char)); strncat(that->comments, this->comments, that->comments_maxlen); } if (this->cmask) { free(that->cmask); that->cmask = this->cmask; } if (this->offset != that->offset) that->offset = this->offset; if (this->attr != 0) that->attr = this->attr; if (this->groupid != 0) { that->groupid = this->groupid; } that->groupb = NULL; that->groupf = NULL; } /* NormalizeOffset(aln); Regroup(aln); AdjustGroups(aln); */ return (indx); }