#include #include #include #include #include #include "defines.h" #include "menudefs.h" /* LoadData(): Load a data set from the command line argument. Copyright (c) 1989, University of Illinois board of trustees. All rights reserved. Written by Steven Smith at the Center for Prokaryote Genome Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. Carl Woese. Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. All rights reserved. */ LoadData(filename) char *filename; { extern NA_Alignment *DataSet; extern int DataType, FileFormat, Default_DNA_Trans[], Default_RNA_Trans[]; extern int Default_NA_RTrans[], Default_PROColor_LKUP[], Default_NAColor_LKUP[]; extern Frame frame; extern Canvas EditCan, EditNameCan; extern char FileName[]; FILE *file; NA_Alignment *DataNaAln; char temp[1024]; /* * Get file name, determine the file type, and away we go.. */ if (Find2(filename, "gde") != 0) strcpy(FileName, filename); if ((file = fopen(filename, "r")) != 0) { FindType(filename, &DataType, &FileFormat); switch (DataType) { case NASEQ_ALIGN: if (DataSet == NULL) { DataSet = (NA_Alignment *)Calloc( 1, sizeof(NA_Alignment)); DataNaAln = (NA_Alignment *)DataSet; DataSet->rel_offset = 0; } else DataNaAln = (NA_Alignment *)DataSet; LoadFile(filename, DataNaAln, DataType, FileFormat); break; default: break; } } fclose(file); sprintf(temp, "Genetic Data Environment 2.2.1"); xv_set(frame, FRAME_LABEL, temp, 0); return; } /* LoadFile(): Load the given filename into the given dataset. Handle any type conversion needed to get the data into the specified data type. This routine is used in situations where the format and datatype is known. Copyright (c) 1989-1990, University of Illinois board of trustees. All rights reserved. Written by Steven Smith at the Center for Prokaryote Genome Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. Carl Woese. Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. All rights reserved. */ LoadFile(filename, dataset, type, format) char *filename; char *dataset; int type, format; { extern int DataType; if (DataType != type) fprintf(stderr, "Warning, datatypes do not match.\n"); /* Handle the overwrite/create/merge dialog here. */ switch (format) { case NA_FLAT: ReadNA_Flat(filename, dataset, type); ((NA_Alignment *)dataset)->format = GDE; break; case GENBANK: ReadGen(filename, dataset, type); ((NA_Alignment *)dataset)->format = GENBANK; break; case GDE: ReadGDE(filename, dataset, type); ((NA_Alignment *)dataset)->format = GDE; break; case COLORMASK: ReadCMask(filename); default: break; } return; } /* * Print error message, and die */ ErrorOut(code, string) int code; char *string; { if (code == 0) { fprintf(stderr, "Error:%s\n", string); exit(1); } return; } /* * More robust memory management routines */ char *Calloc(count, size) int count, size; { char *temp; #ifdef SeeAlloc extern int TotalCalloc; TotalCalloc += count * size; fprintf(stderr, "Calloc %d %d\n", count * size, TotalCalloc); #endif temp = calloc(count, size); ErrorOut(temp, "Cannot allocate memory"); return (temp); } char *Realloc(block, size) char *block; int size; { char *temp; #ifdef SeeAlloc extern int TotalRealloc; TotalRealloc += size; fprintf(stderr, "Realloc %d\n", TotalRealloc); #endif temp = realloc(block, size); ErrorOut(temp, "Cannot change memory size"); return (temp); } Cfree(block) char *block; { if (block) { /* rtm 18.III.98 FileIO.c: In function `Cfree': FileIO.c:181: void value not ignored as it ought to be if(free(block) == 0) Warning("Error in Cfree..."); */ free(block); } else Warning("Error in Cfree, NULL block"); return; } /* * same as strdup */ char *String(string) char *string; { char *temp; temp = Calloc(strlen(string) + 1, sizeof(char)); strcpy(temp, string); return (temp); } FindType(name, dtype, ftype) char *name; int *dtype, *ftype; { FILE *file; char Inline[GBUFSIZ]; file = fopen(name, "r"); *dtype = 0; *ftype = 0; if (file == NULL) return (1); /* * Is this a flat file? * Get the first non blank line, see if a type marker shows up. */ fgets(Inline, GBUFSIZ, file); for (; strlen(Inline) < 2 && fgets(Inline, GBUFSIZ, file) != NULL;) ; if (Inline[0] == '#' || Inline[0] == '%' || Inline[0] == '"' || Inline[0] == '@') { *dtype = NASEQ_ALIGN; *ftype = NA_FLAT; } /* * Else, try genbank */ else { fclose(file); file = fopen(name, "r"); *dtype = 0; *ftype = 0; if (file == NULL) return (1); for (; fgets(Inline, GBUFSIZ, file) != NULL;) if (Find(Inline, "LOCUS")) { *dtype = NASEQ_ALIGN; *ftype = GENBANK; fclose(file); return (0); } /* * and last, try GDE */ else if (Find(Inline, "sequence")) { *dtype = NASEQ_ALIGN; *ftype = GDE; fclose(file); return (0); } else if (Find(Inline, "start:")) { *dtype = NASEQ_ALIGN; *ftype = COLORMASK; fclose(file); return (0); } } fclose(file); return (0); } AppendNA(buffer, len, seq) NA_Base *buffer; int len; NA_Sequence *seq; { int curlen = 0, j; NA_Base *temp; if (seq->seqlen + len >= seq->seqmaxlen) { if (seq->seqlen > 0) seq->sequence = (NA_Base *)Realloc( seq->sequence, (seq->seqlen + len + GBUFSIZ) * sizeof(NA_Base)); else seq->sequence = (NA_Base *)Calloc( 1, (seq->seqlen + len + GBUFSIZ) * sizeof(NA_Base)); seq->seqmaxlen = seq->seqlen + len + GBUFSIZ; } /* * seqlen is the length, and the index of the next free * base */ curlen = seq->seqlen + seq->offset; for (j = 0; j < len; j++) putelem(seq, j + curlen, buffer[j]); seq->seqlen += len; return; } Ascii2NA(buffer, len, matrix) char *buffer; int len; int matrix[16]; { /* * if the translation matrix exists, use it to * encode the buffer. */ register i; if (matrix != NULL) for (i = 0; i < len; i++) buffer[i] = matrix[buffer[i]]; return; } WriteNA_Flat(aln, filename, method, maskable) NA_Alignment *aln; char *filename; int method, maskable; { int j, kk, mask = -1, k, offset, min_offset = -999999; char offset_str[100], buf[100]; NA_Sequence *seqs; FILE *file; if (aln == (NA_Alignment *)NULL) return; if (aln->numelements == (int)NULL) return; seqs = aln->element; file = fopen(filename, "w"); if (file == NULL) { Warning("Cannot open file for output"); return (1); } if (maskable && (method != SELECT_REGION)) { for (j = 0; j < aln->numelements; j++) if (seqs[j].elementtype == MASK && seqs[j].selected) mask = j; } for (j = 0; j < aln->numelements; j++) { SeqNorm(&(seqs[j])); } for (j = 0; j < aln->numelements; j++) { if (method != SELECT_REGION) offset = seqs[j].offset; else for (offset = seqs[j].offset; aln->selection_mask[offset] == '0'; offset++) ; if (offset + aln->rel_offset != 0) sprintf(offset_str, "(%d)", offset + aln->rel_offset); else offset_str[0] = '\0'; if (((j != mask) && (seqs[j].selected) && method != SELECT_REGION) || (method == SELECT_REGION && seqs[j].subselected) || method == ALL) { fprintf( file, "%c%s%s\n", seqs[j].elementtype == DNA ? '#' : seqs[j].elementtype == RNA ? '#' : seqs[j].elementtype == PROTEIN ? '%' : seqs[j].elementtype == TEXT ? '"' : seqs[j].elementtype == MASK ? '@' : '"', seqs[j].short_name, (offset + aln->rel_offset == 0) ? "" : offset_str); if (seqs[j].tmatrix) { if (mask == -1) for (k = 0, kk = 0; kk < seqs[j].seqlen; kk++) { if ((k) % 60 == 0 && k > 0) { buf[60] = '\0'; fputs(buf, file); putc('\n', file); } if (method == SELECT_REGION) { if (aln->selection_mask [kk + offset] == '1') { buf[k % 60] = ((char)seqs[j] .tmatrix[(int)getelem( &(seqs[j]), kk + offset)]); k++; } } else { buf[k % 60] = ((char)seqs[j].tmatrix [(int)getelem( &(seqs[j]), kk + offset)]); k++; } } else for (k = 0, kk = 0; kk < seqs[j].seqlen; kk++) { if (getelem(&(seqs[mask]), kk + seqs[mask] .offset) != '0' && (getelem( &(seqs[mask]), kk + seqs[mask] .offset) != '-')) { if ((k++) % 60 == 0 && k > 1) { buf[60] = '\0'; fputs(buf, file); putc('\n', file); } buf[k % 60] = ((char)seqs[j].tmatrix [getelem( &(seqs[j]), kk + offset)]); } } } else { if (mask == -1) for (k = 0, kk = 0; kk < seqs[j].seqlen; kk++) { if ((k) % 60 == 0 && k > 0) { buf[60] = '\0'; fputs(buf, file); putc('\n', file); } if (method == SELECT_REGION) { if (aln->selection_mask [kk + offset] == '1') { buf[k % 60] = (getelem( &(seqs[j]), kk + offset)); k++; } } else { buf[k % 60] = (getelem( &(seqs[j]), kk + offset)); k++; } } else for (k = 0, kk = 0; kk < seqs[j].seqlen; kk++) { if (getelem(&(seqs[mask]), kk + offset) == '1') { if ((k++) % 60 == 0 && k > 1) { buf[60] = '\0'; fputs(buf, file); putc('\n', file); } buf[k % 60] = ((char)getelem( &(seqs[j]), kk + offset)); } } } buf[(k % 60) > 0 ? (k % 60) : 60] = '\0'; fputs(buf, file); putc('\n', file); } } fclose(file); return (0); } Warning(s) char *s; { extern Frame frame; extern Panel_item left_foot, right_foot; Beep(); xv_set(frame, FRAME_RIGHT_FOOTER, s, 0); xv_set(right_foot, PANEL_LABEL_STRING, s, 0); } InitNASeq(seq, type) NA_Sequence *seq; int type; { extern int Default_RNA_Trans[]; /* rtm 18.III.98 */ extern int Default_DNA_Trans[], Default_NA_RTrans[]; extern int Default_NA_RTrans[], Default_PROColor_LKUP[], Default_NAColor_LKUP[]; SetTime(&(seq->t_stamp.origin)); SetTime(&(seq->t_stamp.modify)); strncpy(seq->id, uniqueID(), 79); seq->seq_name[0] = '\0'; seq->barcode[0] = '\0'; seq->contig[0] = '\0'; seq->membrane[0] = '\0'; seq->authority[0] = '\0'; seq->short_name[0] = '\0'; seq->sequence = NULL; seq->offset = 0; seq->baggage = NULL; seq->baggage_len = 0; seq->baggage_maxlen = 0; seq->comments = NULL; seq->comments_len = 0; seq->comments_maxlen = 0; seq->description[0] = '\0'; seq->mask = NULL; seq->seqlen = 0; seq->seqmaxlen = 0; seq->protect = PROT_WHITE_SPACE + PROT_TRANSLATION; #ifdef HGL seq->attr = 0; #else seq->attr = IS_5_TO_3 + IS_PRIMARY; #endif seq->elementtype = type; seq->groupid = 0; seq->groupb = NULL; seq->groupf = NULL; seq->cmask = NULL; seq->selected = 0; seq->subselected = 0; switch (type) { case DNA: seq->tmatrix = Default_DNA_Trans; seq->rmatrix = Default_NA_RTrans; seq->col_lut = Default_NAColor_LKUP; break; case RNA: seq->tmatrix = Default_RNA_Trans; seq->rmatrix = Default_NA_RTrans; seq->col_lut = Default_NAColor_LKUP; break; case PROTEIN: seq->tmatrix = NULL; seq->rmatrix = NULL; seq->col_lut = Default_PROColor_LKUP; break; case MASK: case TEXT: default: seq->tmatrix = NULL; seq->rmatrix = NULL; seq->col_lut = NULL; break; } return; } ReadCMask(filename) char *filename; { extern Frame frame; extern NA_Alignment *DataSet; char Inline[GBUFSIZ], head[GBUFSIZ], curname[GBUFSIZ], temp[GBUFSIZ]; int IGNORE_DASH = FALSE, offset; NA_DisplayData *NAdd; NA_Alignment *aln; int i, j, k, curlen = 0, *colors, orig_ctype, jj, indx = 0; FILE *file; if (DataSet == NULL) return; NAdd = (NA_DisplayData *)((NA_Alignment *)DataSet)->na_ddata; if (NAdd == NULL) return; aln = (NA_Alignment *)DataSet; curname[0] = '\0'; orig_ctype = NAdd->color_type; file = fopen(filename, "r"); if (file == NULL) { Warning("File not found"); Warning(filename); return; } NAdd->color_type = COLOR_ALN_MASK; for (; fgets(Inline, GBUFSIZ, file) != 0;) { if (Find(Inline, "offset:")) { crop(Inline, head, temp); sscanf(temp, "%d", &(aln->cmask_offset)); } else if (Find(Inline, "nodash:")) IGNORE_DASH = TRUE; else if (Find(Inline, "dash:")) IGNORE_DASH = TRUE; else if (Find(Inline, "name:")) { crop(Inline, head, curname); curname[strlen(curname) - 1] = '\0'; for (j = 0; j < strlen(curname); j++) if (curname[j] == '(') curname[j] = '\0'; } else if (Find(Inline, "length:")) { crop(Inline, head, temp); sscanf(temp, "%d", &curlen); } else if (Find(Inline, "start:")) { indx = -1; if (curlen == 0) { Warning("illegal format in colormask"); NAdd->color_type = orig_ctype; return; } if (strlen(curname) != 0) { indx = -1; for (j = 0; j < aln->numelements; j++) if (Find(aln->element[j].short_name, curname) || Find(aln->element[j].id, curname)) { if (aln->element[j].cmask != NULL) Cfree(aln->element[j] .cmask); colors = (int *)Calloc( aln->element[j].seqmaxlen + 1 + aln->element[j].offset, sizeof(int)); aln->element[j].cmask = colors; NAdd->color_type = COLOR_SEQ_MASK; indx = j; j = aln->numelements; } if (indx == -1) colors = NULL; } else { if (aln->cmask != NULL) Cfree(aln->cmask); colors = (int *)Calloc(curlen, sizeof(int)); aln->cmask = colors; aln->cmask_len = curlen; NAdd->color_type = COLOR_ALN_MASK; for (j = 0; j < curlen; j++) colors[j] = 12; } if (IGNORE_DASH && (indx != -1)) { for (jj = 0, j = 0; (j < curlen) && (jj < aln->element[indx].seqlen); j++, jj++) { offset = aln->element[indx].offset; if (fgets(Inline, GBUFSIZ, file) == NULL) { Warning( "illegal format in " "colormask"); NAdd->color_type = orig_ctype; return; } /* * Fixed so that the keyword nodash *causes the colormask to be mapped to *the sequence, not the alignment. * * The allocated space is equal the *seqlen of the matched sequence. * */ if (aln->element[indx].tmatrix) for (; (getelem( &(aln->element[indx]), jj + offset) == (aln->element[indx] .tmatrix['-']) || (getelem(&(aln->element [indx]), jj + offset) == aln->element[indx] .tmatrix['~'])) && jj < aln->element[indx] .seqlen;) colors[jj++] = 12; else for (; getelem( &(aln->element[indx]), jj + offset) == '-' && jj < aln->element[indx] .seqlen;) colors[jj++] = 12; sscanf(Inline, "%d", &(colors[jj])); } } else if ((indx == -1) && (strlen(curname) != 0)) for (j = 0; j < curlen; j++) fgets(Inline, GBUFSIZ, file); else for (j = 0; j < curlen; j++) { if (fgets(Inline, GBUFSIZ, file) == NULL) { Warning( "illegal format in " "colormask"); NAdd->color_type = orig_ctype; return; } sscanf(Inline, "%d", &(colors[j])); } IGNORE_DASH = FALSE; curname[0] = '\0'; } } RepaintAll(TRUE); return; } ReadNA_Flat(filename, dataset, type) char *filename; char *dataset; int type; { int i, j, jj, c, curelem, offset; char name[GBUFSIZ]; char buffer[GBUFSIZ]; char origin[GBUFSIZ], ref[GBUFSIZ]; char Inline[GBUFSIZ], head[GBUFSIZ], tail[GBUFSIZ], temp[GBUFSIZ]; char curname[GBUFSIZ]; NA_Sequence *this_elem; NA_Alignment *data; extern int Default_DNA_Trans[], Default_RNA_Trans[], Default_NA_RTrans[]; FILE *file; curname[0] = '\0'; data = (NA_Alignment *)dataset; file = fopen(filename, "r"); if (file == NULL) { fprintf(stderr, "Cannot open %s.\n", filename); return; } for (; fgets(Inline, GBUFSIZ, file) != 0;) { if (Inline[0] == '#' || Inline[0] == '%' || Inline[0] == '"' || Inline[0] == '@') { offset = 0; for (j = 0; j < strlen(Inline); j++) { if (Inline[j] == '(') { sscanf((char *)&(Inline[j + 1]), "%d", &offset); Inline[j] = '\0'; } } curelem = data->numelements++; if (curelem == 0) { data->element = (NA_Sequence *)Calloc( 5, sizeof(NA_Sequence)); data->maxnumelements = 5; } else if (curelem == data->maxnumelements) { (data->maxnumelements) *= 2; data->element = (NA_Sequence *)Realloc( data->element, data->maxnumelements * sizeof(NA_Sequence)); } InitNASeq(&(data->element[curelem]), Inline[0] == '#' ? DNA : Inline[0] == '%' ? PROTEIN : Inline[0] == '"' ? TEXT : Inline[0] == '@' ? MASK : TEXT); this_elem = &(data->element[curelem]); if (Inline[strlen(Inline) - 1] == '\n') Inline[strlen(Inline) - 1] = '\0'; strncpy(this_elem->short_name, (char *)&(Inline[1]), 31); this_elem->offset = offset; } else if (Inline[0] != '\n') { for (j = 0, jj = 0; j < strlen(Inline); j++) if (Inline[j] != ' ' && Inline[j] != '\n' && Inline[j] != '\t') buffer[jj++] = Inline[j]; if (data->element[curelem].rmatrix) Ascii2NA(buffer, jj, data->element[curelem].rmatrix); AppendNA(buffer, jj, &(data->element[curelem])); } } for (j = 0; j < data->numelements; j++) data->maxlen = MAX(data->maxlen, data->element[j].seqlen + data->element[j].offset); for (j = 0; j < data->numelements; j++) if (data->element[j].seqlen == 0) data->element[j].protect = PROT_BASE_CHANGES + PROT_GREY_SPACE + PROT_WHITE_SPACE + PROT_TRANSLATION; NormalizeOffset(data); Regroup(data); return; } WriteStatus(aln, filename, method) NA_Alignment *aln; char *filename; int method; { extern int EditMode, FileFormat; extern NA_Alignment *DataSet; NA_DisplayData *NAdd; NA_Sequence *this_seq; int j; FILE *file; if (DataSet == NULL) return; NAdd = (NA_DisplayData *)((NA_Alignment *)DataSet)->na_ddata; if (NAdd == NULL) return; file = fopen(filename, "w"); if (file == NULL) { Warning("Cannot open status file."); return (1); } fprintf(file, "File_format: %s\n", FileFormat == GENBANK ? "genbank" : "flat"); /* fprintf(file,"EditMode: %s\n",EditMode==INSERT?"insert": "check"); */ this_seq = &(aln->element[NAdd->cursor_y]); if (this_seq->id != NULL) fprintf(file, "sequence-ID %s\n", this_seq->id); fprintf(file, "Column: %d\nPos:%d\n", NAdd->cursor_x, NAdd->position); switch (this_seq->elementtype) { case DNA: case RNA: fprintf(file, "#%s\n", this_seq->short_name); break; case PROTEIN: fprintf(file, "%%%s\n", this_seq->short_name); break; case MASK: fprintf(file, "@%s\n", this_seq->short_name); break; case TEXT: fprintf(file, "%c%s\n", '"', this_seq->short_name); break; default: break; } if (this_seq->tmatrix) for (j = 0; j < this_seq->seqlen; j++) putc(this_seq->tmatrix[getelem(this_seq, j)], file); else for (j = 0; j < this_seq->seqlen; j++) putc(getelem(this_seq, j), file); fclose(file); return; } ReadStatus(filename) char *filename; { /* int i,j; FILE *file; char Inline[GBUFSIZ],head[GBUFSIZ]; file = fopen(filename,"r"); for(;!DONE;) { fgets(Inline,GBUFSIZ,file); if(strlen(Inline) == 0) DONE = TRUE; else { sscanf(Inline,"%s",head); if(strncmp(head,"Col",3) != 0) { sscanf(Inline,"%*s %d",head,&(DataSet->nadd-> cursor_x),&(DataSet->nadd->cursory); } else if(strncmp(head,"Pos",3) != 0) { } } } */ } NormalizeOffset(aln) NA_Alignment *aln; { int i, j, offset = 99999999; for (j = 0; j < aln->numelements; j++) offset = MIN(offset, aln->element[j].offset); for (j = 0; j < aln->numelements; j++) aln->element[j].offset -= offset; aln->maxlen = -999999999; for (j = 0; j < aln->numelements; j++) aln->maxlen = MAX(aln->element[j].seqlen + aln->element[j].offset, aln->maxlen); aln->rel_offset += offset; if (aln->numelements == 0) aln->rel_offset = 0; return; } WriteCMask(aln, filename, method, maskable) NA_Alignment *aln; char *filename; int method, maskable; { int j, kk, mask = -1, k, offset, min_offset = -999999; char offset_str[100]; int *buf; NA_Sequence *seqs; FILE *file; if (aln == NULL) return; if (aln->numelements == (int)NULL) return; seqs = aln->element; file = fopen(filename, "w"); if (file == NULL) { Warning("Cannot open file for output"); return (1); } if (maskable && (method != SELECT_REGION)) { for (j = 0; j < aln->numelements; j++) if (seqs[j].elementtype == MASK && seqs[j].selected) mask = j; } for (j = 0; j < aln->numelements; j++) { SeqNorm(&(seqs[j])); } for (j = 0; j < aln->numelements; j++) { if (method != SELECT_REGION) offset = seqs[j].offset; else for (offset = seqs[j].offset; aln->selection_mask[offset] == '0'; offset++) ; if (offset + aln->rel_offset != 0) sprintf(offset_str, "(%d)", offset + aln->rel_offset); else offset_str[0] = '\0'; if (((j != mask) && (seqs[j].selected) && method != SELECT_REGION) || (method == SELECT_REGION && seqs[j].subselected) || method == ALL) { fprintf( file, "%c%s%s\n", seqs[j].elementtype == DNA ? '#' : seqs[j].elementtype == RNA ? '#' : seqs[j].elementtype == PROTEIN ? '%' : seqs[j].elementtype == TEXT ? '"' : seqs[j].elementtype == MASK ? '@' : '"', seqs[j].short_name, (offset + aln->rel_offset == 0) ? "" : offset_str); if (seqs[j].cmask != NULL) { buf = (int *)Calloc(seqs[j].seqlen, sizeof(int)); if (mask == -1) { for (k = 0, kk = 0; kk < seqs[j].seqlen; kk++) { if (method == SELECT_REGION) { if (aln->selection_mask [kk + offset] == '1') buf[k++] = (getcmask( &(seqs[j]), kk + offset)); } else buf[k++] = (getcmask( &(seqs[j]), kk + offset)); } } else { for (k = 0, kk = 0; kk < seqs[j].seqlen; kk++) if (getelem(&(seqs[mask]), kk + offset) == '1') buf[k++] = (getcmask( &(seqs[j]), kk + offset)); /* * Looks like k *might be one behind? */ } fprintf( file, "name:%s\noffset:%d\nlength:%d\nstart:\n", seqs[j].short_name, seqs[j].offset, k); for (kk = 0; kk < k; kk++) fprintf(file, "%d\n", buf[kk]); Cfree(buf); } } } fclose(file); return (0); }