#include #include #include #include "global_defs.h" char vert_mito[512][4] = { "AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", "ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Ter", "AGC","Ser", "AGG","Ter", "AGT","Ser", "ATA","Met", "ATC","Ile", "ATG","Met", "ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", "CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", "CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", "CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", "GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", "GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", "GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", "TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", "TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", "TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" }, mycoplasma[512][4] = { "AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", "ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", "AGG","Arg", "AGT","Ser", "ATA","Ile", "ATC","Ile", "ATG","Met", "ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", "CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", "CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", "CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", "GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", "GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", "GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", "TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", "TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", "TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" }, universal[512][4] = { "AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", "ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", "AGG","Arg", "AGT","Ser", "ATA","Ile", "ATC","Ile", "ATG","Met", "ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", "CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", "CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", "CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", "GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", "GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", "GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", "TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", "TCT","Ser", "TGA","Ter", "TGC","Cys", "TGG","Trp", "TGT","Cys", "TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" }, yeast[512][4] = { "AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", "ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", "AGG","Arg", "AGT","Ser", "ATA","Met", "ATC","Ile", "ATG","Met", "ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", "CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", "CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Thr", "CTC","Thr", "CTG","Thr", "CTT","Thr", "GAA","Glu", "GAC","Asp", "GAG","Glu", "GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", "GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", "GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", "TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", "TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", "TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" }; char three_to_one[23][5] = { "AlaA", "ArgR", "AsnN", "AspD", "AsxB", "CysC", "GlnQ", "GluE", "GlxZ", "GlyG", "HisH", "IleI", "LeuL", "LysK", "MetM", "PheF", "ProP", "SerS", "ThrT", "TrpW", "TyrY", "ValV", "Ter*" }; main(ac,av) int ac; char **av; { int Success = TRUE,cursize,tbl,i,j,k,maxsize = 10,frame=1, min_frame = 0,ltrs=0, sep=0, tmp_num_frame, print_comp=FALSE; Sequence temp,*seqs; FILE *file; char number[5],tr_tbl[512][4]; extern char *Realloc(); extern Translate_NA_AA(); if(ac == 1) { fprintf(stderr, "Options:[-tbl codon_table] [-frame #] [-min_frame #] [-3] [-sep] GDEfile\n"); fprintf(stderr," 1=universal 1=first frame Shortest AA Three letter don't seperate\n"); fprintf(stderr," 2=mycoplasma 2=second frame sequence codes groups\n"); fprintf(stderr," 3=yeast 3=third frame to translate\n"); fprintf(stderr," 4=Vert. mito. 6=All six\n"); exit(0); } for(j=1;jname, 80); temp=(char*)Calloc(seq->seqlen+1,sizeof(char)); for(i=0;iseqlen;i++) temp[i] = '-'; if(letter_code == 1) { /* * Triple letter codes */ strcpy(seq->type,"TEXT"); } else { /* * Single Letter Codes */ strcpy(seq->type , "PROT"); } /* * Skip over r_frame valid characters (skip ' ','-','~') */ for(true_start=0,i=0; iseqlen;true_start++) { c=seq->c_elem[true_start]; if(index(" -~",c) == NULL) i++; } for(pos=true_start;posseqlen;pos++) { c=seq->c_elem[pos]; if(index(" -~",c) == NULL) { c &= (255-32); /*upper case*/ if(c == 'U') c = 'T'; /* * We have a valid character... */ if(fptr == 0) start = pos; codon[fptr++] = c; /* * Translate the codon... */ if(fptr == 3) { /* * Place default code 'X' in case translation fails */ temp[start] = 'X'; for(i=0;i<512;i+=2) if(strcmp(codon,table[i]) == 0) { if(letter_code == 1) { temp[start] = table[i+1][0]; temp[start+1] = table[i+1][1]; temp[start+2] = table[i+1][2]; } else temp[start] = ThreeToOne (table[i+1]); i = 512; } fptr = 0; /* * Check to see if it is a valid ORF */ if((strncmp("Ter",&(temp[start]),3) == 0) || (temp[start] == '*')) { /* * If the ORF is too small, clear it out... */ if(codon_count < min_frame) { /* * Should we seperate the groups out, or not?!?! * */ if (!sep) { /* * If reading from stop to stop, leave the elading stop codon */ if(temp[last_start]=='*' || strncmp("Ter",&(temp[last_start]),3) == 0) for(i=last_start+3;iseqlen;i++) temp[i] = '-'; } } else { if (sep) { sprintf(strtmp, "_%d", grp++); strncat(seq->name, strtmp, 20); save_c_elem = seq->c_elem; seq->c_elem = temp; WriteRecord(stdout,seq,NULL,0); strncpy(seq->name, save_name, 80); seq->c_elem = save_c_elem; for(i=0;iseqlen;i++) temp[i] = '-'; } } codon_count = 0; last_start = start; /* * Don't bother continuing if too close to end to meat min_frame requirements * */ if ((pos + (3 * min_frame)) > seq->seqlen) break; } else codon_count++; } } } Cfree(seq->c_elem); if (!sep) { seq->c_elem = temp; WriteRecord(stdout,seq,NULL,0); } Cfree(temp); return; } char ThreeToOne(s) char s[]; { extern char three_to_one[][5]; int j; for(j=0;j<23;j++) if(strncmp(s,three_to_one[j],3) == 0) return(three_to_one[j][3]); Warning("ThreeToOne, code not found"); return('*'); }