/* Copyright (C) 2008 Pablo A. Goloboff Copyright (C) 2023 Guoyi Zhang; This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Email: pablogolo@csnat.edu.ar Mail: Pablo A. Goloboff, INSUE, Instituto Miguel Lillo, Miguel Lillo 205, 4000 S.M. de Tucuman, Argentina. Email: GuoyiZhang@malacology.net */ #include #include #include #include #include #include #include int verb = 0 ; int use_translation = 0 ; int dargc , laquiero ; int showedskipped = 0 ; char ** dargv ; int fileargs ; // args 1 to fileargs-1 are file names! FILE * inpf , * opsf , * curinput , * opsf , * notsfile ; int laschar ; #define NUCLEAR 1 #define MITOCH 2 #define PLASTID 3 #define CHLORO 4 #define TRUE 1 #define FALSE 0 #define MAXUSERACC 50000 int genometype = 0 ; int use_string_matching = 0 ; double string_similarity ; int stringis ( char * , char * ) ; // are both strings the same, case aside ??? int rd ( void ) ; // return next char. from input file; save it in laschar void outer ( int , char * txt ) ; // if first arg. is TRUE, then output txt and exit void gonln ( void ) ; // read all the way to the ENTER void gotostring ( char * , int ) ; // find string of first arg, with margin of sec arg characters void rdto ( int , char * ) ; // saves to string every byte it finds before first arg void stornexline ( void ) ; // puts in stringsp everything to the ENTER void * mymalloc ( unsigned long int ) ; // allocs mem or exits void process (void); void save_to_not ( void ); void setopts ( void ); void makelower ( char * txt ); void parsit ( void ); void parse_translation ( void ); void spew_name ( void ); void output_translation ( void ); void effect_complementation ( void ); #define MAXNUMCHUNKS 200 typedef struct { int from ; int to ; } Chunktyp ; Chunktyp chunk[MAXNUMCHUNKS] ; int numchunks = 0 ; #define MAXSEQLENGTH 25000 char bytestring[MAXSEQLENGTH] ; char complement_string[MAXSEQLENGTH] ; unsigned long int accepted = 0 , rejected = 0 ; char stringsp[160] , headerline[160]; int prodnumber = 0 , genenumber = 0 ; #define MAX_USER_DEFINITIONS 60 char * prodname[MAX_USER_DEFINITIONS] , * genename[MAX_USER_DEFINITIONS ] , accnumber [ 100 ] ; char taxname [ 100 ] ; char taxonomy [ 1000 ] ; char ** useraccname ; int useraccnumber = 0 ; unsigned long int dafsize , bytesread = 0 ; #ifdef LINUX int getch ( void ) { return ( getc ( stdin ) ) ; } #endif void * mymalloc ( unsigned long int size ) { void * pt = malloc ( size ) ; outer ( pt == NULL , "Not enough memory!" ) ; return pt ; } void dildit ( void ) { static int prv = 0 ; double fract ; unsigned long int ifract ; ++ bytesread ; fract = ( ( double ) bytesread / dafsize ) * 100 ; ifract = fract ; if ( ifract == prv ) return ; prv = ifract ; fprintf ( stderr , "\rParsing ... %lu%%" , ifract ) ; } void undild ( void ) { fprintf ( stderr , "\r \r" ) ; } int rd ( void ) { int i ; if ( feof ( curinput ) ) { fclose ( curinput ) ; if ( inpf == curinput ) { undild () ; if ( !useraccnumber ) fprintf ( stderr , "\nAccepted %lu accessions, rejected %lu" , accepted , rejected ) ; fprintf ( stderr , "\nFinished parsing %s (input)\n" , dargv[1] ) ; } else fprintf ( stderr , "\nFinished parsing %s (options)\n" , dargv[2] ) ; if ( inpf == curinput ) { getc ( stdin ) ; exit ( 1 ) ; }} if ( curinput == inpf ) dildit () ; i = getc ( curinput ) ; if ( i == 13 ) if ( ( i = getc ( curinput ) ) != 10 ) ungetc ( i , curinput ) ; return ( laschar = i ) ; } void openit ( void ) { struct stat buf ; curinput = inpf = fopen ( dargv [ 1 ] , "rb" ) ; if ( inpf == NULL ) { fprintf ( stderr , "Error trying to open input file %s: " , dargv[1] ) ; outer ( 1 , "cannot open file" ) ; } fstat ( fileno ( inpf ) , &buf ) ; dafsize = buf.st_size ; } void outer ( int doit , char * txt ) { if ( !doit ) return ; fprintf ( stderr , "%s\n" , txt ) ; getc ( stdin ) ; exit ( 0 ) ; } void dohelp ( void ) { fprintf ( stderr , "\n\n" "Usage is: \n\n" " Give input file name (1st arg) and options file (2nd arg)\n" " Output can be redirected with \">\"\n\n" " Inside options file: \n\n" " gene \"name(s)\"\n" " product \"name(s)\"\n" " protein\n" " genome \"type\"\n" " stringmatch S (S=similarity=1-(E/L)\n" " where E=edit cost, and L=length)\n\n" " List of skipped sequences goes to file gb2tnt.not\n\n" " To see details of skipped sequences, use gb2tnt.not as\n" " options file:\n\n" " \"gb2tnt input gb2tnt.not\"\n\n" " Alternatively, string similarity can be given as 3d and\n" " 4th argument (which overrides values in options file)\n\n" ) ; getc ( stdin ) ; exit ( 0 ) ; } int main ( int argc , char ** argv ) { dargc = argc ; dargv = argv ; setopts () ; process () ; return 0; } void gonln ( void ) { while ( laschar != 10 ) rd () ; } void gotostring ( char * string , int shift ) { char * cp ; int i ; int somenonwhite ; strcpy ( stringsp , " " ) ; while ( strcmp ( stringsp , string ) && !feof ( inpf ) ) { gonln () ; somenonwhite = 0 ; if ( shift > 0 ) { for ( i = 0 ; i < shift && !somenonwhite ; ++ i ) if ( rd () != 32 ) somenonwhite = 1 ; if ( somenonwhite ) continue ; } else { while ( isspace ( rd () ) ) ; ungetc ( laschar , inpf ) ; } * ( cp = stringsp ) = rd () ; if ( * cp ++ != * string ) continue ; while ( !isspace( * cp = rd () ) ) { cp ++ ; if ( cp - stringsp > 98 ) break ; } * cp = '\0' ; } if ( feof ( inpf ) ) { fprintf ( stderr , "\nDone!" ) ; exit ( 1 ) ; } } void rdaccnumber ( void ) { char * cp = accnumber ; while ( isspace ( rd () ) ) ; while ( !isspace ( laschar ) ) { * cp ++ = laschar ; rd () ; } * cp = '\0' ; } void rdto ( int donde , char * stor ) { char * cp ; int a = 0 ; cp = stor ; while ( isspace( laschar ) ) rd () ; while ( laschar != donde ) { if ( a != '_' || laschar != '_' ) * cp ++ = laschar ; a = laschar ; rd () ; if ( laschar == 32 ) laschar = '_' ; if ( laschar == 10 && donde != 10 ) laschar = '_' ; } * cp = '\0' ; } void stornexline ( void ) { char * cp = stringsp ; rd () ; while ( isspace ( laschar ) ) rd () ; while ( laschar != 10 && laschar != 13 ) { * cp ++ = laschar ; rd () ; } * cp = '\0' ; } typedef struct { int up , diag , lef ; int min ; } Stringcomptyp ; Stringcomptyp ** cellcost ; int gapcost = 1 , gapextcost = 1 ; int suscost = 1 ; int mademem = 0 ; void ** loray ( int wid , int hei , int size ) { int a ; void ** pp ; pp = ( void ** ) malloc ( wid * sizeof ( void * ) ) ; outer ( ( pp == NULL ) , "Not enough RAM") ; for ( a = 0 ; a < wid ; ++ a ) if ( ( pp [ a ] = ( void * ) malloc ( hei * size ) ) == NULL ) outer ( 1 , "Not enough RAM") ; return pp ; } double doneedwunsch ( char * ap , char * bp ) { int wid , hei , i , j , dacos ; char * app , * bpp ; char * abecs , * bbecs , * anp , * bnp ; double val ; int HIGH = 10000000 ; wid = strlen ( ap ) ; hei = strlen ( bp ) ; if ( ! mademem ) { cellcost = ( Stringcomptyp ** ) loray ( 100 , 100 , sizeof ( Stringcomptyp ) ) ; mademem = 1 ; } outer ( ( hei >= 99 || wid >= 99 ) , "String is too long to use string-matching" ) ; cellcost[0][0].min = cellcost[0][0].diag = 0 ; cellcost[0][0].up = cellcost[0][0].lef = HIGH ; bpp = bp ; for ( j = 0 ; j < hei ; ++ j ) { app = ap ; for ( i = 0 ; i < wid ; ++ i ) { if ( !i && !j ) { continue ; } dacos = 0 ; if ( * app != * bpp ) dacos = suscost ; if ( j ) { if ( cellcost[i][j-1].min == cellcost[i][j-1].up ) cellcost[i][j].up = cellcost[i][j-1].min + gapextcost ; else cellcost[i][j].up = cellcost[i][j-1].min + gapcost ; } else cellcost[i][j].up = cellcost[i][j].diag = HIGH ; if ( i ) { if ( cellcost[i-1][j].min == cellcost[i-1][j].lef ) cellcost[i][j].lef = cellcost[i-1][j].min + gapextcost ; else cellcost[i][j].lef = cellcost[i-1][j].min + gapcost ; } else cellcost[i][j].lef = cellcost[i][j].diag = HIGH ; if ( i && j ) cellcost[i][j].diag = cellcost[i-1][j-1].min + dacos ; dacos = cellcost[i][j].diag ; if ( dacos > cellcost[i][j].up ) dacos = cellcost[i][j].up ; if ( dacos > cellcost[i][j].lef ) dacos = cellcost[i][j].lef ; cellcost[i][j].min = dacos ; ++ app ; } ++ bpp ; } dacos = cellcost[wid-1][hei-1].min ; val = ( double ) dacos / ( double ) hei ; val = 1 - val ; return val ; } int stringis ( char * a , char * b ) { if ( !strcmp ( b , "\"?\"" ) ) return 1 ; if ( use_string_matching ) { if ( doneedwunsch ( a , b ) >= string_similarity ) return 1 ; return 0 ; } while ( * a && * b ) if ( tolower ( * a ++ ) != tolower ( * b ++ ) ) return 0 ; return 1 ; } int rdliteral ( void ) { char * cp = stringsp ; rd () ; while ( isspace ( laschar ) && !feof ( curinput ) ) rd () ; if ( laschar == '\"' ) { * cp ++ = laschar ; while ( rd () != '\"' && !feof ( curinput ) ) * cp ++ = laschar ; * cp ++ = laschar ; * cp = '\0' ; return 1 ; } else { * cp = laschar ; while ( !isspace ( laschar ) && !feof ( curinput ) ) * ++ cp = rd () ; * cp = '\0' ; return 0 ; } } int istrunc ( char * a ) // is a a truncation of stringsp ?? { char * b = stringsp ; while ( 1 ) { if ( ! * a ) return 1 ; if ( * a ++ != * b ++ ) return 0 ; } } int isamatch ( char * a , char * b ) { while ( 1 ) { if ( ! * a ) return 1 ; if ( * a ++ != * b ++ ) return 0 ; } } void process (void) { char * cp ; int i , mygenometype , showed_headerline ; int showacc_only ; int found_translation ; while ( !feof ( inpf ) ) { gotostring ( "ACCESSION" , 0 ) ; rdaccnumber () ; showacc_only = 0 ; if ( useraccnumber ) { for ( i = 0 ; i < useraccnumber && !showacc_only ; ++ i ) if ( !strcmp ( accnumber , useraccname[i] ) ) showacc_only = 1 ; } laquiero = found_translation = 0 ; gotostring ( "ORGANISM" , -1 ) ; rdto ( 10 , taxname ) ; rdto ( '.' , taxonomy ) ; gotostring ( "FEATURES" , 0 ) ; mygenometype = NUCLEAR ; /**** Empieza accession... ****/ while ( 1 ) { stornexline ( ) ; if ( istrunc ( "ORIGIN" ) ) { laquiero = -1 ; break ; } if ( istrunc ( "/organelle=" ) ) { if ( istrunc ( "/organelle=\"mitoc" ) ) mygenometype = MITOCH ; else if ( istrunc ( "/organelle=\"chlorop" ) ) mygenometype = CHLORO ; else if ( istrunc ( "/organelle=\"plastid" ) ) mygenometype = PLASTID ; } if ( istrunc ( "tRNA" ) ) break ; if ( istrunc ( "rRNA" ) ) break ; if ( istrunc ( "CDS" ) ) { /*** linea agregada por el caso de Marcos... ****/ if ( use_translation ) laquiero = FALSE ; break ; }} /***** End initial parsing... ******/ if ( genometype && mygenometype != genometype ) laquiero = -1 ; while ( laquiero == FALSE || ( use_translation && !found_translation ) ) { if ( !istrunc ( "gene" ) ) { if ( istrunc ( "ORIGIN" ) ) break ; if ( istrunc ( "tRNA" ) || istrunc ( "rRNA" ) || istrunc ( "CDS" ) ) { showed_headerline = 0 ; strcpy ( headerline , stringsp ) ; cp = stringsp + 10 ; while ( isspace ( * cp ) ) ++ cp ; if ( isamatch ( "join(" , cp ) ) { while ( * cp != ')' && * cp ) ++ cp ; if ( !*cp ) { stornexline () ; cp = stringsp ; while ( isspace ( * cp ) ) ++ cp ; } strcat ( headerline , cp ) ; }} stornexline () ; if ( showacc_only ) { if ( showacc_only ++ == 1 ) { fprintf ( stdout , "\n%s , %s " , accnumber , taxname ) ; if ( mygenometype == NUCLEAR ) fprintf ( stdout , ", nuclear " ) ; if ( mygenometype == CHLORO ) fprintf ( stdout , ", chloro " ) ; if ( mygenometype == MITOCH ) fprintf ( stdout , ", mitoch " ) ; if ( mygenometype == PLASTID ) fprintf ( stdout , ", plastid " ) ; } if ( !showed_headerline ) if ( isamatch ( "tRNA" , headerline ) || isamatch ( "rRNA" , headerline ) || isamatch ( "CDS" , headerline ) ) { fprintf ( stdout , "\n %s , " , headerline ) ; ++ showed_headerline ; } if ( ( istrunc ( "/product=" ) || istrunc ( "/gene=" ) ) ) { if ( istrunc ( "/product=" ) ) cp = stringsp + 9 ; else cp = stringsp + 6 ; fprintf ( stdout , ", %s" , cp ) ; }} if ( /*** linea agregada por el problema de Marcos.... ****/ laquiero && istrunc ( "/translation=" ) && use_translation ) { parse_translation () ; found_translation = 1 ; } if ( istrunc ( "/product=" ) && prodnumber ) { cp = stringsp + 9 ; makelower ( cp ) ; for ( i = 0 ; i < prodnumber && !laquiero ; ++ i ) if ( stringis ( cp , prodname[i] ) ) laquiero = 1 ; } if ( istrunc ( "/gene=" ) && genenumber ) { cp = stringsp + 6 ; makelower ( cp ) ; for ( i = 0 ; i < genenumber && !laquiero ; ++ i ) if ( stringis ( cp , genename[i] ) ) laquiero = 1 ; }} else stornexline () ; } if ( useraccnumber ) { // if ( showacc_only == 2 ) fprintf ( stdout , "\n" ) ; continue ; } if ( laquiero == TRUE && ( !use_translation || found_translation ) ) { if ( use_translation ) output_translation () ; else parsit () ; } else save_to_not () ; } // --- while ( !feof ( inpf ) ) } void save_to_not ( void ) { ++ rejected ; if ( !showedskipped ) fprintf ( notsfile , "accession " ) ; showedskipped = 1 ; fprintf ( notsfile , "\"%s\" " , accnumber ) ; } void setopts ( void ) { int i ; if ( dargc > 1 ) if ( !strcmp ( dargv[1] , "--help" ) ) dohelp () ; outer ( dargc < 3 , "Specify input file and option file (\"--help\" to get help)" ) ; outer ( ( curinput = opsf = fopen ( dargv[ 2 ] , "rb" ) ) == NULL , "Cannot open file with options" ) ; if ( dargc > 4 ) if ( !strcmp ( dargv[3], "stringmatch" ) ) { use_string_matching = 1 ; string_similarity = atof ( dargv[4] ) ; } while ( !feof ( opsf ) ) { outer ( rdliteral () , "Unexpected literal string in options file" ) ; if ( !strcmp ( stringsp , "protein" ) ) use_translation = 1 ; else if ( !strcmp ( stringsp , "stringmatch" ) ) { rdliteral () ; if ( !use_string_matching ) { use_string_matching = 1 ; string_similarity = atof ( stringsp ) ; }} else if ( !strcmp ( stringsp , "gene" ) ) { while ( !feof ( curinput ) ) { while ( isspace ( rd () ) && !feof ( curinput ) ) ; ungetc ( laschar , opsf ) ; if ( laschar != '\"' ) break ; outer ( genenumber == MAX_USER_DEFINITIONS , "Cannot define so many gene names" ) ; rdliteral () ; makelower ( stringsp ) ; genename [ genenumber ] = mymalloc ( ( strlen ( stringsp ) + 1 ) * sizeof ( char ) ) ; strcpy ( genename [ genenumber ++ ] , stringsp ) ; }} else if ( !strcmp ( stringsp , "accession" ) ) { useraccname = mymalloc ( MAXUSERACC * sizeof ( char * ) ) ; while ( !feof ( curinput ) ) { while ( isspace ( rd () ) && !feof ( curinput ) ) ; ungetc ( laschar , opsf ) ; if ( laschar != '\"' ) break ; rdliteral () ; if ( useraccnumber == MAXUSERACC ) fprintf ( stderr , "Cannot define so many accessions, will skip from %s on" , stringsp ) ; else { useraccname [ useraccnumber ] = mymalloc ( ( strlen ( stringsp ) + 1 ) * sizeof ( char ) ) ; strcpy ( useraccname [ useraccnumber ] , stringsp + 1 ) ; i = strlen ( useraccname [ useraccnumber ] ) ; useraccname [ useraccnumber ] [ i - 1 ] = '\0' ; ++ useraccnumber ; }}} else if ( !strcmp ( stringsp , "product" ) ) { while ( !feof ( curinput ) ) { while ( isspace ( rd () ) && !feof ( curinput ) ) ; ungetc ( laschar , opsf ) ; if ( laschar != '\"' ) break ; outer ( prodnumber == MAX_USER_DEFINITIONS , "Cannot define so many product names" ) ; rdliteral () ; makelower ( stringsp ) ; prodname [ prodnumber ] = malloc ( ( strlen ( stringsp ) + 1 ) * sizeof ( char ) ) ; strcpy ( prodname [ prodnumber ++ ] , stringsp ) ; }} else if ( !strcmp ( stringsp , "genome" ) ) { outer ( !rdliteral () , "Syntax error after \"genome\"" ) ; if ( !strcmp ( stringsp , "\"mitochondrial\"" ) ) genometype = MITOCH ; else if ( !strcmp ( stringsp , "\"nuclear\"" ) ) genometype = NUCLEAR ; else if ( !strcmp ( stringsp , "\"plastid\"" ) ) genometype = PLASTID ; else if ( !strcmp ( stringsp , "\"chloroplast\"" ) ) genometype = CHLORO ; else outer ( 1 , "Unrecognized genome option" ) ; }} if ( use_string_matching ) fprintf ( stderr , "\nUsing string similarity of %.3f\n" , string_similarity ) ; if ( !useraccnumber ) outer ( ( notsfile = fopen ( "gb2tnt.not" , "wb" ) ) == NULL , "Cannot open file for skipped accessions" ) ; openit () ; } void makelower ( char * txt ) { char * cp = txt ; while ( * cp ) { * cp = tolower ( *cp ) ; ++ cp ; } return ; } int wrong_location ; char * storchunk ( char * cp ) { if ( * cp == '<' ) ++ cp ; if ( !isdigit ( * cp ) ) { wrong_location = 1 ; * cp = '0' ; return cp ; } chunk[numchunks].from = atoi ( cp ) ; while ( * cp ++ != '.' && * cp ) ; if ( * cp != '.' ) { wrong_location = 1 ; * cp = '0' ; return cp ; } ++ cp ; if ( * cp == '>' ) ++ cp ; if ( !isdigit ( * cp ) ) { wrong_location = 1 ; * cp = '0' ; return cp ; } chunk[numchunks].to = atoi ( cp ) ; while ( isdigit ( * cp ) ) ++ cp ; ++ numchunks ; return cp ; } int species_read = 0 ; void parsit ( void ) { char * cp = headerline + 15 ; int i , atchunk , atpos ; char * bytept = bytestring , now ; int complement_it = 0 , didntmatch ; numchunks = 0 ; wrong_location = 0 ; while ( isspace ( * cp ) ) ++ cp ; if ( * cp == '<' || isdigit ( * cp ) ) storchunk ( cp ) ; else { i = cp [ 4 ] ; cp [ 4 ] = '\0' ; didntmatch = 0 ; if ( strcmp ( "join" , cp ) ) didntmatch = 1 ; cp [ 4 ] = i ; i = cp [ 10 ] ; cp [ 10 ] = '\0' ; if ( didntmatch ) if ( strcmp ( "complement" , cp ) ) { fprintf ( stderr , "OOPS!!\nFound unrecognized location specifier: %s\nFor accession %s\n" , cp , accnumber ) ; save_to_not () ; return ; } if ( !strcmp ( "complement" , cp ) ) { complement_it = 1 ; cp += 11 ; } else { cp [ 10 ] = i ; cp += 5 ; } while ( * cp != ')' && !wrong_location ) { cp = storchunk ( cp ) ; while ( * cp == ',' || isspace ( * cp ) ) ++ cp ; }} if ( wrong_location ) { save_to_not () ; return ; } gotostring ( "ORIGIN" , 0 ) ; rdliteral () ; atchunk = atpos = 0 ; ++ species_read ; while ( 1 ) { now = laschar ; while ( laschar == 10 || laschar == 13 || laschar == 32 || ( laschar >= '0' && laschar <= '9' ) ) { now = laschar ; rd () ; } if ( now == '/' || laschar == '/' ) return ; ++ atpos ; if ( chunk[atchunk].from <= atpos && chunk[atchunk].to >= atpos ) { * bytept ++ = laschar ; outer ( ( bytept - bytestring >= MAXSEQLENGTH ) , "OOPS -- sequence is too long!\nChange MAXSEQLENGTH and re-compile" ) ; } laschar = 32 ; if ( atpos == chunk[atchunk].to ) if ( ++ atchunk == numchunks ) break ; } * bytept = '\0' ; ++ accepted ; spew_name () ; if ( complement_it ) effect_complementation () ; fprintf ( stdout , "%s" , bytestring ) ; fprintf ( stdout , "\n" ) ; fflush ( stdout ) ; } void parse_translation ( void ) { char * cp , * bytept ; cp = stringsp + 14 ; bytept = bytestring ; while ( * cp != '\"' && * cp ) { if ( !isspace ( * cp ) ) * bytept ++ = * cp ++ ; if ( * cp == 10 || * cp == 13 || * cp == '\0' ) { stornexline () ; cp = stringsp ; while ( isspace ( * cp ) ) ++ cp ; }} * bytept = '\0' ; } void spew_name ( void ) { char * cp = taxonomy , * here , * begg ; int numsemicols = 0 ; while ( * cp != ';' && * cp ) ++ cp ; begg = here = ( cp += 2 ) ; while ( numsemicols < 2 && * cp ) { if ( * cp == ';' ) { ++ cp ; ++ numsemicols ; } * here ++ = * cp ++ ; } * here = '\0' ; fprintf ( stdout , ">%s____%s_@%s\n" , taxname , accnumber , begg ) ; } void output_translation ( void ) { ++ accepted ; spew_name () ; fprintf ( stdout , "%s" , bytestring ) ; fprintf ( stdout , "\n" ) ; fflush ( stdout ) ; } char tmpmask[256] ; char makeit[9] ; char antimask[16] ; void effect_complementation ( void ) { int i , j , k , l ; int bit ; int x , y ; for ( i = 0 ; i < 256 ; ++ i ) tmpmask[i] = 0 ; tmpmask [ 'a' ] = tmpmask [ 'A' ] = 1 ; // tmpmask [ '0' ] ; tmpmask [ 'g' ] = tmpmask [ 'G' ] = 2 ; // tmpmask [ '1' ] ; tmpmask [ 'c' ] = tmpmask [ 'C' ] = 4 ; // tmpmask [ '2' ] ; tmpmask [ 't' ] = tmpmask [ 'T' ] = 8 ; // tmpmask [ '3' ] ; tmpmask [ 'R' ] = tmpmask [ 'r' ] = tmpmask [ 'a' ] | tmpmask [ 'g' ] ; tmpmask [ 'Y' ] = tmpmask [ 'y' ] = tmpmask [ 't' ] | tmpmask [ 'c' ] ; tmpmask [ 'W' ] = tmpmask [ 'w' ] = tmpmask [ 'a' ] | tmpmask [ 't' ] ; tmpmask [ 'S' ] = tmpmask [ 's' ] = tmpmask [ 'c' ] | tmpmask [ 'g' ] ; tmpmask [ 'M' ] = tmpmask [ 'm' ] = tmpmask [ 'a' ] | tmpmask [ 'c' ] ; tmpmask [ 'K' ] = tmpmask [ 'k' ] = tmpmask [ 'g' ] | tmpmask [ 't' ] ; tmpmask [ 'B' ] = tmpmask [ 'b' ] = tmpmask [ 'c' ] | tmpmask [ 'g' ] | tmpmask [ 't' ] ; tmpmask [ 'D' ] = tmpmask [ 'd' ] = tmpmask [ 'a' ] | tmpmask [ 'g' ] | tmpmask [ 't' ] ; tmpmask [ 'H' ] = tmpmask [ 'h' ] = tmpmask [ 'a' ] | tmpmask [ 'c' ] | tmpmask [ 't' ] ; tmpmask [ 'V' ] = tmpmask [ 'v' ] = tmpmask [ 'a' ] | tmpmask [ 'c' ] | tmpmask [ 'g' ] ; tmpmask [ 'N' ] = tmpmask [ 'n' ] = 1 | 2 | 4 | 8 ; makeit[tmpmask['a']] = tmpmask['t'] ; makeit[tmpmask['c']] = tmpmask['g'] ; makeit[tmpmask['g']] = tmpmask['c'] ; makeit[tmpmask['t']] = tmpmask['a'] ; for ( i = 0 ; i < 256 ; ++ i ) if ( tmpmask[i] ) antimask[ tmpmask[i] ] = i ; j = strlen ( bytestring ) ; for ( i = 0 ; i < j ; ++ i ) complement_string[i] = bytestring[i] ; for ( i = 0 , k = j - 1 ; i < j ; ++ i , -- k ) { x = tmpmask [ complement_string [i]] ; y = 0 ; for ( l = 0 , bit = 1 ; l < 4 ; ++ l , bit <<= 1 ) if ( ( bit & x ) ) y |= makeit[ bit ] ; bytestring [ k ] = antimask [ y ] ; } return; }