commit a25316aa9e23e23a1bb77cd7a8818247edcb4362 Author: Kuoi Date: Mon Mar 7 20:43:05 2022 +0000 2006 version init diff --git a/CORE/#lixoalan# b/CORE/#lixoalan# new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/CORE/#lixoalan# @@ -0,0 +1 @@ + diff --git a/CORE/.GDEmenus b/CORE/.GDEmenus new file mode 100644 index 0000000..77486c8 --- /dev/null +++ b/CORE/.GDEmenus @@ -0,0 +1,764 @@ +1menu:File + +item:test cmask output +itemmethod: kedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:kedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2 +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code + +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:------------------------ + +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/bin/installBLASTDB.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: enter the file name + +arg:menuname +argtype:text +arglabel: enter the name of the DB + + +#Sequence dataset +menu:seq. datasets + + +item:------------- +item:add a new dataset +itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file + +arg:name +argtype:text +arglabel:Enter the dataset name ? + +arg:file +argtype:text +arglabel:Enter the dataset file (in FASTA) ? + + +#Menu for Protein +menu:protein +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDBPROT +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + +item:-------------------------- +item:Add a new Protein blast db +itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: Enter the file (in FASTA) + +arg:menuname +argtype:text +arglabel: Enter the name of the DB + +#Phylogenetic Menu + +menu:Phylogeny + +item:Phylip help +itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:clique:clique.html +argchoice:consense:consense.html +argchoice:contchar:contchar.html +argchoice:contml:contml.html +argchoice:contrast:contrast.html +argchoice:discrete:discrete.html +argchoice:distance:distance.html +argchoice:dnaboot:dnaboot.html +argchoice:dnacomp:dnacomp.html +argchoice:dnadist:dnadist.html +argchoice:dnainvar:dnainvar.html +argchoice:dnaml:dnaml.html +argchoice:dnamlk:dnamlk.html +argchoice:dnamove:dnamove.html +argchoice:dnapars:dnapars.html +argchoice:dnapenny:dnapenny.html +argchoice:dollop:dollop.html +argchoice:dolmove:dolmove.html +argchoice:dolpenny:dolpenny.html +argchoice:draw:draw.html +argchoice:drawgram:drawgram.html +argchoice:drawtree:drawtree.html +argchoice:factor:factor.html +argchoice:fitch:fitch.html +argchoice:gendist:gendist.html +argchoice:kitsch:kitsch.html +argchoice:main:main.html +argchoice:mix:mix.html +argchoice:move:move.html +argchoice:neighbor:neighbor.html +argchoice:penny:penny.html +argchoice:protpars:protpars.html +argchoice:read.me.general:read.me.general.html +argchoice:restml:restml.html +argchoice:seqboot:seqboot.html +argchoice:sequence:sequence.html + + + +item:Phylip 3.5 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + +item:Phylip DNA Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& + +arg:EXPLAIN +argtype:text +arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE + + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Run ? +argtype:chooser +argchoice:Run without Bootstrap: +argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; + +arg:DNA +argtype:text +arglabel:Name of DNADIST outfile? + +arg:NEI +argtype:text +arglabel:Name of NEIGHBOR outfile? + +arg:TREE +argtype:text +arglabel:Name of TREEFILE ? + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip PROTEIN Distance methods +itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:PROTDIST+NEIGHBOR: +argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + +#Online menu +menu:On-Line Res. + +item:GDE for Linux resources at Bioafrica.net +itemmethod:netscape http://www.bioafrica.net & + +item:------------------------- +item:add a new website +itemmethod:xterm -e /usr/local/bio/GDE/newURL.pl $name $url + +arg:name +argtype:text +arglabel:Enter the site name + +arg:url +argtype:text +arglabel:Enter the URL (including http://) diff --git a/CORE/.GDEmenus.bak b/CORE/.GDEmenus.bak new file mode 100644 index 0000000..5a3f56f --- /dev/null +++ b/CORE/.GDEmenus.bak @@ -0,0 +1,761 @@ +1menu:File + +item:test cmask output +itemmethod: kedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:kedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2 +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:------------------------ + +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: enter the file name + +arg:menuname +argtype:text +arglabel: enter the name of the DB + +menu:seq. datasets + +item:------------- +item:add a new dataset +itemmethod:cp $file /usr/local/biotools/GDE/db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file + +arg:name +argtype:text +arglabel:Enter the dataset name ? + +arg:file +argtype:text +arglabel:Enter the dataset file (in FASTA) ? + + +#Menu for Protein +menu:protein +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + +item:-------------------------- +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: Enter the file (in FASTA) + +arg:menuname +argtype:text +arglabel: Enter the name of the DB + +menu:Phylogeny + + +item:Phylip help +itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:clique:clique.html +argchoice:consense:consense.html +argchoice:contchar:contchar.html +argchoice:contml:contml.html +argchoice:contrast:contrast.html +argchoice:discrete:discrete.html +argchoice:distance:distance.html +argchoice:dnaboot:dnaboot.html +argchoice:dnacomp:dnacomp.html +argchoice:dnadist:dnadist.html +argchoice:dnainvar:dnainvar.html +argchoice:dnaml:dnaml.html +argchoice:dnamlk:dnamlk.html +argchoice:dnamove:dnamove.html +argchoice:dnapars:dnapars.html +argchoice:dnapenny:dnapenny.html +argchoice:dollop:dollop.html +argchoice:dolmove:dolmove.html +argchoice:dolpenny:dolpenny.html +argchoice:draw:draw.html +argchoice:drawgram:drawgram.html +argchoice:drawtree:drawtree.html +argchoice:factor:factor.html +argchoice:fitch:fitch.html +argchoice:gendist:gendist.html +argchoice:kitsch:kitsch.html +argchoice:main:main.html +argchoice:mix:mix.html +argchoice:move:move.html +argchoice:neighbor:neighbor.html +argchoice:penny:penny.html +argchoice:protpars:protpars.html +argchoice:read.me.general:read.me.general.html +argchoice:restml:restml.html +argchoice:seqboot:seqboot.html +argchoice:sequence:sequence.html + + + +item:Phylip 3.5 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + +item:Phylip DNA Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& + +arg:EXPLAIN +argtype:text +arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE + + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Run ? +argtype:chooser +argchoice:Run without Bootstrap: +argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; + +arg:DNA +argtype:text +arglabel:Name of DNADIST outfile? + +arg:NEI +argtype:text +arglabel:Name of NEIGHBOR outfile? + +arg:TREE +argtype:text +arglabel:Name of TREEFILE ? + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip PROTEIN Distance methods +itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:PROTDIST+NEIGHBOR: +argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + + + +menu:On-Line Res. + +item:GDE for Linux resources at Bioafrica.net +itemmethod:netscape http://www.bioafrica.net & + +item:------------------------- +item:add a new website +itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url + +arg:name +argtype:text +arglabel:Enter the site name + +arg:url +argtype:text +arglabel:Enter the URL (including http://) diff --git a/CORE/.GDEmenus.copy b/CORE/.GDEmenus.copy new file mode 100755 index 0000000..f3dbf5a --- /dev/null +++ b/CORE/.GDEmenus.copy @@ -0,0 +1,2049 @@ +1menu:File + +item:test cmask output +itemmethod: textedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; textedit backg*; textedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; textedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +item:MFOLD +itemmethod:(tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)& +itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:Pairing(ct) File Name +argtext:mfold_out + + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + +item:Highlight helix +itemmethod:readseq -a -f8 in1 | sed "s/>HELIX/\"HELIX/" > in1.flat; sho_helix < in1.flat > out1;rm in1.flat +itemhelp:sho_helix.help + +in:in1 +informat:genbank + +out:out1 +outformat:colormask + + +#Menu for DNA/RNA +item:codeml +itemmethod:(readseq -a -f11 in1 | sed "s/ YF//1" > test.phy; /usr/bin/X11/xterm -e codeml $METHOD)& + + +arg:METHOD +arglabel:Which method ? +argtype:chooser +argchoice:0:/home/tulio/biotools/paml/method0.ctl +argchoice:1:/home/tulio/biotools/paml/method1.ctl +argchoice:2:/home/tulio/biotools/paml/method2.ctl +argchoice:3:/home/tulio/biotools/paml/method3.ctl +argchoice:4:/home/tulio/biotools/paml/method4.ctl +argchoice:5:/home/tulio/biotools/paml/method5.ctl +argchoice:6:/home/tulio/biotools/paml/method6.ctl + + +in:in1 +informat:genbank +inmask: +insave: + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDB -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/var/www/cgi-bin/db/hiv17-08-01.fasta2 +argchoice:HIV-1 Subtype:/var/www/cgi-bin/db/subcomplete.fasta +argchoice:HIV-1 HXB2 Numbering:/var/www/cgi-bin/db/HXB2.fasta +argchoice:HCV Numbering:/var/www/cgi-bin/db/HCV.fasta +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:FASTA (DNA/RNA) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:GenBank Primate:$GDE_HELP_DIR/FASTA/GENBANK/gbpri.seq\ 1 +argchoice:GenBank Rodent:$GDE_HELP_DIR/FASTA/GENBANK/gbrod.seq\ 1 +argchoice:GenBank all Mammal:$GDE_HELP_DIR/FASTA/GENBANK/gbmam.seq\ 1 +argchoice:GenBank verteBrates:$GDE_HELP_DIR/FASTA/GENBANK/gbvrt.seq\ 1 +argchoice:GenBank Inverts:$GDE_HELP_DIR/FASTA/GENBANK/gbinv.seq\ 1 +argchoice:GenBank pLants:$GDE_HELP_DIR/FASTA/GENBANK/gbpln.seq\ 1 +argchoice:GenBank Struct RNA:$GDE_HELP_DIR/FASTA/GENBANK/gbrna.seq\ 1 +argchoice:GenBank euk. Organelles:$GDE_HELP_DIR/FASTA/GENBANK/gborg.seq\ 1 +argchoice:GenBank phaGe:$GDE_HELP_DIR/FASTA/GENBANK/gbphg.seq\ 1 +argchoice:GenBank bacTeria:$GDE_HELP_DIR/FASTA/GENBANK/gbbct.seq\ 1 +argchoice:GenBank sYnthetic:$GDE_HELP_DIR/FASTA/GENBANK/gbsyn.seq\ 1 +argchoice:GenBank Viral:$GDE_HELP_DIR/FASTA/GENBANK/gbvrl.seq\ 1 +argchoice:GenBank Unannotated:$GDE_HELP_DIR/FASTA/GENBANK/gbuna.seq\ 1 + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:altdiag.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altdiag.mat +argchoice:altprot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altprot.mat +argchoice:dna.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/dna.mat +argchoice:prot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/prot.mat + +menu:HIV Seq. Db. +item:Ref. Seq. for Drug Resistance +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Sequence +argchoice:Prot function:/database/AA/protease.mutations.fasta +argchoice:rt function:/database/AA/rtmutations.fasta +argchoice:pNL4-3 CG :/database/DR/pNL4-3.CG.fasta +argchoice:pNL4-3 protease :/database/DR/pNL4-3.prot$format.fasta +argchoice:pNL4-3 RT :/database/DR/pNL4-3.RT$format.fasta +argchoice:HXB2 CG :/database/DR/HXB2.fasta +argchoice:HXB2 protease :/database/DR/HXB2.prot$format.fasta +argchoice:HXB2 RT :/database/DR/HXB2.RT$format.fasta +argchoice:data1.fasta:/database/DNA/data1.fasta + + +arg:format +argtype:chooser +arglabel:Format +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype Db. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/database/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/database/DNA/subtyperef/sub5ltr.fasta +argchoice:sub5ltrU3.fasta:/database/DNA/subtyperef/sub5ltrU3.fasta +argchoice:sub5ltrU5.fasta:/database/DNA/subtyperef/sub5ltrU5.fasta +argchoice:subenv.fasta:/database/DNA/subtyperef/subenv.fasta +argchoice:subenv-gp120.fasta:/database/DNA/subtyperef/subenv-gp120.fasta +argchoice:subenv-gp41.fasta:/database/DNA/subtyperef/subenv-gp41.fasta +argchoice:subenvv3.fasta:/database/DNA/subtyperef/subenvv3.fasta +argchoice:subgag.fasta:/database/DNA/subtyperef/subgag.fasta +argchoice:subgag-p17.fasta:/database/DNA/subtyperef/subgag-p17.fasta +argchoice:subgag-p24.fasta:/database/DNA/subtyperef/subgag-p24.fasta +argchoice:subgag-pol.fasta:/database/DNA/subtyperef/subgag-pol.fasta +argchoice:subpol.fasta:/database/DNA/subtyperef/subpol.fasta +argchoice:subpol-p15RNAase.fasta:/database/DNA/subtyperef/subpol-p15RNAase.fasta +argchoice:subpol-p31integrase.fasta:/database/DNA/subtyperef/subpol-p31integrase.fasta +argchoice:subpol-p51RT.fasta:/database/DNA/subtyperef/subpol-p51RT.fasta +argchoice:subpol-protease.fasta:/database/DNA/subtyperef/subpol-protease.fasta +argchoice:subrevCDS.fasta:/database/DNA/subtyperef/subrevCDS.fasta +argchoice:subrevexon1.fasta:/database/DNA/subtyperef/subrevexon1.fasta +argchoice:subrevexon2.fasta:/database/DNA/subtyperef/subrevexon2.fasta +argchoice:subrevintron.fasta:/database/DNA/subtyperef/subrevintron.fasta +argchoice:subTAR.fasta:/database/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/database/DNA/subtyperef/subtatCDS.fasta +argchoice:subtatexon1.fasta:/database/DNA/subtyperef/subtatexon1.fasta +argchoice:subtatexon2.fasta:/database/DNA/subtyperef/subtatexon2.fasta +argchoice:subtatintron.fasta:/database/DNA/subtyperef/subtatintron.fasta +argchoice:subvif.fasta:/database/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/database/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/database/DNA/subtyperef/subvpu.fasta +argchoice:subnef.fasta:/database/DNA/subtyperef/subnef.fasta + +out:OUTPUTFILE +outformat:genbank + + +item:HIV-1 Subtype reduz. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV Subtype C CG:/database/DNA/sub-reference/subC.CG.fasta +argchoice:HIV-1 GAG sub:/database/DNA/sub-reference/gagsub-reference.fasta +argchoice:HIV-1 POL sub:/database/DNA/sub-reference/polsub-reference.fasta +argchoice:HIV-1 VIF sub:/database/DNA/sub-reference/vifsub-reference.fasta +argchoice:HIV-1 VPR sub:/database/DNA/sub-reference/vprsub-reference.fasta +argchoice:HIV-1 TAT sub:/database/DNA/sub-reference/tatsub-reference.fasta +argchoice:HIV-1 REV sub:/database/DNA/sub-reference/revsub-reference.fasta +argchoice:HIV-1 VPU sub:/database/DNA/sub-reference/vpusub-reference.fasta +argchoice:HIV-1 ENV sub:/database/DNA/sub-reference/envsub-reference.fasta +argchoice:HIV-1 NEF sub:/database/DNA/sub-reference/nefsub-reference.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype B & C Gen. regions +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV Subtype C CG:/database/DNA/subB/$format.fasta +argchoice:HIV-1 GAG sub:/database/DNA/subB/$format.gag$type.fasta +argchoice:HIV-1 POL sub:/database/DNA/subB/$format.pol$type.fasta +argchoice:HIV-1 PROTEASE sub:/database/DNA/subB/$format.pol-prot$type.fasta +argchoice:HIV-1 RT sub:/database/DNA/subB/$format.pol-RT$type.fasta +argchoice:HIV-1 INTEGRASE sub:/database/DNA/subB/$format.pol-INT$type.fasta +argchoice:HIV-1 VIF sub:/database/DNA/subB/$format.vif$type.fasta +argchoice:HIV-1 VPR sub:/database/DNA/subB/$format.vpr$type.fasta +argchoice:HIV-1 TAT sub:/database/DNA/subB/$format.tat$type.fasta +argchoice:HIV-1 REV sub:/database/DNA/subB/$format.rev$type.fasta +argchoice:HIV-1 VPU sub:/database/DNA/subB/$format.vpu$type.fasta +argchoice:HIV-1 ENV sub:/database/DNA/subB/$format.env$type.fasta +argchoice:HIV-1 NEF sub:/database/DNA/subB/$format.nef$type.fasta + +arg:format +argtype:chooser +arglabel:Format +argchoice:Subtype B:subB +argchoice:Subtype C:subC + +arg:type +argtype:chooser +arglabel:type +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + + +item:Find Beggining of Genome regions +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:choice_list +argchoice:Protease:ATCACTCTTTGG +argchoice:Protease:ATC +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat +out:out1 +outformat:colormask + + +item:hivHXB2 genome regions aln +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/database/DNA/hivHXB2regions.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + + +item:Clustal Protein Alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -output=GDE -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.gde> in1;$REPORT gde clus_in.gde;/bin/rm -f clus_in* in1* )& + + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + +in:in1 +informat:flat +insave: + + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins Tulio:/var/www/cgi-bin/db/HIV-PROTEINS-tulio.fasta +argchoice:HIV Proteins:/var/www/cgi-bin/db/hiv17-08-01.PROT.fasta +argchoice:HIV-1 Structures at PDB:/var/www/cgi-bin/db/Prot.3d.fasta +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:VESPA PROTEASE +itemmethod:cat in1 > infile ;/usr/local/biotools/GDE/bin/fasta2VESPA.pl > outfile; sed "s/[%]/ /" outfile.f ;/home/tulio/biotools/VESPA/VESPA -b $SUBTYPE -q outfile.f > outVESPA; textedit outVESPA; + +arg:SUBTYPE +argtype:chooser +arglabel:Subtype +argchoice:B:/database/AA/subB.prot.aa.vespa +argchoice:C:/database/AA/subC.prot.aa.vespa +argchoice:D:/database/AA/subD.prot.aa.vespa +in:in1 +informat:flat +out:out1 +outformat:text + +item:VESPA Reverse Transcriptase +itemmethod: cat in1 > infile ;/usr/local/biotools/GDE/bin/fasta2VESPA.pl > outfile; sed "s/[%]/ /" outfile.f ;/home/tulio/biotools/VESPA/VESPA -b $SUBTYPE -q outfile.f > outVESPA; textedit outVESPA; + +arg:SUBTYPE +argtype:chooser +arglabel:Subtype +argchoice:B:/database/AA/subB.rt.aa.vespa +argchoice:C:/database/AA/subC.rt.aa.vespa +argchoice:D:/database/AA/subD.rt.aa.vespa +in:in1 +informat:flat +out:out1 +outformat:text + +item:FASTA (Protein) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:NBRF PIR1:$GDE_HELP_DIR/FASTA/PIR/pir1.dat\ 2 +argchoice:NBRF PIR2:$GDE_HELP_DIR/FASTA/PIR/pir2.dat\ 2 +argchoice:NBRF PIR3:$GDE_HELP_DIR/FASTA/PIR/pir3.dat\ 2 + + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:Minimum mutation matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/codaa.mat +argchoice:Identity matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/idnaa.mat +argchoice:Identity matrix for mismatches:-s $GDE_HELP_DIR/FASTA/MATRIX/idpaa.mat +argchoice:PAM250:-s $GDE_HELP_DIR/FASTA/MATRIX/pam250.mat +argchoice:PAM120:-s $GDE_HELP_DIR/FASTA/MATRIX/pam120.mat + +menu:Seq management + +item:Assemble Contigs +itemmethod:(sed "s/#/>/"in1.tmp; CAP2 in1.tmp $OVERLAP $PMATCH > out1;/bin/rm -f in1.tmp) +itemhelp:CAP2.help + +arg:OVERLAP +argtype:slider +arglabel:Minimum overlap? +argmin:5 +argmax:100 +argvalue:20 + +arg:PMATCH +argtype:slider +arglabel:Percent match required within overlap +argmin:25 +argmax:100 +argvalue:90 + +in:in1 +informat:flat + +out:out1 +outformat:gde +outoverwrite: + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + + +item:Restriction sites +itemmethod:(cp $ENZ in1.tmp ; $PRE_EDIT Restriction in1.tmp in1 > out1 ; rm in1.tmp); +itemhelp:Restriction.help + +arg:ENZ +argtype:text +arglabel:Enzyme file +argtext:$GDE_HELP_DIR/DATA_FILES/enzymes + +arg:PRE_EDIT +argtype:chooser +arglabel:Edit enzyme file first? +argchoice:Yes:textedit in1.tmp; +argchoice:No: + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +menu:Phylogeny + +item:Coalescence +itemmethod: (readseq -a -f11 in1 | sed "s/ YF//1" > infile2;cat tt infile2> infile; /usr/bin/X11/xterm -e /usr/local/biotools/lamarc/coalesce/coalesce;)& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +item:tree-puzzle +itemmethod: (readseq -a -f12 in1 > infile; /usr/bin/X11/xterm -e; (echo b; echo y) | puzzle; textedit outfile; gv outlm.eps)& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: +item:DeSoete Tree fit +itemmethod: (readseq -a -f8 in1>in1.flat;count -t $CORR in1.flat> in1.tmp ; lsadt in1.out ; $DISPLAY_FUNC in1.out;/bin/rm -f in1* )& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +arg:CORR +arglabel:Distance correction? +argtype:chooser +argchoice:Olsen:-c=olsen +argchoice:Jukes/Cantor:-c=jukes +argchoice:None:-c=none + +arg:INIT +arglabel:Initial parameter estimate +argtype:choice_list +argchoice:uniformly distributed random numbers:1 +argchoice:error-perturbed data:2 +argchoice:original distance data from input matrix:3 + +arg:SEED +argtype:slider +arglabel:Random number seed +argmin:0 +argmax:65535 +argvalue:12345 + +arg:DISPLAY_FUNC +argtype:chooser +arglabel:View tree using +argchoice:TextEdit:textedit +argchoice:Treetool:treetool < + +item:Phylip help +itemmethod:(textedit $GDE_HELP_DIR/PHYLIP/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:boot:boot.doc +argchoice:clique:clique.doc +argchoice:consense:consense.doc +argchoice:contchar:contchar.doc +argchoice:contml:contml.doc +argchoice:contrast:contrast.doc +argchoice:discrete:discrete.doc +argchoice:distance:distance.doc +argchoice:dnaboot:dnaboot.doc +argchoice:dnacomp:dnacomp.doc +argchoice:dnadist:dnadist.doc +argchoice:dnainvar:dnainvar.doc +argchoice:dnaml:dnaml.doc +argchoice:dnamlk:dnamlk.doc +argchoice:dnamove:dnamove.doc +argchoice:dnapars:dnapars.doc +argchoice:dnapenny:dnapenny.doc +argchoice:dolboot:dolboot.doc +argchoice:dollop:dollop.doc +argchoice:dolmove:dolmove.doc +argchoice:dolpenny:dolpenny.doc +argchoice:draw:draw.doc +argchoice:drawgram:drawgram.doc +argchoice:drawtree:drawtree.doc +argchoice:factor:factor.doc +argchoice:fitch:fitch.doc +argchoice:gendist:gendist.doc +argchoice:kitsch:kitsch.doc +argchoice:main:main.doc +argchoice:mix:mix.doc +argchoice:move:move.doc +argchoice:neighbor:neighbor.doc +argchoice:penny:penny.doc +argchoice:protpars:protpars.doc +argchoice:read.me.general:read.me.general.doc +argchoice:restml:restml.doc +argchoice:seqboot:seqboot.doc +argchoice:sequence:sequence.doc + + + +item:Phylip 3.4 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;textedit outfile;rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:PAUP +itemmethod:(readseq -a -f17 in1 > work.nxs; /usr/bin/X11/xterm -e paup work.nxs;/bin/rm -f gde*)& + +in:in1 +informat:genbank +inmask: +insave: + +item:MrBaynes +itemmethod:(readseq -a -f17 in1 | sed "s/interleave /interleave=yes /" > work.nxs;cat work.nxs /home/tulio/biotools/mbaynes/mbcommant.txt > workmb.nxs; /usr/bin/X11/xterm -e; (echo execute workmb.nxs) | mb workmb.nxs;treetool workmb.nxs.out.t; /bin/rm in1)& + +in:in1 +informat:genbank +inmask: +insave: + +item:codeml +itemmethod:(readseq -a -f11 in1 | sed "s/ YF//1" > test.phy; /usr/bin/X11/xterm -e codeml $METHOD)& + + +arg:METHOD +arglabel:Which method ? +argtype:chooser +argchoice:0:/home/tulio/biotools/paml/method0.ctl +argchoice:1:/home/tulio/biotools/paml/method1.ctl +argchoice:2:/home/tulio/biotools/paml/method2.ctl +argchoice:3:/home/tulio/biotools/paml/method3.ctl +argchoice:4:/home/tulio/biotools/paml/method4.ctl +argchoice:5:/home/tulio/biotools/paml/method5.ctl +argchoice:6:/home/tulio/biotools/paml/method6.ctl + + +in:in1 +informat:genbank +inmask: +insave: + + +item:Phylip Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM textedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:Fitch: /usr/bin/X11/xterm -e fitch; +argchoice:Kitsch: /usr/bin/X11/xterm -e kitsch; +argchoice:Neighbor: /usr/bin/X11/xterm -e neighbor; +argchoice:Bootstrap+consense: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +menu:On-Line Res. +item:Search Los Alamos DB Search +itemmethod:(netscape http://hiv-web.lanl.gov/cgi-bin/hivDB3/public/wdb/ssampublic) +arg:term +argtype:text +arglabel:Search sequence accession +argtext: +item:Search Entrez +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=Entrez&term=$search)& + +arg:search +argtype:text +arglabel:Search Entrez +argtext: + +item:Search PubMed +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=PubMed&term=$term)& + +arg:term +argtype:text +arglabel:Search PubMed for Literature: +argtext: + +item:Online Resources at Retroviruses in NCBI +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/retroviruses/)& + +item:SNAP Analysis online (dn/ds) +itemmethod:(netscape http://hiv-web.lanl.gov/SNAP/WEBSNAP/SNAP.html)& + +item:RIP Analysis online (Recombination) +itemmethod:(netscape http://hiv-web.lanl.gov/RIP/RIP.html)& + +item:Search Stanford for Resistance mutations +itemmethod:(readseq in1 -a -f8 > infile.fasta; netscape http://hiv-4.stanford.edu/cgi-bin/hivseqweb.pl?uploaded_file=infile.fasta)& +in:in1 +informat:genbank +inmask: +insave: + +menu:Tree Viewer +item:TreeTool +itemmethod:(treetool &); +item:TreeView +itemmethod:(tv &); +item:Xsplit-tree +itemmethod:(/usr/local/biotools/splitstree3.1/xsplits &); + + + + + + + + + + + + + +menu:Email + +item:BLASTN +itemmethod:(echo BLASTPROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo MATCH $MSCORE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:GenBank Qtrly & Updates:GenBank +argchoice:EMBL:embl + +arg:MSCORE +argtype:slider +arglabel:Match Score +argmin:3 +argmax:7 +argvalue:5 + +in:in1 +informat:flat +insave: + +item:BLASTP +itemmethod:(echo BLASTPROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:Swiss-Prot:swiss-prot +argchoice:PIR:pir + +in:in1 +informat:flat +insave: + +item:Fasta-(DNA) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $KPL >> in1.tmp; echo SCORES $TOP >> in1.tmp; echo ALIGNMENTS $ALNG >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which GenBank Database? +argchoice:Qrtly & Updates:GenBank/all +argchoice:Updates:GenBank/new +argchoice:Primate:GenBank/primate +argchoice:Rodent:GenBank/rodent +argchoice:Other-Mammalian:GenBank/other_mammalian +argchoice:Other-Vertebrate:GenBank/other_vertebrate +argchoice:Invertebrate:GenBank/invertebrate +argchoice:Plant:GenBank/plant +argchoice:Organelle:GenBank/organelle +argchoice:Bacterial:GenBank/bacterial +argchoice:Structural-RNA:GenBank/structural_rna +argchoice:Viral:GenBank/viral +argchoice:Phage:GenBank/phage +argchoice:Synthetic:GenBank/synthetic +argchoice:Unannotated:GenBank/unannotated + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta-(PROTEIN) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $TPL >> in1.tmp; echo SCORES $SCRS >> in1.tmp; echo ALIGNMENTS $ALNMNTS >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Protein Database? +argchoice:Trans GenBank Qrtly:GenPept/all +argchoice:Trans GenBank Daily:GenPept/new +argchoice:Swiss-Protein:SWISS-PROT/all + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + +item:Sequence Retrieval +itemmethod:(echo $REGEXP > in1.tmp; Mail RETRIEVE@GENBANK.BIO.NET < in1.tmp; rm in1.tmp) & + +arg:REGEXP +argtype:text +arglabel:Accession # or LOCUS name of sequence to retrieve + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + +# +# dgg added new readseq formats, 29 dec 92 +# + +item:Export Foreign Format +itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:GenBank:genbank +argchoice:IG/Stanford:ig +argchoice:NBRF:nbrf +argchoice:EMBL:embl +argchoice:GCG:gcg +argchoice:DNA Strider:strider +argchoice:Fitch:fitch +argchoice:Pearson/Fasta:pearson +argchoice:Zuker:zuker +argchoice:Olsen:olsen +argchoice:Phylip:phylip +#argchoice:Phylip v3.2:phylip3.2 +argchoice:Plain text:raw +argchoice:ASN.1:asn +argchoice:PIR:pir +argchoice:MSF:msf +argchoice:PAUP:paup +argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:in1 +informat:genbank + + +# +#dgg addition for new readseq, 24 dec 92 +# + +item:Pretty Print +itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)& +itemhelp:readseq.help + +#nametop is bad !? + +in:in1 +informat:genbank + +arg:NAMETOP +argtype:chooser +arglabel:Names at top ? +argchoice:No: +argchoice:Yes:-nametop + +arg:NAMELEFT +argtype:chooser +arglabel:Names at left ? +argchoice:No: +argchoice:Yes:-nameleft + +arg:NAMERIGHT +argtype:chooser +arglabel:Names at right? +argchoice:Yes:-nameright +argchoice:No: + +arg:NUMTOP +argtype:chooser +arglabel:Numbers at top ? +argchoice:Yes:-numtop +argchoice:No: + +arg:NUMBOT +argtype:chooser +arglabel:Numbers at tail ? +argchoice:No: +argchoice:Yes:-numbot + +arg:NUMLEFT +argtype:chooser +arglabel:Numbers at left ? +argchoice:Yes:-numleft +argchoice:No: + +arg:NUMRIGHT +argtype:chooser +arglabel:Numbers at right? +argchoice:Yes:-numright +argchoice:No: + +arg:MATCH +argtype:chooser +arglabel:Use match '.' for 2..n species? +argchoice:No: +argchoice:Yes:-match + +arg:GAPC +argtype:chooser +arglabel:Count gap symbols? +argchoice:No: +argchoice:Yes:-gap + +arg:WIDTH +argtype:slider +arglabel:Sequence width? +argmin:10 +argmax:200 +argvalue:50 + +arg:COLS +argtype:slider +arglabel:Column spacers? +argmin:0 +argmax:50 +argvalue:10 + + +### pretty print insert end +# + +item:Wally's test function +itemmethod:run__wally $ONE $TWO $THREE < $FILE + +arg:ONE +argtype:chooser +arglabel:How? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:TWO +argtype:slider +argmin:0 +argmax:100 +argvalue:50 +arglabel:how many? + +arg:THREE +argtype:choice_list +arglabel:Which one? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:FILE +argtype:text +arglabel:Which file + + +menu:Xylem +#------------ Open XYLEM Dataset (GenBank) ( 9/ 6/94) --------------- +item:Open_XYLEM (GenBank) +itemlabel:Open XYLEM Dataset (GenBank) +itemmethod:getloc $DBNAME.ind $DBNAME.ano $DBNAME.wrp $DBNAME.ind out1 + +arg:DBNAME +arglabel:XYLEM-format GenBank Dataset +argtype:text + +out:out1 +outformat:genbank + +#-------------- Open XYLEM Database (PIR) ( 9/ 6/94) -------------- +item:Open_XYLEM (PIR) +itemlabel:Open XYLEM Dataset (GenBank) +itemmethod:getloc -p $DBNAME.ind $DBNAME.ano $DBNAME.wrp $DBNAME.ind out1.pir; readseq -a -f2 out1.pir > out1 + +arg:DBNAME +arglabel:XYLEM-format PIR Dataset +argtype:text +#argtype:file_chooser + +out:out1 +outformat:genbank + + +################################# PROTEIN ################################ +#--------------- PROT2NUC - Reverse Translation ( 8/10/94) ----------------- +item:PROT2NUC - reverse translation +itemmethod: sed "s/[#%]/>/" in1.out; (textedit in1.out; rm in1*)& +itemhelp: xylem/prot2nuc.doc + +arg:LINLEN +arglabel:CODONS PER LINE +argtype:slider +argmin:5 +argmax:100 +argvalue:25 + +arg:GROUP +arglabel:NUMBERING INTERVAL (amino acids/codons) +argtype:slider +argmin:5 +argmax:100 +argvalue:5 + +in:in1 +informat:flat + +########################### DATABASE MENU ############################### +#------------------- FINDKEY (3/13/97)----------------------- +item:FINDKEY - Keyword Search +itemmethod: $KEYWORDS; (findkey $DATABASE in1.kw in1.nam in1.fnd; rm in1.kw; (textedit in1.fnd; rm in1.fnd)& (textedit in1.nam -Ws 150 628; rm in1.nam)& )& +itemhelp:xylem/findkey.asc + +arg:KEYWORDS +arglabel:KEYWORDS +argtype:chooser +argchoice:Single keyword:echo $KEY > in1.kw +argchoice:Create list of keywords:cat $GDE_HELP_DIR/xylem/GDE/keyfile.template > in1.tmp; textedit in1.tmp; egrep -v -e \; in1.tmp >in1.kw;rm in1.tmp* +argvalue:0 + +arg:KEY +arglabel:Single keyword +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:choice_menu +argchoice:HIV GB:-G /database/DNA/hiv1entries +argchoice:PIR:-p +argchoice:PIR Dataset:-P $DBFILE +argchoice:GB bacterial:-b +argchoice:GB mamalian:-m +argchoice:GB phage:-g +argchoice:GB primate:-r +argchoice:GB rodent:-d +argchoice:GB unannotated:-u +argchoice:GB vertebrate:-t +argchoice:GB invertebrate:-i +argchoice:GB plant:-l +argchoice:GB rna:-n +argchoice:GB synthetic:-s +argchoice:GB viral:-a +argchoice:GB patented:-x +argchoice:GB Seq. Tagged Sites:-z +argchoice:GB expressed seq. tag:-e +argchoice:GB Genome Survey Seq.:-S +argchoice:GB High Throughput Genomic:-h +argchoice:GenBank Dataset:-G $DBFILE +argchoice:VecBase:-v +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +in:in1 +informat:flat + +#------------------- FETCH ( 2/ 7/94) -------------------------- +# Note: This menu requires that the shell script 'GBfilter' be +# in your bin directory. +item:FETCH +itemmethod: $NAMES; (fetch $WHATTOGET $DATABASE in1.nam in1.tmp; rm in1.nam; $WHERE) & +itemhelp:xylem/fetch.doc + +arg:NAMES +arglabel:NAMES/ACCESSION #'S +argtype:chooser +argchoice:Single name/acc:echo $NAMEFILE > in1.nam +argchoice:Create list of names/acc#'s:cat $GDE_HELP_DIR/xylem/GDE/namefile.template > in1.tmpname; textedit in1.tmpname; egrep -v -e \; in1.tmpname >in1.nam;rm in1.tmpname* +argchoice:File of Names/Acc.#'s:cat $NAMEFILE >in1.nam +argvalue:0 + +arg:NAMEFILE +arglabel:Single name, accession # or file of names/acc. #'s +argtype:text + +arg:WHATTOGET +arglabel:WHAT TO GET +argtype:chooser +argchoice:annotation:-a +argchoice:sequence:-s +argchoice:both:-b +argvalue:2 + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-G $DBFILE +argchoice:HIV Dataset:-G /database/DNA/hiventries +argchoice:PIR:-p +argchoice:PIR Dataset:-P $DBFILE +argchoice:VecBase:-v +argvalue:0 + +arg:DBFILE +arglabel:Dataset +argtype:text + +arg:WHERE +arglabel:WHERE TO SEND OUTPUT +argtype:chooser +# If GenBank file, read directly, otherwise, convert to GenBank. +argchoice:GDE:(GBfilter in1.tmp in1.gen; gde in1.gen; rm in1.*)& +argchoice:Textedit window:(textedit in1.tmp;rm in1.tmp) & +argchoice:Output file:mv in1.tmp $OUTFILE; echo ' Fetch completed' +argchoice:GenBank Dataset:splitdb -g in1.tmp $OUTFILE.ano $OUTFILE.wrp $OUTFILE.ind; rm in1.*; echo ' Fetch completed' +argchoice:PIR Dataset:splitdb -p in1.tmp $OUTFILE.ano $OUTFILE.wrp $OUTFILE.ind; rm in1.*; echo ' Fetch completed' +argvalue:0 + +arg:OUTFILE +arglabel:Output file or Dataset name +argtype:text + +in:in1 +informat:genbank + +out:OUTPUT +outformat:genbank + +#---------------- FEATURES - by feature key ( 3/14/94) ------------------- +item:FEATURES - Extract by feature keys +# sed corrects errors in GDE-generated genbank output +itemmethod: sed -e "s/^LOCUS */LOCUS /" -e "s/^ ACCESSION/ACCESSION /" -e "/^$/d" in1 > in1.gen; $FEALIST; ($FCOMMAND -F in1.feafile $DATABASE; rm in1.feafile in1.efile in1.gen; $WHERE) & +itemhelp:xylem/features.doc + +arg:FEALIST +arglabel:FEATURES TO EXTRACT +argtype:chooser +argchoice:Single feature:echo $FEAKEY > in1.feafile +argchoice:Create list of features:cat $GDE_HELP_DIR/xylem/GDE/feafile.template > in1.tmpfeafile; textedit in1.tmpfeafile; egrep -v -e \; in1.tmpfeafile >in1.feafile; rm in1.tmpfeafile +argvalue:0 + +arg:FEAKEY +arglabel:Single feature key +argtype:choice_list +argchoice:allele:allele +argchoice:attenuator:attenuator +argchoice:binding:binding +argchoice:CAAT_signal:CAAT_signal +argchoice:CDS:CDS +argchoice:chromosome:chromosome +argchoice:conflict:conflict +argchoice:contig:contig +argchoice:C_region:C_region +argchoice:D_loop:D_loop +argchoice:D_region:D_region +argchoice:D_segment:D_segment +argchoice:enhancer:enhancer +argchoice:exon:exon +argchoice:GC_signal:GC_signal +argchoice:iDNA:iDNA +argchoice:intron:intron +argchoice:J_region:J_region +argchoice:J_segment:J_segment +argchoice:LTR:LTR +argchoice:mat_peptide:mat_peptide +argchoice:misc_binding:misc_binding +argchoice:misc_difference:misc_difference +argchoice:misc_feature:misc_feature +argchoice:misc_recomb:misc_recomb +argchoice:misc_RNA:misc_RNA +argchoice:misc_signal:misc_signal +argchoice:misc_structure:misc_structure +argchoice:modified_base:modified_base +argchoice:mRNA:mRNA +argchoice:mutation:mutation +argchoice:N_region:N_region +argchoice:old_sequence:old_sequence +argchoice:polyA_signal:polyA_signal +argchoice:polyA_site:polyA_site +argchoice:precursor_RNA:precursor_RNA +argchoice:primer_bind:primer_bind +argchoice:prim_transcript:prim_transcript +argchoice:promoter:promoter +argchoice:protein_bind:protein_bind +argchoice:RBS:RBS +argchoice:repeat_region:repeat_region +argchoice:repeat_unit:repeat_unit +argchoice:rep_origin:rep_origin +argchoice:rRNA:rRNA +argchoice:satellite:satellite +argchoice:scRNA:scRNA +argchoice:sig_peptide:sig_peptide +argchoice:snRNA:snRNA +argchoice:source:source +argchoice:S_region:S_region +argchoice:stem_loop:stem_loop +argchoice:STS:STS +argchoice:TATA_signal:TATA_signal +argchoice:terminator:terminator +argchoice:transit_peptide:transit_peptide +argchoice:tRNA:tRNA +argchoice:unsure:unsure +argchoice:variation:variation +argchoice:virion:virion +argchoice:V_region:V_region +argchoice:V_segment:V_segment +argchoice:3'clip:3\'clip +argchoice:3'UTR:3\'UTR +argchoice:5'UTR:5\'UTR +argchoice:5'clip:5\'clip +argchoice:-10_signal:-10_signal +argchoice:-35 signal:-35 signal +argvalue:5 + +arg:FCOMMAND +arglabel:NAMES/ACCESSION #'S OF ENTRIES +argtype:choice_menu +argchoice:Single name:echo $EFILE > in1.efile; features -N in1.efile +argchoice:Create list of names:cat $GDE_HELP_DIR/xylem/GDE/names.template > in1.tmpefile; textedit in1.tmpefile; egrep -v -e \; in1.tmpefile >in1.efile; rm in1.tmpefile; features -N in1.efile +argchoice:File of names:cat $EFILE >in1.efile; features -N in1.efile +argchoice:Single Acc#:echo $EFILE > in1.efile; features -A in1.efile +argchoice:Create list of Acc#s:cat $GDE_HELP_DIR/xylem/GDE/acc.template > in1.tmpefile; textedit in1.tmpefile; egrep -v -e \; in1.tmpefile >in1.efile; rm in1.tmpefile; features -A in1.efile +argchoice:File of Acc#s:cat $EFILE >in1.efile; features -A in1.efile +argvalue:0 + +arg:EFILE +arglabel:Name, Accession # or filename +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-u $DBFILE +argchoice:Selected sequences:-U in1.gen +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +arg:WHERE +arglabel:WHERE TO SEND OUTPUT +argtype:chooser +argchoice:GDE:readseq -a -f2 in1.out >in1.result; (gde in1.result;rm in1.*)& +argchoice:Textedit windows:(textedit in1.msg -Ws 450 350;rm in1.msg)& (textedit in1.exp -Ws 350 350; rm in1.exp) & (textedit in1.out -Ws 400 350;rm in1.out)& +argchoice:Output file:mv in1.msg $OUTNAME.msg; mv in1.out $OUTNAME.out; mv in1.exp $OUTNAME.exp; echo 'Features completed' +argvalue:0 + +arg:OUTNAME +arglabel:Output file name +argtype:text + +in:in1 +informat:genbank + +out:RESULT +outformat:genbank + +#-------------------- FEATURES - by expression ( 3/14/94)--------------------- +item:FEATURES - Extract using expressions +itemmethod: $CHOOSEEXP; sed -e "s/^LOCUS */LOCUS /" -e "s/^ ACCESSION/ACCESSION /" -e "/^$/d" in1 > in1.gen; (features -E in1.efile $DATABASE; rm in1.gen in1.efile; $WHERE)& +itemhelp:xylem/features.doc + +arg:CHOOSEEXP +arglabel:EXPRESSION(S) +argtype:chooser +argchoice:Single expression:echo '$EXPRESSION'|cut -f1 -d":" > in1.accfile; echo \>`cat in1.accfile` >in1.efile; echo '@$EXPRESSION' >> in1.efile; rm in1.accfile +argchoice:Expression file:cat $EFILE >in1.efile +argchoice:Create list of expressions:cat $GDE_HELP_DIR/xylem/GDE/expfile.template > in1.tmpexpfile; textedit in1.tmpexpfile; egrep -v -e \; in1.tmpexpfile >in1.efile; rm in1.tmpexpfile +argvalue:0 + +arg:EXPRESSION +arglabel:Feature expression +argtype:text + +arg:EFILE +arglabel:Expression file +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-u $DBFILE +argchoice:Selected sequences:-u in1.gen +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +arg:WHERE +arglabel:WHERE TO SENT OUTPUT +argtype:chooser +argchoice:GDE:readseq -a -f2 in1.out >in1.result; (gde in1.result;rm in1.*)& +argchoice:Textedit windows:(textedit in1.msg -Ws 450 350;rm in1.msg)& (textedit in1.out -Ws 400 350;rm in1.out)& +argchoice:Output file:mv in1.msg $OUTNAME.msg; mv in1.out $OUTNAME.out;echo 'Features completed' +argvalue:0 + +arg:OUTNAME +arglabel:Output file name +argtype:text + +in:in1 +informat:genbank + +out:RESULT +outformat:genbank + +######################## ALIGNMENT MENU ############################### +#--------------- REFORM - print multiple alignment (2/ 2/95) ----------------- +item:REFORM - print mult. align. +#Note: do not use flat or gde . +itemmethod:(cat in1 |readseq -pipe -a -f8 | reform $TYPE -fp $GAPS $CAPS $DOTS -l$LINESIZE -s$START > in1.out; textedit in1.out;rm in1*) & +itemhelp: xylem/reform.doc + +arg:TYPE +argtype:chooser +arglabel:Type: +argchoice:Protein: +argchoice:Nucleic acid:-n +argvalue:0 + +arg:GAPS +argtype:chooser +arglabel:Print gaps as +argchoice:Dashes:-g +argchoice:Spaces: +argvalue:0 + +arg:CAPS +argtype:chooser +arglabel:Capitalize conserved sites in consensus seq. +argchoice:Yes:-c +argchoice:No: +argvalue:0 + +arg:DOTS +argtype:chooser +arglabel:Print conserved sites in alignment as dots +argchoice:Yes:-p +argchoice:No: +argvalue:0 + +arg:LINESIZE +arglabel:# residues per line +argtype:slider +argmin:40 +argmax:150 +argvalue:70 + +arg:START +arglabel:Begin numbering at +argtype:slider +argmin:-500000 +argmax:500000 +argvalue:1 + +in:in1 +#informat:flat +informat:genbank +insave: + + +############################## SIMILARITY MENU ############################ +#--------------- SHUFFLE - randomize sequences (11/10/93) ----------------- +item:SHUFFLE - randomize sequences +itemmethod: sed "s/[#%]/>/" in1.tmp; shuffle -s$SEED -w$WINDOW -o$OVERLAP in1.shuf; readseq -a -f2 in1.shuf >out1; rm in1* +itemhelp: xylem/shuffle.doc + +arg:SEED +arglabel:RANDOM SEED +argtype:slider +argmin:1 +argmax:32767 +argvalue:7777 + +arg:WINDOW +arglabel:WINDOW +argtype:slider +argmin:5 +argmax:500000 +argvalue:10 + +arg:OVERLAP +arglabel:OVERLAP BETWEEN ADJACENT WINDOWS +argtype:slider +argmin:0 +argmax:100 +argvalue:0 + +in:in1 +informat:flat + +out:out1 +outformat:genbank + + diff --git a/CORE/.GDEmenus.noPAUP b/CORE/.GDEmenus.noPAUP new file mode 100755 index 0000000..0cd16f1 --- /dev/null +++ b/CORE/.GDEmenus.noPAUP @@ -0,0 +1,1461 @@ +1menu:File + +item:test cmask output +itemmethod: textedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:NEXUS:17 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; shelltool /home/tulio/biotools/SNAP/SNAP.pl outfile; textedit backg*; textedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; textedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +item:MFOLD +itemmethod:(tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)& +itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:Pairing(ct) File Name +argtext:mfold_out + + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + +item:Highlight helix +itemmethod:readseq -a -f8 in1 | sed "s/>HELIX/\"HELIX/" > in1.flat; sho_helix < in1.flat > out1;rm in1.flat +itemhelp:sho_helix.help + +in:in1 +informat:genbank + +out:out1 +outformat:colormask + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDB -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/var/www/cgi-bin/db/hiv17-08-01.fasta2 +argchoice:HIV-1 Subtype:/var/www/cgi-bin/db/subcomplete.fasta +argchoice:HIV-1 HXB2 Numbering:/var/www/cgi-bin/db/HXB2.fasta +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:FASTA (DNA/RNA) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:GenBank Primate:$GDE_HELP_DIR/FASTA/GENBANK/gbpri.seq\ 1 +argchoice:GenBank Rodent:$GDE_HELP_DIR/FASTA/GENBANK/gbrod.seq\ 1 +argchoice:GenBank all Mammal:$GDE_HELP_DIR/FASTA/GENBANK/gbmam.seq\ 1 +argchoice:GenBank verteBrates:$GDE_HELP_DIR/FASTA/GENBANK/gbvrt.seq\ 1 +argchoice:GenBank Inverts:$GDE_HELP_DIR/FASTA/GENBANK/gbinv.seq\ 1 +argchoice:GenBank pLants:$GDE_HELP_DIR/FASTA/GENBANK/gbpln.seq\ 1 +argchoice:GenBank Struct RNA:$GDE_HELP_DIR/FASTA/GENBANK/gbrna.seq\ 1 +argchoice:GenBank euk. Organelles:$GDE_HELP_DIR/FASTA/GENBANK/gborg.seq\ 1 +argchoice:GenBank phaGe:$GDE_HELP_DIR/FASTA/GENBANK/gbphg.seq\ 1 +argchoice:GenBank bacTeria:$GDE_HELP_DIR/FASTA/GENBANK/gbbct.seq\ 1 +argchoice:GenBank sYnthetic:$GDE_HELP_DIR/FASTA/GENBANK/gbsyn.seq\ 1 +argchoice:GenBank Viral:$GDE_HELP_DIR/FASTA/GENBANK/gbvrl.seq\ 1 +argchoice:GenBank Unannotated:$GDE_HELP_DIR/FASTA/GENBANK/gbuna.seq\ 1 + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:altdiag.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altdiag.mat +argchoice:altprot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altprot.mat +argchoice:dna.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/dna.mat +argchoice:prot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/prot.mat + +menu:HIV Seq. Db. +item:Ref. Seq. for Drug Resistance +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Sequence +argchoice:pNL4-3 CG :/database/DR/pNL4-3.CG.fasta +argchoice:pNL4-3 protease :/database/DR/pNL4-3.prot$format.fasta +argchoice:pNL4-3 RT :/database/DR/pNL4-3.RT$format.fasta +argchoice:HXB2 CG :/database/DR/HXB2.fasta +argchoice:HXB2 protease :/database/DR/HXB2.prot$format.fasta +argchoice:HXB2 RT :/database/DR/HXB2.RT$format.fasta +argchoice:data1.fasta:/database/DNA/data1.fasta + + +arg:format +argtype:chooser +arglabel:Format +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype Db. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/database/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/database/DNA/subtyperef/sub5ltr.fasta +argchoice:sub5ltrU3.fasta:/database/DNA/subtyperef/sub5ltrU3.fasta +argchoice:sub5ltrU5.fasta:/database/DNA/subtyperef/sub5ltrU5.fasta +argchoice:subenv.fasta:/database/DNA/subtyperef/subenv.fasta +argchoice:subenv-gp120.fasta:/database/DNA/subtyperef/subenv-gp120.fasta +argchoice:subenv-gp41.fasta:/database/DNA/subtyperef/subenv-gp41.fasta +argchoice:subenvv3.fasta:/database/DNA/subtyperef/subenvv3.fasta +argchoice:subgag.fasta:/database/DNA/subtyperef/subgag.fasta +argchoice:subgag-p17.fasta:/database/DNA/subtyperef/subgag-p17.fasta +argchoice:subgag-p24.fasta:/database/DNA/subtyperef/subgag-p24.fasta +argchoice:subgag-pol.fasta:/database/DNA/subtyperef/subgag-pol.fasta +argchoice:subpol.fasta:/database/DNA/subtyperef/subpol.fasta +argchoice:subpol-p15RNAase.fasta:/database/DNA/subtyperef/subpol-p15RNAase.fasta +argchoice:subpol-p31integrase.fasta:/database/DNA/subtyperef/subpol-p31integrase.fasta +argchoice:subpol-p51RT.fasta:/database/DNA/subtyperef/subpol-p51RT.fasta +argchoice:subpol-protease.fasta:/database/DNA/subtyperef/subpol-protease.fasta +argchoice:subrevCDS.fasta:/database/DNA/subtyperef/subrevCDS.fasta +argchoice:subrevexon1.fasta:/database/DNA/subtyperef/subrevexon1.fasta +argchoice:subrevexon2.fasta:/database/DNA/subtyperef/subrevexon2.fasta +argchoice:subrevintron.fasta:/database/DNA/subtyperef/subrevintron.fasta +argchoice:subTAR.fasta:/database/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/database/DNA/subtyperef/subtatCDS.fasta +argchoice:subtatexon1.fasta:/database/DNA/subtyperef/subtatexon1.fasta +argchoice:subtatexon2.fasta:/database/DNA/subtyperef/subtatexon2.fasta +argchoice:subtatintron.fasta:/database/DNA/subtyperef/subtatintron.fasta +argchoice:subvif.fasta:/database/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/database/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/database/DNA/subtyperef/subvpu.fasta +argchoice:subnef.fasta:/database/DNA/subtyperef/subnef.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype Protein Db. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV1ENVPRO.fasta:/database/AA/HIV1ENVPRO.fasta +argchoice:HIV1GAGPRO.fasta:/database/AA/HIV1GAGPRO.fasta +argchoice:HIV1POLPRO.fasta:/database/AA/HIV1POLPRO.fasta +argchoice:HIV1REVPRO.fasta:/database/AA/HIV1REVPRO.fasta +argchoice:HIV1TARPRO.fasta:/database/AA/HIV1TARPRO.fasta +argchoice:HIV1VIFPRO.fasta:/database/AA/HIV1VIFPRO.fasta +argchoice:HIV1VPRPRO.fasta:/database/AA/HIV1VPRPRO.fasta +argchoice:HIV1VPUPRO.fasta:/database/AA/HIV1VPUPRO.fasta +argchoice:HIV1NEFPRO.fasta:/database/AA/subNEFPRO.fasta +out:OUTPUTFILE +outformat:genbank + +item:HXB2 Ref. Seq. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/database/DNA/HXB2.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + +item:Clustal Protein Alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -output=GDE -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.gde> in1;$REPORT gde clus_in.gde;/bin/rm -f clus_in* in1* )& + + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + +in:in1 +informat:flat +insave: + + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:VESPA PROTEASE +itemmethod: cat in1 > infile ;/usr/local/biotools/GDE/bin/fasta2VESPA.pl > outfile; sed "s/[%]/ /" outfile.f ;/home/tulio/biotools/VESPA/VESPA -b $SUBTYPE -q outfile.f > outVESPA; textedit outVESPA; + +arg:SUBTYPE +argtype:chooser +arglabel:Subtype +argchoice:B:subB.prot.aa.vespa +argchoice:C:subC.prot.aa.vespa +argchoice:D:subD.prot.aa.vespa +in:in1 +informat:flat +out:out1 +outformat:text + +item:VESPA Reverse Transcriptase +itemmethod: cat in1 > infile ;/usr/local/biotools/GDE/bin/fasta2VESPA.pl > outfile; sed "s/[%]/ /" outfile.f ;/home/tulio/biotools/VESPA/VESPA -b $SUBTYPE -q outfile.f > outVESPA; textedit outVESPA; + +arg:SUBTYPE +argtype:chooser +arglabel:Subtype +argchoice:B:subB.rt.aa.vespa +argchoice:C:subC.rt.aa.vespa +argchoice:D:subD.rt.aa.vespa +in:in1 +informat:flat +out:out1 +outformat:text + +item:FASTA (Protein) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:NBRF PIR1:$GDE_HELP_DIR/FASTA/PIR/pir1.dat\ 2 +argchoice:NBRF PIR2:$GDE_HELP_DIR/FASTA/PIR/pir2.dat\ 2 +argchoice:NBRF PIR3:$GDE_HELP_DIR/FASTA/PIR/pir3.dat\ 2 + + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:Minimum mutation matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/codaa.mat +argchoice:Identity matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/idnaa.mat +argchoice:Identity matrix for mismatches:-s $GDE_HELP_DIR/FASTA/MATRIX/idpaa.mat +argchoice:PAM250:-s $GDE_HELP_DIR/FASTA/MATRIX/pam250.mat +argchoice:PAM120:-s $GDE_HELP_DIR/FASTA/MATRIX/pam120.mat + +menu:Seq management + +item:Assemble Contigs +itemmethod:(sed "s/#/>/"in1.tmp; CAP2 in1.tmp $OVERLAP $PMATCH > out1;/bin/rm -f in1.tmp) +itemhelp:CAP2.help + +arg:OVERLAP +argtype:slider +arglabel:Minimum overlap? +argmin:5 +argmax:100 +argvalue:20 + +arg:PMATCH +argtype:slider +arglabel:Percent match required within overlap +argmin:25 +argmax:100 +argvalue:90 + +in:in1 +informat:flat + +out:out1 +outformat:gde +outoverwrite: + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + + +item:Restriction sites +itemmethod:(cp $ENZ in1.tmp ; $PRE_EDIT Restriction in1.tmp in1 > out1 ; rm in1.tmp); +itemhelp:Restriction.help + +arg:ENZ +argtype:text +arglabel:Enzyme file +argtext:$GDE_HELP_DIR/DATA_FILES/enzymes + +arg:PRE_EDIT +argtype:chooser +arglabel:Edit enzyme file first? +argchoice:Yes:textedit in1.tmp; +argchoice:No: + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +menu:Phylogeny +item:Coalescence +itemmethod: (readseq -a -f11 in1 | sed "s/ YF//1" > infile2;cat tt infile2> infile;shelltool /usr/local/biotools/lamarc/coalesce/coalesce;)& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +item:tree-puzzle +itemmethod: (readseq -a -f12 in1 > infile;shelltool puzzle; textedit outfile; gv outlm.eps)& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: +item:DeSoete Tree fit +itemmethod: (readseq -a -f8 in1>in1.flat;count -t $CORR in1.flat> in1.tmp ; lsadt in1.out ; $DISPLAY_FUNC in1.out;/bin/rm -f in1* )& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +arg:CORR +arglabel:Distance correction? +argtype:chooser +argchoice:Olsen:-c=olsen +argchoice:Jukes/Cantor:-c=jukes +argchoice:None:-c=none + +arg:INIT +arglabel:Initial parameter estimate +argtype:choice_list +argchoice:uniformly distributed random numbers:1 +argchoice:error-perturbed data:2 +argchoice:original distance data from input matrix:3 + +arg:SEED +argtype:slider +arglabel:Random number seed +argmin:0 +argmax:65535 +argvalue:12345 + +arg:DISPLAY_FUNC +argtype:chooser +arglabel:View tree using +argchoice:TextEdit:textedit +argchoice:Treetool:treetool < + +item:Phylip help +itemmethod:(textedit $GDE_HELP_DIR/PHYLIP/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:boot:boot.doc +argchoice:clique:clique.doc +argchoice:consense:consense.doc +argchoice:contchar:contchar.doc +argchoice:contml:contml.doc +argchoice:contrast:contrast.doc +argchoice:discrete:discrete.doc +argchoice:distance:distance.doc +argchoice:dnaboot:dnaboot.doc +argchoice:dnacomp:dnacomp.doc +argchoice:dnadist:dnadist.doc +argchoice:dnainvar:dnainvar.doc +argchoice:dnaml:dnaml.doc +argchoice:dnamlk:dnamlk.doc +argchoice:dnamove:dnamove.doc +argchoice:dnapars:dnapars.doc +argchoice:dnapenny:dnapenny.doc +argchoice:dolboot:dolboot.doc +argchoice:dollop:dollop.doc +argchoice:dolmove:dolmove.doc +argchoice:dolpenny:dolpenny.doc +argchoice:draw:draw.doc +argchoice:drawgram:drawgram.doc +argchoice:drawtree:drawtree.doc +argchoice:factor:factor.doc +argchoice:fitch:fitch.doc +argchoice:gendist:gendist.doc +argchoice:kitsch:kitsch.doc +argchoice:main:main.doc +argchoice:mix:mix.doc +argchoice:move:move.doc +argchoice:neighbor:neighbor.doc +argchoice:penny:penny.doc +argchoice:protpars:protpars.doc +argchoice:read.me.general:read.me.general.doc +argchoice:restml:restml.doc +argchoice:seqboot:seqboot.doc +argchoice:sequence:sequence.doc + + + +item:Phylip 3.4 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT shelltool $PROGRAM;textedit outfile;rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:PAUP +itemmethod:(readseq -a -f17 in1 > work.nxs; /usr/bin/X11/xterm -e paup work.nxs;/bin/rm -f gde*)& + +in:in1 +informat:genbank +inmask: +insave: + +item:MrBaynes +itemmethod:(readseq -a -f17 in1 | sed "s/interleave /interleave=yes /" > work.nxs;cat work.nxs /home/tulio/biotools/mbaynes/mbcommant.txt > workmb.nxs; /usr/bin/X11/xterm -e mb workmb.nxs;treetool workmb.nxs.out.t; /bin/rm in1)& + +in:in1 +informat:genbank +inmask: +insave: + +item:codeml +itemmethod:(readseq -a -f11 in1 | sed "s/ YF//1" > test.phy; shelltool codeml $METHOD)& + + +arg:METHOD +arglabel:Which method ? +argtype:chooser +argchoice:1:method1.ctl +argchoice:2:method2.ctl +argchoice:3:method3.ctl +argchoice:4:method4.ctl +argchoice:5:method5.ctl +argchoice:6:method6.ctl + + +in:in1 +informat:genbank +inmask: +insave: + + +item:Phylip Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; shelltool dnadist;mv -f outfile infile; shelltool neighbor; cp outtree intree; $PROGRAM textedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:Fitch:shelltool fitch; +argchoice:Kitsch:shelltool kitsch; +argchoice:Neighbor:shelltool neighbor; +argchoice:Bootstrap+consense:shelltool consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap:shelltool seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +menu:On-Line Res. +item:Search Los Alamos DB Search +itemmethod:(netscape http://hiv-web.lanl.gov/cgi-bin/hivDB3/public/wdb/ssampublic) +arg:term +argtype:text +arglabel:Search sequence accession +argtext: +item:Search Entrez +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=Entrez&term=$search)& + +arg:search +argtype:text +arglabel:Search Entrez +argtext: + +item:Search PubMed +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=PubMed&term=$term)& + +arg:term +argtype:text +arglabel:Search PubMed for Literature: +argtext: + +item:Online Resources at Retroviruses in NCBI +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/retroviruses/)& + +item:SNAP Analysis online (dn/ds) +itemmethod:(netscape http://hiv-web.lanl.gov/SNAP/WEBSNAP/SNAP.html)& + +item:RIP Analysis online (Recombination) +itemmethod:(netscape http://hiv-web.lanl.gov/RIP/RIP.html)& + +item:Search Stanford for Resistance mutations +itemmethod:(readseq in1 -a -f8 > infile.fasta; netscape http://hiv-4.stanford.edu/cgi-bin/hivseqweb.pl?uploaded_file=infile.fasta)& +in:in1 +informat:genbank +inmask: +insave: + +menu:Email + +item:BLASTN +itemmethod:(echo BLASTPROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo MATCH $MSCORE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:GenBank Qtrly & Updates:GenBank +argchoice:EMBL:embl + +arg:MSCORE +argtype:slider +arglabel:Match Score +argmin:3 +argmax:7 +argvalue:5 + +in:in1 +informat:flat +insave: + +item:BLASTP +itemmethod:(echo BLASTPROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:Swiss-Prot:swiss-prot +argchoice:PIR:pir + +in:in1 +informat:flat +insave: + +item:Fasta-(DNA) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $KPL >> in1.tmp; echo SCORES $TOP >> in1.tmp; echo ALIGNMENTS $ALNG >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which GenBank Database? +argchoice:Qrtly & Updates:GenBank/all +argchoice:Updates:GenBank/new +argchoice:Primate:GenBank/primate +argchoice:Rodent:GenBank/rodent +argchoice:Other-Mammalian:GenBank/other_mammalian +argchoice:Other-Vertebrate:GenBank/other_vertebrate +argchoice:Invertebrate:GenBank/invertebrate +argchoice:Plant:GenBank/plant +argchoice:Organelle:GenBank/organelle +argchoice:Bacterial:GenBank/bacterial +argchoice:Structural-RNA:GenBank/structural_rna +argchoice:Viral:GenBank/viral +argchoice:Phage:GenBank/phage +argchoice:Synthetic:GenBank/synthetic +argchoice:Unannotated:GenBank/unannotated + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta-(PROTEIN) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $TPL >> in1.tmp; echo SCORES $SCRS >> in1.tmp; echo ALIGNMENTS $ALNMNTS >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Protein Database? +argchoice:Trans GenBank Qrtly:GenPept/all +argchoice:Trans GenBank Daily:GenPept/new +argchoice:Swiss-Protein:SWISS-PROT/all + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + +item:Sequence Retrieval +itemmethod:(echo $REGEXP > in1.tmp; Mail RETRIEVE@GENBANK.BIO.NET < in1.tmp; rm in1.tmp) & + +arg:REGEXP +argtype:text +arglabel:Accession # or LOCUS name of sequence to retrieve + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + +# +# dgg added new readseq formats, 29 dec 92 +# + +item:Export Foreign Format +itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:GenBank:genbank +argchoice:IG/Stanford:ig +argchoice:NBRF:nbrf +argchoice:EMBL:embl +argchoice:GCG:gcg +argchoice:DNA Strider:strider +argchoice:Fitch:fitch +argchoice:Pearson/Fasta:pearson +argchoice:Zuker:zuker +argchoice:Olsen:olsen +argchoice:Phylip:phylip +#argchoice:Phylip v3.2:phylip3.2 +argchoice:Plain text:raw +argchoice:ASN.1:asn +argchoice:PIR:pir +argchoice:MSF:msf +argchoice:PAUP:paup +argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:in1 +informat:genbank + + +# +#dgg addition for new readseq, 24 dec 92 +# + +item:Pretty Print +itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)& +itemhelp:readseq.help + +#nametop is bad !? + +in:in1 +informat:genbank + +arg:NAMETOP +argtype:chooser +arglabel:Names at top ? +argchoice:No: +argchoice:Yes:-nametop + +arg:NAMELEFT +argtype:chooser +arglabel:Names at left ? +argchoice:No: +argchoice:Yes:-nameleft + +arg:NAMERIGHT +argtype:chooser +arglabel:Names at right? +argchoice:Yes:-nameright +argchoice:No: + +arg:NUMTOP +argtype:chooser +arglabel:Numbers at top ? +argchoice:Yes:-numtop +argchoice:No: + +arg:NUMBOT +argtype:chooser +arglabel:Numbers at tail ? +argchoice:No: +argchoice:Yes:-numbot + +arg:NUMLEFT +argtype:chooser +arglabel:Numbers at left ? +argchoice:Yes:-numleft +argchoice:No: + +arg:NUMRIGHT +argtype:chooser +arglabel:Numbers at right? +argchoice:Yes:-numright +argchoice:No: + +arg:MATCH +argtype:chooser +arglabel:Use match '.' for 2..n species? +argchoice:No: +argchoice:Yes:-match + +arg:GAPC +argtype:chooser +arglabel:Count gap symbols? +argchoice:No: +argchoice:Yes:-gap + +arg:WIDTH +argtype:slider +arglabel:Sequence width? +argmin:10 +argmax:200 +argvalue:50 + +arg:COLS +argtype:slider +arglabel:Column spacers? +argmin:0 +argmax:50 +argvalue:10 + + +### pretty print insert end +# + +item:Wally's test function +itemmethod:run__wally $ONE $TWO $THREE < $FILE + +arg:ONE +argtype:chooser +arglabel:How? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:TWO +argtype:slider +argmin:0 +argmax:100 +argvalue:50 +arglabel:how many? + +arg:THREE +argtype:choice_list +arglabel:Which one? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:FILE +argtype:text +arglabel:Which file + diff --git a/CORE/.GDEmenus.safe b/CORE/.GDEmenus.safe new file mode 100755 index 0000000..a49f15e --- /dev/null +++ b/CORE/.GDEmenus.safe @@ -0,0 +1,2029 @@ +1menu:File + +item:test cmask output +itemmethod: textedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; textedit backg*; textedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; textedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +item:MFOLD +itemmethod:(tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)& +itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:Pairing(ct) File Name +argtext:mfold_out + + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + +item:Highlight helix +itemmethod:readseq -a -f8 in1 | sed "s/>HELIX/\"HELIX/" > in1.flat; sho_helix < in1.flat > out1;rm in1.flat +itemhelp:sho_helix.help + +in:in1 +informat:genbank + +out:out1 +outformat:colormask + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDB -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/var/www/cgi-bin/db/hiv17-08-01.fasta2 +argchoice:HIV-1 Subtype:/var/www/cgi-bin/db/subcomplete.fasta +argchoice:HIV-1 HXB2 Numbering:/var/www/cgi-bin/db/HXB2.fasta +argchoice:HCV Numbering:/var/www/cgi-bin/db/HCV.fasta +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:FASTA (DNA/RNA) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:GenBank Primate:$GDE_HELP_DIR/FASTA/GENBANK/gbpri.seq\ 1 +argchoice:GenBank Rodent:$GDE_HELP_DIR/FASTA/GENBANK/gbrod.seq\ 1 +argchoice:GenBank all Mammal:$GDE_HELP_DIR/FASTA/GENBANK/gbmam.seq\ 1 +argchoice:GenBank verteBrates:$GDE_HELP_DIR/FASTA/GENBANK/gbvrt.seq\ 1 +argchoice:GenBank Inverts:$GDE_HELP_DIR/FASTA/GENBANK/gbinv.seq\ 1 +argchoice:GenBank pLants:$GDE_HELP_DIR/FASTA/GENBANK/gbpln.seq\ 1 +argchoice:GenBank Struct RNA:$GDE_HELP_DIR/FASTA/GENBANK/gbrna.seq\ 1 +argchoice:GenBank euk. Organelles:$GDE_HELP_DIR/FASTA/GENBANK/gborg.seq\ 1 +argchoice:GenBank phaGe:$GDE_HELP_DIR/FASTA/GENBANK/gbphg.seq\ 1 +argchoice:GenBank bacTeria:$GDE_HELP_DIR/FASTA/GENBANK/gbbct.seq\ 1 +argchoice:GenBank sYnthetic:$GDE_HELP_DIR/FASTA/GENBANK/gbsyn.seq\ 1 +argchoice:GenBank Viral:$GDE_HELP_DIR/FASTA/GENBANK/gbvrl.seq\ 1 +argchoice:GenBank Unannotated:$GDE_HELP_DIR/FASTA/GENBANK/gbuna.seq\ 1 + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:altdiag.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altdiag.mat +argchoice:altprot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altprot.mat +argchoice:dna.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/dna.mat +argchoice:prot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/prot.mat + +menu:HIV Seq. Db. +item:Ref. Seq. for Drug Resistance +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Sequence +argchoice:Prot function:/database/AA/protease.mutations.fasta +argchoice:rt function:/database/AA/rtmutations.fasta +argchoice:pNL4-3 CG :/database/DR/pNL4-3.CG.fasta +argchoice:pNL4-3 protease :/database/DR/pNL4-3.prot$format.fasta +argchoice:pNL4-3 RT :/database/DR/pNL4-3.RT$format.fasta +argchoice:HXB2 CG :/database/DR/HXB2.fasta +argchoice:HXB2 protease :/database/DR/HXB2.prot$format.fasta +argchoice:HXB2 RT :/database/DR/HXB2.RT$format.fasta +argchoice:data1.fasta:/database/DNA/data1.fasta + + +arg:format +argtype:chooser +arglabel:Format +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype Db. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/database/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/database/DNA/subtyperef/sub5ltr.fasta +argchoice:sub5ltrU3.fasta:/database/DNA/subtyperef/sub5ltrU3.fasta +argchoice:sub5ltrU5.fasta:/database/DNA/subtyperef/sub5ltrU5.fasta +argchoice:subenv.fasta:/database/DNA/subtyperef/subenv.fasta +argchoice:subenv-gp120.fasta:/database/DNA/subtyperef/subenv-gp120.fasta +argchoice:subenv-gp41.fasta:/database/DNA/subtyperef/subenv-gp41.fasta +argchoice:subenvv3.fasta:/database/DNA/subtyperef/subenvv3.fasta +argchoice:subgag.fasta:/database/DNA/subtyperef/subgag.fasta +argchoice:subgag-p17.fasta:/database/DNA/subtyperef/subgag-p17.fasta +argchoice:subgag-p24.fasta:/database/DNA/subtyperef/subgag-p24.fasta +argchoice:subgag-pol.fasta:/database/DNA/subtyperef/subgag-pol.fasta +argchoice:subpol.fasta:/database/DNA/subtyperef/subpol.fasta +argchoice:subpol-p15RNAase.fasta:/database/DNA/subtyperef/subpol-p15RNAase.fasta +argchoice:subpol-p31integrase.fasta:/database/DNA/subtyperef/subpol-p31integrase.fasta +argchoice:subpol-p51RT.fasta:/database/DNA/subtyperef/subpol-p51RT.fasta +argchoice:subpol-protease.fasta:/database/DNA/subtyperef/subpol-protease.fasta +argchoice:subrevCDS.fasta:/database/DNA/subtyperef/subrevCDS.fasta +argchoice:subrevexon1.fasta:/database/DNA/subtyperef/subrevexon1.fasta +argchoice:subrevexon2.fasta:/database/DNA/subtyperef/subrevexon2.fasta +argchoice:subrevintron.fasta:/database/DNA/subtyperef/subrevintron.fasta +argchoice:subTAR.fasta:/database/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/database/DNA/subtyperef/subtatCDS.fasta +argchoice:subtatexon1.fasta:/database/DNA/subtyperef/subtatexon1.fasta +argchoice:subtatexon2.fasta:/database/DNA/subtyperef/subtatexon2.fasta +argchoice:subtatintron.fasta:/database/DNA/subtyperef/subtatintron.fasta +argchoice:subvif.fasta:/database/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/database/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/database/DNA/subtyperef/subvpu.fasta +argchoice:subnef.fasta:/database/DNA/subtyperef/subnef.fasta + +out:OUTPUTFILE +outformat:genbank + + +item:HIV-1 Subtype reduz. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV Subtype C CG:/database/DNA/sub-reference/subC.CG.fasta +argchoice:HIV-1 GAG sub:/database/DNA/sub-reference/gagsub-reference.fasta +argchoice:HIV-1 POL sub:/database/DNA/sub-reference/polsub-reference.fasta +argchoice:HIV-1 VIF sub:/database/DNA/sub-reference/vifsub-reference.fasta +argchoice:HIV-1 VPR sub:/database/DNA/sub-reference/vprsub-reference.fasta +argchoice:HIV-1 TAT sub:/database/DNA/sub-reference/tatsub-reference.fasta +argchoice:HIV-1 REV sub:/database/DNA/sub-reference/revsub-reference.fasta +argchoice:HIV-1 VPU sub:/database/DNA/sub-reference/vpusub-reference.fasta +argchoice:HIV-1 ENV sub:/database/DNA/sub-reference/envsub-reference.fasta +argchoice:HIV-1 NEF sub:/database/DNA/sub-reference/nefsub-reference.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype B & C Gen. regions +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV Subtype C CG:/database/DNA/subB/$format.fasta +argchoice:HIV-1 GAG sub:/database/DNA/subB/$format.gag$type.fasta +argchoice:HIV-1 POL sub:/database/DNA/subB/$format.pol$type.fasta +argchoice:HIV-1 PROTEASE sub:/database/DNA/subB/$format.pol-prot$type.fasta +argchoice:HIV-1 RT sub:/database/DNA/subB/$format.pol-RT$type.fasta +argchoice:HIV-1 INTEGRASE sub:/database/DNA/subB/$format.pol-INT$type.fasta +argchoice:HIV-1 VIF sub:/database/DNA/subB/$format.vif$type.fasta +argchoice:HIV-1 VPR sub:/database/DNA/subB/$format.vpr$type.fasta +argchoice:HIV-1 TAT sub:/database/DNA/subB/$format.tat$type.fasta +argchoice:HIV-1 REV sub:/database/DNA/subB/$format.rev$type.fasta +argchoice:HIV-1 VPU sub:/database/DNA/subB/$format.vpu$type.fasta +argchoice:HIV-1 ENV sub:/database/DNA/subB/$format.env$type.fasta +argchoice:HIV-1 NEF sub:/database/DNA/subB/$format.nef$type.fasta + +arg:format +argtype:chooser +arglabel:Format +argchoice:Subtype B:subB +argchoice:Subtype C:subC + +arg:type +argtype:chooser +arglabel:type +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + + +item:Find Beggining of Genome regions +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:choice_list +argchoice:Protease:ATCACTCTTTGG +argchoice:Protease:ATC +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat +out:out1 +outformat:colormask + + +item:hivHXB2 genome regions aln +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/database/DNA/hivHXB2regions.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + + +item:Clustal Protein Alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -output=GDE -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.gde> in1;$REPORT gde clus_in.gde;/bin/rm -f clus_in* in1* )& + + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + +in:in1 +informat:flat +insave: + + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins Tulio:/var/www/cgi-bin/db/HIV-PROTEINS-tulio.fasta +argchoice:HIV Proteins:/var/www/cgi-bin/db/hiv17-08-01.PROT.fasta +argchoice:HIV-1 Structures at PDB:/var/www/cgi-bin/db/Prot.3d.fasta +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:VESPA PROTEASE +itemmethod:cat in1 > infile ;/usr/local/biotools/GDE/bin/fasta2VESPA.pl > outfile; sed "s/[%]/ /" outfile.f ;/home/tulio/biotools/VESPA/VESPA -b $SUBTYPE -q outfile.f > outVESPA; textedit outVESPA; + +arg:SUBTYPE +argtype:chooser +arglabel:Subtype +argchoice:B:/database/AA/subB.prot.aa.vespa +argchoice:C:/database/AA/subC.prot.aa.vespa +argchoice:D:/database/AA/subD.prot.aa.vespa +in:in1 +informat:flat +out:out1 +outformat:text + +item:VESPA Reverse Transcriptase +itemmethod: cat in1 > infile ;/usr/local/biotools/GDE/bin/fasta2VESPA.pl > outfile; sed "s/[%]/ /" outfile.f ;/home/tulio/biotools/VESPA/VESPA -b $SUBTYPE -q outfile.f > outVESPA; textedit outVESPA; + +arg:SUBTYPE +argtype:chooser +arglabel:Subtype +argchoice:B:/database/AA/subB.rt.aa.vespa +argchoice:C:/database/AA/subC.rt.aa.vespa +argchoice:D:/database/AA/subD.rt.aa.vespa +in:in1 +informat:flat +out:out1 +outformat:text + +item:FASTA (Protein) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:NBRF PIR1:$GDE_HELP_DIR/FASTA/PIR/pir1.dat\ 2 +argchoice:NBRF PIR2:$GDE_HELP_DIR/FASTA/PIR/pir2.dat\ 2 +argchoice:NBRF PIR3:$GDE_HELP_DIR/FASTA/PIR/pir3.dat\ 2 + + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:Minimum mutation matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/codaa.mat +argchoice:Identity matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/idnaa.mat +argchoice:Identity matrix for mismatches:-s $GDE_HELP_DIR/FASTA/MATRIX/idpaa.mat +argchoice:PAM250:-s $GDE_HELP_DIR/FASTA/MATRIX/pam250.mat +argchoice:PAM120:-s $GDE_HELP_DIR/FASTA/MATRIX/pam120.mat + +menu:Seq management + +item:Assemble Contigs +itemmethod:(sed "s/#/>/"in1.tmp; CAP2 in1.tmp $OVERLAP $PMATCH > out1;/bin/rm -f in1.tmp) +itemhelp:CAP2.help + +arg:OVERLAP +argtype:slider +arglabel:Minimum overlap? +argmin:5 +argmax:100 +argvalue:20 + +arg:PMATCH +argtype:slider +arglabel:Percent match required within overlap +argmin:25 +argmax:100 +argvalue:90 + +in:in1 +informat:flat + +out:out1 +outformat:gde +outoverwrite: + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + + +item:Restriction sites +itemmethod:(cp $ENZ in1.tmp ; $PRE_EDIT Restriction in1.tmp in1 > out1 ; rm in1.tmp); +itemhelp:Restriction.help + +arg:ENZ +argtype:text +arglabel:Enzyme file +argtext:$GDE_HELP_DIR/DATA_FILES/enzymes + +arg:PRE_EDIT +argtype:chooser +arglabel:Edit enzyme file first? +argchoice:Yes:textedit in1.tmp; +argchoice:No: + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +menu:Phylogeny + +item:Coalescence +itemmethod: (readseq -a -f11 in1 | sed "s/ YF//1" > infile2;cat tt infile2> infile; /usr/bin/X11/xterm -e /usr/local/biotools/lamarc/coalesce/coalesce;)& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +item:tree-puzzle +itemmethod: (readseq -a -f12 in1 > infile; /usr/bin/X11/xterm -e; (echo b; echo y) | puzzle; textedit outfile; gv outlm.eps)& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: +item:DeSoete Tree fit +itemmethod: (readseq -a -f8 in1>in1.flat;count -t $CORR in1.flat> in1.tmp ; lsadt in1.out ; $DISPLAY_FUNC in1.out;/bin/rm -f in1* )& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +arg:CORR +arglabel:Distance correction? +argtype:chooser +argchoice:Olsen:-c=olsen +argchoice:Jukes/Cantor:-c=jukes +argchoice:None:-c=none + +arg:INIT +arglabel:Initial parameter estimate +argtype:choice_list +argchoice:uniformly distributed random numbers:1 +argchoice:error-perturbed data:2 +argchoice:original distance data from input matrix:3 + +arg:SEED +argtype:slider +arglabel:Random number seed +argmin:0 +argmax:65535 +argvalue:12345 + +arg:DISPLAY_FUNC +argtype:chooser +arglabel:View tree using +argchoice:TextEdit:textedit +argchoice:Treetool:treetool < + +item:Phylip help +itemmethod:(textedit $GDE_HELP_DIR/PHYLIP/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:boot:boot.doc +argchoice:clique:clique.doc +argchoice:consense:consense.doc +argchoice:contchar:contchar.doc +argchoice:contml:contml.doc +argchoice:contrast:contrast.doc +argchoice:discrete:discrete.doc +argchoice:distance:distance.doc +argchoice:dnaboot:dnaboot.doc +argchoice:dnacomp:dnacomp.doc +argchoice:dnadist:dnadist.doc +argchoice:dnainvar:dnainvar.doc +argchoice:dnaml:dnaml.doc +argchoice:dnamlk:dnamlk.doc +argchoice:dnamove:dnamove.doc +argchoice:dnapars:dnapars.doc +argchoice:dnapenny:dnapenny.doc +argchoice:dolboot:dolboot.doc +argchoice:dollop:dollop.doc +argchoice:dolmove:dolmove.doc +argchoice:dolpenny:dolpenny.doc +argchoice:draw:draw.doc +argchoice:drawgram:drawgram.doc +argchoice:drawtree:drawtree.doc +argchoice:factor:factor.doc +argchoice:fitch:fitch.doc +argchoice:gendist:gendist.doc +argchoice:kitsch:kitsch.doc +argchoice:main:main.doc +argchoice:mix:mix.doc +argchoice:move:move.doc +argchoice:neighbor:neighbor.doc +argchoice:penny:penny.doc +argchoice:protpars:protpars.doc +argchoice:read.me.general:read.me.general.doc +argchoice:restml:restml.doc +argchoice:seqboot:seqboot.doc +argchoice:sequence:sequence.doc + + + +item:Phylip 3.4 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;textedit outfile;rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:PAUP +itemmethod:(readseq -a -f17 in1 > work.nxs; /usr/bin/X11/xterm -e paup work.nxs;/bin/rm -f gde*)& + +in:in1 +informat:genbank +inmask: +insave: + +item:MrBaynes +itemmethod:(readseq -a -f17 in1 | sed "s/interleave /interleave=yes /" > work.nxs;cat work.nxs /home/tulio/biotools/mbaynes/mbcommant.txt > workmb.nxs; /usr/bin/X11/xterm -e; (echo execute workmb.nxs) | mb workmb.nxs;treetool workmb.nxs.out.t; /bin/rm in1)& + +in:in1 +informat:genbank +inmask: +insave: + +item:codeml +itemmethod:(readseq -a -f11 in1 | sed "s/ YF//1" > test.phy; /usr/bin/X11/xterm -e codeml $METHOD)& + + +arg:METHOD +arglabel:Which method ? +argtype:chooser +argchoice:0:/home/tulio/biotools/paml/method0.ctl +argchoice:1:/home/tulio/biotools/paml/method1.ctl +argchoice:2:/home/tulio/biotools/paml/method2.ctl +argchoice:3:/home/tulio/biotools/paml/method3.ctl +argchoice:4:/home/tulio/biotools/paml/method4.ctl +argchoice:5:/home/tulio/biotools/paml/method5.ctl +argchoice:6:/home/tulio/biotools/paml/method6.ctl + + +in:in1 +informat:genbank +inmask: +insave: + + +item:Phylip Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM textedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:Fitch: /usr/bin/X11/xterm -e fitch; +argchoice:Kitsch: /usr/bin/X11/xterm -e kitsch; +argchoice:Neighbor: /usr/bin/X11/xterm -e neighbor; +argchoice:Bootstrap+consense: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +menu:On-Line Res. +item:Search Los Alamos DB Search +itemmethod:(netscape http://hiv-web.lanl.gov/cgi-bin/hivDB3/public/wdb/ssampublic) +arg:term +argtype:text +arglabel:Search sequence accession +argtext: +item:Search Entrez +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=Entrez&term=$search)& + +arg:search +argtype:text +arglabel:Search Entrez +argtext: + +item:Search PubMed +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=PubMed&term=$term)& + +arg:term +argtype:text +arglabel:Search PubMed for Literature: +argtext: + +item:Online Resources at Retroviruses in NCBI +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/retroviruses/)& + +item:SNAP Analysis online (dn/ds) +itemmethod:(netscape http://hiv-web.lanl.gov/SNAP/WEBSNAP/SNAP.html)& + +item:RIP Analysis online (Recombination) +itemmethod:(netscape http://hiv-web.lanl.gov/RIP/RIP.html)& + +item:Search Stanford for Resistance mutations +itemmethod:(readseq in1 -a -f8 > infile.fasta; netscape http://hiv-4.stanford.edu/cgi-bin/hivseqweb.pl?uploaded_file=infile.fasta)& +in:in1 +informat:genbank +inmask: +insave: + +menu:Tree Viewer +item:TreeTool +itemmethod:(treetool &); +item:TreeView +itemmethod:(tv &); +item:Xsplit-tree +itemmethod:(/usr/local/biotools/splitstree3.1/xsplits &); + + + + + + + + + + + + + +menu:Email + +item:BLASTN +itemmethod:(echo BLASTPROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo MATCH $MSCORE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:GenBank Qtrly & Updates:GenBank +argchoice:EMBL:embl + +arg:MSCORE +argtype:slider +arglabel:Match Score +argmin:3 +argmax:7 +argvalue:5 + +in:in1 +informat:flat +insave: + +item:BLASTP +itemmethod:(echo BLASTPROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:Swiss-Prot:swiss-prot +argchoice:PIR:pir + +in:in1 +informat:flat +insave: + +item:Fasta-(DNA) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $KPL >> in1.tmp; echo SCORES $TOP >> in1.tmp; echo ALIGNMENTS $ALNG >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which GenBank Database? +argchoice:Qrtly & Updates:GenBank/all +argchoice:Updates:GenBank/new +argchoice:Primate:GenBank/primate +argchoice:Rodent:GenBank/rodent +argchoice:Other-Mammalian:GenBank/other_mammalian +argchoice:Other-Vertebrate:GenBank/other_vertebrate +argchoice:Invertebrate:GenBank/invertebrate +argchoice:Plant:GenBank/plant +argchoice:Organelle:GenBank/organelle +argchoice:Bacterial:GenBank/bacterial +argchoice:Structural-RNA:GenBank/structural_rna +argchoice:Viral:GenBank/viral +argchoice:Phage:GenBank/phage +argchoice:Synthetic:GenBank/synthetic +argchoice:Unannotated:GenBank/unannotated + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta-(PROTEIN) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $TPL >> in1.tmp; echo SCORES $SCRS >> in1.tmp; echo ALIGNMENTS $ALNMNTS >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Protein Database? +argchoice:Trans GenBank Qrtly:GenPept/all +argchoice:Trans GenBank Daily:GenPept/new +argchoice:Swiss-Protein:SWISS-PROT/all + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + +item:Sequence Retrieval +itemmethod:(echo $REGEXP > in1.tmp; Mail RETRIEVE@GENBANK.BIO.NET < in1.tmp; rm in1.tmp) & + +arg:REGEXP +argtype:text +arglabel:Accession # or LOCUS name of sequence to retrieve + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + +# +# dgg added new readseq formats, 29 dec 92 +# + +item:Export Foreign Format +itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:GenBank:genbank +argchoice:IG/Stanford:ig +argchoice:NBRF:nbrf +argchoice:EMBL:embl +argchoice:GCG:gcg +argchoice:DNA Strider:strider +argchoice:Fitch:fitch +argchoice:Pearson/Fasta:pearson +argchoice:Zuker:zuker +argchoice:Olsen:olsen +argchoice:Phylip:phylip +#argchoice:Phylip v3.2:phylip3.2 +argchoice:Plain text:raw +argchoice:ASN.1:asn +argchoice:PIR:pir +argchoice:MSF:msf +argchoice:PAUP:paup +argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:in1 +informat:genbank + + +# +#dgg addition for new readseq, 24 dec 92 +# + +item:Pretty Print +itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)& +itemhelp:readseq.help + +#nametop is bad !? + +in:in1 +informat:genbank + +arg:NAMETOP +argtype:chooser +arglabel:Names at top ? +argchoice:No: +argchoice:Yes:-nametop + +arg:NAMELEFT +argtype:chooser +arglabel:Names at left ? +argchoice:No: +argchoice:Yes:-nameleft + +arg:NAMERIGHT +argtype:chooser +arglabel:Names at right? +argchoice:Yes:-nameright +argchoice:No: + +arg:NUMTOP +argtype:chooser +arglabel:Numbers at top ? +argchoice:Yes:-numtop +argchoice:No: + +arg:NUMBOT +argtype:chooser +arglabel:Numbers at tail ? +argchoice:No: +argchoice:Yes:-numbot + +arg:NUMLEFT +argtype:chooser +arglabel:Numbers at left ? +argchoice:Yes:-numleft +argchoice:No: + +arg:NUMRIGHT +argtype:chooser +arglabel:Numbers at right? +argchoice:Yes:-numright +argchoice:No: + +arg:MATCH +argtype:chooser +arglabel:Use match '.' for 2..n species? +argchoice:No: +argchoice:Yes:-match + +arg:GAPC +argtype:chooser +arglabel:Count gap symbols? +argchoice:No: +argchoice:Yes:-gap + +arg:WIDTH +argtype:slider +arglabel:Sequence width? +argmin:10 +argmax:200 +argvalue:50 + +arg:COLS +argtype:slider +arglabel:Column spacers? +argmin:0 +argmax:50 +argvalue:10 + + +### pretty print insert end +# + +item:Wally's test function +itemmethod:run__wally $ONE $TWO $THREE < $FILE + +arg:ONE +argtype:chooser +arglabel:How? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:TWO +argtype:slider +argmin:0 +argmax:100 +argvalue:50 +arglabel:how many? + +arg:THREE +argtype:choice_list +arglabel:Which one? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:FILE +argtype:text +arglabel:Which file + + +menu:Xylem +#------------ Open XYLEM Dataset (GenBank) ( 9/ 6/94) --------------- +item:Open_XYLEM (GenBank) +itemlabel:Open XYLEM Dataset (GenBank) +itemmethod:getloc $DBNAME.ind $DBNAME.ano $DBNAME.wrp $DBNAME.ind out1 + +arg:DBNAME +arglabel:XYLEM-format GenBank Dataset +argtype:text + +out:out1 +outformat:genbank + +#-------------- Open XYLEM Database (PIR) ( 9/ 6/94) -------------- +item:Open_XYLEM (PIR) +itemlabel:Open XYLEM Dataset (GenBank) +itemmethod:getloc -p $DBNAME.ind $DBNAME.ano $DBNAME.wrp $DBNAME.ind out1.pir; readseq -a -f2 out1.pir > out1 + +arg:DBNAME +arglabel:XYLEM-format PIR Dataset +argtype:text +#argtype:file_chooser + +out:out1 +outformat:genbank + + +################################# PROTEIN ################################ +#--------------- PROT2NUC - Reverse Translation ( 8/10/94) ----------------- +item:PROT2NUC - reverse translation +itemmethod: sed "s/[#%]/>/" in1.out; (textedit in1.out; rm in1*)& +itemhelp: xylem/prot2nuc.doc + +arg:LINLEN +arglabel:CODONS PER LINE +argtype:slider +argmin:5 +argmax:100 +argvalue:25 + +arg:GROUP +arglabel:NUMBERING INTERVAL (amino acids/codons) +argtype:slider +argmin:5 +argmax:100 +argvalue:5 + +in:in1 +informat:flat + +########################### DATABASE MENU ############################### +#------------------- FINDKEY (3/13/97)----------------------- +item:FINDKEY - Keyword Search +itemmethod: $KEYWORDS; (findkey $DATABASE in1.kw in1.nam in1.fnd; rm in1.kw; (textedit in1.fnd; rm in1.fnd)& (textedit in1.nam -Ws 150 628; rm in1.nam)& )& +itemhelp:xylem/findkey.asc + +arg:KEYWORDS +arglabel:KEYWORDS +argtype:chooser +argchoice:Single keyword:echo $KEY > in1.kw +argchoice:Create list of keywords:cat $GDE_HELP_DIR/xylem/GDE/keyfile.template > in1.tmp; textedit in1.tmp; egrep -v -e \; in1.tmp >in1.kw;rm in1.tmp* +argvalue:0 + +arg:KEY +arglabel:Single keyword +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:choice_menu +argchoice:HIV GB:-G /database/DNA/hiv1entries +argchoice:PIR:-p +argchoice:PIR Dataset:-P $DBFILE +argchoice:GB bacterial:-b +argchoice:GB mamalian:-m +argchoice:GB phage:-g +argchoice:GB primate:-r +argchoice:GB rodent:-d +argchoice:GB unannotated:-u +argchoice:GB vertebrate:-t +argchoice:GB invertebrate:-i +argchoice:GB plant:-l +argchoice:GB rna:-n +argchoice:GB synthetic:-s +argchoice:GB viral:-a +argchoice:GB patented:-x +argchoice:GB Seq. Tagged Sites:-z +argchoice:GB expressed seq. tag:-e +argchoice:GB Genome Survey Seq.:-S +argchoice:GB High Throughput Genomic:-h +argchoice:GenBank Dataset:-G $DBFILE +argchoice:VecBase:-v +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +in:in1 +informat:flat + +#------------------- FETCH ( 2/ 7/94) -------------------------- +# Note: This menu requires that the shell script 'GBfilter' be +# in your bin directory. +item:FETCH +itemmethod: $NAMES; (fetch $WHATTOGET $DATABASE in1.nam in1.tmp; rm in1.nam; $WHERE) & +itemhelp:xylem/fetch.doc + +arg:NAMES +arglabel:NAMES/ACCESSION #'S +argtype:chooser +argchoice:Single name/acc:echo $NAMEFILE > in1.nam +argchoice:Create list of names/acc#'s:cat $GDE_HELP_DIR/xylem/GDE/namefile.template > in1.tmpname; textedit in1.tmpname; egrep -v -e \; in1.tmpname >in1.nam;rm in1.tmpname* +argchoice:File of Names/Acc.#'s:cat $NAMEFILE >in1.nam +argvalue:0 + +arg:NAMEFILE +arglabel:Single name, accession # or file of names/acc. #'s +argtype:text + +arg:WHATTOGET +arglabel:WHAT TO GET +argtype:chooser +argchoice:annotation:-a +argchoice:sequence:-s +argchoice:both:-b +argvalue:2 + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-G $DBFILE +argchoice:HIV Dataset:-G /database/DNA/hiventries +argchoice:PIR:-p +argchoice:PIR Dataset:-P $DBFILE +argchoice:VecBase:-v +argvalue:0 + +arg:DBFILE +arglabel:Dataset +argtype:text + +arg:WHERE +arglabel:WHERE TO SEND OUTPUT +argtype:chooser +# If GenBank file, read directly, otherwise, convert to GenBank. +argchoice:GDE:(GBfilter in1.tmp in1.gen; gde in1.gen; rm in1.*)& +argchoice:Textedit window:(textedit in1.tmp;rm in1.tmp) & +argchoice:Output file:mv in1.tmp $OUTFILE; echo ' Fetch completed' +argchoice:GenBank Dataset:splitdb -g in1.tmp $OUTFILE.ano $OUTFILE.wrp $OUTFILE.ind; rm in1.*; echo ' Fetch completed' +argchoice:PIR Dataset:splitdb -p in1.tmp $OUTFILE.ano $OUTFILE.wrp $OUTFILE.ind; rm in1.*; echo ' Fetch completed' +argvalue:0 + +arg:OUTFILE +arglabel:Output file or Dataset name +argtype:text + +in:in1 +informat:genbank + +out:OUTPUT +outformat:genbank + +#---------------- FEATURES - by feature key ( 3/14/94) ------------------- +item:FEATURES - Extract by feature keys +# sed corrects errors in GDE-generated genbank output +itemmethod: sed -e "s/^LOCUS */LOCUS /" -e "s/^ ACCESSION/ACCESSION /" -e "/^$/d" in1 > in1.gen; $FEALIST; ($FCOMMAND -F in1.feafile $DATABASE; rm in1.feafile in1.efile in1.gen; $WHERE) & +itemhelp:xylem/features.doc + +arg:FEALIST +arglabel:FEATURES TO EXTRACT +argtype:chooser +argchoice:Single feature:echo $FEAKEY > in1.feafile +argchoice:Create list of features:cat $GDE_HELP_DIR/xylem/GDE/feafile.template > in1.tmpfeafile; textedit in1.tmpfeafile; egrep -v -e \; in1.tmpfeafile >in1.feafile; rm in1.tmpfeafile +argvalue:0 + +arg:FEAKEY +arglabel:Single feature key +argtype:choice_list +argchoice:allele:allele +argchoice:attenuator:attenuator +argchoice:binding:binding +argchoice:CAAT_signal:CAAT_signal +argchoice:CDS:CDS +argchoice:chromosome:chromosome +argchoice:conflict:conflict +argchoice:contig:contig +argchoice:C_region:C_region +argchoice:D_loop:D_loop +argchoice:D_region:D_region +argchoice:D_segment:D_segment +argchoice:enhancer:enhancer +argchoice:exon:exon +argchoice:GC_signal:GC_signal +argchoice:iDNA:iDNA +argchoice:intron:intron +argchoice:J_region:J_region +argchoice:J_segment:J_segment +argchoice:LTR:LTR +argchoice:mat_peptide:mat_peptide +argchoice:misc_binding:misc_binding +argchoice:misc_difference:misc_difference +argchoice:misc_feature:misc_feature +argchoice:misc_recomb:misc_recomb +argchoice:misc_RNA:misc_RNA +argchoice:misc_signal:misc_signal +argchoice:misc_structure:misc_structure +argchoice:modified_base:modified_base +argchoice:mRNA:mRNA +argchoice:mutation:mutation +argchoice:N_region:N_region +argchoice:old_sequence:old_sequence +argchoice:polyA_signal:polyA_signal +argchoice:polyA_site:polyA_site +argchoice:precursor_RNA:precursor_RNA +argchoice:primer_bind:primer_bind +argchoice:prim_transcript:prim_transcript +argchoice:promoter:promoter +argchoice:protein_bind:protein_bind +argchoice:RBS:RBS +argchoice:repeat_region:repeat_region +argchoice:repeat_unit:repeat_unit +argchoice:rep_origin:rep_origin +argchoice:rRNA:rRNA +argchoice:satellite:satellite +argchoice:scRNA:scRNA +argchoice:sig_peptide:sig_peptide +argchoice:snRNA:snRNA +argchoice:source:source +argchoice:S_region:S_region +argchoice:stem_loop:stem_loop +argchoice:STS:STS +argchoice:TATA_signal:TATA_signal +argchoice:terminator:terminator +argchoice:transit_peptide:transit_peptide +argchoice:tRNA:tRNA +argchoice:unsure:unsure +argchoice:variation:variation +argchoice:virion:virion +argchoice:V_region:V_region +argchoice:V_segment:V_segment +argchoice:3'clip:3\'clip +argchoice:3'UTR:3\'UTR +argchoice:5'UTR:5\'UTR +argchoice:5'clip:5\'clip +argchoice:-10_signal:-10_signal +argchoice:-35 signal:-35 signal +argvalue:5 + +arg:FCOMMAND +arglabel:NAMES/ACCESSION #'S OF ENTRIES +argtype:choice_menu +argchoice:Single name:echo $EFILE > in1.efile; features -N in1.efile +argchoice:Create list of names:cat $GDE_HELP_DIR/xylem/GDE/names.template > in1.tmpefile; textedit in1.tmpefile; egrep -v -e \; in1.tmpefile >in1.efile; rm in1.tmpefile; features -N in1.efile +argchoice:File of names:cat $EFILE >in1.efile; features -N in1.efile +argchoice:Single Acc#:echo $EFILE > in1.efile; features -A in1.efile +argchoice:Create list of Acc#s:cat $GDE_HELP_DIR/xylem/GDE/acc.template > in1.tmpefile; textedit in1.tmpefile; egrep -v -e \; in1.tmpefile >in1.efile; rm in1.tmpefile; features -A in1.efile +argchoice:File of Acc#s:cat $EFILE >in1.efile; features -A in1.efile +argvalue:0 + +arg:EFILE +arglabel:Name, Accession # or filename +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-u $DBFILE +argchoice:Selected sequences:-U in1.gen +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +arg:WHERE +arglabel:WHERE TO SEND OUTPUT +argtype:chooser +argchoice:GDE:readseq -a -f2 in1.out >in1.result; (gde in1.result;rm in1.*)& +argchoice:Textedit windows:(textedit in1.msg -Ws 450 350;rm in1.msg)& (textedit in1.exp -Ws 350 350; rm in1.exp) & (textedit in1.out -Ws 400 350;rm in1.out)& +argchoice:Output file:mv in1.msg $OUTNAME.msg; mv in1.out $OUTNAME.out; mv in1.exp $OUTNAME.exp; echo 'Features completed' +argvalue:0 + +arg:OUTNAME +arglabel:Output file name +argtype:text + +in:in1 +informat:genbank + +out:RESULT +outformat:genbank + +#-------------------- FEATURES - by expression ( 3/14/94)--------------------- +item:FEATURES - Extract using expressions +itemmethod: $CHOOSEEXP; sed -e "s/^LOCUS */LOCUS /" -e "s/^ ACCESSION/ACCESSION /" -e "/^$/d" in1 > in1.gen; (features -E in1.efile $DATABASE; rm in1.gen in1.efile; $WHERE)& +itemhelp:xylem/features.doc + +arg:CHOOSEEXP +arglabel:EXPRESSION(S) +argtype:chooser +argchoice:Single expression:echo '$EXPRESSION'|cut -f1 -d":" > in1.accfile; echo \>`cat in1.accfile` >in1.efile; echo '@$EXPRESSION' >> in1.efile; rm in1.accfile +argchoice:Expression file:cat $EFILE >in1.efile +argchoice:Create list of expressions:cat $GDE_HELP_DIR/xylem/GDE/expfile.template > in1.tmpexpfile; textedit in1.tmpexpfile; egrep -v -e \; in1.tmpexpfile >in1.efile; rm in1.tmpexpfile +argvalue:0 + +arg:EXPRESSION +arglabel:Feature expression +argtype:text + +arg:EFILE +arglabel:Expression file +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-u $DBFILE +argchoice:Selected sequences:-u in1.gen +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +arg:WHERE +arglabel:WHERE TO SENT OUTPUT +argtype:chooser +argchoice:GDE:readseq -a -f2 in1.out >in1.result; (gde in1.result;rm in1.*)& +argchoice:Textedit windows:(textedit in1.msg -Ws 450 350;rm in1.msg)& (textedit in1.out -Ws 400 350;rm in1.out)& +argchoice:Output file:mv in1.msg $OUTNAME.msg; mv in1.out $OUTNAME.out;echo 'Features completed' +argvalue:0 + +arg:OUTNAME +arglabel:Output file name +argtype:text + +in:in1 +informat:genbank + +out:RESULT +outformat:genbank + +######################## ALIGNMENT MENU ############################### +#--------------- REFORM - print multiple alignment (2/ 2/95) ----------------- +item:REFORM - print mult. align. +#Note: do not use flat or gde . +itemmethod:(cat in1 |readseq -pipe -a -f8 | reform $TYPE -fp $GAPS $CAPS $DOTS -l$LINESIZE -s$START > in1.out; textedit in1.out;rm in1*) & +itemhelp: xylem/reform.doc + +arg:TYPE +argtype:chooser +arglabel:Type: +argchoice:Protein: +argchoice:Nucleic acid:-n +argvalue:0 + +arg:GAPS +argtype:chooser +arglabel:Print gaps as +argchoice:Dashes:-g +argchoice:Spaces: +argvalue:0 + +arg:CAPS +argtype:chooser +arglabel:Capitalize conserved sites in consensus seq. +argchoice:Yes:-c +argchoice:No: +argvalue:0 + +arg:DOTS +argtype:chooser +arglabel:Print conserved sites in alignment as dots +argchoice:Yes:-p +argchoice:No: +argvalue:0 + +arg:LINESIZE +arglabel:# residues per line +argtype:slider +argmin:40 +argmax:150 +argvalue:70 + +arg:START +arglabel:Begin numbering at +argtype:slider +argmin:-500000 +argmax:500000 +argvalue:1 + +in:in1 +#informat:flat +informat:genbank +insave: + + +############################## SIMILARITY MENU ############################ +#--------------- SHUFFLE - randomize sequences (11/10/93) ----------------- +item:SHUFFLE - randomize sequences +itemmethod: sed "s/[#%]/>/" in1.tmp; shuffle -s$SEED -w$WINDOW -o$OVERLAP in1.shuf; readseq -a -f2 in1.shuf >out1; rm in1* +itemhelp: xylem/shuffle.doc + +arg:SEED +arglabel:RANDOM SEED +argtype:slider +argmin:1 +argmax:32767 +argvalue:7777 + +arg:WINDOW +arglabel:WINDOW +argtype:slider +argmin:5 +argmax:500000 +argvalue:10 + +arg:OVERLAP +arglabel:OVERLAP BETWEEN ADJACENT WINDOWS +argtype:slider +argmin:0 +argmax:100 +argvalue:0 + +in:in1 +informat:flat + +out:out1 +outformat:genbank + + diff --git a/CORE/.GDEmenus.safe2 b/CORE/.GDEmenus.safe2 new file mode 100755 index 0000000..d1e1581 --- /dev/null +++ b/CORE/.GDEmenus.safe2 @@ -0,0 +1,1340 @@ +menu:File + +item:test cmask output +itemmethod: textedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +item:MFOLD +itemmethod:(tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)& +itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:Pairing(ct) File Name +argtext:mfold_out + + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + +item:Highlight helix +itemmethod:readseq -a -f8 in1 | sed "s/>HELIX/\"HELIX/" > in1.flat; sho_helix < in1.flat > out1;rm in1.flat +itemhelp:sho_helix.help + +in:in1 +informat:genbank + +out:out1 +outformat:colormask + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDB -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:Genome HIV position:/var/www/cgi-bin/db/genomeHIV.fasta +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate +argchoice:HIV-1:/var/www/cgi-bin/db/hivallsequencesGB-31-10.fasta +argchoice:HIV-1 Subtype:/var/www/cgi-bin/db/subcomplete.fasta +argchoice:HIV-1 HXB2 Numb:/var/www/cgi-bin/db/HXB2.fasta +argchoice:HIV-1 HXB2 Numb:/var/www/cgi-bin/db/HXB2.fasta +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; blastx $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE> in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:pir1:$GDE_HELP_DIR/BLAST/pir +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:FASTA (DNA/RNA) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:GenBank Primate:$GDE_HELP_DIR/FASTA/GENBANK/gbpri.seq\ 1 +argchoice:GenBank Rodent:$GDE_HELP_DIR/FASTA/GENBANK/gbrod.seq\ 1 +argchoice:GenBank all Mammal:$GDE_HELP_DIR/FASTA/GENBANK/gbmam.seq\ 1 +argchoice:GenBank verteBrates:$GDE_HELP_DIR/FASTA/GENBANK/gbvrt.seq\ 1 +argchoice:GenBank Inverts:$GDE_HELP_DIR/FASTA/GENBANK/gbinv.seq\ 1 +argchoice:GenBank pLants:$GDE_HELP_DIR/FASTA/GENBANK/gbpln.seq\ 1 +argchoice:GenBank Struct RNA:$GDE_HELP_DIR/FASTA/GENBANK/gbrna.seq\ 1 +argchoice:GenBank euk. Organelles:$GDE_HELP_DIR/FASTA/GENBANK/gborg.seq\ 1 +argchoice:GenBank phaGe:$GDE_HELP_DIR/FASTA/GENBANK/gbphg.seq\ 1 +argchoice:GenBank bacTeria:$GDE_HELP_DIR/FASTA/GENBANK/gbbct.seq\ 1 +argchoice:GenBank sYnthetic:$GDE_HELP_DIR/FASTA/GENBANK/gbsyn.seq\ 1 +argchoice:GenBank Viral:$GDE_HELP_DIR/FASTA/GENBANK/gbvrl.seq\ 1 +argchoice:GenBank Unannotated:$GDE_HELP_DIR/FASTA/GENBANK/gbuna.seq\ 1 + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:altdiag.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altdiag.mat +argchoice:altprot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altprot.mat +argchoice:dna.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/dna.mat +argchoice:prot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/prot.mat + +menu:DB +item:DB X8873 +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Databases +argchoice:subtype:/home/database/DNA/subtyperef/subcomplete.fasta +argchoice:SIVENVDNA.fasta:/home/database/DNA/SIVENVDNA.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/home/database/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/home/database/DNA/subtyperef/sub5ltr.fasta +argchoice:sub5ltrU3.fasta:/home/database/DNA/subtyperef/sub5ltrU3.fasta +argchoice:sub5ltrU5.fasta:/home/database/DNA/subtyperef/sub5ltrU5.fasta +argchoice:subenv.fasta:/home/database/DNA/subtyperef/subenv.fasta +argchoice:subenv-gp120.fasta:/home/database/DNA/subtyperef/subenv-gp120.fasta +argchoice:subenv-gp41.fasta:/home/database/DNA/subtyperef/subenv-gp41.fasta +argchoice:subenvv3.fasta:/home/database/DNA/subtyperef/subenvv3.fasta +argchoice:subgag.fasta:/home/database/DNA/subtyperef/subgag.fasta +argchoice:subgag-p17.fasta:/home/database/DNA/subtyperef/subgag-p17.fasta +argchoice:subgag-p24.fasta:/home/database/DNA/subtyperef/subgag-p24.fasta +argchoice:subgag-pol.fasta:/home/database/DNA/subtyperef/subgag-pol.fasta +argchoice:subpol.fasta:/home/database/DNA/subtyperef/subpol.fasta +argchoice:subpol-p15RNAase.fasta:/home/database/DNA/subtyperef/subpol-p15RNAase.fasta +argchoice:subpol-p31integrase.fasta:/home/database/DNA/subtyperef/subpol-p31integrase.fasta +argchoice:subpol-p51RT.fasta:/home/database/DNA/subtyperef/subpol-p51RT.fasta +argchoice:subpol-protease.fasta:/home/database/DNA/subtyperef/subpol-protease.fasta +argchoice:subrevCDS.fasta:/home/database/DNA/subtyperef/subrevCDS.fasta +argchoice:subrevexon1.fasta:/home/database/DNA/subtyperef/subrevexon1.fasta +argchoice:subrevexon2.fasta:/home/database/DNA/subtyperef/subrevexon2.fasta +argchoice:subrevintron.fasta:/home/database/DNA/subtyperef/subrevintron.fasta +argchoice:subTAR.fasta:/home/database/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/home/database/DNA/subtyperef/subtatCDS.fasta +argchoice:subtatexon1.fasta:/home/database/DNA/subtyperef/subtatexon1.fasta +argchoice:subtatexon2.fasta:/home/database/DNA/subtyperef/subtatexon2.fasta +argchoice:subtatintron.fasta:/home/database/DNA/subtyperef/subtatintron.fasta +argchoice:subvif.fasta:/home/database/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/home/database/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/home/database/DNA/subtyperef/subvpu.fasta +argchoice:subnef.fasta:/home/database/DNA/subtyperef/subnef.fasta +out:OUTPUTFILE +outformat:genbank + + + +item:HXB2 +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/home/database/DNA/HXB2.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + +item:Clustal Protein Alignment +itemmethod:(tr '%#"' '>' clus_in;clustalw /output=PIR /infile=clus_in /align /ktup=$KTUP /window=$WIN $Matrx /fixedgap=$FIXED /floatgap=$FLOAT > in1.rpt;sed "s/>P1;/%/g" < clus_in.pir > in1;$REPORT gde in1;/bin/rm -f in1* clus_in* gde* )& +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + +in:in1 +informat:flat +insave: + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; blastp $BLASTDB in1.f W=$WORDLEN M=$Matrix > in1.tmp;textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:pir:$GDE_HELP_DIR/BLAST/pir +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:blast3 +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; blast3 $BLASTDB in1.f W=$WORDLEN M=$Matrix > in1.tmp;textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:pir1:$GDE_HELP_DIR/BLAST/pir +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:FASTA (Protein) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:NBRF PIR1:$GDE_HELP_DIR/FASTA/PIR/pir1.dat\ 2 +argchoice:NBRF PIR2:$GDE_HELP_DIR/FASTA/PIR/pir2.dat\ 2 +argchoice:NBRF PIR3:$GDE_HELP_DIR/FASTA/PIR/pir3.dat\ 2 + + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:Minimum mutation matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/codaa.mat +argchoice:Identity matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/idnaa.mat +argchoice:Identity matrix for mismatches:-s $GDE_HELP_DIR/FASTA/MATRIX/idpaa.mat +argchoice:PAM250:-s $GDE_HELP_DIR/FASTA/MATRIX/pam250.mat +argchoice:PAM120:-s $GDE_HELP_DIR/FASTA/MATRIX/pam120.mat + +menu:Seq management + +item:Assemble Contigs +itemmethod:(sed "s/#/>/"in1.tmp; CAP2 in1.tmp $OVERLAP $PMATCH > out1;/bin/rm -f in1.tmp) +itemhelp:CAP2.help + +arg:OVERLAP +argtype:slider +arglabel:Minimum overlap? +argmin:5 +argmax:100 +argvalue:20 + +arg:PMATCH +argtype:slider +arglabel:Percent match required within overlap +argmin:25 +argmax:100 +argvalue:90 + +in:in1 +informat:flat + +out:out1 +outformat:gde +outoverwrite: + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + + +item:Restriction sites +itemmethod:(cp $ENZ in1.tmp ; $PRE_EDIT Restriction in1.tmp in1 > out1 ; rm in1.tmp); +itemhelp:Restriction.help + +arg:ENZ +argtype:text +arglabel:Enzyme file +argtext:$GDE_HELP_DIR/DATA_FILES/enzymes + +arg:PRE_EDIT +argtype:chooser +arglabel:Edit enzyme file first? +argchoice:Yes:textedit in1.tmp; +argchoice:No: + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +menu:Phylogeny + +item:DeSoete Tree fit +itemmethod: (readseq -a -f8 in1>in1.flat;count -t $CORR in1.flat> in1.tmp ; lsadt in1.out ; $DISPLAY_FUNC in1.out;/bin/rm -f in1* )& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +arg:CORR +arglabel:Distance correction? +argtype:chooser +argchoice:Olsen:-c=olsen +argchoice:Jukes/Cantor:-c=jukes +argchoice:None:-c=none + +arg:INIT +arglabel:Initial parameter estimate +argtype:choice_list +argchoice:uniformly distributed random numbers:1 +argchoice:error-perturbed data:2 +argchoice:original distance data from input matrix:3 + +arg:SEED +argtype:slider +arglabel:Random number seed +argmin:0 +argmax:65535 +argvalue:12345 + +arg:DISPLAY_FUNC +argtype:chooser +arglabel:View tree using +argchoice:TextEdit:textedit +argchoice:Treetool:treetool < + +item:Phylip help +itemmethod:(textedit $GDE_HELP_DIR/PHYLIP/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:boot:boot.doc +argchoice:clique:clique.doc +argchoice:consense:consense.doc +argchoice:contchar:contchar.doc +argchoice:contml:contml.doc +argchoice:contrast:contrast.doc +argchoice:discrete:discrete.doc +argchoice:distance:distance.doc +argchoice:dnaboot:dnaboot.doc +argchoice:dnacomp:dnacomp.doc +argchoice:dnadist:dnadist.doc +argchoice:dnainvar:dnainvar.doc +argchoice:dnaml:dnaml.doc +argchoice:dnamlk:dnamlk.doc +argchoice:dnamove:dnamove.doc +argchoice:dnapars:dnapars.doc +argchoice:dnapenny:dnapenny.doc +argchoice:dolboot:dolboot.doc +argchoice:dollop:dollop.doc +argchoice:dolmove:dolmove.doc +argchoice:dolpenny:dolpenny.doc +argchoice:draw:draw.doc +argchoice:drawgram:drawgram.doc +argchoice:drawtree:drawtree.doc +argchoice:factor:factor.doc +argchoice:fitch:fitch.doc +argchoice:gendist:gendist.doc +argchoice:kitsch:kitsch.doc +argchoice:main:main.doc +argchoice:mix:mix.doc +argchoice:move:move.doc +argchoice:neighbor:neighbor.doc +argchoice:penny:penny.doc +argchoice:protpars:protpars.doc +argchoice:read.me.general:read.me.general.doc +argchoice:restml:restml.doc +argchoice:seqboot:seqboot.doc +argchoice:sequence:sequence.doc + + +item:Phylip 3.4 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT shelltool $PROGRAM;textedit outfile;rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ; $PREEDIT shelltool seqboot; mv -f outfile infile; shelltool dnadist;mv -f outfile infile; shelltool neighbor; cp outtree intree; $PROGRAM textedit outfile;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST:mv -f infile outfile; +argchoice:Fitch:shelltool fitch; +argchoice:Kitsch:shelltool kitsch; +argchoice:Neighbor:shelltool neighbor; +argchoice:Full:shelltool consense; + + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +menu:W +item:Stanford +itemmethod:(readseq in1 -a -f8 > infile.fasta; netscape http://hiv-4.stanford.edu/cgi-bin/hivseqweb.pl?uploaded_file=infile.fasta)& +in:in1 +informat:genbank +item:Los Alamos DB Search +itemmethod:(netscape http://hiv-web.lanl.gov/cgi-bin/hivDB3/public/wdb/ssampublic)& +item:Retroviruses NCBI +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/retroviruses/)& +item:SNAP (sy/nosy)& +itemmethod:(netscape http://hiv-web.lanl.gov/SNAP/WEBSNAP/SNAP.html)& +item:PubMed +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/Entrez/) + + +in:in1 +informat:genbank + + + + +menu:Email + +item:BLASTN +itemmethod:(echo BLASTPROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo MATCH $MSCORE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:GenBank Qtrly & Updates:GenBank +argchoice:EMBL:embl + +arg:MSCORE +argtype:slider +arglabel:Match Score +argmin:3 +argmax:7 +argvalue:5 + +in:in1 +informat:flat +insave: + +item:BLASTP +itemmethod:(echo BLASTPROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:Swiss-Prot:swiss-prot +argchoice:PIR:pir + +in:in1 +informat:flat +insave: + +item:Fasta-(DNA) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $KPL >> in1.tmp; echo SCORES $TOP >> in1.tmp; echo ALIGNMENTS $ALNG >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which GenBank Database? +argchoice:Qrtly & Updates:GenBank/all +argchoice:Updates:GenBank/new +argchoice:Primate:GenBank/primate +argchoice:Rodent:GenBank/rodent +argchoice:Other-Mammalian:GenBank/other_mammalian +argchoice:Other-Vertebrate:GenBank/other_vertebrate +argchoice:Invertebrate:GenBank/invertebrate +argchoice:Plant:GenBank/plant +argchoice:Organelle:GenBank/organelle +argchoice:Bacterial:GenBank/bacterial +argchoice:Structural-RNA:GenBank/structural_rna +argchoice:Viral:GenBank/viral +argchoice:Phage:GenBank/phage +argchoice:Synthetic:GenBank/synthetic +argchoice:Unannotated:GenBank/unannotated + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta-(PROTEIN) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $TPL >> in1.tmp; echo SCORES $SCRS >> in1.tmp; echo ALIGNMENTS $ALNMNTS >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Protein Database? +argchoice:Trans GenBank Qrtly:GenPept/all +argchoice:Trans GenBank Daily:GenPept/new +argchoice:Swiss-Protein:SWISS-PROT/all + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + +item:Sequence Retrieval +itemmethod:(echo $REGEXP > in1.tmp; Mail RETRIEVE@GENBANK.BIO.NET < in1.tmp; rm in1.tmp) & + +arg:REGEXP +argtype:text +arglabel:Accession # or LOCUS name of sequence to retrieve + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + +# +# dgg added new readseq formats, 29 dec 92 +# + +item:Export Foreign Format +itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:GenBank:genbank +argchoice:IG/Stanford:ig +argchoice:NBRF:nbrf +argchoice:EMBL:embl +argchoice:GCG:gcg +argchoice:DNA Strider:strider +argchoice:Fitch:fitch +argchoice:Pearson/Fasta:pearson +argchoice:Zuker:zuker +argchoice:Olsen:olsen +argchoice:Phylip:phylip +#argchoice:Phylip v3.2:phylip3.2 +argchoice:Plain text:raw +argchoice:ASN.1:asn +argchoice:PIR:pir +argchoice:MSF:msf +argchoice:PAUP:paup +argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:in1 +informat:genbank + + +# +#dgg addition for new readseq, 24 dec 92 +# + +item:Pretty Print +itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)& +itemhelp:readseq.help + +#nametop is bad !? + +in:in1 +informat:genbank + +arg:NAMETOP +argtype:chooser +arglabel:Names at top ? +argchoice:No: +argchoice:Yes:-nametop + +arg:NAMELEFT +argtype:chooser +arglabel:Names at left ? +argchoice:No: +argchoice:Yes:-nameleft + +arg:NAMERIGHT +argtype:chooser +arglabel:Names at right? +argchoice:Yes:-nameright +argchoice:No: + +arg:NUMTOP +argtype:chooser +arglabel:Numbers at top ? +argchoice:Yes:-numtop +argchoice:No: + +arg:NUMBOT +argtype:chooser +arglabel:Numbers at tail ? +argchoice:No: +argchoice:Yes:-numbot + +arg:NUMLEFT +argtype:chooser +arglabel:Numbers at left ? +argchoice:Yes:-numleft +argchoice:No: + +arg:NUMRIGHT +argtype:chooser +arglabel:Numbers at right? +argchoice:Yes:-numright +argchoice:No: + +arg:MATCH +argtype:chooser +arglabel:Use match '.' for 2..n species? +argchoice:No: +argchoice:Yes:-match + +arg:GAPC +argtype:chooser +arglabel:Count gap symbols? +argchoice:No: +argchoice:Yes:-gap + +arg:WIDTH +argtype:slider +arglabel:Sequence width? +argmin:10 +argmax:200 +argvalue:50 + +arg:COLS +argtype:slider +arglabel:Column spacers? +argmin:0 +argmax:50 +argvalue:10 + + +### pretty print insert end +# + +item:Wally's test function +itemmethod:run__wally $ONE $TWO $THREE < $FILE + +arg:ONE +argtype:chooser +arglabel:How? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:TWO +argtype:slider +argmin:0 +argmax:100 +argvalue:50 +arglabel:how many? + +arg:THREE +argtype:choice_list +arglabel:Which one? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:FILE +argtype:text +arglabel:Which file + diff --git a/CORE/.GDEmenus.safeCOOL b/CORE/.GDEmenus.safeCOOL new file mode 100755 index 0000000..f862468 --- /dev/null +++ b/CORE/.GDEmenus.safeCOOL @@ -0,0 +1,1383 @@ +1menu:File + +item:test cmask output +itemmethod: textedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod:readseq in1 -a -f8 > infile; shelltool /usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; shelltool /home/tulio/biotools/SNAP/SNAP.pl outfile; textedit backg*; textedit summ*;/bin/rm -rf back* codo* summ*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +item:MFOLD +itemmethod:(tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)& +itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:Pairing(ct) File Name +argtext:mfold_out + + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + +item:Highlight helix +itemmethod:readseq -a -f8 in1 | sed "s/>HELIX/\"HELIX/" > in1.flat; sho_helix < in1.flat > out1;rm in1.flat +itemhelp:sho_helix.help + +in:in1 +informat:genbank + +out:out1 +outformat:colormask + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDB -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:Genome HIV position:/var/www/cgi-bin/db/genomeHIV.fasta +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate +argchoice:HIV-1:/var/www/cgi-bin/db/hivallsequencesGB-31-10.fasta +argchoice:HIV-1 Subtype:/var/www/cgi-bin/db/subcomplete.fasta +argchoice:HIV-1 HXB2 Numb:/var/www/cgi-bin/db/HXB2.fasta +argchoice:HIV-1 HXB2 Numb:/var/www/cgi-bin/db/HXB2.fasta +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM30; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM30)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hivallsequencesGB-PROT2-31-10.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:FASTA (DNA/RNA) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:GenBank Primate:$GDE_HELP_DIR/FASTA/GENBANK/gbpri.seq\ 1 +argchoice:GenBank Rodent:$GDE_HELP_DIR/FASTA/GENBANK/gbrod.seq\ 1 +argchoice:GenBank all Mammal:$GDE_HELP_DIR/FASTA/GENBANK/gbmam.seq\ 1 +argchoice:GenBank verteBrates:$GDE_HELP_DIR/FASTA/GENBANK/gbvrt.seq\ 1 +argchoice:GenBank Inverts:$GDE_HELP_DIR/FASTA/GENBANK/gbinv.seq\ 1 +argchoice:GenBank pLants:$GDE_HELP_DIR/FASTA/GENBANK/gbpln.seq\ 1 +argchoice:GenBank Struct RNA:$GDE_HELP_DIR/FASTA/GENBANK/gbrna.seq\ 1 +argchoice:GenBank euk. Organelles:$GDE_HELP_DIR/FASTA/GENBANK/gborg.seq\ 1 +argchoice:GenBank phaGe:$GDE_HELP_DIR/FASTA/GENBANK/gbphg.seq\ 1 +argchoice:GenBank bacTeria:$GDE_HELP_DIR/FASTA/GENBANK/gbbct.seq\ 1 +argchoice:GenBank sYnthetic:$GDE_HELP_DIR/FASTA/GENBANK/gbsyn.seq\ 1 +argchoice:GenBank Viral:$GDE_HELP_DIR/FASTA/GENBANK/gbvrl.seq\ 1 +argchoice:GenBank Unannotated:$GDE_HELP_DIR/FASTA/GENBANK/gbuna.seq\ 1 + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:altdiag.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altdiag.mat +argchoice:altprot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altprot.mat +argchoice:dna.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/dna.mat +argchoice:prot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/prot.mat + +menu:DB +item:Drug Resistance +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Sequence +argchoice:pNL4-3 CG :/database/DR/pNL4-3.CG.fasta +argchoice:pNL4-3 protease :/database/DR/pNL4-3.prot$format.fasta +argchoice:pNL4-3 RT :/database/DR/pNL4-3.RT$format.fasta +argchoice:HXB2 CG :/database/DR/HXB2.fasta +argchoice:HXB2 protease :/database/DR/HXB2.prot$format.fasta +argchoice:HXB2 RT :/database/DR/HXB2.RT$format.fasta + + +arg:format +argtype:chooser +arglabel:Format +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + +item:DB X8873 +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Databases +argchoice:subtype:/database/DNA/subtyperef/subcomplete.fasta +argchoice:SIVENVDNA.fasta:/database/DNA/SIVENVDNA.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/database/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/database/DNA/subtyperef/sub5ltr.fasta +argchoice:sub5ltrU3.fasta:/database/DNA/subtyperef/sub5ltrU3.fasta +argchoice:sub5ltrU5.fasta:/database/DNA/subtyperef/sub5ltrU5.fasta +argchoice:subenv.fasta:/database/DNA/subtyperef/subenv.fasta +argchoice:subenv-gp120.fasta:/database/DNA/subtyperef/subenv-gp120.fasta +argchoice:subenv-gp41.fasta:/database/DNA/subtyperef/subenv-gp41.fasta +argchoice:subenvv3.fasta:/database/DNA/subtyperef/subenvv3.fasta +argchoice:subgag.fasta:/database/DNA/subtyperef/subgag.fasta +argchoice:subgag-p17.fasta:/database/DNA/subtyperef/subgag-p17.fasta +argchoice:subgag-p24.fasta:/database/DNA/subtyperef/subgag-p24.fasta +argchoice:subgag-pol.fasta:/database/DNA/subtyperef/subgag-pol.fasta +argchoice:subpol.fasta:/database/DNA/subtyperef/subpol.fasta +argchoice:subpol-p15RNAase.fasta:/database/DNA/subtyperef/subpol-p15RNAase.fasta +argchoice:subpol-p31integrase.fasta:/database/DNA/subtyperef/subpol-p31integrase.fasta +argchoice:subpol-p51RT.fasta:/database/DNA/subtyperef/subpol-p51RT.fasta +argchoice:subpol-protease.fasta:/database/DNA/subtyperef/subpol-protease.fasta +argchoice:subrevCDS.fasta:/database/DNA/subtyperef/subrevCDS.fasta +argchoice:subrevexon1.fasta:/database/DNA/subtyperef/subrevexon1.fasta +argchoice:subrevexon2.fasta:/database/DNA/subtyperef/subrevexon2.fasta +argchoice:subrevintron.fasta:/database/DNA/subtyperef/subrevintron.fasta +argchoice:subTAR.fasta:/database/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/database/DNA/subtyperef/subtatCDS.fasta +argchoice:subtatexon1.fasta:/database/DNA/subtyperef/subtatexon1.fasta +argchoice:subtatexon2.fasta:/database/DNA/subtyperef/subtatexon2.fasta +argchoice:subtatintron.fasta:/database/DNA/subtyperef/subtatintron.fasta +argchoice:subvif.fasta:/database/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/database/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/database/DNA/subtyperef/subvpu.fasta +argchoice:subnef.fasta:/database/DNA/subtyperef/subnef.fasta +out:OUTPUTFILE +outformat:genbank + + + +item:HXB2 +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/database/DNA/HXB2.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + +item:Clustal Protein Alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -output=GDE -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.gde> in1;$REPORT gde clus_in.gde;/bin/rm -f clus_in* in1* )& + + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + +in:in1 +informat:flat +insave: + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hivallsequencesGB-PROT2-31-10.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:blast3 +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; blast3 $BLASTDB in1.f W=$WORDLEN M=$Matrix > in1.tmp;textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:pir1:$GDE_HELP_DIR/BLAST/pir +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:FASTA (Protein) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:NBRF PIR1:$GDE_HELP_DIR/FASTA/PIR/pir1.dat\ 2 +argchoice:NBRF PIR2:$GDE_HELP_DIR/FASTA/PIR/pir2.dat\ 2 +argchoice:NBRF PIR3:$GDE_HELP_DIR/FASTA/PIR/pir3.dat\ 2 + + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:Minimum mutation matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/codaa.mat +argchoice:Identity matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/idnaa.mat +argchoice:Identity matrix for mismatches:-s $GDE_HELP_DIR/FASTA/MATRIX/idpaa.mat +argchoice:PAM250:-s $GDE_HELP_DIR/FASTA/MATRIX/pam250.mat +argchoice:PAM120:-s $GDE_HELP_DIR/FASTA/MATRIX/pam120.mat + +menu:Seq management + +item:Assemble Contigs +itemmethod:(sed "s/#/>/"in1.tmp; CAP2 in1.tmp $OVERLAP $PMATCH > out1;/bin/rm -f in1.tmp) +itemhelp:CAP2.help + +arg:OVERLAP +argtype:slider +arglabel:Minimum overlap? +argmin:5 +argmax:100 +argvalue:20 + +arg:PMATCH +argtype:slider +arglabel:Percent match required within overlap +argmin:25 +argmax:100 +argvalue:90 + +in:in1 +informat:flat + +out:out1 +outformat:gde +outoverwrite: + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + + +item:Restriction sites +itemmethod:(cp $ENZ in1.tmp ; $PRE_EDIT Restriction in1.tmp in1 > out1 ; rm in1.tmp); +itemhelp:Restriction.help + +arg:ENZ +argtype:text +arglabel:Enzyme file +argtext:$GDE_HELP_DIR/DATA_FILES/enzymes + +arg:PRE_EDIT +argtype:chooser +arglabel:Edit enzyme file first? +argchoice:Yes:textedit in1.tmp; +argchoice:No: + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +menu:Phylogeny + +item:DeSoete Tree fit +itemmethod: (readseq -a -f8 in1>in1.flat;count -t $CORR in1.flat> in1.tmp ; lsadt in1.out ; $DISPLAY_FUNC in1.out;/bin/rm -f in1* )& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +arg:CORR +arglabel:Distance correction? +argtype:chooser +argchoice:Olsen:-c=olsen +argchoice:Jukes/Cantor:-c=jukes +argchoice:None:-c=none + +arg:INIT +arglabel:Initial parameter estimate +argtype:choice_list +argchoice:uniformly distributed random numbers:1 +argchoice:error-perturbed data:2 +argchoice:original distance data from input matrix:3 + +arg:SEED +argtype:slider +arglabel:Random number seed +argmin:0 +argmax:65535 +argvalue:12345 + +arg:DISPLAY_FUNC +argtype:chooser +arglabel:View tree using +argchoice:TextEdit:textedit +argchoice:Treetool:treetool < + +item:Phylip help +itemmethod:(textedit $GDE_HELP_DIR/PHYLIP/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:boot:boot.doc +argchoice:clique:clique.doc +argchoice:consense:consense.doc +argchoice:contchar:contchar.doc +argchoice:contml:contml.doc +argchoice:contrast:contrast.doc +argchoice:discrete:discrete.doc +argchoice:distance:distance.doc +argchoice:dnaboot:dnaboot.doc +argchoice:dnacomp:dnacomp.doc +argchoice:dnadist:dnadist.doc +argchoice:dnainvar:dnainvar.doc +argchoice:dnaml:dnaml.doc +argchoice:dnamlk:dnamlk.doc +argchoice:dnamove:dnamove.doc +argchoice:dnapars:dnapars.doc +argchoice:dnapenny:dnapenny.doc +argchoice:dolboot:dolboot.doc +argchoice:dollop:dollop.doc +argchoice:dolmove:dolmove.doc +argchoice:dolpenny:dolpenny.doc +argchoice:draw:draw.doc +argchoice:drawgram:drawgram.doc +argchoice:drawtree:drawtree.doc +argchoice:factor:factor.doc +argchoice:fitch:fitch.doc +argchoice:gendist:gendist.doc +argchoice:kitsch:kitsch.doc +argchoice:main:main.doc +argchoice:mix:mix.doc +argchoice:move:move.doc +argchoice:neighbor:neighbor.doc +argchoice:penny:penny.doc +argchoice:protpars:protpars.doc +argchoice:read.me.general:read.me.general.doc +argchoice:restml:restml.doc +argchoice:seqboot:seqboot.doc +argchoice:sequence:sequence.doc + + +item:Phylip 3.4 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT shelltool $PROGRAM;textedit outfile;rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ; $PREEDIT shelltool seqboot; mv -f outfile infile; shelltool dnadist;mv -f outfile infile; shelltool neighbor; cp outtree intree; $PROGRAM textedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST:mv -f infile outfile; +argchoice:Fitch:shelltool fitch; +argchoice:Kitsch:shelltool kitsch; +argchoice:Neighbor:shelltool neighbor; +argchoice:Full:shelltool consense; + + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +menu:W +item:Stanford +itemmethod:(readseq in1 -a -f8 > infile.fasta; netscape http://hiv-4.stanford.edu/cgi-bin/hivseqweb.pl?uploaded_file=infile.fasta)& +in:in1 +informat:genbank +item:Los Alamos DB Search +itemmethod:(netscape http://hiv-web.lanl.gov/cgi-bin/hivDB3/public/wdb/ssampublic)& +item:Retroviruses NCBI +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/retroviruses/)& +item:SNAP (sy/nosy)& +itemmethod:(netscape http://hiv-web.lanl.gov/SNAP/WEBSNAP/SNAP.html)& +item:PubMed +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=PubMed&term=hiv+africa)& + +arg:term +argtype:text +arglabel:Searc? +argtext:New + + +item:test seqname +itemmethod:(textedit in1;netscape http://www.ncbi.nlm.nih.gov/Entrez/in1)& + +in:in1 +informat:genbank +out:out1 +outformat:flat + + + +menu:Email + +item:BLASTN +itemmethod:(echo BLASTPROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo MATCH $MSCORE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:GenBank Qtrly & Updates:GenBank +argchoice:EMBL:embl + +arg:MSCORE +argtype:slider +arglabel:Match Score +argmin:3 +argmax:7 +argvalue:5 + +in:in1 +informat:flat +insave: + +item:BLASTP +itemmethod:(echo BLASTPROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:Swiss-Prot:swiss-prot +argchoice:PIR:pir + +in:in1 +informat:flat +insave: + +item:Fasta-(DNA) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $KPL >> in1.tmp; echo SCORES $TOP >> in1.tmp; echo ALIGNMENTS $ALNG >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which GenBank Database? +argchoice:Qrtly & Updates:GenBank/all +argchoice:Updates:GenBank/new +argchoice:Primate:GenBank/primate +argchoice:Rodent:GenBank/rodent +argchoice:Other-Mammalian:GenBank/other_mammalian +argchoice:Other-Vertebrate:GenBank/other_vertebrate +argchoice:Invertebrate:GenBank/invertebrate +argchoice:Plant:GenBank/plant +argchoice:Organelle:GenBank/organelle +argchoice:Bacterial:GenBank/bacterial +argchoice:Structural-RNA:GenBank/structural_rna +argchoice:Viral:GenBank/viral +argchoice:Phage:GenBank/phage +argchoice:Synthetic:GenBank/synthetic +argchoice:Unannotated:GenBank/unannotated + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta-(PROTEIN) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $TPL >> in1.tmp; echo SCORES $SCRS >> in1.tmp; echo ALIGNMENTS $ALNMNTS >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Protein Database? +argchoice:Trans GenBank Qrtly:GenPept/all +argchoice:Trans GenBank Daily:GenPept/new +argchoice:Swiss-Protein:SWISS-PROT/all + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + +item:Sequence Retrieval +itemmethod:(echo $REGEXP > in1.tmp; Mail RETRIEVE@GENBANK.BIO.NET < in1.tmp; rm in1.tmp) & + +arg:REGEXP +argtype:text +arglabel:Accession # or LOCUS name of sequence to retrieve + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + +# +# dgg added new readseq formats, 29 dec 92 +# + +item:Export Foreign Format +itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:GenBank:genbank +argchoice:IG/Stanford:ig +argchoice:NBRF:nbrf +argchoice:EMBL:embl +argchoice:GCG:gcg +argchoice:DNA Strider:strider +argchoice:Fitch:fitch +argchoice:Pearson/Fasta:pearson +argchoice:Zuker:zuker +argchoice:Olsen:olsen +argchoice:Phylip:phylip +#argchoice:Phylip v3.2:phylip3.2 +argchoice:Plain text:raw +argchoice:ASN.1:asn +argchoice:PIR:pir +argchoice:MSF:msf +argchoice:PAUP:paup +argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:in1 +informat:genbank + + +# +#dgg addition for new readseq, 24 dec 92 +# + +item:Pretty Print +itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)& +itemhelp:readseq.help + +#nametop is bad !? + +in:in1 +informat:genbank + +arg:NAMETOP +argtype:chooser +arglabel:Names at top ? +argchoice:No: +argchoice:Yes:-nametop + +arg:NAMELEFT +argtype:chooser +arglabel:Names at left ? +argchoice:No: +argchoice:Yes:-nameleft + +arg:NAMERIGHT +argtype:chooser +arglabel:Names at right? +argchoice:Yes:-nameright +argchoice:No: + +arg:NUMTOP +argtype:chooser +arglabel:Numbers at top ? +argchoice:Yes:-numtop +argchoice:No: + +arg:NUMBOT +argtype:chooser +arglabel:Numbers at tail ? +argchoice:No: +argchoice:Yes:-numbot + +arg:NUMLEFT +argtype:chooser +arglabel:Numbers at left ? +argchoice:Yes:-numleft +argchoice:No: + +arg:NUMRIGHT +argtype:chooser +arglabel:Numbers at right? +argchoice:Yes:-numright +argchoice:No: + +arg:MATCH +argtype:chooser +arglabel:Use match '.' for 2..n species? +argchoice:No: +argchoice:Yes:-match + +arg:GAPC +argtype:chooser +arglabel:Count gap symbols? +argchoice:No: +argchoice:Yes:-gap + +arg:WIDTH +argtype:slider +arglabel:Sequence width? +argmin:10 +argmax:200 +argvalue:50 + +arg:COLS +argtype:slider +arglabel:Column spacers? +argmin:0 +argmax:50 +argvalue:10 + + +### pretty print insert end +# + +item:Wally's test function +itemmethod:run__wally $ONE $TWO $THREE < $FILE + +arg:ONE +argtype:chooser +arglabel:How? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:TWO +argtype:slider +argmin:0 +argmax:100 +argvalue:50 +arglabel:how many? + +arg:THREE +argtype:choice_list +arglabel:Which one? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:FILE +argtype:text +arglabel:Which file + diff --git a/CORE/.GDEmenus.tulio b/CORE/.GDEmenus.tulio new file mode 100755 index 0000000..a49f15e --- /dev/null +++ b/CORE/.GDEmenus.tulio @@ -0,0 +1,2029 @@ +1menu:File + +item:test cmask output +itemmethod: textedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; textedit backg*; textedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; textedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +item:MFOLD +itemmethod:(tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)& +itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:Pairing(ct) File Name +argtext:mfold_out + + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + +item:Highlight helix +itemmethod:readseq -a -f8 in1 | sed "s/>HELIX/\"HELIX/" > in1.flat; sho_helix < in1.flat > out1;rm in1.flat +itemhelp:sho_helix.help + +in:in1 +informat:genbank + +out:out1 +outformat:colormask + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDB -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/var/www/cgi-bin/db/hiv17-08-01.fasta2 +argchoice:HIV-1 Subtype:/var/www/cgi-bin/db/subcomplete.fasta +argchoice:HIV-1 HXB2 Numbering:/var/www/cgi-bin/db/HXB2.fasta +argchoice:HCV Numbering:/var/www/cgi-bin/db/HCV.fasta +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:FASTA (DNA/RNA) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:GenBank Primate:$GDE_HELP_DIR/FASTA/GENBANK/gbpri.seq\ 1 +argchoice:GenBank Rodent:$GDE_HELP_DIR/FASTA/GENBANK/gbrod.seq\ 1 +argchoice:GenBank all Mammal:$GDE_HELP_DIR/FASTA/GENBANK/gbmam.seq\ 1 +argchoice:GenBank verteBrates:$GDE_HELP_DIR/FASTA/GENBANK/gbvrt.seq\ 1 +argchoice:GenBank Inverts:$GDE_HELP_DIR/FASTA/GENBANK/gbinv.seq\ 1 +argchoice:GenBank pLants:$GDE_HELP_DIR/FASTA/GENBANK/gbpln.seq\ 1 +argchoice:GenBank Struct RNA:$GDE_HELP_DIR/FASTA/GENBANK/gbrna.seq\ 1 +argchoice:GenBank euk. Organelles:$GDE_HELP_DIR/FASTA/GENBANK/gborg.seq\ 1 +argchoice:GenBank phaGe:$GDE_HELP_DIR/FASTA/GENBANK/gbphg.seq\ 1 +argchoice:GenBank bacTeria:$GDE_HELP_DIR/FASTA/GENBANK/gbbct.seq\ 1 +argchoice:GenBank sYnthetic:$GDE_HELP_DIR/FASTA/GENBANK/gbsyn.seq\ 1 +argchoice:GenBank Viral:$GDE_HELP_DIR/FASTA/GENBANK/gbvrl.seq\ 1 +argchoice:GenBank Unannotated:$GDE_HELP_DIR/FASTA/GENBANK/gbuna.seq\ 1 + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:altdiag.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altdiag.mat +argchoice:altprot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altprot.mat +argchoice:dna.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/dna.mat +argchoice:prot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/prot.mat + +menu:HIV Seq. Db. +item:Ref. Seq. for Drug Resistance +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Sequence +argchoice:Prot function:/database/AA/protease.mutations.fasta +argchoice:rt function:/database/AA/rtmutations.fasta +argchoice:pNL4-3 CG :/database/DR/pNL4-3.CG.fasta +argchoice:pNL4-3 protease :/database/DR/pNL4-3.prot$format.fasta +argchoice:pNL4-3 RT :/database/DR/pNL4-3.RT$format.fasta +argchoice:HXB2 CG :/database/DR/HXB2.fasta +argchoice:HXB2 protease :/database/DR/HXB2.prot$format.fasta +argchoice:HXB2 RT :/database/DR/HXB2.RT$format.fasta +argchoice:data1.fasta:/database/DNA/data1.fasta + + +arg:format +argtype:chooser +arglabel:Format +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype Db. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/database/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/database/DNA/subtyperef/sub5ltr.fasta +argchoice:sub5ltrU3.fasta:/database/DNA/subtyperef/sub5ltrU3.fasta +argchoice:sub5ltrU5.fasta:/database/DNA/subtyperef/sub5ltrU5.fasta +argchoice:subenv.fasta:/database/DNA/subtyperef/subenv.fasta +argchoice:subenv-gp120.fasta:/database/DNA/subtyperef/subenv-gp120.fasta +argchoice:subenv-gp41.fasta:/database/DNA/subtyperef/subenv-gp41.fasta +argchoice:subenvv3.fasta:/database/DNA/subtyperef/subenvv3.fasta +argchoice:subgag.fasta:/database/DNA/subtyperef/subgag.fasta +argchoice:subgag-p17.fasta:/database/DNA/subtyperef/subgag-p17.fasta +argchoice:subgag-p24.fasta:/database/DNA/subtyperef/subgag-p24.fasta +argchoice:subgag-pol.fasta:/database/DNA/subtyperef/subgag-pol.fasta +argchoice:subpol.fasta:/database/DNA/subtyperef/subpol.fasta +argchoice:subpol-p15RNAase.fasta:/database/DNA/subtyperef/subpol-p15RNAase.fasta +argchoice:subpol-p31integrase.fasta:/database/DNA/subtyperef/subpol-p31integrase.fasta +argchoice:subpol-p51RT.fasta:/database/DNA/subtyperef/subpol-p51RT.fasta +argchoice:subpol-protease.fasta:/database/DNA/subtyperef/subpol-protease.fasta +argchoice:subrevCDS.fasta:/database/DNA/subtyperef/subrevCDS.fasta +argchoice:subrevexon1.fasta:/database/DNA/subtyperef/subrevexon1.fasta +argchoice:subrevexon2.fasta:/database/DNA/subtyperef/subrevexon2.fasta +argchoice:subrevintron.fasta:/database/DNA/subtyperef/subrevintron.fasta +argchoice:subTAR.fasta:/database/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/database/DNA/subtyperef/subtatCDS.fasta +argchoice:subtatexon1.fasta:/database/DNA/subtyperef/subtatexon1.fasta +argchoice:subtatexon2.fasta:/database/DNA/subtyperef/subtatexon2.fasta +argchoice:subtatintron.fasta:/database/DNA/subtyperef/subtatintron.fasta +argchoice:subvif.fasta:/database/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/database/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/database/DNA/subtyperef/subvpu.fasta +argchoice:subnef.fasta:/database/DNA/subtyperef/subnef.fasta + +out:OUTPUTFILE +outformat:genbank + + +item:HIV-1 Subtype reduz. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV Subtype C CG:/database/DNA/sub-reference/subC.CG.fasta +argchoice:HIV-1 GAG sub:/database/DNA/sub-reference/gagsub-reference.fasta +argchoice:HIV-1 POL sub:/database/DNA/sub-reference/polsub-reference.fasta +argchoice:HIV-1 VIF sub:/database/DNA/sub-reference/vifsub-reference.fasta +argchoice:HIV-1 VPR sub:/database/DNA/sub-reference/vprsub-reference.fasta +argchoice:HIV-1 TAT sub:/database/DNA/sub-reference/tatsub-reference.fasta +argchoice:HIV-1 REV sub:/database/DNA/sub-reference/revsub-reference.fasta +argchoice:HIV-1 VPU sub:/database/DNA/sub-reference/vpusub-reference.fasta +argchoice:HIV-1 ENV sub:/database/DNA/sub-reference/envsub-reference.fasta +argchoice:HIV-1 NEF sub:/database/DNA/sub-reference/nefsub-reference.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype B & C Gen. regions +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV Subtype C CG:/database/DNA/subB/$format.fasta +argchoice:HIV-1 GAG sub:/database/DNA/subB/$format.gag$type.fasta +argchoice:HIV-1 POL sub:/database/DNA/subB/$format.pol$type.fasta +argchoice:HIV-1 PROTEASE sub:/database/DNA/subB/$format.pol-prot$type.fasta +argchoice:HIV-1 RT sub:/database/DNA/subB/$format.pol-RT$type.fasta +argchoice:HIV-1 INTEGRASE sub:/database/DNA/subB/$format.pol-INT$type.fasta +argchoice:HIV-1 VIF sub:/database/DNA/subB/$format.vif$type.fasta +argchoice:HIV-1 VPR sub:/database/DNA/subB/$format.vpr$type.fasta +argchoice:HIV-1 TAT sub:/database/DNA/subB/$format.tat$type.fasta +argchoice:HIV-1 REV sub:/database/DNA/subB/$format.rev$type.fasta +argchoice:HIV-1 VPU sub:/database/DNA/subB/$format.vpu$type.fasta +argchoice:HIV-1 ENV sub:/database/DNA/subB/$format.env$type.fasta +argchoice:HIV-1 NEF sub:/database/DNA/subB/$format.nef$type.fasta + +arg:format +argtype:chooser +arglabel:Format +argchoice:Subtype B:subB +argchoice:Subtype C:subC + +arg:type +argtype:chooser +arglabel:type +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + + +item:Find Beggining of Genome regions +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:choice_list +argchoice:Protease:ATCACTCTTTGG +argchoice:Protease:ATC +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat +out:out1 +outformat:colormask + + +item:hivHXB2 genome regions aln +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/database/DNA/hivHXB2regions.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + + +item:Clustal Protein Alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -output=GDE -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.gde> in1;$REPORT gde clus_in.gde;/bin/rm -f clus_in* in1* )& + + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + +in:in1 +informat:flat +insave: + + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins Tulio:/var/www/cgi-bin/db/HIV-PROTEINS-tulio.fasta +argchoice:HIV Proteins:/var/www/cgi-bin/db/hiv17-08-01.PROT.fasta +argchoice:HIV-1 Structures at PDB:/var/www/cgi-bin/db/Prot.3d.fasta +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:VESPA PROTEASE +itemmethod:cat in1 > infile ;/usr/local/biotools/GDE/bin/fasta2VESPA.pl > outfile; sed "s/[%]/ /" outfile.f ;/home/tulio/biotools/VESPA/VESPA -b $SUBTYPE -q outfile.f > outVESPA; textedit outVESPA; + +arg:SUBTYPE +argtype:chooser +arglabel:Subtype +argchoice:B:/database/AA/subB.prot.aa.vespa +argchoice:C:/database/AA/subC.prot.aa.vespa +argchoice:D:/database/AA/subD.prot.aa.vespa +in:in1 +informat:flat +out:out1 +outformat:text + +item:VESPA Reverse Transcriptase +itemmethod: cat in1 > infile ;/usr/local/biotools/GDE/bin/fasta2VESPA.pl > outfile; sed "s/[%]/ /" outfile.f ;/home/tulio/biotools/VESPA/VESPA -b $SUBTYPE -q outfile.f > outVESPA; textedit outVESPA; + +arg:SUBTYPE +argtype:chooser +arglabel:Subtype +argchoice:B:/database/AA/subB.rt.aa.vespa +argchoice:C:/database/AA/subC.rt.aa.vespa +argchoice:D:/database/AA/subD.rt.aa.vespa +in:in1 +informat:flat +out:out1 +outformat:text + +item:FASTA (Protein) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:NBRF PIR1:$GDE_HELP_DIR/FASTA/PIR/pir1.dat\ 2 +argchoice:NBRF PIR2:$GDE_HELP_DIR/FASTA/PIR/pir2.dat\ 2 +argchoice:NBRF PIR3:$GDE_HELP_DIR/FASTA/PIR/pir3.dat\ 2 + + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:Minimum mutation matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/codaa.mat +argchoice:Identity matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/idnaa.mat +argchoice:Identity matrix for mismatches:-s $GDE_HELP_DIR/FASTA/MATRIX/idpaa.mat +argchoice:PAM250:-s $GDE_HELP_DIR/FASTA/MATRIX/pam250.mat +argchoice:PAM120:-s $GDE_HELP_DIR/FASTA/MATRIX/pam120.mat + +menu:Seq management + +item:Assemble Contigs +itemmethod:(sed "s/#/>/"in1.tmp; CAP2 in1.tmp $OVERLAP $PMATCH > out1;/bin/rm -f in1.tmp) +itemhelp:CAP2.help + +arg:OVERLAP +argtype:slider +arglabel:Minimum overlap? +argmin:5 +argmax:100 +argvalue:20 + +arg:PMATCH +argtype:slider +arglabel:Percent match required within overlap +argmin:25 +argmax:100 +argvalue:90 + +in:in1 +informat:flat + +out:out1 +outformat:gde +outoverwrite: + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + + +item:Restriction sites +itemmethod:(cp $ENZ in1.tmp ; $PRE_EDIT Restriction in1.tmp in1 > out1 ; rm in1.tmp); +itemhelp:Restriction.help + +arg:ENZ +argtype:text +arglabel:Enzyme file +argtext:$GDE_HELP_DIR/DATA_FILES/enzymes + +arg:PRE_EDIT +argtype:chooser +arglabel:Edit enzyme file first? +argchoice:Yes:textedit in1.tmp; +argchoice:No: + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +menu:Phylogeny + +item:Coalescence +itemmethod: (readseq -a -f11 in1 | sed "s/ YF//1" > infile2;cat tt infile2> infile; /usr/bin/X11/xterm -e /usr/local/biotools/lamarc/coalesce/coalesce;)& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +item:tree-puzzle +itemmethod: (readseq -a -f12 in1 > infile; /usr/bin/X11/xterm -e; (echo b; echo y) | puzzle; textedit outfile; gv outlm.eps)& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: +item:DeSoete Tree fit +itemmethod: (readseq -a -f8 in1>in1.flat;count -t $CORR in1.flat> in1.tmp ; lsadt in1.out ; $DISPLAY_FUNC in1.out;/bin/rm -f in1* )& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +arg:CORR +arglabel:Distance correction? +argtype:chooser +argchoice:Olsen:-c=olsen +argchoice:Jukes/Cantor:-c=jukes +argchoice:None:-c=none + +arg:INIT +arglabel:Initial parameter estimate +argtype:choice_list +argchoice:uniformly distributed random numbers:1 +argchoice:error-perturbed data:2 +argchoice:original distance data from input matrix:3 + +arg:SEED +argtype:slider +arglabel:Random number seed +argmin:0 +argmax:65535 +argvalue:12345 + +arg:DISPLAY_FUNC +argtype:chooser +arglabel:View tree using +argchoice:TextEdit:textedit +argchoice:Treetool:treetool < + +item:Phylip help +itemmethod:(textedit $GDE_HELP_DIR/PHYLIP/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:boot:boot.doc +argchoice:clique:clique.doc +argchoice:consense:consense.doc +argchoice:contchar:contchar.doc +argchoice:contml:contml.doc +argchoice:contrast:contrast.doc +argchoice:discrete:discrete.doc +argchoice:distance:distance.doc +argchoice:dnaboot:dnaboot.doc +argchoice:dnacomp:dnacomp.doc +argchoice:dnadist:dnadist.doc +argchoice:dnainvar:dnainvar.doc +argchoice:dnaml:dnaml.doc +argchoice:dnamlk:dnamlk.doc +argchoice:dnamove:dnamove.doc +argchoice:dnapars:dnapars.doc +argchoice:dnapenny:dnapenny.doc +argchoice:dolboot:dolboot.doc +argchoice:dollop:dollop.doc +argchoice:dolmove:dolmove.doc +argchoice:dolpenny:dolpenny.doc +argchoice:draw:draw.doc +argchoice:drawgram:drawgram.doc +argchoice:drawtree:drawtree.doc +argchoice:factor:factor.doc +argchoice:fitch:fitch.doc +argchoice:gendist:gendist.doc +argchoice:kitsch:kitsch.doc +argchoice:main:main.doc +argchoice:mix:mix.doc +argchoice:move:move.doc +argchoice:neighbor:neighbor.doc +argchoice:penny:penny.doc +argchoice:protpars:protpars.doc +argchoice:read.me.general:read.me.general.doc +argchoice:restml:restml.doc +argchoice:seqboot:seqboot.doc +argchoice:sequence:sequence.doc + + + +item:Phylip 3.4 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;textedit outfile;rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:PAUP +itemmethod:(readseq -a -f17 in1 > work.nxs; /usr/bin/X11/xterm -e paup work.nxs;/bin/rm -f gde*)& + +in:in1 +informat:genbank +inmask: +insave: + +item:MrBaynes +itemmethod:(readseq -a -f17 in1 | sed "s/interleave /interleave=yes /" > work.nxs;cat work.nxs /home/tulio/biotools/mbaynes/mbcommant.txt > workmb.nxs; /usr/bin/X11/xterm -e; (echo execute workmb.nxs) | mb workmb.nxs;treetool workmb.nxs.out.t; /bin/rm in1)& + +in:in1 +informat:genbank +inmask: +insave: + +item:codeml +itemmethod:(readseq -a -f11 in1 | sed "s/ YF//1" > test.phy; /usr/bin/X11/xterm -e codeml $METHOD)& + + +arg:METHOD +arglabel:Which method ? +argtype:chooser +argchoice:0:/home/tulio/biotools/paml/method0.ctl +argchoice:1:/home/tulio/biotools/paml/method1.ctl +argchoice:2:/home/tulio/biotools/paml/method2.ctl +argchoice:3:/home/tulio/biotools/paml/method3.ctl +argchoice:4:/home/tulio/biotools/paml/method4.ctl +argchoice:5:/home/tulio/biotools/paml/method5.ctl +argchoice:6:/home/tulio/biotools/paml/method6.ctl + + +in:in1 +informat:genbank +inmask: +insave: + + +item:Phylip Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM textedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:Fitch: /usr/bin/X11/xterm -e fitch; +argchoice:Kitsch: /usr/bin/X11/xterm -e kitsch; +argchoice:Neighbor: /usr/bin/X11/xterm -e neighbor; +argchoice:Bootstrap+consense: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +menu:On-Line Res. +item:Search Los Alamos DB Search +itemmethod:(netscape http://hiv-web.lanl.gov/cgi-bin/hivDB3/public/wdb/ssampublic) +arg:term +argtype:text +arglabel:Search sequence accession +argtext: +item:Search Entrez +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=Entrez&term=$search)& + +arg:search +argtype:text +arglabel:Search Entrez +argtext: + +item:Search PubMed +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=PubMed&term=$term)& + +arg:term +argtype:text +arglabel:Search PubMed for Literature: +argtext: + +item:Online Resources at Retroviruses in NCBI +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/retroviruses/)& + +item:SNAP Analysis online (dn/ds) +itemmethod:(netscape http://hiv-web.lanl.gov/SNAP/WEBSNAP/SNAP.html)& + +item:RIP Analysis online (Recombination) +itemmethod:(netscape http://hiv-web.lanl.gov/RIP/RIP.html)& + +item:Search Stanford for Resistance mutations +itemmethod:(readseq in1 -a -f8 > infile.fasta; netscape http://hiv-4.stanford.edu/cgi-bin/hivseqweb.pl?uploaded_file=infile.fasta)& +in:in1 +informat:genbank +inmask: +insave: + +menu:Tree Viewer +item:TreeTool +itemmethod:(treetool &); +item:TreeView +itemmethod:(tv &); +item:Xsplit-tree +itemmethod:(/usr/local/biotools/splitstree3.1/xsplits &); + + + + + + + + + + + + + +menu:Email + +item:BLASTN +itemmethod:(echo BLASTPROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo MATCH $MSCORE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:GenBank Qtrly & Updates:GenBank +argchoice:EMBL:embl + +arg:MSCORE +argtype:slider +arglabel:Match Score +argmin:3 +argmax:7 +argvalue:5 + +in:in1 +informat:flat +insave: + +item:BLASTP +itemmethod:(echo BLASTPROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:Swiss-Prot:swiss-prot +argchoice:PIR:pir + +in:in1 +informat:flat +insave: + +item:Fasta-(DNA) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $KPL >> in1.tmp; echo SCORES $TOP >> in1.tmp; echo ALIGNMENTS $ALNG >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which GenBank Database? +argchoice:Qrtly & Updates:GenBank/all +argchoice:Updates:GenBank/new +argchoice:Primate:GenBank/primate +argchoice:Rodent:GenBank/rodent +argchoice:Other-Mammalian:GenBank/other_mammalian +argchoice:Other-Vertebrate:GenBank/other_vertebrate +argchoice:Invertebrate:GenBank/invertebrate +argchoice:Plant:GenBank/plant +argchoice:Organelle:GenBank/organelle +argchoice:Bacterial:GenBank/bacterial +argchoice:Structural-RNA:GenBank/structural_rna +argchoice:Viral:GenBank/viral +argchoice:Phage:GenBank/phage +argchoice:Synthetic:GenBank/synthetic +argchoice:Unannotated:GenBank/unannotated + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta-(PROTEIN) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $TPL >> in1.tmp; echo SCORES $SCRS >> in1.tmp; echo ALIGNMENTS $ALNMNTS >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Protein Database? +argchoice:Trans GenBank Qrtly:GenPept/all +argchoice:Trans GenBank Daily:GenPept/new +argchoice:Swiss-Protein:SWISS-PROT/all + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + +item:Sequence Retrieval +itemmethod:(echo $REGEXP > in1.tmp; Mail RETRIEVE@GENBANK.BIO.NET < in1.tmp; rm in1.tmp) & + +arg:REGEXP +argtype:text +arglabel:Accession # or LOCUS name of sequence to retrieve + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + +# +# dgg added new readseq formats, 29 dec 92 +# + +item:Export Foreign Format +itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:GenBank:genbank +argchoice:IG/Stanford:ig +argchoice:NBRF:nbrf +argchoice:EMBL:embl +argchoice:GCG:gcg +argchoice:DNA Strider:strider +argchoice:Fitch:fitch +argchoice:Pearson/Fasta:pearson +argchoice:Zuker:zuker +argchoice:Olsen:olsen +argchoice:Phylip:phylip +#argchoice:Phylip v3.2:phylip3.2 +argchoice:Plain text:raw +argchoice:ASN.1:asn +argchoice:PIR:pir +argchoice:MSF:msf +argchoice:PAUP:paup +argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:in1 +informat:genbank + + +# +#dgg addition for new readseq, 24 dec 92 +# + +item:Pretty Print +itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)& +itemhelp:readseq.help + +#nametop is bad !? + +in:in1 +informat:genbank + +arg:NAMETOP +argtype:chooser +arglabel:Names at top ? +argchoice:No: +argchoice:Yes:-nametop + +arg:NAMELEFT +argtype:chooser +arglabel:Names at left ? +argchoice:No: +argchoice:Yes:-nameleft + +arg:NAMERIGHT +argtype:chooser +arglabel:Names at right? +argchoice:Yes:-nameright +argchoice:No: + +arg:NUMTOP +argtype:chooser +arglabel:Numbers at top ? +argchoice:Yes:-numtop +argchoice:No: + +arg:NUMBOT +argtype:chooser +arglabel:Numbers at tail ? +argchoice:No: +argchoice:Yes:-numbot + +arg:NUMLEFT +argtype:chooser +arglabel:Numbers at left ? +argchoice:Yes:-numleft +argchoice:No: + +arg:NUMRIGHT +argtype:chooser +arglabel:Numbers at right? +argchoice:Yes:-numright +argchoice:No: + +arg:MATCH +argtype:chooser +arglabel:Use match '.' for 2..n species? +argchoice:No: +argchoice:Yes:-match + +arg:GAPC +argtype:chooser +arglabel:Count gap symbols? +argchoice:No: +argchoice:Yes:-gap + +arg:WIDTH +argtype:slider +arglabel:Sequence width? +argmin:10 +argmax:200 +argvalue:50 + +arg:COLS +argtype:slider +arglabel:Column spacers? +argmin:0 +argmax:50 +argvalue:10 + + +### pretty print insert end +# + +item:Wally's test function +itemmethod:run__wally $ONE $TWO $THREE < $FILE + +arg:ONE +argtype:chooser +arglabel:How? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:TWO +argtype:slider +argmin:0 +argmax:100 +argvalue:50 +arglabel:how many? + +arg:THREE +argtype:choice_list +arglabel:Which one? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:FILE +argtype:text +arglabel:Which file + + +menu:Xylem +#------------ Open XYLEM Dataset (GenBank) ( 9/ 6/94) --------------- +item:Open_XYLEM (GenBank) +itemlabel:Open XYLEM Dataset (GenBank) +itemmethod:getloc $DBNAME.ind $DBNAME.ano $DBNAME.wrp $DBNAME.ind out1 + +arg:DBNAME +arglabel:XYLEM-format GenBank Dataset +argtype:text + +out:out1 +outformat:genbank + +#-------------- Open XYLEM Database (PIR) ( 9/ 6/94) -------------- +item:Open_XYLEM (PIR) +itemlabel:Open XYLEM Dataset (GenBank) +itemmethod:getloc -p $DBNAME.ind $DBNAME.ano $DBNAME.wrp $DBNAME.ind out1.pir; readseq -a -f2 out1.pir > out1 + +arg:DBNAME +arglabel:XYLEM-format PIR Dataset +argtype:text +#argtype:file_chooser + +out:out1 +outformat:genbank + + +################################# PROTEIN ################################ +#--------------- PROT2NUC - Reverse Translation ( 8/10/94) ----------------- +item:PROT2NUC - reverse translation +itemmethod: sed "s/[#%]/>/" in1.out; (textedit in1.out; rm in1*)& +itemhelp: xylem/prot2nuc.doc + +arg:LINLEN +arglabel:CODONS PER LINE +argtype:slider +argmin:5 +argmax:100 +argvalue:25 + +arg:GROUP +arglabel:NUMBERING INTERVAL (amino acids/codons) +argtype:slider +argmin:5 +argmax:100 +argvalue:5 + +in:in1 +informat:flat + +########################### DATABASE MENU ############################### +#------------------- FINDKEY (3/13/97)----------------------- +item:FINDKEY - Keyword Search +itemmethod: $KEYWORDS; (findkey $DATABASE in1.kw in1.nam in1.fnd; rm in1.kw; (textedit in1.fnd; rm in1.fnd)& (textedit in1.nam -Ws 150 628; rm in1.nam)& )& +itemhelp:xylem/findkey.asc + +arg:KEYWORDS +arglabel:KEYWORDS +argtype:chooser +argchoice:Single keyword:echo $KEY > in1.kw +argchoice:Create list of keywords:cat $GDE_HELP_DIR/xylem/GDE/keyfile.template > in1.tmp; textedit in1.tmp; egrep -v -e \; in1.tmp >in1.kw;rm in1.tmp* +argvalue:0 + +arg:KEY +arglabel:Single keyword +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:choice_menu +argchoice:HIV GB:-G /database/DNA/hiv1entries +argchoice:PIR:-p +argchoice:PIR Dataset:-P $DBFILE +argchoice:GB bacterial:-b +argchoice:GB mamalian:-m +argchoice:GB phage:-g +argchoice:GB primate:-r +argchoice:GB rodent:-d +argchoice:GB unannotated:-u +argchoice:GB vertebrate:-t +argchoice:GB invertebrate:-i +argchoice:GB plant:-l +argchoice:GB rna:-n +argchoice:GB synthetic:-s +argchoice:GB viral:-a +argchoice:GB patented:-x +argchoice:GB Seq. Tagged Sites:-z +argchoice:GB expressed seq. tag:-e +argchoice:GB Genome Survey Seq.:-S +argchoice:GB High Throughput Genomic:-h +argchoice:GenBank Dataset:-G $DBFILE +argchoice:VecBase:-v +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +in:in1 +informat:flat + +#------------------- FETCH ( 2/ 7/94) -------------------------- +# Note: This menu requires that the shell script 'GBfilter' be +# in your bin directory. +item:FETCH +itemmethod: $NAMES; (fetch $WHATTOGET $DATABASE in1.nam in1.tmp; rm in1.nam; $WHERE) & +itemhelp:xylem/fetch.doc + +arg:NAMES +arglabel:NAMES/ACCESSION #'S +argtype:chooser +argchoice:Single name/acc:echo $NAMEFILE > in1.nam +argchoice:Create list of names/acc#'s:cat $GDE_HELP_DIR/xylem/GDE/namefile.template > in1.tmpname; textedit in1.tmpname; egrep -v -e \; in1.tmpname >in1.nam;rm in1.tmpname* +argchoice:File of Names/Acc.#'s:cat $NAMEFILE >in1.nam +argvalue:0 + +arg:NAMEFILE +arglabel:Single name, accession # or file of names/acc. #'s +argtype:text + +arg:WHATTOGET +arglabel:WHAT TO GET +argtype:chooser +argchoice:annotation:-a +argchoice:sequence:-s +argchoice:both:-b +argvalue:2 + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-G $DBFILE +argchoice:HIV Dataset:-G /database/DNA/hiventries +argchoice:PIR:-p +argchoice:PIR Dataset:-P $DBFILE +argchoice:VecBase:-v +argvalue:0 + +arg:DBFILE +arglabel:Dataset +argtype:text + +arg:WHERE +arglabel:WHERE TO SEND OUTPUT +argtype:chooser +# If GenBank file, read directly, otherwise, convert to GenBank. +argchoice:GDE:(GBfilter in1.tmp in1.gen; gde in1.gen; rm in1.*)& +argchoice:Textedit window:(textedit in1.tmp;rm in1.tmp) & +argchoice:Output file:mv in1.tmp $OUTFILE; echo ' Fetch completed' +argchoice:GenBank Dataset:splitdb -g in1.tmp $OUTFILE.ano $OUTFILE.wrp $OUTFILE.ind; rm in1.*; echo ' Fetch completed' +argchoice:PIR Dataset:splitdb -p in1.tmp $OUTFILE.ano $OUTFILE.wrp $OUTFILE.ind; rm in1.*; echo ' Fetch completed' +argvalue:0 + +arg:OUTFILE +arglabel:Output file or Dataset name +argtype:text + +in:in1 +informat:genbank + +out:OUTPUT +outformat:genbank + +#---------------- FEATURES - by feature key ( 3/14/94) ------------------- +item:FEATURES - Extract by feature keys +# sed corrects errors in GDE-generated genbank output +itemmethod: sed -e "s/^LOCUS */LOCUS /" -e "s/^ ACCESSION/ACCESSION /" -e "/^$/d" in1 > in1.gen; $FEALIST; ($FCOMMAND -F in1.feafile $DATABASE; rm in1.feafile in1.efile in1.gen; $WHERE) & +itemhelp:xylem/features.doc + +arg:FEALIST +arglabel:FEATURES TO EXTRACT +argtype:chooser +argchoice:Single feature:echo $FEAKEY > in1.feafile +argchoice:Create list of features:cat $GDE_HELP_DIR/xylem/GDE/feafile.template > in1.tmpfeafile; textedit in1.tmpfeafile; egrep -v -e \; in1.tmpfeafile >in1.feafile; rm in1.tmpfeafile +argvalue:0 + +arg:FEAKEY +arglabel:Single feature key +argtype:choice_list +argchoice:allele:allele +argchoice:attenuator:attenuator +argchoice:binding:binding +argchoice:CAAT_signal:CAAT_signal +argchoice:CDS:CDS +argchoice:chromosome:chromosome +argchoice:conflict:conflict +argchoice:contig:contig +argchoice:C_region:C_region +argchoice:D_loop:D_loop +argchoice:D_region:D_region +argchoice:D_segment:D_segment +argchoice:enhancer:enhancer +argchoice:exon:exon +argchoice:GC_signal:GC_signal +argchoice:iDNA:iDNA +argchoice:intron:intron +argchoice:J_region:J_region +argchoice:J_segment:J_segment +argchoice:LTR:LTR +argchoice:mat_peptide:mat_peptide +argchoice:misc_binding:misc_binding +argchoice:misc_difference:misc_difference +argchoice:misc_feature:misc_feature +argchoice:misc_recomb:misc_recomb +argchoice:misc_RNA:misc_RNA +argchoice:misc_signal:misc_signal +argchoice:misc_structure:misc_structure +argchoice:modified_base:modified_base +argchoice:mRNA:mRNA +argchoice:mutation:mutation +argchoice:N_region:N_region +argchoice:old_sequence:old_sequence +argchoice:polyA_signal:polyA_signal +argchoice:polyA_site:polyA_site +argchoice:precursor_RNA:precursor_RNA +argchoice:primer_bind:primer_bind +argchoice:prim_transcript:prim_transcript +argchoice:promoter:promoter +argchoice:protein_bind:protein_bind +argchoice:RBS:RBS +argchoice:repeat_region:repeat_region +argchoice:repeat_unit:repeat_unit +argchoice:rep_origin:rep_origin +argchoice:rRNA:rRNA +argchoice:satellite:satellite +argchoice:scRNA:scRNA +argchoice:sig_peptide:sig_peptide +argchoice:snRNA:snRNA +argchoice:source:source +argchoice:S_region:S_region +argchoice:stem_loop:stem_loop +argchoice:STS:STS +argchoice:TATA_signal:TATA_signal +argchoice:terminator:terminator +argchoice:transit_peptide:transit_peptide +argchoice:tRNA:tRNA +argchoice:unsure:unsure +argchoice:variation:variation +argchoice:virion:virion +argchoice:V_region:V_region +argchoice:V_segment:V_segment +argchoice:3'clip:3\'clip +argchoice:3'UTR:3\'UTR +argchoice:5'UTR:5\'UTR +argchoice:5'clip:5\'clip +argchoice:-10_signal:-10_signal +argchoice:-35 signal:-35 signal +argvalue:5 + +arg:FCOMMAND +arglabel:NAMES/ACCESSION #'S OF ENTRIES +argtype:choice_menu +argchoice:Single name:echo $EFILE > in1.efile; features -N in1.efile +argchoice:Create list of names:cat $GDE_HELP_DIR/xylem/GDE/names.template > in1.tmpefile; textedit in1.tmpefile; egrep -v -e \; in1.tmpefile >in1.efile; rm in1.tmpefile; features -N in1.efile +argchoice:File of names:cat $EFILE >in1.efile; features -N in1.efile +argchoice:Single Acc#:echo $EFILE > in1.efile; features -A in1.efile +argchoice:Create list of Acc#s:cat $GDE_HELP_DIR/xylem/GDE/acc.template > in1.tmpefile; textedit in1.tmpefile; egrep -v -e \; in1.tmpefile >in1.efile; rm in1.tmpefile; features -A in1.efile +argchoice:File of Acc#s:cat $EFILE >in1.efile; features -A in1.efile +argvalue:0 + +arg:EFILE +arglabel:Name, Accession # or filename +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-u $DBFILE +argchoice:Selected sequences:-U in1.gen +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +arg:WHERE +arglabel:WHERE TO SEND OUTPUT +argtype:chooser +argchoice:GDE:readseq -a -f2 in1.out >in1.result; (gde in1.result;rm in1.*)& +argchoice:Textedit windows:(textedit in1.msg -Ws 450 350;rm in1.msg)& (textedit in1.exp -Ws 350 350; rm in1.exp) & (textedit in1.out -Ws 400 350;rm in1.out)& +argchoice:Output file:mv in1.msg $OUTNAME.msg; mv in1.out $OUTNAME.out; mv in1.exp $OUTNAME.exp; echo 'Features completed' +argvalue:0 + +arg:OUTNAME +arglabel:Output file name +argtype:text + +in:in1 +informat:genbank + +out:RESULT +outformat:genbank + +#-------------------- FEATURES - by expression ( 3/14/94)--------------------- +item:FEATURES - Extract using expressions +itemmethod: $CHOOSEEXP; sed -e "s/^LOCUS */LOCUS /" -e "s/^ ACCESSION/ACCESSION /" -e "/^$/d" in1 > in1.gen; (features -E in1.efile $DATABASE; rm in1.gen in1.efile; $WHERE)& +itemhelp:xylem/features.doc + +arg:CHOOSEEXP +arglabel:EXPRESSION(S) +argtype:chooser +argchoice:Single expression:echo '$EXPRESSION'|cut -f1 -d":" > in1.accfile; echo \>`cat in1.accfile` >in1.efile; echo '@$EXPRESSION' >> in1.efile; rm in1.accfile +argchoice:Expression file:cat $EFILE >in1.efile +argchoice:Create list of expressions:cat $GDE_HELP_DIR/xylem/GDE/expfile.template > in1.tmpexpfile; textedit in1.tmpexpfile; egrep -v -e \; in1.tmpexpfile >in1.efile; rm in1.tmpexpfile +argvalue:0 + +arg:EXPRESSION +arglabel:Feature expression +argtype:text + +arg:EFILE +arglabel:Expression file +argtype:text + +arg:DATABASE +arglabel:DATABASE +argtype:chooser +argchoice:GenBank:-g +argchoice:GenBank Dataset:-u $DBFILE +argchoice:Selected sequences:-u in1.gen +argvalue:0 + +arg:DBFILE +arglabel:Dataset name +argtype:text + +arg:WHERE +arglabel:WHERE TO SENT OUTPUT +argtype:chooser +argchoice:GDE:readseq -a -f2 in1.out >in1.result; (gde in1.result;rm in1.*)& +argchoice:Textedit windows:(textedit in1.msg -Ws 450 350;rm in1.msg)& (textedit in1.out -Ws 400 350;rm in1.out)& +argchoice:Output file:mv in1.msg $OUTNAME.msg; mv in1.out $OUTNAME.out;echo 'Features completed' +argvalue:0 + +arg:OUTNAME +arglabel:Output file name +argtype:text + +in:in1 +informat:genbank + +out:RESULT +outformat:genbank + +######################## ALIGNMENT MENU ############################### +#--------------- REFORM - print multiple alignment (2/ 2/95) ----------------- +item:REFORM - print mult. align. +#Note: do not use flat or gde . +itemmethod:(cat in1 |readseq -pipe -a -f8 | reform $TYPE -fp $GAPS $CAPS $DOTS -l$LINESIZE -s$START > in1.out; textedit in1.out;rm in1*) & +itemhelp: xylem/reform.doc + +arg:TYPE +argtype:chooser +arglabel:Type: +argchoice:Protein: +argchoice:Nucleic acid:-n +argvalue:0 + +arg:GAPS +argtype:chooser +arglabel:Print gaps as +argchoice:Dashes:-g +argchoice:Spaces: +argvalue:0 + +arg:CAPS +argtype:chooser +arglabel:Capitalize conserved sites in consensus seq. +argchoice:Yes:-c +argchoice:No: +argvalue:0 + +arg:DOTS +argtype:chooser +arglabel:Print conserved sites in alignment as dots +argchoice:Yes:-p +argchoice:No: +argvalue:0 + +arg:LINESIZE +arglabel:# residues per line +argtype:slider +argmin:40 +argmax:150 +argvalue:70 + +arg:START +arglabel:Begin numbering at +argtype:slider +argmin:-500000 +argmax:500000 +argvalue:1 + +in:in1 +#informat:flat +informat:genbank +insave: + + +############################## SIMILARITY MENU ############################ +#--------------- SHUFFLE - randomize sequences (11/10/93) ----------------- +item:SHUFFLE - randomize sequences +itemmethod: sed "s/[#%]/>/" in1.tmp; shuffle -s$SEED -w$WINDOW -o$OVERLAP in1.shuf; readseq -a -f2 in1.shuf >out1; rm in1* +itemhelp: xylem/shuffle.doc + +arg:SEED +arglabel:RANDOM SEED +argtype:slider +argmin:1 +argmax:32767 +argvalue:7777 + +arg:WINDOW +arglabel:WINDOW +argtype:slider +argmin:5 +argmax:500000 +argvalue:10 + +arg:OVERLAP +arglabel:OVERLAP BETWEEN ADJACENT WINDOWS +argtype:slider +argmin:0 +argmax:100 +argvalue:0 + +in:in1 +informat:flat + +out:out1 +outformat:genbank + + diff --git a/CORE/.GDEmenus.web b/CORE/.GDEmenus.web new file mode 100644 index 0000000..f70072e --- /dev/null +++ b/CORE/.GDEmenus.web @@ -0,0 +1,1016 @@ +1menu:File + +item:test cmask output +itemmethod: kedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:kedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2 +argchoice:NEW:/usr/local/bio/db/newblast.fasta +argchoice:TEST:/usr/local/bio/db/test_na_db +argchoice:testDB:/usr/local/bio/db/test_na_db +argchoice:HIV-1 Subtype:/usr/local/bio/db/DNA/subcomplete.fasta +argchoice:HIV-1 HXB2 Numbering:/usr/local/bio/db/HXB2.fasta +argchoice:HCV Numbering:/usr/local/bio/db/HCV.fasta +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:------------------------ + +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: enter the file name + +arg:menuname +argtype:text +arglabel: enter the name of the DB + + + + +menu:HIV Seq. Db. +item:Ref. Seq. for Drug Resistance +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Sequence +argchoice:Prot function:/database/AA/protease.mutations.fasta +argchoice:rt function:/database/AA/rtmutations.fasta +argchoice:pNL4-3 CG :/database/DR/pNL4-3.CG.fasta +argchoice:pNL4-3 protease :/database/DR/pNL4-3.prot$format.fasta +argchoice:pNL4-3 RT :/database/DR/pNL4-3.RT$format.fasta +argchoice:HXB2 CG :/database/DR/HXB2.fasta +argchoice:HXB2 protease :/database/DR/HXB2.prot$format.fasta +argchoice:HXB2 RT :/database/DR/HXB2.RT$format.fasta +argchoice:data1.fasta:/database/DNA/data1.fasta + + +arg:format +argtype:chooser +arglabel:Format +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype Db. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/usr/local/bio/db/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/usr/local/bio/db/DNA/subtyperef/sub5ltr.fasta +argchoice:sub5ltrU3.fasta:/usr/local/bio/db/DNA/subtyperef/sub5ltrU3.fasta +argchoice:sub5ltrU5.fasta:/usr/local/bio/db/DNA/subtyperef/sub5ltrU5.fasta +argchoice:subenv.fasta:/usr/local/bio/db/DNA/subtyperef/subenv.fasta +argchoice:subenv-gp120.fasta:/usr/local/bio/db/DNA/subtyperef/subenv-gp120.fasta +argchoice:subenv-gp41.fasta:/usr/local/bio/db/DNA/subtyperef/subenv-gp41.fasta +argchoice:subenvv3.fasta:/usr/local/bio/db/DNA/subtyperef/subenvv3.fasta +argchoice:subgag.fasta:/usr/local/bio/db/DNA/subtyperef/subgag.fasta +argchoice:subgag-p17.fasta:/usr/local/bio/db/DNA/subtyperef/subgag-p17.fasta +argchoice:subgag-p24.fasta:/usr/local/bio/db/DNA/subtyperef/subgag-p24.fasta +argchoice:subgag-pol.fasta:/usr/local/bio/db/DNA/subtyperef/subgag-pol.fasta +argchoice:subpol.fasta:/usr/local/bio/db/DNA/subtyperef/subpol.fasta +argchoice:subpol-p15RNAase.fasta:/usr/local/bio/db/DNA/subtyperef/subpol-p15RNAase.fasta +argchoice:subpol-p31integrase.fasta:/usr/local/bio/db/DNA/subtyperef/subpol-p31integrase.fasta +argchoice:subpol-p51RT.fasta:/usr/local/bio/db/DNA/subtyperef/subpol-p51RT.fasta +argchoice:subpol-protease.fasta:/usr/local/bio/db/DNA/subtyperef/subpol-protease.fasta +argchoice:subrevCDS.fasta:/usr/local/bio/db/DNA/subtyperef/subrevCDS.fasta +argchoice:subrevexon1.fasta:/usr/local/bio/db/DNA/subtyperef/subrevexon1.fasta +argchoice:subrevexon2.fasta:/usr/local/bio/db/DNA/subtyperef/subrevexon2.fasta +argchoice:subrevintron.fasta:/usr/local/bio/db/DNA/subtyperef/subrevintron.fasta +argchoice:subTAR.fasta:/usr/local/bio/db/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/usr/local/bio/db/DNA/subtyperef/subtatCDS.fasta +argchoice:subtatexon1.fasta:/usr/local/bio/db/DNA/subtyperef/subtatexon1.fasta +argchoice:subtatexon2.fasta:/usr/local/bio/db/DNA/subtyperef/subtatexon2.fasta +argchoice:subtatintron.fasta:/usr/local/bio/db/DNA/subtyperef/subtatintron.fasta +argchoice:subvif.fasta:/usr/local/bio/db/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/usr/local/bio/db/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/usr/local/bio/db/DNA/subtyperef/subvpu.fasta +argchoice:subnef.fasta:/usr/local/bio/db/DNA/subtyperef/subnef.fasta + +out:OUTPUTFILE +outformat:genbank + + +item:HIV-1 Subtype reduz. +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV Subtype C CG:/database/DNA/sub-reference/subC.CG.fasta +argchoice:HIV-1 GAG sub:/database/DNA/sub-reference/gagsub-reference.fasta +argchoice:HIV-1 POL sub:/database/DNA/sub-reference/polsub-reference.fasta +argchoice:HIV-1 VIF sub:/database/DNA/sub-reference/vifsub-reference.fasta +argchoice:HIV-1 VPR sub:/database/DNA/sub-reference/vprsub-reference.fasta +argchoice:HIV-1 TAT sub:/database/DNA/sub-reference/tatsub-reference.fasta +argchoice:HIV-1 REV sub:/database/DNA/sub-reference/revsub-reference.fasta +argchoice:HIV-1 VPU sub:/database/DNA/sub-reference/vpusub-reference.fasta +argchoice:HIV-1 ENV sub:/database/DNA/sub-reference/envsub-reference.fasta +argchoice:HIV-1 NEF sub:/database/DNA/sub-reference/nefsub-reference.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype B & C Gen. regions +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:HIV Subtype C CG:/database/DNA/subB/$format.fasta +argchoice:HIV-1 GAG sub:/database/DNA/subB/$format.gag$type.fasta +argchoice:HIV-1 POL sub:/database/DNA/subB/$format.pol$type.fasta +argchoice:HIV-1 PROTEASE sub:/database/DNA/subB/$format.pol-prot$type.fasta +argchoice:HIV-1 RT sub:/database/DNA/subB/$format.pol-RT$type.fasta +argchoice:HIV-1 INTEGRASE sub:/database/DNA/subB/$format.pol-INT$type.fasta +argchoice:HIV-1 VIF sub:/database/DNA/subB/$format.vif$type.fasta +argchoice:HIV-1 VPR sub:/database/DNA/subB/$format.vpr$type.fasta +argchoice:HIV-1 TAT sub:/database/DNA/subB/$format.tat$type.fasta +argchoice:HIV-1 REV sub:/database/DNA/subB/$format.rev$type.fasta +argchoice:HIV-1 VPU sub:/database/DNA/subB/$format.vpu$type.fasta +argchoice:HIV-1 ENV sub:/database/DNA/subB/$format.env$type.fasta +argchoice:HIV-1 NEF sub:/database/DNA/subB/$format.nef$type.fasta + +arg:format +argtype:chooser +arglabel:Format +argchoice:Subtype B:subB +argchoice:Subtype C:subC + +arg:type +argtype:chooser +arglabel:type +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + + +item:Find Beggining of Genome regions +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:choice_list +argchoice:Protease:ATCACTCTTTGG +argchoice:Protease:ATC +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat +out:out1 +outformat:colormask + + +item:hivHXB2 genome regions aln +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/database/DNA/hivHXB2regions.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + + +item:Clustal Protein Alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -output=GDE -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.gde> in1;$REPORT gde clus_in.gde;/bin/rm -f clus_in* in1* )& + + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:kedit in1.rpt& + +in:in1 +informat:flat +insave: + + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins Tulio:/usr/local/bio/db/HIV-PROTEINS-tulio.fasta +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta +argchoice:HIV-1 Structures at PDB:/usr/local/bio/db/Prot.3d.fasta +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + +item:-------------------------- +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: enter the file name + +arg:menuname +argtype:text +arglabel: enter the name of the DB + +menu:Phylogeny + + +item:Phylip help +itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:clique:clique.html +argchoice:consense:consense.html +argchoice:contchar:contchar.html +argchoice:contml:contml.html +argchoice:contrast:contrast.html +argchoice:discrete:discrete.html +argchoice:distance:distance.html +argchoice:dnaboot:dnaboot.html +argchoice:dnacomp:dnacomp.html +argchoice:dnadist:dnadist.html +argchoice:dnainvar:dnainvar.html +argchoice:dnaml:dnaml.html +argchoice:dnamlk:dnamlk.html +argchoice:dnamove:dnamove.html +argchoice:dnapars:dnapars.html +argchoice:dnapenny:dnapenny.html +argchoice:dollop:dollop.html +argchoice:dolmove:dolmove.html +argchoice:dolpenny:dolpenny.html +argchoice:draw:draw.html +argchoice:drawgram:drawgram.html +argchoice:drawtree:drawtree.html +argchoice:factor:factor.html +argchoice:fitch:fitch.html +argchoice:gendist:gendist.html +argchoice:kitsch:kitsch.html +argchoice:main:main.html +argchoice:mix:mix.html +argchoice:move:move.html +argchoice:neighbor:neighbor.html +argchoice:penny:penny.html +argchoice:protpars:protpars.html +argchoice:read.me.general:read.me.general.html +argchoice:restml:restml.html +argchoice:seqboot:seqboot.html +argchoice:sequence:sequence.html + + + +item:Phylip 3.5 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + +item:Phylip DNA Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& + +arg:EXPLAIN +argtype:text +arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE + + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Run ? +argtype:chooser +argchoice:Run without Bootstrap: +argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; + +arg:DNA +argtype:text +arglabel:Name of DNADIST outfile? + +arg:NEI +argtype:text +arglabel:Name of NEIGHBOR outfile? + +arg:TREE +argtype:text +arglabel:Name of TREEFILE ? + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip PROTEIN Distance methods +itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:PROTDIST+NEIGHBOR: +argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + + + +menu:On-Line Res. +item:test +itemmethod:netscape http://test.com +item:test +itemmethod:netscape http://test.com +item:test +itemmethod:netscape http://test.com + +item:add a new website +itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url + +arg:name +argtype:text +arglabel:Enter the site name + +arg:url +argtype:text +arglabel:Enter the URL (including http://) diff --git a/CORE/.GDEmenus1 b/CORE/.GDEmenus1 new file mode 100755 index 0000000..09481c5 --- /dev/null +++ b/CORE/.GDEmenus1 @@ -0,0 +1,1339 @@ +menu:File + +item:test cmask output +itemmethod: textedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +item:MFOLD +itemmethod:(tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)& +itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:Pairing(ct) File Name +argtext:mfold_out + + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + +item:Highlight helix +itemmethod:readseq -a -f8 in1 | sed "s/>HELIX/\"HELIX/" > in1.flat; sho_helix < in1.flat > out1;rm in1.flat +itemhelp:sho_helix.help + +in:in1 +informat:genbank + +out:out1 +outformat:colormask + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDB -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:Contamination:/var/www/cgi-bin/db/UniVec +argchoice:HIV-1:/var/www/cgi-bin/db/hivallsequencesGB-31-10.fasta +argchoice:HIV-1 Subtype:/var/www/cgi-bin/db/subcomplete.fasta +argchoice:HIV-1 HXB2 Numb:/var/www/cgi-bin/db/HXB2.fasta +argchoice:HIV-1 Genome Location:/var/www/cgi-bin/db/genomeHXB2.fasta + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/$Matrix .; blastall -p blastx -d $BLASTDB -i in1.f> in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:test_aa_db:$GDE_HELP_DIR/BLAST/test_aa_db +argchoice:HIVallPRO.fasta:$GDE_HELP_DIR/BLAST/HIVallPRO.fasta +argchoice:ALL prot:$GDE_HELP_DIR/BLAST/hivallsequencesGB-PROT2-31-10.fasta + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:FASTA (DNA/RNA) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:GenBank Primate:$GDE_HELP_DIR/FASTA/GENBANK/gbpri.seq\ 1 +argchoice:GenBank Rodent:$GDE_HELP_DIR/FASTA/GENBANK/gbrod.seq\ 1 +argchoice:GenBank all Mammal:$GDE_HELP_DIR/FASTA/GENBANK/gbmam.seq\ 1 +argchoice:GenBank verteBrates:$GDE_HELP_DIR/FASTA/GENBANK/gbvrt.seq\ 1 +argchoice:GenBank Inverts:$GDE_HELP_DIR/FASTA/GENBANK/gbinv.seq\ 1 +argchoice:GenBank pLants:$GDE_HELP_DIR/FASTA/GENBANK/gbpln.seq\ 1 +argchoice:GenBank Struct RNA:$GDE_HELP_DIR/FASTA/GENBANK/gbrna.seq\ 1 +argchoice:GenBank euk. Organelles:$GDE_HELP_DIR/FASTA/GENBANK/gborg.seq\ 1 +argchoice:GenBank phaGe:$GDE_HELP_DIR/FASTA/GENBANK/gbphg.seq\ 1 +argchoice:GenBank bacTeria:$GDE_HELP_DIR/FASTA/GENBANK/gbbct.seq\ 1 +argchoice:GenBank sYnthetic:$GDE_HELP_DIR/FASTA/GENBANK/gbsyn.seq\ 1 +argchoice:GenBank Viral:$GDE_HELP_DIR/FASTA/GENBANK/gbvrl.seq\ 1 +argchoice:GenBank Unannotated:$GDE_HELP_DIR/FASTA/GENBANK/gbuna.seq\ 1 + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:altdiag.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altdiag.mat +argchoice:altprot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altprot.mat +argchoice:dna.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/dna.mat +argchoice:prot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/prot.mat + +menu:DB +item:Drug Resistance +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Sequence +argchoice:pNL4-3 CG :/home/database/DR/pNL4-3.CG.fasta +argchoice:pNL4-3 protease :/home/database/DR/pNL4-3.prot$format.fasta +argchoice:pNL4-3 RT :/home/database/DR/pNL4-3.RT$format.fasta +argchoice:HXB2 CG :/home/database/DR/HXB2.fasta +argchoice:HXB2 protease :/home/database/DR/HXB2.prot$format.fasta +argchoice:HXB2 RT :/home/database/DR/HXB2.RT$format.fasta + + +arg:format +argtype:chooser +arglabel:Format +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/home/database/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/home/database/DNA/subtyperef/sub5ltr.fasta +argchoice:subenv.fasta:/home/database/DNA/subtyperef/subenv.fasta +argchoice:subgag.fasta:/home/database/DNA/subtyperef/subgag.fasta +argchoice:subnef.fasta:/home/database/DNA/subtyperef/subnef.fasta +argchoice:subpol.fasta:/home/database/DNA/subtyperef/subpol.fasta +argchoice:subrevCDS.fasta:/home/database/DNA/subtyperef/subrevCDS.fasta +argchoice:subTAR.fasta:/home/database/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/home/database/DNA/subtyperef/subtatCDS.fasta +argchoice:subvif.fasta:/home/database/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/home/database/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/home/database/DNA/subtyperef/subvpu.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HXB2 +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/home/database/DNA/HXB2.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + +item:Clustal Protein Alignment +itemmethod:(tr '%#"' '>' clus_in;clustalw /output=PIR /infile=clus_in /align /ktup=$KTUP /window=$WIN $Matrx /fixedgap=$FIXED /floatgap=$FLOAT > in1.rpt;sed "s/>P1;/%/g" < clus_in.pir > in1;$REPORT gde in1;/bin/rm -f in1* clus_in* gde* )& +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + +in:in1 +informat:flat +insave: + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; blastp $BLASTDB in1.f W=$WORDLEN M=$Matrix > in1.tmp;textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:pir:$GDE_HELP_DIR/BLAST/pir +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:blast3 +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; blast3 $BLASTDB in1.f W=$WORDLEN M=$Matrix > in1.tmp;textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:pir1:$GDE_HELP_DIR/BLAST/pir +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:FASTA (Protein) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:NBRF PIR1:$GDE_HELP_DIR/FASTA/PIR/pir1.dat\ 2 +argchoice:NBRF PIR2:$GDE_HELP_DIR/FASTA/PIR/pir2.dat\ 2 +argchoice:NBRF PIR3:$GDE_HELP_DIR/FASTA/PIR/pir3.dat\ 2 + + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:Minimum mutation matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/codaa.mat +argchoice:Identity matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/idnaa.mat +argchoice:Identity matrix for mismatches:-s $GDE_HELP_DIR/FASTA/MATRIX/idpaa.mat +argchoice:PAM250:-s $GDE_HELP_DIR/FASTA/MATRIX/pam250.mat +argchoice:PAM120:-s $GDE_HELP_DIR/FASTA/MATRIX/pam120.mat + +menu:Seq management + +item:Assemble Contigs +itemmethod:(sed "s/#/>/"in1.tmp; CAP2 in1.tmp $OVERLAP $PMATCH > out1;/bin/rm -f in1.tmp) +itemhelp:CAP2.help + +arg:OVERLAP +argtype:slider +arglabel:Minimum overlap? +argmin:5 +argmax:100 +argvalue:20 + +arg:PMATCH +argtype:slider +arglabel:Percent match required within overlap +argmin:25 +argmax:100 +argvalue:90 + +in:in1 +informat:flat + +out:out1 +outformat:gde +outoverwrite: + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + + +item:Restriction sites +itemmethod:(cp $ENZ in1.tmp ; $PRE_EDIT Restriction in1.tmp in1 > out1 ; rm in1.tmp); +itemhelp:Restriction.help + +arg:ENZ +argtype:text +arglabel:Enzyme file +argtext:$GDE_HELP_DIR/DATA_FILES/enzymes + +arg:PRE_EDIT +argtype:chooser +arglabel:Edit enzyme file first? +argchoice:Yes:textedit in1.tmp; +argchoice:No: + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +menu:Phylogeny + +item:DeSoete Tree fit +itemmethod: (readseq -a -f8 in1>in1.flat;count -t $CORR in1.flat> in1.tmp ; lsadt in1.out ; $DISPLAY_FUNC in1.out;/bin/rm -f in1* )& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +arg:CORR +arglabel:Distance correction? +argtype:chooser +argchoice:Olsen:-c=olsen +argchoice:Jukes/Cantor:-c=jukes +argchoice:None:-c=none + +arg:INIT +arglabel:Initial parameter estimate +argtype:choice_list +argchoice:uniformly distributed random numbers:1 +argchoice:error-perturbed data:2 +argchoice:original distance data from input matrix:3 + +arg:SEED +argtype:slider +arglabel:Random number seed +argmin:0 +argmax:65535 +argvalue:12345 + +arg:DISPLAY_FUNC +argtype:chooser +arglabel:View tree using +argchoice:TextEdit:textedit +argchoice:Treetool:treetool < + +item:Phylip help +itemmethod:(textedit $GDE_HELP_DIR/PHYLIP/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:boot:boot.doc +argchoice:clique:clique.doc +argchoice:consense:consense.doc +argchoice:contchar:contchar.doc +argchoice:contml:contml.doc +argchoice:contrast:contrast.doc +argchoice:discrete:discrete.doc +argchoice:distance:distance.doc +argchoice:dnaboot:dnaboot.doc +argchoice:dnacomp:dnacomp.doc +argchoice:dnadist:dnadist.doc +argchoice:dnainvar:dnainvar.doc +argchoice:dnaml:dnaml.doc +argchoice:dnamlk:dnamlk.doc +argchoice:dnamove:dnamove.doc +argchoice:dnapars:dnapars.doc +argchoice:dnapenny:dnapenny.doc +argchoice:dolboot:dolboot.doc +argchoice:dollop:dollop.doc +argchoice:dolmove:dolmove.doc +argchoice:dolpenny:dolpenny.doc +argchoice:draw:draw.doc +argchoice:drawgram:drawgram.doc +argchoice:drawtree:drawtree.doc +argchoice:factor:factor.doc +argchoice:fitch:fitch.doc +argchoice:gendist:gendist.doc +argchoice:kitsch:kitsch.doc +argchoice:main:main.doc +argchoice:mix:mix.doc +argchoice:move:move.doc +argchoice:neighbor:neighbor.doc +argchoice:penny:penny.doc +argchoice:protpars:protpars.doc +argchoice:read.me.general:read.me.general.doc +argchoice:restml:restml.doc +argchoice:seqboot:seqboot.doc +argchoice:sequence:sequence.doc + + +item:Phylip 3.4 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT shelltool $PROGRAM;textedit outfile;rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ; $PREEDIT shelltool seqboot; mv -f outfile infile; shelltool dnadist;mv -f outfile infile; shelltool neighbor; cp outtree intree; $PROGRAM textedit outfile;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST:mv -f infile outfile; +argchoice:Fitch:shelltool fitch; +argchoice:Kitsch:shelltool kitsch; +argchoice:Neighbor:shelltool neighbor; +argchoice:Full:shelltool consense; + + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +menu:W +item:Stanford +itemmethod:(readseq in1 -a -f8 > infile.fasta; netscape http://hiv-4.stanford.edu/cgi-bin/hivseqweb.pl)& +in:in1 +informat:genbank +insave: +inmask: + +item:Los Alamos DB Search +itemmethod:(netscape http://hiv-web.lanl.gov/cgi-bin/hivDB3/public/wdb/ssampublic)& +item:Retroviruses NCBI +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/retroviruses/)& +item:SNAP (sy/nosy)& +itemmethod:(netscape http://hiv-web.lanl.gov/SNAP/WEBSNAP/SNAP.html)& +item:PubMed +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/Entrez/)& +item:HIV-1 DR mutations +itemmethod:(xloadimage $GDE_HELP_DIR/DR/drugresistancemut.gif)& + +in:in1 +informat:genbank + + + + + + +menu:Email + +item:BLASTN +itemmethod:(echo BLASTPROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo MATCH $MSCORE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:GenBank Qtrly & Updates:GenBank +argchoice:EMBL:embl + +arg:MSCORE +argtype:slider +arglabel:Match Score +argmin:3 +argmax:7 +argvalue:5 + +in:in1 +informat:flat +insave: + +item:BLASTP +itemmethod:(echo BLASTPROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:Swiss-Prot:swiss-prot +argchoice:PIR:pir + +in:in1 +informat:flat +insave: + +item:Fasta-(DNA) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $KPL >> in1.tmp; echo SCORES $TOP >> in1.tmp; echo ALIGNMENTS $ALNG >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which GenBank Database? +argchoice:Qrtly & Updates:GenBank/all +argchoice:Updates:GenBank/new +argchoice:Primate:GenBank/primate +argchoice:Rodent:GenBank/rodent +argchoice:Other-Mammalian:GenBank/other_mammalian +argchoice:Other-Vertebrate:GenBank/other_vertebrate +argchoice:Invertebrate:GenBank/invertebrate +argchoice:Plant:GenBank/plant +argchoice:Organelle:GenBank/organelle +argchoice:Bacterial:GenBank/bacterial +argchoice:Structural-RNA:GenBank/structural_rna +argchoice:Viral:GenBank/viral +argchoice:Phage:GenBank/phage +argchoice:Synthetic:GenBank/synthetic +argchoice:Unannotated:GenBank/unannotated + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta-(PROTEIN) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $TPL >> in1.tmp; echo SCORES $SCRS >> in1.tmp; echo ALIGNMENTS $ALNMNTS >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Protein Database? +argchoice:Trans GenBank Qrtly:GenPept/all +argchoice:Trans GenBank Daily:GenPept/new +argchoice:Swiss-Protein:SWISS-PROT/all + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + +item:Sequence Retrieval +itemmethod:(echo $REGEXP > in1.tmp; Mail RETRIEVE@GENBANK.BIO.NET < in1.tmp; rm in1.tmp) & + +arg:REGEXP +argtype:text +arglabel:Accession # or LOCUS name of sequence to retrieve + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + +# +# dgg added new readseq formats, 29 dec 92 +# + +item:Export Foreign Format +itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:GenBank:genbank +argchoice:IG/Stanford:ig +argchoice:NBRF:nbrf +argchoice:EMBL:embl +argchoice:GCG:gcg +argchoice:DNA Strider:strider +argchoice:Fitch:fitch +argchoice:Pearson/Fasta:pearson +argchoice:Zuker:zuker +argchoice:Olsen:olsen +argchoice:Phylip:phylip +#argchoice:Phylip v3.2:phylip3.2 +argchoice:Plain text:raw +argchoice:ASN.1:asn +argchoice:PIR:pir +argchoice:MSF:msf +argchoice:PAUP:paup +argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:in1 +informat:genbank + + +# +#dgg addition for new readseq, 24 dec 92 +# + +item:Pretty Print +itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)& +itemhelp:readseq.help + +#nametop is bad !? + +in:in1 +informat:genbank + +arg:NAMETOP +argtype:chooser +arglabel:Names at top ? +argchoice:No: +argchoice:Yes:-nametop + +arg:NAMELEFT +argtype:chooser +arglabel:Names at left ? +argchoice:No: +argchoice:Yes:-nameleft + +arg:NAMERIGHT +argtype:chooser +arglabel:Names at right? +argchoice:Yes:-nameright +argchoice:No: + +arg:NUMTOP +argtype:chooser +arglabel:Numbers at top ? +argchoice:Yes:-numtop +argchoice:No: + +arg:NUMBOT +argtype:chooser +arglabel:Numbers at tail ? +argchoice:No: +argchoice:Yes:-numbot + +arg:NUMLEFT +argtype:chooser +arglabel:Numbers at left ? +argchoice:Yes:-numleft +argchoice:No: + +arg:NUMRIGHT +argtype:chooser +arglabel:Numbers at right? +argchoice:Yes:-numright +argchoice:No: + +arg:MATCH +argtype:chooser +arglabel:Use match '.' for 2..n species? +argchoice:No: +argchoice:Yes:-match + +arg:GAPC +argtype:chooser +arglabel:Count gap symbols? +argchoice:No: +argchoice:Yes:-gap + +arg:WIDTH +argtype:slider +arglabel:Sequence width? +argmin:10 +argmax:200 +argvalue:50 + +arg:COLS +argtype:slider +arglabel:Column spacers? +argmin:0 +argmax:50 +argvalue:10 + + +### pretty print insert end +# + +item:Wally's test function +itemmethod:run__wally $ONE $TWO $THREE < $FILE + +arg:ONE +argtype:chooser +arglabel:How? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:TWO +argtype:slider +argmin:0 +argmax:100 +argvalue:50 +arglabel:how many? + +arg:THREE +argtype:choice_list +arglabel:Which one? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:FILE +argtype:text +arglabel:Which file + diff --git a/CORE/.GDEmenusILOVED b/CORE/.GDEmenusILOVED new file mode 100755 index 0000000..98f062c --- /dev/null +++ b/CORE/.GDEmenusILOVED @@ -0,0 +1,1415 @@ +1menu:File + +item:test cmask output +itemmethod: textedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:NEXUS:17 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod:readseq in1 -a -f11 > infile; shelltool /usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; shelltool /home/tulio/biotools/SNAP/SNAP.pl outfile; textedit backg*; textedit summ*;/bin/rm -rf back* codo* summ*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +item:MFOLD +itemmethod:(tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)& +itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:Pairing(ct) File Name +argtext:mfold_out + + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + +item:Highlight helix +itemmethod:readseq -a -f8 in1 | sed "s/>HELIX/\"HELIX/" > in1.flat; sho_helix < in1.flat > out1;rm in1.flat +itemhelp:sho_helix.help + +in:in1 +informat:genbank + +out:out1 +outformat:colormask + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDB -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:Genome HIV position:/var/www/cgi-bin/db/genomeHIV.fasta +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate +argchoice:HIV-1:/var/www/cgi-bin/db/hivallsequencesGB-31-10.fasta +argchoice:HIV-1 Subtype:/var/www/cgi-bin/db/subcomplete.fasta +argchoice:HIV-1 HXB2 Numb:/var/www/cgi-bin/db/HXB2.fasta +argchoice:HIV-1 HXB2 Numb:/var/www/cgi-bin/db/HXB2.fasta +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM30; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM30)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hivallsequencesGB-PROT2-31-10.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:FASTA (DNA/RNA) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:GenBank Primate:$GDE_HELP_DIR/FASTA/GENBANK/gbpri.seq\ 1 +argchoice:GenBank Rodent:$GDE_HELP_DIR/FASTA/GENBANK/gbrod.seq\ 1 +argchoice:GenBank all Mammal:$GDE_HELP_DIR/FASTA/GENBANK/gbmam.seq\ 1 +argchoice:GenBank verteBrates:$GDE_HELP_DIR/FASTA/GENBANK/gbvrt.seq\ 1 +argchoice:GenBank Inverts:$GDE_HELP_DIR/FASTA/GENBANK/gbinv.seq\ 1 +argchoice:GenBank pLants:$GDE_HELP_DIR/FASTA/GENBANK/gbpln.seq\ 1 +argchoice:GenBank Struct RNA:$GDE_HELP_DIR/FASTA/GENBANK/gbrna.seq\ 1 +argchoice:GenBank euk. Organelles:$GDE_HELP_DIR/FASTA/GENBANK/gborg.seq\ 1 +argchoice:GenBank phaGe:$GDE_HELP_DIR/FASTA/GENBANK/gbphg.seq\ 1 +argchoice:GenBank bacTeria:$GDE_HELP_DIR/FASTA/GENBANK/gbbct.seq\ 1 +argchoice:GenBank sYnthetic:$GDE_HELP_DIR/FASTA/GENBANK/gbsyn.seq\ 1 +argchoice:GenBank Viral:$GDE_HELP_DIR/FASTA/GENBANK/gbvrl.seq\ 1 +argchoice:GenBank Unannotated:$GDE_HELP_DIR/FASTA/GENBANK/gbuna.seq\ 1 + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:altdiag.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altdiag.mat +argchoice:altprot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/altprot.mat +argchoice:dna.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/dna.mat +argchoice:prot.mat:-s $GDE_HELP_DIR/FASTA/MATRIX/prot.mat + +menu:DB +item:Drug Resistance +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Sequence +argchoice:pNL4-3 CG :/database/DR/pNL4-3.CG.fasta +argchoice:pNL4-3 protease :/database/DR/pNL4-3.prot$format.fasta +argchoice:pNL4-3 RT :/database/DR/pNL4-3.RT$format.fasta +argchoice:HXB2 CG :/database/DR/HXB2.fasta +argchoice:HXB2 protease :/database/DR/HXB2.prot$format.fasta +argchoice:HXB2 RT :/database/DR/HXB2.RT$format.fasta + + +arg:format +argtype:chooser +arglabel:Format +argchoice:DNA: +argchoice:AA:AA + +out:OUTPUTFILE +outformat:genbank + +item:DB X8873 +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:Databases +argchoice:subtype:/database/DNA/subtyperef/subcomplete.fasta +argchoice:SIVENVDNA.fasta:/database/DNA/SIVENVDNA.fasta + +out:OUTPUTFILE +outformat:genbank + +item:HIV-1 Subtype +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:choice_list +arglabel:HIV-1 Subtype genome regions +argchoice:CG:/database/DNA/subtyperef/subcomplete.fasta +argchoice:sub5ltr.fasta:/database/DNA/subtyperef/sub5ltr.fasta +argchoice:sub5ltrU3.fasta:/database/DNA/subtyperef/sub5ltrU3.fasta +argchoice:sub5ltrU5.fasta:/database/DNA/subtyperef/sub5ltrU5.fasta +argchoice:subenv.fasta:/database/DNA/subtyperef/subenv.fasta +argchoice:subenv-gp120.fasta:/database/DNA/subtyperef/subenv-gp120.fasta +argchoice:subenv-gp41.fasta:/database/DNA/subtyperef/subenv-gp41.fasta +argchoice:subenvv3.fasta:/database/DNA/subtyperef/subenvv3.fasta +argchoice:subgag.fasta:/database/DNA/subtyperef/subgag.fasta +argchoice:subgag-p17.fasta:/database/DNA/subtyperef/subgag-p17.fasta +argchoice:subgag-p24.fasta:/database/DNA/subtyperef/subgag-p24.fasta +argchoice:subgag-pol.fasta:/database/DNA/subtyperef/subgag-pol.fasta +argchoice:subpol.fasta:/database/DNA/subtyperef/subpol.fasta +argchoice:subpol-p15RNAase.fasta:/database/DNA/subtyperef/subpol-p15RNAase.fasta +argchoice:subpol-p31integrase.fasta:/database/DNA/subtyperef/subpol-p31integrase.fasta +argchoice:subpol-p51RT.fasta:/database/DNA/subtyperef/subpol-p51RT.fasta +argchoice:subpol-protease.fasta:/database/DNA/subtyperef/subpol-protease.fasta +argchoice:subrevCDS.fasta:/database/DNA/subtyperef/subrevCDS.fasta +argchoice:subrevexon1.fasta:/database/DNA/subtyperef/subrevexon1.fasta +argchoice:subrevexon2.fasta:/database/DNA/subtyperef/subrevexon2.fasta +argchoice:subrevintron.fasta:/database/DNA/subtyperef/subrevintron.fasta +argchoice:subTAR.fasta:/database/DNA/subtyperef/subTAR.fasta +argchoice:subtatCDS.fasta:/database/DNA/subtyperef/subtatCDS.fasta +argchoice:subtatexon1.fasta:/database/DNA/subtyperef/subtatexon1.fasta +argchoice:subtatexon2.fasta:/database/DNA/subtyperef/subtatexon2.fasta +argchoice:subtatintron.fasta:/database/DNA/subtyperef/subtatintron.fasta +argchoice:subvif.fasta:/database/DNA/subtyperef/subvif.fasta +argchoice:subvpr.fasta:/database/DNA/subtyperef/subvpr.fasta +argchoice:subvpu.fasta:/database/DNA/subtyperef/subvpu.fasta +argchoice:subnef.fasta:/database/DNA/subtyperef/subnef.fasta +out:OUTPUTFILE +outformat:genbank + + + +item:HXB2 +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:chooser +arglabel:HXB2 Reference Seq +argchoice:CG:/database/DNA/HXB2.fasta + +out:OUTPUTFILE +outformat:genbank + +menu:Protein + +item:Clustal Protein Alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -output=GDE -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.gde> in1;$REPORT gde clus_in.gde;/bin/rm -f clus_in* in1* )& + + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 100:PAM100 +argchoice:Identity:ID + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:textedit in1.rpt& + +in:in1 +informat:flat +insave: + +#Menu for Protein + +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/textedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/var/www/cgi-bin/db/hivallsequencesGB-PROT2-31-10.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept +argchoice:local:$GDE_HELP_DIR/BLAST/local_db + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /var/www/cgi-bin/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:blast3 +itemmethod:(sed "s/[#%]/>/" in1.f; cp $GDE_HELP_DIR/BLAST/PAM??? .; blast3 $BLASTDB in1.f W=$WORDLEN M=$Matrix > in1.tmp;textedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:pir1:$GDE_HELP_DIR/BLAST/pir +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM120:PAM120 +argchoice:PAM250:PAM250 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:FASTA (Protein) +itemmethod:(sed "s/[#%]/>/"in1.fasta;fasta -Q -d $NUMOFALN $MATRIX in1.fasta $DBASE > in1.out; textedit in1.out;\rm in1*) & +itemhelp:FASTA.help + +in:in1 +informat:flat + +arg:DBASE +argtype:choice_list +arglabel:Database +argchoice:NBRF PIR1:$GDE_HELP_DIR/FASTA/PIR/pir1.dat\ 2 +argchoice:NBRF PIR2:$GDE_HELP_DIR/FASTA/PIR/pir2.dat\ 2 +argchoice:NBRF PIR3:$GDE_HELP_DIR/FASTA/PIR/pir3.dat\ 2 + + +arg:NUMOFALN +argtype:slider +arglabel:Number of Alignment to Report +argmin:1 +argmax:100 +argvalue:20 + +arg:MATRIX +arglabel:Which SMATRIX +argtype:choice_list +argchoice:Default: +argchoice:Minimum mutation matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/codaa.mat +argchoice:Identity matrix:-s $GDE_HELP_DIR/FASTA/MATRIX/idnaa.mat +argchoice:Identity matrix for mismatches:-s $GDE_HELP_DIR/FASTA/MATRIX/idpaa.mat +argchoice:PAM250:-s $GDE_HELP_DIR/FASTA/MATRIX/pam250.mat +argchoice:PAM120:-s $GDE_HELP_DIR/FASTA/MATRIX/pam120.mat + +menu:Seq management + +item:Assemble Contigs +itemmethod:(sed "s/#/>/"in1.tmp; CAP2 in1.tmp $OVERLAP $PMATCH > out1;/bin/rm -f in1.tmp) +itemhelp:CAP2.help + +arg:OVERLAP +argtype:slider +arglabel:Minimum overlap? +argmin:5 +argmax:100 +argvalue:20 + +arg:PMATCH +argtype:slider +arglabel:Percent match required within overlap +argmin:25 +argmax:100 +argvalue:90 + +in:in1 +informat:flat + +out:out1 +outformat:gde +outoverwrite: + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + + +item:Restriction sites +itemmethod:(cp $ENZ in1.tmp ; $PRE_EDIT Restriction in1.tmp in1 > out1 ; rm in1.tmp); +itemhelp:Restriction.help + +arg:ENZ +argtype:text +arglabel:Enzyme file +argtext:$GDE_HELP_DIR/DATA_FILES/enzymes + +arg:PRE_EDIT +argtype:chooser +arglabel:Edit enzyme file first? +argchoice:Yes:textedit in1.tmp; +argchoice:No: + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +menu:Phylogeny + +item:DeSoete Tree fit +itemmethod: (readseq -a -f8 in1>in1.flat;count -t $CORR in1.flat> in1.tmp ; lsadt in1.out ; $DISPLAY_FUNC in1.out;/bin/rm -f in1* )& +itemhelp:lsadt.help + +in:in1 +informat:genbank +insave: +inmask: + +arg:CORR +arglabel:Distance correction? +argtype:chooser +argchoice:Olsen:-c=olsen +argchoice:Jukes/Cantor:-c=jukes +argchoice:None:-c=none + +arg:INIT +arglabel:Initial parameter estimate +argtype:choice_list +argchoice:uniformly distributed random numbers:1 +argchoice:error-perturbed data:2 +argchoice:original distance data from input matrix:3 + +arg:SEED +argtype:slider +arglabel:Random number seed +argmin:0 +argmax:65535 +argvalue:12345 + +arg:DISPLAY_FUNC +argtype:chooser +arglabel:View tree using +argchoice:TextEdit:textedit +argchoice:Treetool:treetool < + +item:Phylip help +itemmethod:(textedit $GDE_HELP_DIR/PHYLIP/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:boot:boot.doc +argchoice:clique:clique.doc +argchoice:consense:consense.doc +argchoice:contchar:contchar.doc +argchoice:contml:contml.doc +argchoice:contrast:contrast.doc +argchoice:discrete:discrete.doc +argchoice:distance:distance.doc +argchoice:dnaboot:dnaboot.doc +argchoice:dnacomp:dnacomp.doc +argchoice:dnadist:dnadist.doc +argchoice:dnainvar:dnainvar.doc +argchoice:dnaml:dnaml.doc +argchoice:dnamlk:dnamlk.doc +argchoice:dnamove:dnamove.doc +argchoice:dnapars:dnapars.doc +argchoice:dnapenny:dnapenny.doc +argchoice:dolboot:dolboot.doc +argchoice:dollop:dollop.doc +argchoice:dolmove:dolmove.doc +argchoice:dolpenny:dolpenny.doc +argchoice:draw:draw.doc +argchoice:drawgram:drawgram.doc +argchoice:drawtree:drawtree.doc +argchoice:factor:factor.doc +argchoice:fitch:fitch.doc +argchoice:gendist:gendist.doc +argchoice:kitsch:kitsch.doc +argchoice:main:main.doc +argchoice:mix:mix.doc +argchoice:move:move.doc +argchoice:neighbor:neighbor.doc +argchoice:penny:penny.doc +argchoice:protpars:protpars.doc +argchoice:read.me.general:read.me.general.doc +argchoice:restml:restml.doc +argchoice:seqboot:seqboot.doc +argchoice:sequence:sequence.doc + + +item:Phylip 3.4 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT shelltool $PROGRAM;textedit outfile;rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:PAUP +itemmethod:(readseq -a -f17 in1 > work.nxs; shelltool paup work.nxs;/bin/rm -f gde*)& + +in:in1 +informat:genbank +inmask: +insave: + +item:MrBaynes +itemmethod:(readseq -a -f17 in1 | sed "s/interleave /interleave=yes /" > work.nxs; shelltool mb work.nxs;/bin/rm -rf gde1*;/bin/rm in1)& + +item:codeml +itemmethod:(readseq -a -f11 in1 | sed "s/ YF//1" > test.phy; shelltool codeml $METHOD)& + +arg:METHOD +arglabel:Which method ? +argtype:chooser +argchoice:1:method1.ctl +argchoice:2:method2.ctl +argchoice:3:method3.ctl +argchoice:4:method4.ctl +argchoice:5:method5.ctl +argchoice:6:method6.ctl + + +in:in1 +informat:genbank +inmask: +insave: + + +item:Phylip Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ; $PREEDIT shelltool seqboot; mv -f outfile infile; shelltool dnadist;mv -f outfile infile; shelltool neighbor; cp outtree intree; $PROGRAM textedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST:mv -f infile outfile; +argchoice:Fitch:shelltool fitch; +argchoice:Kitsch:shelltool kitsch; +argchoice:Neighbor:shelltool neighbor; +argchoice:Full:shelltool consense; + + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:textedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +menu:W +item:Stanford +itemmethod:(readseq in1 -a -f8 > infile.fasta; netscape http://hiv-4.stanford.edu/cgi-bin/hivseqweb.pl?uploaded_file=infile.fasta)& +in:in1 +informat:genbank +item:Los Alamos DB Search +itemmethod:(netscape http://hiv-web.lanl.gov/cgi-bin/hivDB3/public/wdb/ssampublic)& +item:Retroviruses NCBI +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/retroviruses/)& +item:SNAP (sy/nosy)& +itemmethod:(netscape http://hiv-web.lanl.gov/SNAP/WEBSNAP/SNAP.html)& +item:PubMed +itemmethod:(netscape http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=PubMed&term=hiv+africa)& + +arg:term +argtype:text +arglabel:Searc? +argtext:New + + +item:test seqname +itemmethod:(textedit in1;netscape http://www.ncbi.nlm.nih.gov/Entrez/in1)& + +in:in1 +informat:genbank +out:out1 +outformat:flat + + + +menu:Email + +item:BLASTN +itemmethod:(echo BLASTPROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo MATCH $MSCORE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:GenBank Qtrly & Updates:GenBank +argchoice:EMBL:embl + +arg:MSCORE +argtype:slider +arglabel:Match Score +argmin:3 +argmax:7 +argvalue:5 + +in:in1 +informat:flat +insave: + +item:BLASTP +itemmethod:(echo BLASTPROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail BLAST@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Database? +argchoice:Swiss-Prot:swiss-prot +argchoice:PIR:pir + +in:in1 +informat:flat +insave: + +item:Fasta-(DNA) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $KPL >> in1.tmp; echo SCORES $TOP >> in1.tmp; echo ALIGNMENTS $ALNG >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which GenBank Database? +argchoice:Qrtly & Updates:GenBank/all +argchoice:Updates:GenBank/new +argchoice:Primate:GenBank/primate +argchoice:Rodent:GenBank/rodent +argchoice:Other-Mammalian:GenBank/other_mammalian +argchoice:Other-Vertebrate:GenBank/other_vertebrate +argchoice:Invertebrate:GenBank/invertebrate +argchoice:Plant:GenBank/plant +argchoice:Organelle:GenBank/organelle +argchoice:Bacterial:GenBank/bacterial +argchoice:Structural-RNA:GenBank/structural_rna +argchoice:Viral:GenBank/viral +argchoice:Phage:GenBank/phage +argchoice:Synthetic:GenBank/synthetic +argchoice:Unannotated:GenBank/unannotated + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta-(PROTEIN) +itemmethod:(echo DATALIB $DBASE > in1.tmp; echo KTUP $TPL >> in1.tmp; echo SCORES $SCRS >> in1.tmp; echo ALIGNMENTS $ALNMNTS >> in1.tmp; echo BEGIN >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail SEARCH@GENBANK.BIO.NET < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_list +arglabel:Which Protein Database? +argchoice:Trans GenBank Qrtly:GenPept/all +argchoice:Trans GenBank Daily:GenPept/new +argchoice:Swiss-Protein:SWISS-PROT/all + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + +item:Sequence Retrieval +itemmethod:(echo $REGEXP > in1.tmp; Mail RETRIEVE@GENBANK.BIO.NET < in1.tmp; rm in1.tmp) & + +arg:REGEXP +argtype:text +arglabel:Accession # or LOCUS name of sequence to retrieve + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + +# +# dgg added new readseq formats, 29 dec 92 +# + +item:Export Foreign Format +itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:GenBank:genbank +argchoice:IG/Stanford:ig +argchoice:NBRF:nbrf +argchoice:EMBL:embl +argchoice:GCG:gcg +argchoice:DNA Strider:strider +argchoice:Fitch:fitch +argchoice:Pearson/Fasta:pearson +argchoice:Zuker:zuker +argchoice:Olsen:olsen +argchoice:Phylip:phylip +#argchoice:Phylip v3.2:phylip3.2 +argchoice:Plain text:raw +argchoice:ASN.1:asn +argchoice:PIR:pir +argchoice:MSF:msf +argchoice:PAUP:paup +argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:in1 +informat:genbank + + +# +#dgg addition for new readseq, 24 dec 92 +# + +item:Pretty Print +itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)& +itemhelp:readseq.help + +#nametop is bad !? + +in:in1 +informat:genbank + +arg:NAMETOP +argtype:chooser +arglabel:Names at top ? +argchoice:No: +argchoice:Yes:-nametop + +arg:NAMELEFT +argtype:chooser +arglabel:Names at left ? +argchoice:No: +argchoice:Yes:-nameleft + +arg:NAMERIGHT +argtype:chooser +arglabel:Names at right? +argchoice:Yes:-nameright +argchoice:No: + +arg:NUMTOP +argtype:chooser +arglabel:Numbers at top ? +argchoice:Yes:-numtop +argchoice:No: + +arg:NUMBOT +argtype:chooser +arglabel:Numbers at tail ? +argchoice:No: +argchoice:Yes:-numbot + +arg:NUMLEFT +argtype:chooser +arglabel:Numbers at left ? +argchoice:Yes:-numleft +argchoice:No: + +arg:NUMRIGHT +argtype:chooser +arglabel:Numbers at right? +argchoice:Yes:-numright +argchoice:No: + +arg:MATCH +argtype:chooser +arglabel:Use match '.' for 2..n species? +argchoice:No: +argchoice:Yes:-match + +arg:GAPC +argtype:chooser +arglabel:Count gap symbols? +argchoice:No: +argchoice:Yes:-gap + +arg:WIDTH +argtype:slider +arglabel:Sequence width? +argmin:10 +argmax:200 +argvalue:50 + +arg:COLS +argtype:slider +arglabel:Column spacers? +argmin:0 +argmax:50 +argvalue:10 + + +### pretty print insert end +# + +item:Wally's test function +itemmethod:run__wally $ONE $TWO $THREE < $FILE + +arg:ONE +argtype:chooser +arglabel:How? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:TWO +argtype:slider +argmin:0 +argmax:100 +argvalue:50 +arglabel:how many? + +arg:THREE +argtype:choice_list +arglabel:Which one? +argchoice:Fast:-fast +argchoice:Slow:-slow + +arg:FILE +argtype:text +arglabel:Which file + diff --git a/CORE/.GDEmenusNew b/CORE/.GDEmenusNew new file mode 100644 index 0000000..fa1cff0 --- /dev/null +++ b/CORE/.GDEmenusNew @@ -0,0 +1,791 @@ +1menu:File + +item:test cmask output +itemmethod: kedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:kedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/usr/local/biotools/db/DNA/hiv17-08-01.fasta2 +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code + +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:------------------------ + +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/biotools/GDE/bin/installBLASTDB.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: enter the file name + +arg:menuname +argtype:text +arglabel: enter the name of the DB + +menu:seq. datasets +item:tttt +itemmethod:readseq /usr/local/biotools/GDE/db/ttttt -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +out:OUTPUTFILE +outformat:genbank + +item:HIV1POLDNA.fasta +itemmethod:readseq /usr/local/biotools/GDE/db/HIV1POLDNA.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +out:OUTPUTFILE +outformat:genbank + +item:structure +itemmethod:readseq /usr/local/biotools/GDE/db/structprot.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +out:OUTPUTFILE +outformat:genbank + +item:------------- +item:add a new dataset +itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file + +arg:name +argtype:text +arglabel:Enter the dataset name ? + +arg:file +argtype:text +arglabel:Enter the dataset file (in FASTA) ? + + +#Menu for Protein +menu:protein +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/biotools/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDBPROT +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta +argchoice:ttttt:/usr/local/biotools/db/tttt +argchoice:tytuiphn:/usr/local/biotools/db/yejhuh[9hp +argchoice:yyyy:/usr/local/biotools/db/test + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/biotools/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + +item:-------------------------- +item:Add a new Protein blast db +itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/biotools/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: Enter the file (in FASTA) + +arg:menuname +argtype:text +arglabel: Enter the name of the DB + +menu:Phylogeny + + +item:Phylip help +itemmethod:(netscape /usr/local/biotools/phylip/doc/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:clique:clique.html +argchoice:consense:consense.html +argchoice:contchar:contchar.html +argchoice:contml:contml.html +argchoice:contrast:contrast.html +argchoice:discrete:discrete.html +argchoice:distance:distance.html +argchoice:dnaboot:dnaboot.html +argchoice:dnacomp:dnacomp.html +argchoice:dnadist:dnadist.html +argchoice:dnainvar:dnainvar.html +argchoice:dnaml:dnaml.html +argchoice:dnamlk:dnamlk.html +argchoice:dnamove:dnamove.html +argchoice:dnapars:dnapars.html +argchoice:dnapenny:dnapenny.html +argchoice:dollop:dollop.html +argchoice:dolmove:dolmove.html +argchoice:dolpenny:dolpenny.html +argchoice:draw:draw.html +argchoice:drawgram:drawgram.html +argchoice:drawtree:drawtree.html +argchoice:factor:factor.html +argchoice:fitch:fitch.html +argchoice:gendist:gendist.html +argchoice:kitsch:kitsch.html +argchoice:main:main.html +argchoice:mix:mix.html +argchoice:move:move.html +argchoice:neighbor:neighbor.html +argchoice:penny:penny.html +argchoice:protpars:protpars.html +argchoice:read.me.general:read.me.general.html +argchoice:restml:restml.html +argchoice:seqboot:seqboot.html +argchoice:sequence:sequence.html + + + +item:Phylip 3.5 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + +item:Phylip DNA Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& + +arg:EXPLAIN +argtype:text +arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE + + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Run ? +argtype:chooser +argchoice:Run without Bootstrap: +argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; + +arg:DNA +argtype:text +arglabel:Name of DNADIST outfile? + +arg:NEI +argtype:text +arglabel:Name of NEIGHBOR outfile? + +arg:TREE +argtype:text +arglabel:Name of TREEFILE ? + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip PROTEIN Distance methods +itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:PROTDIST+NEIGHBOR: +argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + + + +menu:On-Line Res. +item:tytyt +itemmethod:netscape hnu[phoph & +item:SANBI +itemmethod:netscape again & +item:PlasmoDB +itemmethod:netscape http://www.plasmodb.org & +item:NCBI +itemmethod:netscape http://www.ncbi.nlm.nih.gov & +item:sanbi +itemmethod:netscape http://www.sanbi.ac.za & +item:SANBI +itemmethod:netscape http://www.sanbi.ac.za & + +item:GDE for Linux resources at Bioafrica.net +itemmethod:netscape http://www.bioafrica.net & + +item:------------------------- +item:add a new website +itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url + +arg:name +argtype:text +arglabel:Enter the site name + +arg:url +argtype:text +arglabel:Enter the URL (including http://) diff --git a/CORE/.GDEmenusthat b/CORE/.GDEmenusthat new file mode 100644 index 0000000..5a3f56f --- /dev/null +++ b/CORE/.GDEmenusthat @@ -0,0 +1,761 @@ +1menu:File + +item:test cmask output +itemmethod: kedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:kedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2 +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:------------------------ + +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: enter the file name + +arg:menuname +argtype:text +arglabel: enter the name of the DB + +menu:seq. datasets + +item:------------- +item:add a new dataset +itemmethod:cp $file /usr/local/biotools/GDE/db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file + +arg:name +argtype:text +arglabel:Enter the dataset name ? + +arg:file +argtype:text +arglabel:Enter the dataset file (in FASTA) ? + + +#Menu for Protein +menu:protein +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + +item:-------------------------- +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: Enter the file (in FASTA) + +arg:menuname +argtype:text +arglabel: Enter the name of the DB + +menu:Phylogeny + + +item:Phylip help +itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:clique:clique.html +argchoice:consense:consense.html +argchoice:contchar:contchar.html +argchoice:contml:contml.html +argchoice:contrast:contrast.html +argchoice:discrete:discrete.html +argchoice:distance:distance.html +argchoice:dnaboot:dnaboot.html +argchoice:dnacomp:dnacomp.html +argchoice:dnadist:dnadist.html +argchoice:dnainvar:dnainvar.html +argchoice:dnaml:dnaml.html +argchoice:dnamlk:dnamlk.html +argchoice:dnamove:dnamove.html +argchoice:dnapars:dnapars.html +argchoice:dnapenny:dnapenny.html +argchoice:dollop:dollop.html +argchoice:dolmove:dolmove.html +argchoice:dolpenny:dolpenny.html +argchoice:draw:draw.html +argchoice:drawgram:drawgram.html +argchoice:drawtree:drawtree.html +argchoice:factor:factor.html +argchoice:fitch:fitch.html +argchoice:gendist:gendist.html +argchoice:kitsch:kitsch.html +argchoice:main:main.html +argchoice:mix:mix.html +argchoice:move:move.html +argchoice:neighbor:neighbor.html +argchoice:penny:penny.html +argchoice:protpars:protpars.html +argchoice:read.me.general:read.me.general.html +argchoice:restml:restml.html +argchoice:seqboot:seqboot.html +argchoice:sequence:sequence.html + + + +item:Phylip 3.5 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + +item:Phylip DNA Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& + +arg:EXPLAIN +argtype:text +arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE + + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Run ? +argtype:chooser +argchoice:Run without Bootstrap: +argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; + +arg:DNA +argtype:text +arglabel:Name of DNADIST outfile? + +arg:NEI +argtype:text +arglabel:Name of NEIGHBOR outfile? + +arg:TREE +argtype:text +arglabel:Name of TREEFILE ? + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip PROTEIN Distance methods +itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:PROTDIST+NEIGHBOR: +argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + + + +menu:On-Line Res. + +item:GDE for Linux resources at Bioafrica.net +itemmethod:netscape http://www.bioafrica.net & + +item:------------------------- +item:add a new website +itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url + +arg:name +argtype:text +arglabel:Enter the site name + +arg:url +argtype:text +arglabel:Enter the URL (including http://) diff --git a/CORE/.GDEmenusthat~ b/CORE/.GDEmenusthat~ new file mode 100644 index 0000000..ca925b9 --- /dev/null +++ b/CORE/.GDEmenusthat~ @@ -0,0 +1,761 @@ +1menu:File + +item:test cmask output +itemmethod: kedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:kedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2 +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:------------------------ + +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: enter the file name + +arg:menuname +argtype:text +arglabel: enter the name of the DB + +menu:seq. datasets + +item:------------- +item:add a new dataset +itemmethod:cp $file /usr/local/bio/GDE/db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file + +arg:name +argtype:text +arglabel:Enter the dataset name ? + +arg:file +argtype:text +arglabel:Enter the dataset file (in FASTA) ? + + +#Menu for Protein +menu:protein +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + +item:-------------------------- +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: Enter the file (in FASTA) + +arg:menuname +argtype:text +arglabel: Enter the name of the DB + +menu:Phylogeny + + +item:Phylip help +itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:clique:clique.html +argchoice:consense:consense.html +argchoice:contchar:contchar.html +argchoice:contml:contml.html +argchoice:contrast:contrast.html +argchoice:discrete:discrete.html +argchoice:distance:distance.html +argchoice:dnaboot:dnaboot.html +argchoice:dnacomp:dnacomp.html +argchoice:dnadist:dnadist.html +argchoice:dnainvar:dnainvar.html +argchoice:dnaml:dnaml.html +argchoice:dnamlk:dnamlk.html +argchoice:dnamove:dnamove.html +argchoice:dnapars:dnapars.html +argchoice:dnapenny:dnapenny.html +argchoice:dollop:dollop.html +argchoice:dolmove:dolmove.html +argchoice:dolpenny:dolpenny.html +argchoice:draw:draw.html +argchoice:drawgram:drawgram.html +argchoice:drawtree:drawtree.html +argchoice:factor:factor.html +argchoice:fitch:fitch.html +argchoice:gendist:gendist.html +argchoice:kitsch:kitsch.html +argchoice:main:main.html +argchoice:mix:mix.html +argchoice:move:move.html +argchoice:neighbor:neighbor.html +argchoice:penny:penny.html +argchoice:protpars:protpars.html +argchoice:read.me.general:read.me.general.html +argchoice:restml:restml.html +argchoice:seqboot:seqboot.html +argchoice:sequence:sequence.html + + + +item:Phylip 3.5 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + +item:Phylip DNA Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& + +arg:EXPLAIN +argtype:text +arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE + + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Run ? +argtype:chooser +argchoice:Run without Bootstrap: +argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; + +arg:DNA +argtype:text +arglabel:Name of DNADIST outfile? + +arg:NEI +argtype:text +arglabel:Name of NEIGHBOR outfile? + +arg:TREE +argtype:text +arglabel:Name of TREEFILE ? + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip PROTEIN Distance methods +itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:PROTDIST+NEIGHBOR: +argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + + + +menu:On-Line Res. + +item:GDE for Linux resources at Bioafrica.net +itemmethod:netscape http://www.bioafrica.net & + +item:------------------------- +item:add a new website +itemmethod:xterm -e /usr/local/bio/GDE/newURL.pl $name $url + +arg:name +argtype:text +arglabel:Enter the site name + +arg:url +argtype:text +arglabel:Enter the URL (including http://) diff --git a/CORE/.GDEmenus~ b/CORE/.GDEmenus~ new file mode 100644 index 0000000..fa1cff0 --- /dev/null +++ b/CORE/.GDEmenus~ @@ -0,0 +1,791 @@ +1menu:File + +item:test cmask output +itemmethod: kedit in1 + +in:in1 +informat:colormask + +item:New sequence +itemmethod:echo "$Type$Name" > out1 +itemmeta:n +itemhelp:new_sequence.help + +arg:Name +argtype:text +arglabel:New Sequence name? +argtext:New + +arg:Type +argtype:choice_list +arglabel:Type? +argchoice:DNA/RNA:# +argchoice:Amino Acid:% +argchoice:Text:\" +argchoice:Mask:@ + +out:out1 +outformat:flat + +item:Import Foreign Format +itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +itemhelp:readseq.help + +arg:INPUTFILE +argtype:text +arglabel:Name of foreign file? + +out:OUTPUTFILE +outformat:genbank + +item:Export Foreign Format +itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE +itemhelp:readseq.help + +arg:FORMAT +argtype:choice_list +argchoice:FASTA:8 +argchoice:NEXUS:17 +argchoice:Phylip v3.3:12 +argchoice:IG/Stanford:1 +argchoice:GenBank:2 +argchoice:NBRF:3 +argchoice:EMBL:4 +argchoice:GCG:5 +argchoice:DNA Strider:6 +argchoice:Fitch:7 +argchoice:Pearson:8 +argchoice:Zuker:9 +argchoice:Olsen:10 +argchoice:Phylip v3.2:11 +argchoice:Phylip v3.3:12 +argchoice:Plain text:13 + +arg:OUTPUTFILE +argtype:text +arglabel:Save as? + +in:INPUTFILE +informat:genbank + + +item:Save Selection +itemmethod: cat $SAVE_FUNC > $Name +itemhelp:save_selection.help + +arg:SAVE_FUNC +argtype:chooser +arglabel:File format +argchoice:Flat:in1 +argchoice:Genbank:in2 +argchoice:GDE/HGL:in3 + +arg:Name +argtype:text +arglabel:File name? + +in:in1 +informat:flat + +in:in2 +informat:genbank + +in:in3 +informat:gde + +item:Print Selection +itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& +itemhelp:print_alignment.help + +arg:SCALE +argtype:slider +arglabel:Reduce printout by? +argmin:1 +argmax:20 +argvalue:1 + +arg:CMD +argtype:chooser +argchoice:Lpr:lpr +argchoice:Enscript Gaudy:enscript -G -q +argchoice:Enscript Two column:enscript -2rG + +arg:PRINTER +argtype:text +arglabel:Which printer? +argtext:lp + +in:in1 +informat:gde +insave: + +menu:Edit + +item:Sort +itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& +itemhelp:heapsortHGL.help + +arg:PRIM_KEY +argtype:choice_list +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Primary sort field? + +arg:SEC_KEY +argtype:choice_list +argchoice:None: +argchoice:Group:group-ID +argchoice:type:type +argchoice:name:name +argchoice:Sequence ID:sequence-ID +argchoice:creator:creator +argchoice:offset:offset +arglabel:Secondary sort field? + +in:in1 +informat:gde +insave: + +item:extract +itemmethod:(gde in1;/bin/rm -f in1)& + +in:in1 +informat:gde +inmask: +insave: + +menu:DNA/RNA + +item:Translate... +itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 + +arg:FRAME +argtype:chooser +arglabel:Which reading frame? +argchoice:First:1 +argchoice:Second:2 +argchoice:Third:3 +argchoice:All six:6 + +arg:MNFRM +arglabel:Minimum length of AA sequence to translate? +argtype:slider +argmin:0 +argmax:100 +argvalue:20 + +arg:LTRCODE +argtype:chooser +arglabel:Translate to: +argchoice:Single letter codes: +argchoice:Triple letter codes:-3 + +arg:TBL +arglabel:Codon table? +argtype:chooser +argchoice:universal:1 +argchoice:mycoplasma:2 +argchoice:yeast:3 +argchoice:Vert. mito.:4 +in:in1 +informat:gde + +out:out1 +outformat:gde + +item:Dot plot +itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& +itemhelp:DotPlotTool.help + +in:in1 +informat:gde +insave: + +item:Clustal alignment +itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& + +itemhelp:clustal_help + +arg:KTUP +argtype:slider +arglabel:K-tuple size for pairwise search +argmin:1 +argmax:10 +argvalue:2 + +arg:WIN +argtype:slider +arglabel:Window size +argmin:1 +argmax:10 +argvalue:4 + +arg:Trans +argtype:chooser +arglabel:Transitions weighted? +argchoice:Yes:/TRANSIT +argchoice:No: + +arg:FIXED +argtype:slider +arglabel:Fixed gap penalty +argmin:1 +argmax:100 +argvalue:10 + +arg:FLOAT +arglabel:Floating gap penalty +argtype:slider +argmin:1 +argmax:100 +argvalue:10 + +arg:REPORT +argtype:chooser +arglabel:View assembly report? +argchoice:No: +argchoice:Yes:kedit in1.rpt& + + +in:in1 +informat:flat +insave: + +item:Variable Positions +itemmethod:varpos $REV < in1 > out1 + +arg:REV +argtype:chooser +arglabel:Highlight (darken) +argchoice:Conserved positions: +argchoice:variable positions:-rev + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Phrap +itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; + +in:in1 +informat:genbank + +out:out1 +outformat:genbank + +item:SNAP +itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; + +in:in1 +informat:flat +out:out1 +outformat:text + + + + +item:Find all +itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; +itemhelp:findall.help +itemmeta:f + +arg:SEARCH +argtype:text +arglabel:Search String + +arg:PRCNT +argtype:slider +arglabel:Percent mismatch +argmin:0 +argmax:75 +argvalue:10 + +arg:CASE +argtype:chooser +arglabel:Case +argchoice:Upper equals lower: +argchoice:Upper not equal lower:-case + +arg:UT +argtype:chooser +arglabel:U equal T? +argchoice:Yes:-u=t +argchoice:No: +argvalue:0 + +arg:MAT +arglabel:Match color +argtype:choice_list +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:2 + +arg:MIS +argtype:choice_list +arglabel:Mismatch color +argchoice:yellow:1 +argchoice:violet:2 +argchoice:red:3 +argchoice:aqua:4 +argchoice:green:5 +argchoice:blue:6 +argchoice:grey:11 +argchoice:black:8 +argvalue:7 + +in:in1 +informat:flat + +out:out1 +outformat:colormask + +item:Sequence Consensus +itemmethod:(MakeCons in1 $METHOD $MASK > out1) +itemhelp:MakeCons.help + +arg:METHOD +arglabel:Method +argtype:chooser +argchoice:IUPAC:-iupac +argchoice:Majority:-majority $PERCENT + +arg:MASK +argtype:chooser +arglabel:Create a new: +argchoice:Sequence: +argchoice:Selection Mask: | Consto01mask + +arg:PERCENT +arglabel:Minimum Percentage for Majority +argtype:slider +argmin:50 +argmax:100 +argvalue:75 + +in:in1 +informat:gde + +out:out1 +outformat:gde + + +#Menu for DNA/RNA + +item:blastn +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV-1 Seq. Db.:/usr/local/biotools/db/DNA/hiv17-08-01.fasta2 +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:MATCH +argtype:slider +arglabel:Match Score +argmin:1 +argmax:10 +argvalue:5 + +arg:MMSCORE +argtype:slider +arglabel:Mismatch Score +argmin:-10 +argmax:-1 +argvalue:-5 + +item:blastx +itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& + + + +in:in1 +informat:flat +insave: + +arg:BLASTDBDNA +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta +argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code + +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + +item:------------------------ + +item:Add a new DNA blast db +itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/biotools/GDE/bin/installBLASTDB.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: enter the file name + +arg:menuname +argtype:text +arglabel: enter the name of the DB + +menu:seq. datasets +item:tttt +itemmethod:readseq /usr/local/biotools/GDE/db/ttttt -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +out:OUTPUTFILE +outformat:genbank + +item:HIV1POLDNA.fasta +itemmethod:readseq /usr/local/biotools/GDE/db/HIV1POLDNA.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +out:OUTPUTFILE +outformat:genbank + +item:structure +itemmethod:readseq /usr/local/biotools/GDE/db/structprot.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp +out:OUTPUTFILE +outformat:genbank + +item:------------- +item:add a new dataset +itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file + +arg:name +argtype:text +arglabel:Enter the dataset name ? + +arg:file +argtype:text +arglabel:Enter the dataset file (in FASTA) ? + + +#Menu for Protein +menu:protein +item:blastp +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/biotools/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& + + +in:in1 +informat:flat +insave: + +arg:BLASTDBPROT +argtype:choice_list +arglabel:Which Database +argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta +argchoice:ttttt:/usr/local/biotools/db/tttt +argchoice:tytuiphn:/usr/local/biotools/db/yejhuh[9hp +argchoice:yyyy:/usr/local/biotools/db/test + +arg:Matrix +barglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:1 +argmax:5 +argvalue:3 + +item:tblastn +itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/biotools/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& + +in:in1 +informat:flat +insave: + +arg:BLASTDB +argtype:choice_list +arglabel:Which Database +argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank +argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate + +arg:Matrix +arglabel:Substitution Matrix: +argtype:choice_list +argchoice:PAM30:PAM30 +argchoice:PAM70:PAM70 + +arg:WORDLEN +argtype:slider +arglabel:Word Size +argmin:4 +argmax:18 +argvalue:12 + +arg:CODE +argtype:choice_list +arglabel:Genetic Code +argchoice:Standard or Universal:0 +argchoice:Vertebrate Mitochondrial:1 +argchoice:Yeast Mitochondrial:2 +argchoice:Mold Mitochondrial and Mycoplasma:3 +argchoice:Invertebrate Mitochondrial:4 +argchoice:Ciliate Macronuclear:5 +argchoice:Protozoan Mitochondrial:6 +argchoice:Plant Mitochondrial:7 +argchoice:Echinodermate Mitochondrial:8 + + +item:Map View +itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& +itemhelp:mapview.help + +in:in1 +informat:gde +insave: + +arg:PBL +arglabel:Pixel Between Lines +argtype:slider +argvalue:10 +argmin:1 +argmax:15 + +arg:NPP +arglabel:Nucleotides Per Pixel +argtype:slider +argvalue:1 +argmin:1 +argmax:20 + +arg:LWIDTH +arglabel:Line Thickness +argtype:slider +argvalue:2 +argmin:1 +argmax:5 + +item:-------------------------- +item:Add a new Protein blast db +itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/biotools/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname; + +arg:sourcefile +argtype:text +arglabel: Enter the file (in FASTA) + +arg:menuname +argtype:text +arglabel: Enter the name of the DB + +menu:Phylogeny + + +item:Phylip help +itemmethod:(netscape /usr/local/biotools/phylip/doc/$FILE)& + +arg:FILE +argtype:choice_list +arglabel:Which program? +argchoice:clique:clique.html +argchoice:consense:consense.html +argchoice:contchar:contchar.html +argchoice:contml:contml.html +argchoice:contrast:contrast.html +argchoice:discrete:discrete.html +argchoice:distance:distance.html +argchoice:dnaboot:dnaboot.html +argchoice:dnacomp:dnacomp.html +argchoice:dnadist:dnadist.html +argchoice:dnainvar:dnainvar.html +argchoice:dnaml:dnaml.html +argchoice:dnamlk:dnamlk.html +argchoice:dnamove:dnamove.html +argchoice:dnapars:dnapars.html +argchoice:dnapenny:dnapenny.html +argchoice:dollop:dollop.html +argchoice:dolmove:dolmove.html +argchoice:dolpenny:dolpenny.html +argchoice:draw:draw.html +argchoice:drawgram:drawgram.html +argchoice:drawtree:drawtree.html +argchoice:factor:factor.html +argchoice:fitch:fitch.html +argchoice:gendist:gendist.html +argchoice:kitsch:kitsch.html +argchoice:main:main.html +argchoice:mix:mix.html +argchoice:move:move.html +argchoice:neighbor:neighbor.html +argchoice:penny:penny.html +argchoice:protpars:protpars.html +argchoice:read.me.general:read.me.general.html +argchoice:restml:restml.html +argchoice:seqboot:seqboot.html +argchoice:sequence:sequence.html + + + +item:Phylip 3.5 +itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& + +arg:PROGRAM +argtype:choice_list +arglabel:Which program to run? +argchoice:DNAPARS:dnapars +argchoice:DNABOOT:dnaboot +argchoice:DNAPENNY:dnapenny +argchoice:DNAML:dnaml +argchoice:DNAMLK:dnamlk +argchoice:DNACOMP:dnacomp +argchoice:DNAMOVE:dnamove +argchoice:DNAINVAR:dnainvar +argchoice:PROTPARS:protpars + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + +item:Phylip DNA Distance methods +itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& + +arg:EXPLAIN +argtype:text +arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE + + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:DNADIST+NEIGHBOR: +argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Run ? +argtype:chooser +argchoice:Run without Bootstrap: +argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; + +arg:DNA +argtype:text +arglabel:Name of DNADIST outfile? + +arg:NEI +argtype:text +arglabel:Name of NEIGHBOR outfile? + +arg:TREE +argtype:text +arglabel:Name of TREEFILE ? + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + +item:Phylip PROTEIN Distance methods +itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& + +arg:PROGRAM +arglabel:Which method? +argtype:chooser +argchoice:PROTDIST+NEIGHBOR: +argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; + +arg:PROG +arglabel:Which method? +argtype:chooser +argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; +argchoice:No Bootstrap: + +arg:PREEDIT +argtype:chooser +arglabel:Edit input before running? +argchoice:No: +argchoice:Yes:kedit infile; + +in:in1 +informat:genbank +inmask: +insave: + + + + + +menu:On-Line Res. +item:tytyt +itemmethod:netscape hnu[phoph & +item:SANBI +itemmethod:netscape again & +item:PlasmoDB +itemmethod:netscape http://www.plasmodb.org & +item:NCBI +itemmethod:netscape http://www.ncbi.nlm.nih.gov & +item:sanbi +itemmethod:netscape http://www.sanbi.ac.za & +item:SANBI +itemmethod:netscape http://www.sanbi.ac.za & + +item:GDE for Linux resources at Bioafrica.net +itemmethod:netscape http://www.bioafrica.net & + +item:------------------------- +item:add a new website +itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url + +arg:name +argtype:text +arglabel:Enter the site name + +arg:url +argtype:text +arglabel:Enter the URL (including http://) diff --git a/CORE/BasicDisplay.c b/CORE/BasicDisplay.c new file mode 100755 index 0000000..1ef09db --- /dev/null +++ b/CORE/BasicDisplay.c @@ -0,0 +1,890 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + + +Panel menubar = (Panel)NULL; + +/* +BasicDisplay(): +Set up menus and primary display. + +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + + +Panel BasicDisplay(DataSet) +NA_Alignment *DataSet; +{ + int i,j,k; + extern Panel menubar; + extern int DisplayType; + extern Gmenu menu[]; + extern Frame frame; + extern int num_menus; + + if(menubar == (Panel) NULL) + { + menubar = xv_create(frame,PANEL, + 0); + /* +* For all menus defined in the .GDEmenu file, create a corresponding +* menu on the menu bar, and tie its XView object to the internal +* menu structure. +*/ + for(j=0;jX = xv_create((Cms/* ??? rtm 18.III.98*/)NULL,MENU,0); + if(strcmp(thismenu->label,"File")==0) + { + xv_set(thismenu->X, + MENU_ITEM, + MENU_STRING,"Open...", + MENU_NOTIFY_PROC,Open, + 0, + MENU_ITEM, + MENU_STRING,"Save as...", + MENU_NOTIFY_PROC,SaveAs, + 0, + MENU_ITEM, + MENU_STRING,"Properties...", + MENU_NOTIFY_PROC,ChangeDisplay, + 0, + MENU_ITEM, + MENU_STRING,"Protections...", + MENU_NOTIFY_PROC,SetProtection, + 0, + MENU_ITEM, + MENU_STRING,"Get info... ", + MENU_NOTIFY_PROC,ModAttr, + 0, + 0); + } + else if(strcmp(thismenu->label,"Edit")==0) + { + xv_set(thismenu->X, + MENU_ITEM, + MENU_STRING,"Select All", + MENU_NOTIFY_PROC,SelectAll, + 0, + MENU_ITEM, + MENU_STRING,"Select by name...", + MENU_NOTIFY_PROC,SelectBy, + 0, + MENU_ITEM, + MENU_STRING,"Cut", + MENU_NOTIFY_PROC,EditCut, + 0, + MENU_ITEM, + MENU_STRING,"Copy", + MENU_NOTIFY_PROC,EditCopy, + 0, + MENU_ITEM, + MENU_STRING,"Paste", + MENU_NOTIFY_PROC,EditPaste, + 0, + MENU_ITEM, + MENU_STRING,"Group ", + MENU_NOTIFY_PROC,Group, + 0, + MENU_ITEM, + MENU_STRING,"Ungroup ", + MENU_NOTIFY_PROC,Ungroup, + 0, + MENU_ITEM, + MENU_STRING,"Compress", + MENU_NOTIFY_PROC,CompressAlign, + 0, + MENU_ITEM, + MENU_STRING,"Reverse", + MENU_NOTIFY_PROC,RevSeqs, + 0, + MENU_ITEM, + MENU_STRING,"Change case", + MENU_NOTIFY_PROC,CaseChange, + 0, + 0); + } + else if(strcmp(thismenu->label,"DNA/RNA")==0) + { + + xv_set(thismenu->X, + MENU_ITEM, + MENU_STRING,"Complement", + MENU_NOTIFY_PROC,CompSeqs, + 0, + 0); + } + + /* +* For all menu items of the current menu... +*/ + for(curitem = 0;curitemnumitems;curitem++) + { + thisitem = &(thismenu->item[curitem]); + xv_set(thismenu->X, + MENU_ITEM, + MENU_STRING,thismenu->item[curitem].label, + MENU_NOTIFY_PROC,HandleMenus, + 0, + 0); + } + /* +* Make the menu "pin"able +*/ + xv_set(thismenu->X, + MENU_GEN_PIN_WINDOW,frame,thismenu->label, + 0); + } + xv_set(menu[0].X, + MENU_ITEM, + MENU_STRING,"Quit", + MENU_NOTIFY_PROC,QuitGDE, + 0, + 0); + return; +} + + +/* +MakeNAADisplay(): + Set up the generic display rectangle to be a DNA/RNA display. + +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + +MakeNAADisplay() +{ + extern Panel menubar; + extern Canvas EditNameCan; + extern Frame frame; + extern Canvas EditCan; + extern NA_Alignment *DataSet; + extern Xv_singlecolor Default_Colors[]; + Scrollbar hscroll,vscroll; + + GC gc; + Cms colmap; + XGCValues gcv; + Display *dpy; + Xv_font font; + int j,fnt_siz,fnt_style,depth; + + extern unsigned char *greys[]; + extern Pixmap grey_pm[]; + /* +* The window will be scrollable in both X and Y +*/ + xv_set(menubar,WIN_FIT_HEIGHT,0,0); + /* +* set up a window for the organism names on the left side of +* the screen. +*/ + + + EditNameCan = xv_create(frame,CANVAS, + WIN_BELOW,menubar, + WIN_WIDTH,150, + CANVAS_AUTO_EXPAND,TRUE, + CANVAS_AUTO_SHRINK,TRUE, + CANVAS_RETAINED,FALSE, + CANVAS_X_PAINT_WINDOW,TRUE, + OPENWIN_ADJUST_FOR_HORIZONTAL_SCROLLBAR,TRUE, + CANVAS_AUTO_CLEAR,FALSE, + CANVAS_REPAINT_PROC,DummyRepaint, + CANVAS_MIN_PAINT_WIDTH,150, +#ifndef SGI + WIN_INHERIT_COLORS,TRUE, +#endif + 0); + + (void)xv_set(canvas_paint_window(EditNameCan), + WIN_EVENT_PROC,NANameEvents, + WIN_CONSUME_EVENTS, + WIN_MOUSE_BUTTONS, + /* + LOC_DRAG, +*/ + LOC_WINENTER, + WIN_ASCII_EVENTS, + WIN_META_EVENTS, + 0, + 0); + + + /* +* Set up a window to hold the NA sequences. +*/ + + EditCan=xv_create(frame,CANVAS, + WIN_RIGHT_OF,EditNameCan, + CANVAS_AUTO_SHRINK,TRUE, + CANVAS_AUTO_EXPAND,TRUE, +/* + CANVAS_CMS_REPAINT,TRUE, +*/ + CANVAS_X_PAINT_WINDOW,TRUE, + CANVAS_AUTO_CLEAR,FALSE, + CANVAS_RETAINED,FALSE, + CANVAS_MIN_PAINT_WIDTH,150, + OPENWIN_SPLIT, + OPENWIN_SPLIT_INIT_PROC,InitEditSplit, + OPENWIN_SPLIT_DESTROY_PROC,DestroySplit, + NULL, + WIN_INHERIT_COLORS,FALSE, + WIN_BELOW,menubar, + CANVAS_REPAINT_PROC,RepaintNACan, + 0); + +/* +* This causes resize events to occur even if the screen shrinks +* in size. +*/ + xv_set(canvas_paint_window(EditCan), + WIN_BIT_GRAVITY,ForgetGravity, + 0); + + + hscroll = xv_create(EditCan,SCROLLBAR, + SCROLLBAR_DIRECTION,SCROLLBAR_HORIZONTAL, + SCROLLBAR_SPLITTABLE,TRUE, + SCROLLBAR_OVERSCROLL,0, + 0); + + vscroll = xv_create(EditCan,SCROLLBAR, + SCROLLBAR_DIRECTION,SCROLLBAR_VERTICAL, + SCROLLBAR_SPLITTABLE,FALSE, + SCROLLBAR_OVERSCROLL,0, + 0); + + notify_interpose_event_func( + xv_get(hscroll,SCROLLBAR_NOTIFY_CLIENT), + EditCanScroll,NOTIFY_SAFE); + + dpy = (Display *)xv_get(EditNameCan, XV_DISPLAY); + + gc = DefaultGC(dpy,DefaultScreen(dpy)); + depth = xv_get(frame,WIN_DEPTH); + if(depth>3) + { + colmap = (Cms)xv_find(frame,CMS, + CMS_NAME,"GDE Palette", + XV_AUTO_CREATE,FALSE, + 0); + + + if(colmap == (Cms) NULL) + colmap = (Cms)xv_create((Cms)NULL,CMS, + CMS_TYPE,XV_STATIC_CMS, + CMS_SIZE,16, + CMS_COLORS,Default_Colors, +#ifdef SGI /* a hack to try and keep the frame colored in split canvas */ + CMS_FRAME_CMS,TRUE, +#endif + + 0); + + xv_set(EditCan, + WIN_CMS_NAME,"GDE Palette", + WIN_CMS, colmap, + WIN_FOREGROUND_COLOR,8, + WIN_BACKGROUND_COLOR,15, +#ifndef SGI + WIN_INHERIT_COLORS,FALSE, +#endif + 0); + } + + (void)xv_set(canvas_paint_window(EditCan), + WIN_EVENT_PROC,NAEvents, + WIN_CONSUME_EVENTS, + WIN_MOUSE_BUTTONS, + LOC_WINENTER, + WIN_ASCII_EVENTS, + WIN_META_EVENTS, + 0, + 0); + + font = (Xv_font)xv_get(frame,XV_FONT); + fnt_siz = (int)xv_get(font,FONT_SIZE); + fnt_style = (int)xv_get(font,FONT_STYLE); + font = (Xv_font)xv_find(frame,FONT, + FONT_FAMILY,FONT_FAMILY_DEFAULT_FIXEDWIDTH, + FONT_STYLE,fnt_style, + FONT_SIZE,fnt_siz, + 0); + + xv_set(frame,XV_FONT,font,0); + + gcv.font = (Font)xv_get(font,XV_XID); + + if(gcv.font != (Font)NULL) + XChangeGC(dpy,gc,GCFont,&gcv); + + for(j=0;j<16;j++) + { + grey_pm[j] = XCreatePixmapFromBitmapData(dpy, + DefaultRootWindow(dpy), greys[j], grey_width, + grey_height, 1, 0, 1); + } + + return; +} + + +/* +SetNADData() +Fills in the display data structure for an initial monochrome display. +All settings are simple defaults, and will need to be modified externally +if otherwise. This routine passes back a new NA_DisplayData structure, which +can be destroyed after use with a call to cfree(). + +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + +NA_DisplayData *SetNADData(aln,Can,NamCan) +NA_Alignment *aln; +Canvas Can,NamCan; +{ + NA_DisplayData *ddata; + Scrollbar hscroll,vscroll; + Xv_window view; + int j; + + extern Frame frame; + extern int Default_Color_LKUP[]; + extern DisplayAttr; + + int reset_all; + + if(aln->na_ddata == NULL) + { + ddata = (NA_DisplayData*)Calloc(1,sizeof(NA_DisplayData)); + reset_all = TRUE; + } + else + { + ddata =(NA_DisplayData*)(aln->na_ddata); + reset_all = FALSE; + } + + ddata -> font = (Xv_font)xv_get(frame,XV_FONT); + ddata -> font_dx = xv_get(ddata->font,FONT_DEFAULT_CHAR_WIDTH); + ddata -> font_dy = xv_get(ddata->font,FONT_DEFAULT_CHAR_HEIGHT); + if(reset_all) + { + ddata -> wid = 0; + ddata -> ht = 0; + ddata -> position = 0; + ddata -> depth = xv_get(frame,WIN_DEPTH); + if(ddata -> depth >= 4) + { + ddata -> color_type = COLOR_LOOKUP; + ddata -> num_colors = 16; + ddata -> white = 15; + ddata -> black = 8; + } + else + { + ddata -> color_type = COLOR_MONO;; + ddata -> num_colors = 2; + ddata -> white = 0; + ddata -> black = 1; + } + ddata -> jtsize = 0; + ddata -> aln = aln; + ddata -> seq_x = xv_get(Can,XV_XID); + ddata -> nam_x = xv_get(NamCan, XV_XID); + ddata -> use_repeat = TRUE; + } + ddata -> seq_can = Can; + ddata -> nam_can = NamCan; + + for(j=0;jfont_dx,0); + xv_set(vscroll,SCROLLBAR_PIXELS_PER_UNIT, + ddata->font_dy,0); + } + + /* +* Set the length and height of the alignment +*/ + xv_set(hscroll,SCROLLBAR_OBJECT_LENGTH,aln->maxlen,0); + xv_set(vscroll,SCROLLBAR_OBJECT_LENGTH,aln->numelements,0); + + scrollbar_paint(vscroll); + scrollbar_paint(hscroll); + } + + if(aln->numelements !=0) + { + xv_set(Can, + WIN_HEIGHT,MIN(MAX_STARTUP_CANVAS_HEIGHT, + ddata->font_dy * (aln->numelements+2)), + CANVAS_RETAINED,FALSE, + 0); + + xv_set(NamCan, + WIN_HEIGHT,MIN(MAX_STARTUP_CANVAS_HEIGHT, + ddata->font_dy * (aln->numelements+2)), + 0); + } + + (void)window_fit(NamCan); + (void)window_fit(Can); + (void)window_fit(frame); + + return (ddata); +} + + +DummyRepaint(can,win,dpy,xwin,area) +Canvas can; +Xv_window win; +Display *dpy; +Window xwin; +Xv_xrectlist *area; +{ + DrawNANames(dpy,xwin); + return XV_OK; +} + +DrawNANames(dpy,xwin) +Display *dpy; +Window xwin; +{ + extern NA_Alignment *DataSet; + extern Canvas EditCan,EditNameCan; + NA_DisplayData *NAdd; + NA_Alignment *aln; + NA_Sequence *element; + int maxseq,minseq,maxnoseq,i,j; + unsigned long *pixels; + char buffer[GBUFSIZ]; + int scrn = DefaultScreen(dpy); + GC gc; + + aln = DataSet; + if(DataSet == NULL) + return XV_OK; + NAdd = (NA_DisplayData*)(DataSet)->na_ddata; + gc = DefaultGC(dpy,DefaultScreen(dpy)); + + pixels = (unsigned long*)xv_get(EditCan,WIN_X_COLOR_INDICES); + XSetBackground(dpy,gc,WhitePixel(dpy,scrn)); + XSetForeground(dpy,gc,BlackPixel(dpy,scrn)); + + minseq = NAdd->top_seq; + maxseq = minseq + NAdd->ht; + maxseq = MIN(maxseq+1,aln->numelements); + + for(j=minseq;jelement[j]); + if(element->groupid != 0) + sprintf(buffer,"%d %s ", + element->groupid,element->short_name); + else + sprintf(buffer,"%s ", + element->short_name); + + if(aln->element[j].selected) + { + XSetForeground(dpy,gc,WhitePixel(dpy,scrn)); + XSetBackground(dpy,gc,BlackPixel(dpy,scrn)); + } + + XDrawImageString(dpy,xwin,gc,5, + NAdd->font_dy*(j-minseq+1),buffer,40); + + if(aln->element[j].selected) + { + XSetForeground(dpy,gc,BlackPixel(dpy,scrn)); + XSetBackground(dpy,gc,WhitePixel(dpy,scrn)); + } + } + maxnoseq = xv_get(EditNameCan,XV_HEIGHT)/NAdd->font_dy; + for(j=maxseq;jfont_dy*(j-minseq+1), + " ",40); + return XV_OK; +} + + +RepaintNACan(can,win,dpy,xwin,area) +Canvas can; +Xv_window win; +Display *dpy; +Window xwin; +Xv_xrectlist *area; +{ + extern NA_Alignment *DataSet; + extern Frame frame; /* rtm 18.III.98 */ + extern Canvas EditCan,EditNameCan; + extern int SCALE; + Scrollbar hscroll,vscroll; + NA_DisplayData *NAdd; + Xv_window view; + int maxseq,minseq,i,j,lpos,rpos,nviews; + int start,end,top,bottom; + GC gc; + + int scrn = DefaultScreen(dpy); + gc = DefaultGC(dpy,scrn); + + if(DataSet == (NA_Alignment *) NULL || can == (Canvas) NULL) + return XV_OK; + + NAdd = (NA_DisplayData*)(DataSet)->na_ddata; + if(NAdd == NULL) + return XV_OK; + for(;xv_get(can,CANVAS_RETAINED)==TRUE;) + xv_set(can,CANVAS_RETAINED,FALSE,0); + + XSetForeground(dpy,gc,BlackPixel(dpy,scrn)); + XSetBackground(dpy,gc,WhitePixel(dpy,scrn)); + + nviews = (int)xv_get(EditCan,OPENWIN_NVIEWS); + for(j=0;j numelements,0); + minseq = (int)xv_get(vscroll,SCROLLBAR_VIEW_START); + maxseq = (int)xv_get(vscroll,SCROLLBAR_VIEW_LENGTH); + + if( NAdd->top_seq != minseq || NAdd->ht != maxseq) + { + NAdd->top_seq = minseq; + NAdd->ht = maxseq; + DrawNANames(dpy,xv_get(canvas_paint_window(EditNameCan), + XV_XID)); + } + + maxseq += minseq; + maxseq = MIN(maxseq+1,DataSet->numelements); + + top =(int)xv_get(vscroll,SCROLLBAR_VIEW_START); + bottom = top +(int)xv_get(vscroll,SCROLLBAR_VIEW_LENGTH); + for(;bottom-top>MAX_NA_DISPLAY_HEIGHT;) + { + top =(int)xv_get(vscroll,SCROLLBAR_VIEW_START); + bottom= top +(int)xv_get(vscroll,SCROLLBAR_VIEW_LENGTH); + } + } + + if(hscroll) + { + xv_set(hscroll,SCROLLBAR_OBJECT_LENGTH, (DataSet)->maxlen,0); + start =(int)xv_get(hscroll,SCROLLBAR_VIEW_START); + end = start +(int)xv_get(hscroll,SCROLLBAR_VIEW_LENGTH); + for(;end-start>MAX_NA_DISPLAY_WIDTH;) + { + start =(int)xv_get(hscroll,SCROLLBAR_VIEW_START); + end = start +(int)xv_get(hscroll,SCROLLBAR_VIEW_LENGTH); + } + } + + + for(i=0;(icount) && hscroll && vscroll;i++) + { + lpos = start+((int)area->rect_array[i].x/NAdd->font_dx)*SCALE; + rpos = (((int)area->rect_array[i].width/NAdd->font_dx)*SCALE + + lpos); + +/* + rpos = MIN(NAdd->aln->maxlen,rpos + 1); +*/ + rpos += 1; + + minseq = top+(int)area->rect_array[i].y/NAdd->font_dy; + maxseq = (int)area->rect_array[i].height/NAdd->font_dy+minseq; + maxseq = MIN(DataSet->numelements-1,maxseq+1); + + /* + for(;rpos-lpos>MAX_NA_DISPLAY_WIDTH;) + { + lpos =(int)xv_get(hscroll,SCROLLBAR_VIEW_START)/SCALE; + rpos = lpos+(int)xv_get(hscroll,SCROLLBAR_VIEW_LENGTH)*SCALE; + } +*/ + + for(j=minseq;j<=maxseq;j++) + DrawNAColor(can,NAdd,xwin,start,top,j,lpos,rpos,dpy,gc, + NAdd->color_type,FALSE); + } + SetNACursor(NAdd,can,win,xwin,dpy,gc); + (void)window_fit(EditCan); + (void)window_fit(EditNameCan); + (void)window_fit(frame); + return; +} + + +SetNACursor(NAdd,can,win,xwin,dpy,gc) +NA_DisplayData *NAdd; +Canvas can; +Xv_window win; +Window xwin; +Display *dpy; +GC gc; +{ + extern int repeat_cnt,EditMode,SCALE; + extern Panel_item left_foot,right_foot; + extern Frame frame; + extern NA_Alignment *DataSet; + + Scrollbar hscroll,vscroll; + NA_Sequence *this_elem; + + int xx,yy,j,dir=0,SubSel = FALSE; + Xv_window view; + + char buffer[GBUFSIZ]; + int x = ((NA_DisplayData*)(DataSet)-> + na_ddata)->cursor_x; + int y = ((NA_DisplayData*)(DataSet)-> + na_ddata)->cursor_y; + int position = ((NA_DisplayData*)(DataSet)-> + na_ddata)->position; + + this_elem = &(DataSet->element[y]); + dir = OrigDir(this_elem); + + if(repeat_cnt > 0) + sprintf(buffer,"[%s] pos:%d col:%d %s %s (repeat:%d)", + EditMode==0?"Insert": "Check", position,((NA_DisplayData*)( + DataSet)->na_ddata)->cursor_x+1+DataSet->rel_offset,(DataSet)-> + element[y].short_name,(dir == 1)?" -->": + (dir == -1)?" <--":" ",MAX(repeat_cnt,1)); + else + sprintf(buffer,"[%s] pos:%d col:%d %s %s", + EditMode==0?"Insert": "Check",position,((NA_DisplayData*)( + DataSet)->na_ddata)->cursor_x+1+DataSet->rel_offset,(DataSet)-> + element[y].short_name,(dir == 1)?" -->": + (dir == -1)?" <--":" "); + + xv_set(frame,FRAME_LEFT_FOOTER,buffer,0); + xv_set(left_foot,PANEL_LABEL_STRING,buffer,0); + + for(j=0;jnumelements;j++) + if(DataSet->element[j].subselected) + SubSel = TRUE; + + for(j=0;jna_ddata)->cursor_x; + y = ((NA_DisplayData*)(DataSet)->na_ddata)->cursor_y; + + for(j=0;jcolor_type, + FALSE); + } + return; +} + + + +ResizeNACan(canvas,wd,ht) +Canvas canvas; +int wd,ht; +{ + extern NA_Alignment *DataSet; /* rtm 18.III.98 */ + int dy; + if(DataSet == NULL) + return(XV_OK); + if(DataSet->na_ddata == NULL) + return(XV_OK); + + dy = (int)((NA_DisplayData*)(DataSet->na_ddata))->font_dy; + if(ht > dy * (DataSet->numelements+2)) + { + xv_set(canvas,XV_HEIGHT,dy * (DataSet->numelements+2),0); + } + return(XV_OK); +} + +QuitGDE() +{ + extern Frame frame; +if( notice_prompt(frame,NULL,NOTICE_MESSAGE_STRINGS, + "Are you sure you want to Quit?",NULL, + NOTICE_BUTTON,"Confirm",1, + NOTICE_BUTTON,"Cancel",2, + 0) == 1) + { + xv_destroy_safe(frame); + exit(0); + } + else + return(XV_OK); +} diff --git a/CORE/BasicDisplay.o b/CORE/BasicDisplay.o new file mode 100644 index 0000000..5ac3fee Binary files /dev/null and b/CORE/BasicDisplay.o differ diff --git a/CORE/BuiltIn.c b/CORE/BuiltIn.c new file mode 100755 index 0000000..6fcc470 --- /dev/null +++ b/CORE/BuiltIn.c @@ -0,0 +1,2283 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +* Global comments window +*/ + +Textsw comments_tsw; +NA_Sequence *this_elem; + + + +Open(mnu,mnuitm) +Menu mnu; +Menu_item mnuitm; +{ + extern Frame frame,pframe; + extern Panel popup; + /* + extern char FileName[]; + + if(pframe) + xv_destroy_safe(pframe); + + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + FRAME_SHOW_RESIZE_CORNER,FALSE, + WIN_DESIRED_HEIGHT,100, + WIN_DESIRED_WIDTH,300, + FRAME_LABEL,"Open...", + XV_X,300, + XV_Y,150, + WIN_SHOW,FALSE, + 0); + + popup = xv_find(pframe,PANEL, + PANEL_LAYOUT,PANEL_HORIZONTAL, + 0); + popup = xv_get(pframe,FRAME_CMD_PANEL); + popup = xv_get(pframe,FRAME_CMD_PANEL); + + (void)xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"OK", + PANEL_NOTIFY_PROC,OpenFileName, + 0); + + (void)xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"Cancel", + PANEL_NOTIFY_PROC,DONT, + 0); + + (void)xv_set(popup, + PANEL_LAYOUT,PANEL_VERTICAL, + 0); + + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,20, + PANEL_LABEL_STRING,"File name?", + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_VALUE,FileName, + PANEL_NOTIFY_PROC,SetFilename, + 0); + + window_fit(popup); + window_fit(pframe); + + (void)xv_set(pframe,XV_SHOW,TRUE,0); +*/ + (void)load_file(frame,300,150,NULL); + return(XV_OK); +} + + +SaveAs(mnu,mnuitm) +Menu mnu; +Menu_item mnuitm; +{ + extern Frame frame,pframe; + extern Panel popup; + extern char FileName[]; + extern NA_Alignment *DataSet; + NA_Alignment *aln; + + if(pframe) + xv_destroy_safe(pframe); + + if(DataSet == NULL) + return(XV_OK); + + aln = (NA_Alignment*)DataSet; + + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + FRAME_SHOW_RESIZE_CORNER,FALSE, + WIN_DESIRED_HEIGHT,100, + WIN_DESIRED_WIDTH,300, + FRAME_LABEL,"Save alignment as...", + XV_X,300, + XV_Y,150, + WIN_SHOW,FALSE, + 0); + +/* + popup = xv_find(pframe,PANEL, + PANEL_LAYOUT,PANEL_HORIZONTAL, + 0); +*/ + + popup = xv_get(pframe,FRAME_CMD_PANEL); + (void)xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"OK", + PANEL_NOTIFY_PROC,SaveAsFileName, + 0); + + (void)xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"Cancel", + PANEL_NOTIFY_PROC,DONT, + 0); + + (void)xv_set(popup, + PANEL_LAYOUT,PANEL_VERTICAL, + 0); + + (void)xv_create(popup,PANEL_CHOICE, + PANEL_LAYOUT,PANEL_HORIZONTAL, + PANEL_NOTIFY_PROC,SaveFormat, + PANEL_LABEL_STRING,"Format:", + PANEL_CHOICE_STRING,0,"Genbank", + PANEL_CHOICE_STRING,1,"Flat file", + PANEL_CHOICE_STRING,2,"GDE", + PANEL_VALUE,aln->format == GENBANK?0: + (aln->format == GDE)?2:1, + 0); + + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,20, + PANEL_LABEL_STRING,"File name?", + PANEL_NOTIFY_PROC,SetFilename, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_VALUE,FileName, + 0); + + window_fit(popup); + window_fit(pframe); + + (void)xv_set(pframe,XV_SHOW,TRUE,0); + + return(XV_OK); +} + + +SaveFormat(item,event) +Panel_item item; +Event *event; +{ + extern NA_Alignment *DataSet; + NA_Alignment *aln; + int format; + + if(DataSet == NULL) + return(XV_OK); + + format = xv_get(item,PANEL_VALUE); + DataSet->format = (format == 0)? + GENBANK:(format == 1)? NA_FLAT:GDE; + return(XV_OK); +} + + +SaveAsFileName(item,event) +Panel_item item; +Event *event; +{ + extern NA_Alignment *DataSet; + extern char FileName[]; /* rtm 18.III.98 */ + + char *file; + int j; + + file = FileName; + + DONT(); + if(DataSet == NULL) + return(XV_OK); + + switch( ((NA_Alignment*)DataSet)->format ) + { + case GENBANK: + WriteGen(DataSet,file,ALL,FALSE); + break; + case NA_FLAT: + WriteNA_Flat(DataSet,file,ALL,FALSE); + break; + case GDE: + WriteGDE(DataSet,file,ALL,FALSE); + break; + default: + fprintf(stderr,"Unknown file type for write\n"); + break; + } + return(XV_OK); +} + + + +act_on_it_lf(filename,data) +char filename[]; +Xv_opaque data; +{ + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet; + Xv_window view; + Scrollbar hscroll,vscroll; + int j; + + if(filename == NULL) + return(XV_OK); + + LoadData(filename); + + for(j=0;jna_ddata = (char*)SetNADData + ((NA_Alignment*)DataSet,EditCan,EditNameCan); + return(XV_OK); +} + +act_on_it_sf(){ +} + +OpenFileName(item,event) +Panel_item item; +Event *event; +{ + extern char FileName[]; /* rtm 18.III.98 */ + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet; + Xv_window view; + + char *file; + int j; + NA_DisplayData *ddata; + Scrollbar hscroll,vscroll; + + /* +* major kluge in progress, if event is NULL, then item is +* really a pointer to the name of a file to be read in +*/ + + if(event != NULL) + file = FileName; + else + file = (char*)item; + + DONT(); + LoadData(file); + + for(j=0;jna_ddata = (char*)SetNADData + ((NA_Alignment*)DataSet,EditCan,EditNameCan); + return(XV_OK); +} + + +/* +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + + +ChangeDisplay(item,event) +Panel_item item; +Event *event; +{ + extern Canvas EditCan; + extern Frame pframe,frame; + extern Panel popup; + extern NA_Alignment *DataSet; + extern EditMode,EditDir,DisplayAttr; + extern int SCALE; + NA_DisplayData *na_dd; + int color,font_size; + GC gc; + Display *dpy; + Xv_font font; + + if(DataSet == NULL) + { + Warning("Must load a dataset first"); + return(XV_OK); + } + + na_dd = (NA_DisplayData*)(((NA_Alignment*)DataSet)->na_ddata); + if(na_dd == NULL) + { + Warning("Must load a dataset first"); + return(XV_OK); + } + switch(na_dd->color_type) + { + case COLOR_MONO: + color = 0; + break; + case COLOR_LOOKUP: + color = 1; + break; + case COLOR_ALN_MASK: + color = 2; + break; + case COLOR_SEQ_MASK: + color = 3; + break; + case COLOR_STRAND: + color = 4; + break; + default: + break; + } + + xv_destroy_safe(pframe); + + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + FRAME_SHOW_RESIZE_CORNER,FALSE, + FRAME_LABEL,"Properties", + XV_X,300, + XV_Y,150, + WIN_SHOW,FALSE, + 0); + +/* + popup = xv_find(pframe,PANEL, + PANEL_LAYOUT,PANEL_HORIZONTAL, + 0); +*/ + + popup = xv_get(pframe,FRAME_CMD_PANEL); + (void)xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"OK", + PANEL_NOTIFY_PROC,ChDisplayDone, + 0); + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL,0); + + (void)xv_create(popup,PANEL_CHOICE_STACK, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_LABEL_STRING,"Color type", + PANEL_NOTIFY_PROC,ChColor, + PANEL_CHOICE_STRING,0,"Monochrome", + PANEL_CHOICE_STRING,1,"Character->color", + PANEL_CHOICE_STRING,2,"Alignment color mask", + PANEL_CHOICE_STRING,3,"Sequence color mask", + PANEL_CHOICE_STRING,4,"Strand->color", + PANEL_CHOOSE_NONE,FALSE, + PANEL_VALUE,color, + 0); + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_HORIZONTAL,0); + dpy = (Display *)xv_get(EditCan, XV_DISPLAY); + gc = DefaultGC(dpy,DefaultScreen(dpy)); + + font = xv_get(frame,XV_FONT); + switch(xv_get(font,FONT_SCALE)) + { + case WIN_SCALE_EXTRALARGE: + font_size = 0; + break; + case WIN_SCALE_LARGE: + font_size = 1; + break; + case WIN_SCALE_MEDIUM: + font_size = 2; + break; + case WIN_SCALE_SMALL: + font_size = 3; + break; + default: + font_size = 2; + break; + } + + (void)xv_create(popup,PANEL_CHOICE_STACK, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_NOTIFY_PROC,ChFontSize, + PANEL_LABEL_STRING,"Font Size", + PANEL_CHOICE_STRING,0,"Extra large", + PANEL_CHOICE_STRING,1,"Large", + PANEL_CHOICE_STRING,2,"Medium", + PANEL_CHOICE_STRING,3,"Small", + PANEL_CHOOSE_NONE,FALSE, + PANEL_VALUE,font_size, + 0); + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL,0); + + (void)xv_create(popup,PANEL_CHOICE, + PANEL_LAYOUT,PANEL_HORIZONTAL, + PANEL_NOTIFY_PROC,ChEditMode, + PANEL_LABEL_STRING,"Editing mode", + PANEL_CHOICE_STRING,0,"Insert", + PANEL_CHOICE_STRING,1,"Check", + PANEL_VALUE,EditMode, + 0); + + (void)xv_create(popup,PANEL_CHECK_BOX, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_CHOICE_STRINGS, + "Inverted","Lock vertical scroll","Key clicks","Message panel", + 0, + PANEL_NOTIFY_PROC,ChDisAttr, + PANEL_VALUE,DisplayAttr, + 0); + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_HORIZONTAL,0); + + (void)xv_create(popup,PANEL_CHOICE, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_NOTIFY_PROC,ChEditDir, + PANEL_LABEL_STRING,"Insertion", + PANEL_CHOICE_STRING,0,"Right of cursor", + PANEL_CHOICE_STRING,1,"Left of cursor", + PANEL_VALUE,EditDir, + 0); + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL,0); + (void)xv_create(popup,PANEL_SLIDER, + PANEL_LABEL_STRING,"Scale:", + PANEL_MIN_VALUE,1, + PANEL_MAX_VALUE,20, + PANEL_VALUE,SCALE, + PANEL_NOTIFY_PROC,SetScale, + 0); + + + window_fit(popup); + window_fit(pframe); + + (void)xv_set(pframe,XV_SHOW,TRUE,0); + + return(XV_OK); +} + +SetScale(item,event) +Panel_item item; +Event *event; +{ + extern int SCALE; + SCALE = xv_get(item,PANEL_VALUE); + return (XV_OK); +} + + +ChColor(item,event) +Panel_item item; +Event *event; +{ + int i,j; + NA_DisplayData *ddata; + extern NA_Alignment *DataSet; + + if(DataSet == NULL) + return(XV_OK); + ddata = (NA_DisplayData*)((NA_Alignment*)DataSet) ->na_ddata; + + switch(xv_get(item,PANEL_VALUE)) + { + case 0: + ddata->color_type = COLOR_MONO; + break; + case 1: + ddata->color_type = COLOR_LOOKUP; + break; + case 2: + ddata->color_type = COLOR_ALN_MASK; + break; + case 3: + ddata->color_type = COLOR_SEQ_MASK; + break; + case 4: + ddata->color_type = COLOR_STRAND; + break; + default: + break; + } + return(XV_OK); +} + + +ChFontSize(item,event) +Panel_item item; +Event *event; +{ + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet; + int i,j,fnt_style; + GC gc; + Display *dpy; + XGCValues gcv; + Xv_font font; + extern Frame frame; + + font = xv_get(frame,XV_FONT); + fnt_style = (int)xv_get(font,FONT_STYLE); + + dpy = (Display *)xv_get(EditCan, XV_DISPLAY); + gc = DefaultGC(dpy,DefaultScreen(dpy)); + + switch(xv_get(item,PANEL_VALUE)) + { + case 0: + font = (Xv_font)xv_find(frame,FONT, + FONT_FAMILY,FONT_FAMILY_DEFAULT_FIXEDWIDTH, + FONT_STYLE,fnt_style, + FONT_SCALE,WIN_SCALE_EXTRALARGE, + 0); + break; + case 1: + font = (Xv_font)xv_find(frame,FONT, + FONT_FAMILY,FONT_FAMILY_DEFAULT_FIXEDWIDTH, + FONT_STYLE,fnt_style, + FONT_SCALE,WIN_SCALE_LARGE, + 0); + break; + case 2: + font = (Xv_font)xv_find(frame,FONT, + FONT_FAMILY,FONT_FAMILY_DEFAULT_FIXEDWIDTH, + FONT_STYLE,fnt_style, + FONT_SCALE,WIN_SCALE_MEDIUM, + 0); + break; + case 3: + font = (Xv_font)xv_find(frame,FONT, + FONT_FAMILY,FONT_FAMILY_DEFAULT_FIXEDWIDTH, + FONT_STYLE,fnt_style, + FONT_SCALE,WIN_SCALE_SMALL, + 0); + break; + default: + break; + } + (void)xv_set(frame,XV_FONT,font,0); + gcv.font = (Font)xv_get(font,XV_XID); + + if(gcv.font != (Font) NULL) + XChangeGC(dpy,gc,GCFont,&gcv); + + (void)SetNADData(DataSet,EditCan,EditNameCan); + return(XV_OK); +} + + +ChDisplayDone() +{ + extern Frame frame; + + DONT(); + + RepaintAll(FALSE); + return(XV_OK); +} + +SetProtection(item,event) +Panel_item item; +Event *event; +{ + int j; + unsigned int current_prot; + NA_Alignment *aln; + int mismatch_prot = FALSE,num_selected = 0; + + extern Frame pframe,frame; + extern Panel popup; + extern NA_Alignment *DataSet; + + + if(DataSet == NULL) + return(XV_OK); + + aln = (NA_Alignment*)DataSet; + if(aln->numelements == 0) + return(XV_OK); + + for(j=0;jnumelements;j++) + if(aln->element[j].selected) + { + current_prot = aln->element[j].protect; + num_selected++; + } + + for(j=0;jnumelements;j++) + if(aln->element[j].selected && aln->element[j].protect + != current_prot) + { + current_prot=0; + mismatch_prot = TRUE; + } + + xv_destroy_safe(pframe); + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + FRAME_SHOW_RESIZE_CORNER,FALSE, + WIN_DESIRED_HEIGHT,100, + WIN_DESIRED_WIDTH,300, + XV_X,300, + XV_Y,150, + XV_SHOW,FALSE, + 0); + + +/* + popup = xv_find(pframe,PANEL, + PANEL_LAYOUT,PANEL_VERTICAL, + 0); +*/ + popup = xv_get(pframe,FRAME_CMD_PANEL); + + (void)xv_create(popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"Done", + PANEL_NOTIFY_PROC,DONT, + 0); + if(mismatch_prot) + (void)xv_create(popup,PANEL_MESSAGE, + PANEL_LABEL_STRING, + "Warning: Current protections differ",NULL); + if(num_selected == 0) + (void)xv_create(popup,PANEL_MESSAGE, + PANEL_LABEL_STRING,"Warning: No sequences selected", + NULL); + + + (void)xv_create(popup,PANEL_CHECK_BOX, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_CHOOSE_ONE,FALSE, + PANEL_LABEL_STRING,"Allowed modifications:", + PANEL_CHOICE_STRINGS, + "unambiguous characters", + "ambiguous characters", + "alignment gaps", + "translations", + NULL, + PANEL_NOTIFY_PROC,Prot, + PANEL_VALUE,current_prot, + 0); + window_fit(popup); + window_fit(pframe); + (void)xv_set(pframe,XV_SHOW,TRUE, + FRAME_LABEL,"Set Protections",0); + + return(XV_OK); +} + + +Prot(item,event) +Panel_item item; +Event *event; +{ + int j; + unsigned int current_prot; + NA_Alignment *aln; + extern NA_Alignment *DataSet; + + + if(DataSet == NULL) + return(XV_OK); + + aln = (NA_Alignment*)DataSet; + if(aln->numelements == 0) + return(XV_OK); + + current_prot = xv_get(item,PANEL_VALUE); + for(j=0;jnumelements;j++) + if(aln->element[j].selected) + aln->element[j].protect = current_prot; + + return(XV_OK); +} + + +SelectAll(item,event) +Panel_item item; +Event *event; +{ + int i; + extern NA_Alignment *DataSet; + extern Canvas EditNameCan; + Display *dpy; + NA_Alignment *aln = (NA_Alignment*)DataSet; + + if(DataSet == NULL) + return(XV_OK); + + for(i=0;inumelements;i++) + aln->element[i].selected = TRUE; + + dpy = (Display*)xv_get(EditNameCan, XV_DISPLAY); + DrawNANames(dpy,xv_get(canvas_paint_window(EditNameCan),XV_XID)); + return(XV_OK); +} + +SelectBy(item,event) +Panel_item item; +Event *event; +{ + extern Panel popup; + int i; + extern NA_Alignment *DataSet; + extern Frame pframe,frame; + Display *dpy; + NA_Alignment *aln = (NA_Alignment*)DataSet; + xv_destroy_safe(pframe); + + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + FRAME_SHOW_RESIZE_CORNER,FALSE, + FRAME_LABEL,"Select sequences by name", + WIN_DESIRED_HEIGHT,100, + WIN_DESIRED_WIDTH,300, + XV_X,300, + XV_Y,150, + WIN_SHOW,FALSE, + 0); + +/* + popup = xv_find(pframe,PANEL, + PANEL_LAYOUT,PANEL_HORIZONTAL, + 0); +*/ + popup = xv_get(pframe,FRAME_CMD_PANEL); + (void)xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"Done", + PANEL_NOTIFY_PROC,DONT, + 0); + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL, + 0); + + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,20, + PANEL_LABEL_STRING,"Search for?", + PANEL_NOTIFY_PROC,SelectByName, + 0); + + window_fit(popup); + window_fit(pframe); + + (void)xv_set(pframe,XV_SHOW,TRUE,0); + + return(XV_OK); +} + +SelectByName(item,event) +Panel_item item; +Event *event; +{ + extern NA_Alignment *DataSet; + extern Canvas EditCan,EditNameCan; + char search[80]; + Display *dpy; + Xv_window view; + int i,lastselected; + + if(DataSet == NULL) + return(XV_OK); + + strncpy(search,(char*)(xv_get(item,PANEL_VALUE)),79); + + for(i=0;inumelements;i++) + if(Find(DataSet->element[i].short_name,search)) + { + DataSet->element[i].selected = TRUE; + lastselected = i; + } + + dpy = (Display*)xv_get(EditNameCan, XV_DISPLAY); + DrawNANames(dpy,xv_get(canvas_paint_window(EditNameCan),XV_XID)); + view = (Xv_window)xv_get(EditCan,OPENWIN_NTH_VIEW,0); + + OPENWIN_EACH_VIEW(EditCan,(view)) + JumpTo(view, 0,lastselected); + OPENWIN_END_EACH; + + (void)xv_set(item,PANEL_VALUE,"",0); + + return(XV_OK); + +} + + +Group(item,event) +Panel_item item; +Event *event; +{ + int j,old_groups = FALSE,result; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet; + extern Frame frame; + Display *dpy; + NA_Alignment *aln; + NA_Sequence *temp = NULL,*element; + + if(DataSet == NULL) + return(XV_OK); + aln = (NA_Alignment*)DataSet; + if(aln == NULL) + return(XV_OK); + element = aln->element; + + for(j=0;jnumelements;j++) + if((element[j].groupid !=0 ) && element[j].selected) + old_groups = TRUE; + + if(old_groups) + { + result = notice_prompt(frame,NULL, + NOTICE_MESSAGE_STRINGS, + "Groups already exist. Do you wish to", + "Merge these groups, create a new group,", + "or cancel?", + NULL, + NOTICE_BUTTON,"Merge groups",1, + NOTICE_BUTTON,"Create new group",2, + NOTICE_BUTTON,"Cancel",3, + 0); + switch(result) + { + case 3: + break; + case 2: + for(j=0;jnumelements;j++) + if(element[j].selected) + RemoveFromGroup(&(element[j])); + for(j=0;jnumelements;j++) + { + if(element[j].selected) + { + element[j].groupid = + aln->numgroups+1; + element[j].groupb = temp; + if(temp != NULL) + temp->groupf = + &(element[j]); + temp = &(element[j]); + } + } + if(temp != NULL) + temp->groupf = NULL; + if(temp != NULL) + if(temp->groupb !=NULL) + { + aln->numgroups++; + AdjustGroups(aln); + DrawNANames(xv_get(EditNameCan, + XV_DISPLAY),xv_get + (canvas_paint_window( + EditNameCan),XV_XID)); + } + break; + case 1: + temp = NULL; + for(j=0;jnumelements;j++) + { + if(element[j].selected) + { + if(temp != NULL) + MergeGroups(temp,&(element[j])); + temp = &(element[j]); + } + } + AdjustGroups(aln); + DrawNANames(xv_get(EditNameCan,XV_DISPLAY), + xv_get(canvas_paint_window(EditNameCan), + XV_XID)); + break; + } + } + else + { + temp = NULL; + for(j=0;jnumelements;j++) + { + if(element[j].selected) + { + element[j].groupid = aln->numgroups+1; + element[j].groupb = temp; + if(temp != NULL) + temp->groupf = &(element[j]); + temp = &(element[j]); + } + } + if(temp != NULL) + { + temp->groupf = NULL; + if(temp->groupb !=NULL) + { + aln->numgroups++; + DrawNANames(xv_get(EditNameCan, XV_DISPLAY), + xv_get(canvas_paint_window(EditNameCan), + XV_XID)); + } + } + } + return(XV_OK); +} + +RemoveFromGroup(element) +NA_Sequence *element; +{ + if(element == NULL) + return(XV_OK); + + if(element->groupb) + (NA_Sequence*)(element->groupb)->groupf = element->groupf; + + if(element->groupf) + (NA_Sequence*)(element->groupf)->groupb = element->groupb; + + element->groupf = NULL; + element->groupb = NULL; + element->groupid = 0; + + return(XV_OK); +} + + +AdjustGroups(aln) +NA_Alignment *aln; +{ + int i,j,c,done=FALSE; + +#ifdef HGL + return; +#else + for(c=0;c<200 && !done;c++) + { + for(j=1;j<=aln->numgroups;j++) + { + done = FALSE; + for(i=0;inumelements;i++) + { + if(aln->element[i].groupid == j) + { + if(aln->element[i].groupf!=NULL || + aln->element[i].groupb!=NULL) + done = TRUE; + else + aln->element[i].groupid = 0; + } + } + if(done == FALSE) + { + for(i=0;inumelements;i++) + if(aln->element[i].groupid == + aln->numgroups) + aln->element[i].groupid = j; + aln->numgroups--; + } + } + if(aln->numgroups == 0) + done = TRUE; + } + return; +#endif +} + +Ungroup(item,event) +Panel_item item; +Event *event; +{ + int j; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet; + Display *dpy = (Display *)xv_get(EditNameCan, XV_DISPLAY); + NA_Alignment *aln; + NA_Sequence *temp = NULL,*element; + + if(DataSet == NULL) + return(XV_OK); + aln = (NA_Alignment*)DataSet; + if(aln == NULL) + return(XV_OK); + element = aln->element; + + for(j=0;jnumelements;j++) + if(element[j].selected && element[j].groupid != 0) + RemoveFromGroup(&(element[j])); + AdjustGroups(aln); + DrawNANames(dpy,xv_get(canvas_paint_window(EditNameCan),XV_XID)); + return(XV_OK); +} + + +MergeGroups(el1,el2) +NA_Sequence *el1,*el2; +{ + int i,j,newid; + NA_Sequence *last,*first,*temp; + newid = MAX(el1->groupid,el2->groupid); + if( el1->groupid == el2->groupid && el1->groupid != 0) return; + last = el1; + + for(;last->groupf != NULL;) last = last->groupf; + first = el1; + + for(;first->groupb != NULL;) first = first->groupb; + for(;el2->groupf != NULL;) el2 = el2->groupf; + + el2->groupf = first; + first->groupb = el2; + + el2->groupid = newid; + for(;last != NULL; last=last->groupb) last->groupid = newid; + return; +} + + +New() +{ + extern NA_Alignment *DataSet; +} + + +ModAttr(mnu,mnuitm) +Menu mnu; +Menu_item mnuitm; +{ + extern NA_Alignment *DataSet; + extern Frame frame,pframe; + extern Panel popup; + extern int BlockInput; + /* extern Notify_value; rtm 18.III.98 */ + + int cur_type = 0,direction = 0,j,sel_count; + extern Textsw comments_tsw; + Textsw baggage_tsw; + char temp[80]; + NA_Alignment *aln = (NA_Alignment*)DataSet; + + if(DataSet == NULL) + return(XV_OK); + + if(aln->na_ddata == NULL) + return(XV_OK); + + for(j=0,sel_count = 0;jnumelements;j++) + if(aln->element[j].selected) + { + this_elem = &(aln->element[j]); + sel_count++; + } + + if(sel_count == 0) + { + Warning("Must select sequence(s) first"); + return(XV_OK); + } + if(this_elem->elementtype == RNA) cur_type = 0; + if(this_elem->elementtype == DNA) cur_type = 1; + if(this_elem->elementtype == TEXT) cur_type = 2; + if(this_elem->elementtype == MASK) cur_type = 3; + if(this_elem->elementtype == PROTEIN) cur_type = 4; + + xv_destroy_safe(pframe); + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + FRAME_SHOW_RESIZE_CORNER,FALSE, + FRAME_LABEL,"Sequence Information", + WIN_DESIRED_HEIGHT,100, + WIN_DESIRED_WIDTH,300, + XV_X,300, + XV_Y,150, + WIN_SHOW,FALSE, + 0); + +/* + popup = xv_find(pframe,PANEL, + PANEL_LAYOUT,PANEL_HORIZONTAL, + 0); +*/ + popup = xv_get(pframe,FRAME_CMD_PANEL); + + (void)xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"OK", + PANEL_NOTIFY_PROC,ModAttrDone, + 0); + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL, 0); + + if(sel_count == 1) + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,20, + PANEL_LABEL_STRING,"Short name", + PANEL_VALUE,this_elem->short_name, + PANEL_NOTIFY_PROC,ChAttr, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + + (void)xv_create(popup,PANEL_CHOICE_STACK, + PANEL_NOTIFY_PROC,ChAttrType, + PANEL_LABEL_STRING,"Type:", + PANEL_CHOICE_STRINGS, + "RNA", + "DNA", + "TEXT", + "MASK", + "PROTEIN", + 0, + PANEL_VALUE,cur_type, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + + if(sel_count == 1) + (void)xv_set(popup,PANEL_LAYOUT,PANEL_HORIZONTAL, 0); + else + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL, 0); + + + (void)xv_create(popup,PANEL_CHOICE_STACK, + PANEL_NOTIFY_PROC,ChAttrType, + PANEL_LABEL_STRING,"Strand", + PANEL_CHOICE_STRINGS, + "Primary", + "Secondary", + "Undefined", + 0, + PANEL_VALUE,(this_elem->attr & IS_SECONDARY)?1: + (this_elem->attr & IS_PRIMARY)?0:2, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + + if(sel_count == 1) + (void)xv_set(popup,PANEL_LAYOUT,PANEL_HORIZONTAL, 0); + else + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL, 0); + + + (void)xv_create(popup,PANEL_CHOICE_STACK, + PANEL_LAYOUT,PANEL_HORIZONTAL, + PANEL_NOTIFY_PROC,ChAttrType, + PANEL_LABEL_STRING,"Direction", + PANEL_CHOICE_STRINGS, + "5' to 3'", + "3' to 5'", + "Undefined", + 0, + PANEL_VALUE,(this_elem->attr & IS_3_TO_5)?1: + (this_elem->attr & IS_5_TO_3)?0:2, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL, 0); + + if(sel_count == 1) + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,40, + PANEL_LABEL_STRING,"Full name ", + PANEL_VALUE,this_elem->seq_name, + PANEL_NOTIFY_PROC,ChAttr, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + + if(sel_count == 1) + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,40, + PANEL_LABEL_STRING,"ID Number ", + PANEL_VALUE,this_elem->id, + PANEL_NOTIFY_PROC,ChAttr, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + + if(sel_count == 1) + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,40, + PANEL_LABEL_STRING,"Description", + PANEL_VALUE,this_elem->description, + PANEL_NOTIFY_PROC,ChAttr, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + +#ifdef HGL + if(sel_count == 1) + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,40, + PANEL_LABEL_STRING,"Membrane ", + PANEL_VALUE,this_elem->membrane, + PANEL_NOTIFY_PROC,ChAttr, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); +#endif + + if(sel_count == 1) + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,40, + PANEL_LABEL_STRING,"Author ", + PANEL_VALUE,this_elem->authority, + PANEL_NOTIFY_PROC,ChAttr, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + +#ifdef HGL + if(sel_count == 1) + (void)xv_create(popup,PANEL_TEXT, + PANEL_VALUE_DISPLAY_LENGTH,40, + PANEL_LABEL_STRING,"Barcode ", + PANEL_VALUE,this_elem->barcode, + PANEL_NOTIFY_PROC,ChAttr, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); +#endif + + direction = OrigDir(this_elem); + if(sel_count == 1) + { +#ifdef HGL + sprintf(temp,"Created on %d/%d/%d %d:%d:%d (%s %s) %s", + this_elem->t_stamp.origin.mm, + this_elem->t_stamp.origin.dd, + this_elem->t_stamp.origin.yy, + this_elem->t_stamp.origin.hr, + this_elem->t_stamp.origin.mn, + this_elem->t_stamp.origin.sc, + (this_elem->attr & IS_ORIG_PRIMARY)?"Primary": + (this_elem->attr & IS_ORIG_SECONDARY)?"Secondary":"Strand ?", + (direction == 1)?"-->": + (direction == -1)?"<--":"<-?->", + this_elem->attr & IS_CIRCULAR? "Circular":""); +#else + sprintf(temp,"Created on %d/%d/%d %d:%d:%d (%s) %s", + this_elem->t_stamp.origin.mm, + this_elem->t_stamp.origin.dd, + this_elem->t_stamp.origin.yy, + this_elem->t_stamp.origin.hr, + this_elem->t_stamp.origin.mn, + this_elem->t_stamp.origin.sc, + (direction == 1)?"-->": + (direction == -1)?"<--":"<-?->", + this_elem->attr & IS_CIRCULAR? "Circular":""); +#endif + + xv_create(popup,PANEL_MESSAGE, + PANEL_LABEL_STRING,temp, + PANEL_ITEM_X_GAP,5, + PANEL_ITEM_Y_GAP,3, + 0); + + } + (void)xv_set(popup,PANEL_LAYOUT,PANEL_HORIZONTAL, 0); + + if(sel_count == 1) + (void)xv_create(popup,PANEL_MESSAGE, + PANEL_LABEL_STRING, + " Comments:", + 0); + + (void)xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL, 0); + if(sel_count == 1) + window_fit_height(popup); + else + window_fit(popup); + + if(sel_count == 1) + { + comments_tsw = xv_create(pframe,TEXTSW, + WIN_INHERIT_COLORS,TRUE, + WIN_BELOW,popup, + XV_X,0, + XV_HEIGHT,(this_elem->baggage)?90:180, + TEXTSW_CONTENTS,this_elem->comments? + this_elem->comments:"", + TEXTSW_READ_ONLY,FALSE, + 0); + + window_fit(comments_tsw); + if(this_elem->baggage) + { + baggage_tsw = xv_create(pframe,TEXTSW, + WIN_INHERIT_COLORS,TRUE, + WIN_BELOW,comments_tsw, + XV_X,0, XV_HEIGHT,90, + TEXTSW_CONTENTS,this_elem->baggage? + this_elem->baggage:"", + TEXTSW_READ_ONLY,TRUE, + 0); + window_fit(baggage_tsw); + } + window_fit(pframe); + + notify_interpose_destroy_func(comments_tsw,SaveComments); + } + window_fit(pframe); + + (void)xv_set(pframe,XV_SHOW,TRUE,0); + BlockInput = TRUE; + return(XV_OK); +} + + +Notify_value SaveComments(client,status) +Notify_client client; +Destroy_status status; +{ + int j,numselected = 0,lastselected = 0; + extern NA_Alignment *DataSet; + + for(j=0;jnumelements;j++) + if(DataSet->element[j].selected) + { + numselected ++; + lastselected = j; + } + + if(numselected == 1) + { + Cfree(DataSet->element[lastselected].comments); + + DataSet->element[lastselected].comments = + Calloc(xv_get(client,TEXTSW_LENGTH)+1,sizeof(char)); + + DataSet->element[lastselected].comments_len = + strlen(DataSet->element[lastselected].comments); + + DataSet->element[lastselected].comments_maxlen = + xv_get(client,TEXTSW_LENGTH); + + (void)xv_get(client,TEXTSW_CONTENTS,0, + DataSet->element[lastselected].comments, + xv_get(client,TEXTSW_LENGTH)); + + DataSet->element[lastselected].comments[ + xv_get(client,TEXTSW_LENGTH)] = '\0'; + + } + return(notify_next_destroy_func(client,status)); +} + + + +ChAttr(item,event) +Panel_item item; +Event *event; +{ + int j; + extern NA_Alignment *DataSet; + NA_Sequence *this_element; + NA_Alignment *aln; + Panel_setting ps; + + if(DataSet == NULL) + return; + + aln = (NA_Alignment*)DataSet; + + for(j=0;jnumelements;j++) + if(aln->element[j].selected) + this_element = &(aln->element[j]); + + ps = panel_text_notify(item,event); + + if(Find(xv_get(item,PANEL_LABEL_STRING),"Short name")) + { + strncpy(this_element->short_name,(char *) xv_get(item,PANEL_VALUE),31); + for(j=0;jshort_name);j++) + if(this_element->short_name[j] == ' ') + this_element->short_name[j] = '_'; + } + + else if(Find(xv_get(item,PANEL_LABEL_STRING),"Full name ")) + { + strncpy(this_element->seq_name,(char *) xv_get(item,PANEL_VALUE),79); + } + + else if(Find(xv_get(item,PANEL_LABEL_STRING),"Description")) + { + strncpy(this_element->description,(char *) xv_get(item,PANEL_VALUE),79); + } + + else if(Find(xv_get(item,PANEL_LABEL_STRING),"Author ")) + { + strncpy(this_element->authority,(char *) xv_get(item,PANEL_VALUE),79); + } + + else if(Find(xv_get(item,PANEL_LABEL_STRING),"ID Number ")) + { + strncpy(this_element->id,(char *) xv_get(item,PANEL_VALUE),79); + } + + else if(Find(xv_get(item,PANEL_LABEL_STRING),"Membrane ")) + { + strncpy(this_element->membrane,(char *) xv_get(item,PANEL_VALUE),79); + } + + else if(Find(xv_get(item,PANEL_LABEL_STRING),"Contig ")) + { + strncpy(this_element->contig,(char *) xv_get(item,PANEL_VALUE),79); + } + + else if(Find(xv_get(item,PANEL_LABEL_STRING),"Barcode ")) + { + strncpy(this_element->barcode,(char *) xv_get(item,PANEL_VALUE),79); + } + + + return(ps); +} + +ModAttrDone() +{ + FILE *file; + extern Textsw comments_tsw; + int j,maxlen = 20,numselected = 0; + char c,*tempstring; + extern NA_Alignment *DataSet; + + for(j=0;jnumelements;j++) + if(DataSet->element[j].selected) + { + this_elem = &(DataSet->element[j]); + numselected++; + } + + if(numselected == 1) + { + if(this_elem->comments) + maxlen = strlen(this_elem->comments)+10; + + tempstring =(char*)Calloc(maxlen,sizeof(char)); + textsw_store_file(comments_tsw,"/tmp/gde_tmp",300,100); + + file = fopen("/tmp/gde_tmp","r"); + if(file == NULL) + { + Warning("Comments could not be saved"); + return XV_OK; + } + + for(j=0;(c=getc(file))!=EOF;j++) + { + if(j==maxlen-1) + { + maxlen *=2; + tempstring =(char *)Realloc(tempstring,maxlen); + } + tempstring[j] = c; + } + tempstring[j] = '\0'; + + fclose(file); + + unlink("/tmp/gde_tmp"); + + if(this_elem->comments) + Cfree(this_elem->comments); + + this_elem->comments = tempstring; + this_elem->comments_len = j; + StripSpecial(this_elem->comments); + + } + DONT(); + RepaintAll(TRUE); +} + + +ChEditMode(item,event) +Panel_item item; +Event *event; +{ + extern EditMode; + EditMode = xv_get(item,PANEL_VALUE); + + return(XV_OK); +} + + +ChEditDir(item,event) +Panel_item item; +Event *event; +{ + extern EditDir; + EditDir = xv_get(item,PANEL_VALUE); + + return(XV_OK); +} + +ChDisAttr(item,event) +Panel_item item; +Event *event; +{ + extern DisplayAttr; + extern Frame infoframe; + extern NA_Alignment *DataSet; + extern Canvas EditCan,EditNameCan; + + DisplayAttr = xv_get(item,PANEL_VALUE); + (void)SetNADData(DataSet,EditCan,EditNameCan); + if(DisplayAttr & GDE_MESSAGE_PANEL) + (void)xv_set(infoframe,XV_SHOW,TRUE,0); + else + (void)xv_set(infoframe,XV_SHOW,FALSE,0); + return(XV_OK); +} + + +ChAttrType(item,event) +Panel_item item; +Event *event; +{ + int j,current_insert = 0,new_type,type; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet; + extern int Default_DNA_Trans[],Default_NA_RTrans[],Default_RNA_Trans[]; + extern int Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[]; + NA_Alignment *aln; + NA_Sequence *temp = NULL,*element; + + if(DataSet == NULL) + return; + aln = (NA_Alignment*)DataSet; + if(aln == NULL) + return; + element = aln->element; + + + if(strcmp((char *) xv_get(item,PANEL_LABEL_STRING),"Type:") == 0) + { + new_type = xv_get(item,PANEL_VALUE); + type = (new_type == 0)?RNA: + (new_type == 1)?DNA: + (new_type == 2)?TEXT: + (new_type == 3)?MASK: + PROTEIN; + + + for(j=0;jnumelements;j++) + if(element[j].selected) + { + if((element[j].protect & PROT_TRANSLATION) == 0) + { + Warning("Protect violation"); + (void)xv_set(item,PANEL_VALUE,element[j].elementtype + ,0); + return(XV_ERROR); + } + if(element[j].elementtype == DNA || + element[j].elementtype == RNA) + switch(new_type) + { + case 1: + element[j].tmatrix =Default_DNA_Trans; + element[j].elementtype = type; + element[j].col_lut=Default_NAColor_LKUP; + break; + case 0: + element[j].tmatrix =Default_RNA_Trans; + element[j].elementtype = type; + element[j].col_lut=Default_NAColor_LKUP; + break; + case 4: + case 2: + case 3: + default: + /* + (void)xv_set(item,PANEL_VALUE,old_type,0); +*/ + break; + } + else if (element[j].elementtype == PROTEIN) + switch(new_type) + { + case 0: + case 1: + (void)xv_set(item,PANEL_VALUE, + element[j].elementtype ,0); + break; + case 4: + (void)xv_set(item,PANEL_VALUE, + element[j].elementtype ,0); + break; + case 2: + case 3: + element[j].elementtype = type; + element[j].col_lut=Default_PROColor_LKUP; + default: + break; + } + else if (element[j].elementtype == TEXT) + switch(new_type) + { + case 0: + case 1: + (void)xv_set(item,PANEL_VALUE, + element[j].elementtype ,0); + break; + case 4: + element[j].elementtype=type; + element[j].col_lut=Default_PROColor_LKUP; + break; + case 3: + case 2: + element[j].elementtype=type; + element[j].col_lut=NULL; + break; + default: + break; + } + else if (element[j].elementtype == MASK) + switch(new_type) + { + case 0: + case 1: + (void)xv_set(item,PANEL_VALUE, + element[j].elementtype ,0); + break; + case 4: + element[j].elementtype=type; + element[j].col_lut=Default_PROColor_LKUP; + break; + case 3: + case 2: + element[j].elementtype = type; + element[j].col_lut = NULL; + break; + default: + break; + } + + } + } + else if(strcmp((char *) xv_get(item,PANEL_LABEL_STRING),"Direction") == 0) + { + for(j=0;jnumelements;j++) + if(element[j].selected) + switch(xv_get(item,PANEL_VALUE)) + { + case 0: + element[j].attr |= IS_5_TO_3; + element[j].attr &= (0xffff - IS_3_TO_5); + break; + case 1: + element[j].attr |= IS_3_TO_5; + element[j].attr &= (0xffff-IS_5_TO_3); + break; + default: + element[j].attr &= (0xffff-IS_5_TO_3); + element[j].attr &= (0xffff-IS_3_TO_5); + break; + } + } + else if(strcmp((char *) xv_get(item,PANEL_LABEL_STRING),"Strand") == 0) + { + for(j=0;jnumelements;j++) + if(element[j].selected) + switch(xv_get(item,PANEL_VALUE)) + { + case 0: + element[j].attr |= IS_PRIMARY; + element[j].attr &= (0xffff - IS_SECONDARY); + break; + case 1: + element[j].attr |= IS_SECONDARY; + element[j].attr &= (0xffff - IS_PRIMARY); + break; + default: + element[j].attr &= (0xffff - IS_PRIMARY); + element[j].attr &= (0xffff - IS_SECONDARY); + break; + } + } + return(XV_OK); +} + + +SwapElement(aln,e1,e2) +NA_Alignment *aln; +int e1,e2; +{ + /* +* Warning, The following code may not be compatable with other +* C compilers. The elements may need to be explicitly copied. +*/ + NA_Sequence temp; + register i; + + for(i=0;inumelements;i++) + { + if(aln->element[i].groupf == &(aln->element[e1])) + aln->element[i].groupf = &(aln->element[e2]); + else if(aln->element[i].groupf == &(aln->element[e2])) + aln->element[i].groupf = &(aln->element[e1]); + if(aln->element[i].groupb == &(aln->element[e1])) + aln->element[i].groupb = &(aln->element[e2]); + else if(aln->element[i].groupb == &(aln->element[e2])) + aln->element[i].groupb = &(aln->element[e1]); + } + + + temp = aln->element[e1]; + aln->element[e1] = aln->element[e2]; + aln->element[e2] = temp; + return; +} + +CompressAlign(item,event) +Panel_item item; +Event *event; +{ + int j,k,offset,pos = 0; + int max_wid = -999999; + int min_wid = 999999; + int min_offset = 99999999; + int any_selected = FALSE; + int compress_all; + + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet; + extern Frame frame; + NA_Base *rev_seq; + char *temp,*mask,*temp_c; + Display *dpy; + NA_Sequence *element; + + if(DataSet == NULL) + return; + + element = DataSet->element; + + switch(notice_prompt(frame,NULL, + NOTICE_MESSAGE_STRINGS,"Removing extra gaps, Do you want to:",NULL, + NOTICE_BUTTON,"Preserve alignment",1, + NOTICE_BUTTON,"Remove all dashes",2, + NOTICE_BUTTON,"Cancel",3,0)) + { + case 1: + compress_all=FALSE; + break; + case 2: + compress_all=TRUE; + break; + case 3: + default: + return(XV_OK); + } + + any_selected = FALSE; + for(j=0;jnumelements;j++) + if(element[j].selected) + { + max_wid = MAX(max_wid,element[j].offset + element[j].seqlen); + min_wid = MIN(min_wid,element[j].offset); + any_selected = TRUE; + } + if(any_selected == FALSE) + return(XV_OK); + + mask = Calloc(max_wid - min_wid,sizeof(char)); + temp = Calloc(max_wid - min_wid,sizeof(char)); + temp_c = Calloc(max_wid - min_wid,sizeof(int)); + + for(j=min_wid;jnumelements;k++) + if(element[k].selected) + { + if(j>=element[k].offset && j< element[k].offset + + element[k].seqlen) + { + switch (element[k].elementtype) + { + case DNA: + case RNA: + if((getelem(&(element[k]),j) & 15) != 0) + mask[j-min_wid] = '1'; + break; + case PROTEIN: + if(getelem(&(element[k]),j) != '-') + mask[j-min_wid] = '1'; + break; + default: + break; + } + } + } + } + + for(j=0;jnumelements;j++) + if(element[j].selected) + if(element[j].protect & PROT_WHITE_SPACE == 0) + { + Warning("Some sequences are protected"); + return(XV_OK); + } + + if(compress_all) + { + for(j=0;jnumelements;j++) + if(element[j].selected) + { + this_elem = &(element[j]); + offset = this_elem->offset; + pos = 0; + for(k=0; kseqlen;k++) + { + if(this_elem->tmatrix && (this_elem-> + sequence[k]& 15)!='\0') + temp[pos++] = this_elem->sequence[k]; + + else if((this_elem->tmatrix == NULL) && + (this_elem->sequence[k] != '-')) + temp[pos++] = this_elem->sequence[k]; + } + this_elem->seqlen = pos; + for(k=0;ksequence[k] = temp[k]; + } + min_offset = MIN(min_offset,offset); + } + } + else + { + /* +* Use the mask to remove all positions where the mask is set to '0' +*/ + for(j=0;jnumelements;j++) + if(element[j].selected) + { + this_elem = &(element[j]); + offset = this_elem->offset; + pos = 0; + + for(k=offset; kseqlen;k++) + { + if(mask[k-min_wid] == '1') + { + temp[(pos++)] = this_elem->sequence[k-offset]; + } + } + + this_elem->seqlen = pos; + for(k=0;ksequence[k] = temp[k]; + } + min_offset = MIN(min_offset,offset); + } + } + + for(j=0;jnumelements;j++) + { + if(element[j].selected) + { + if(compress_all) + element[j].offset = -(DataSet->rel_offset); + else + element[j].offset -= min_offset; + } + } + NormalizeOffset(DataSet); + + DataSet->maxlen = 0; + + for(j=0;jnumelements;j++) + DataSet->maxlen = MAX(DataSet->maxlen,element[j].seqlen+ + element[j].offset); + + Cfree(mask); + Cfree(temp); + Cfree(temp_c); + + RepaintAll(FALSE); + return(XV_OK); +} + + +RevSeqs(item,event) +Panel_item item; +Event *event; +{ + int j,i,slen,current_insert = 0,offset,*rev_mask; + int min_range = 9999999,max_range = -9999999; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet; + NA_Base *rev_seq; + Display *dpy; + NA_Alignment *aln; + NA_Sequence *element; + + if(DataSet == NULL) + return; + aln = (NA_Alignment*)DataSet; + + if(aln == NULL) + return; + element = aln->element; + + + for(j=0;jnumelements;j++) + if(element[j].selected) + { + if(element[j].offsetmax_range) + max_range = element[j].offset+element[j].seqlen; + } + + for(j=0;jnumelements;j++) + if(element[j].selected && (element[j].protect & + PROT_TRANSLATION)) + { + slen = element[j].seqlen; + offset = element[j].offset; + rev_seq =(NA_Base*)Calloc(element[j].seqmaxlen, + sizeof(NA_Base)); + for(i=0;ielement; + + for(j=0;jnumelements;j++) + if(element[j].selected && (element[j].protect & + PROT_TRANSLATION)) + { + char c; + switch(element[j].elementtype) + { + case DNA: + case RNA: + slen = element[j].seqlen; + offset = element[j].offset; + for(i=0;i>k)& + (unsigned char)1; + } + putelem(&(element[j]),i+offset,(NA_Base)temp| + ((unsigned char)240 & temp2)); + } + break; + case MASK: + slen = element[j].seqlen; + offset = element[j].offset; + for(i=0;ielement; + ddata = (NA_DisplayData*)(aln->na_ddata); + tmat = element[ddata->cursor_y].tmatrix; + rmat = element[ddata->cursor_y].rmatrix; + + + for(j=0;jnumelements;j++) + if(aln->element[j].subselected && DataSet->selection_mask != NULL) + { + subselected = TRUE; + j = aln->numelements; + } + + if(subselected) + { + for(j=0;jnumelements;j++) + if(aln->element[j].subselected) + for(i=0;ielement[j].seqlen;i++) + if(aln->selection_mask[i+aln->element[j].offset - + aln->rel_offset] == '1') + { + pos = i+aln->element[j].offset; + base = (char)getelem(&(element[j]), pos); + switch(element[j].elementtype) + { + case DNA: + case RNA: + base = tmat[base]; + base = (base & 32)? (base & 223): + (base | 32); + base = rmat[base]; + putelem(&(element[j]), pos, base); + break; + case TEXT: + case PROTEIN: + base = (base & 32)? (base & 223): (base | 32); + putelem(&(element[j]), pos,base); + break; + case MASK: + default: + base = (base == '0')? + '1':(base == '1')? '0':base; + putelem(&(element[j]), pos,base); + break; + } + + } + /* +* Repaint the screen, names not needed +*/ + RepaintAll(FALSE); + } + else + { + base = (char)getelem(&(element[ddata->cursor_y]),ddata->cursor_x); + switch(element[ddata->cursor_y].elementtype) + { + case DNA: + case RNA: + base = tmat[base]; + base = (base & 32)? (base & 223): (base | 32); + base = rmat[base]; + putelem(&(element[ddata->cursor_y]),ddata->cursor_x, + base); + break; + case TEXT: + case PROTEIN: + base = (base & 32)? (base & 223): (base | 32); + putelem(&(element[ddata->cursor_y]), + ddata->cursor_x,base); + break; + case MASK: + default: + base = (base == '0')? '1':(base == '1')? '0':base; + putelem(&(element[ddata->cursor_y]), + ddata->cursor_x,base); + break; + } + } + return XV_OK; +} + +OrigDir(seq) +NA_Sequence *seq; +{ + int test; + test = seq->attr; + +#ifdef HGL + if(test & IS_ORIG_PRIMARY) + if(test & IS_ORIG_5_TO_3) + return(1); + else if(test & IS_ORIG_3_TO_5) + return(-1); + + if(test & IS_ORIG_SECONDARY) + if(test & IS_ORIG_5_TO_3) + return(-1); + else if(test & IS_ORIG_3_TO_5) + return(1); +#else + if(test & IS_PRIMARY) + return(1); + else if(test & IS_SECONDARY) + return(-1); +#endif + + + return(0); +} diff --git a/CORE/BuiltIn.o b/CORE/BuiltIn.o new file mode 100644 index 0000000..8714e2a Binary files /dev/null and b/CORE/BuiltIn.o differ diff --git a/CORE/ChooseFile.c b/CORE/ChooseFile.c new file mode 100755 index 0000000..7868076 --- /dev/null +++ b/CORE/ChooseFile.c @@ -0,0 +1,658 @@ +/* +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Michael Maciukenas at the Center for Prokaryote +Genome Analysis. Design and implementation guidance by Steven Smith, Carl +Woese. +*/ +/* File picker by Mike Maciukenas +** Allows the user to search up and down the directory tree, and choose a +** file. +** "Open" descends down into a directory, or chooses a file (depending ** on what is selected). The user may also press return after choosing +** a file or directory, to do the same thing. +** "Up Dir" ascends to the parent directory. +** "Cancel" cancels the operation. +** The user may also type a directory into the "Directory:" field. When the +** user presses return (or tab, or newline), the contents of the new directory +** will be shown. +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define GBUFSIZ 1024 /* buffer size, remove when adding to Steve's code */ + +#define FL_VIEW_H 15 /* # of files to show in one page, originally */ + + +/* structure for a linked list that allows sorting of filenames */ +typedef struct namedata {char *FileN; /* file name */ + int type; /* flag: 1 if directory '/' + ** 2 if executable '*' + ** 3 if symbolic link '@' + ** 4 if socket '=' + ** 0 if normal */ + struct namedata *Next; /* next in list */ + } NameData; + +Frame fl_getframe = XV_NULL; /* frame, is set to XV_NULL by free_mem(), + ** load_file() checks this to see if it should + ** destroy an existing frame */ +Scrollbar fl_scroll; /* the scrollbar for the file list canvas */ +Canvas fl_FileList; /* the file list canvas */ +Panel_item fl_DirText; /* the text item that displays the directory */ +Panel fl_Getpanel; /* the panel, contains buttons, and DirText */ +GC fl_gc; /* gc to use for drawing file names, just the default GC with + ** the frame's font copied in. */ +int fl_current_picked, fl_current_len; /* the current item picked in the file + ** list, and the current number of items + ** in the file list */ +int fl_cell_h, fl_width, fl_ascent; /* the height of the font, the width of the + ** canvas, and the default ascent of the + ** font, all used for drawing into the file + ** list canvas */ +Xv_opaque data; + + + +NameData *fl_start; /* the root node for the linked list of filenames */ + +Frame load_file(Parentframe, x, y, passdata) +/* pick a file for loading. */ +Frame Parentframe; +int x, y; +Xv_opaque passdata; +{ + + /* callback procedures */ + int fl_open_btn_lf(), fl_up_dir_btn(), lf_cancel_btn(); + void fl_show_list_lf(); + void fl_list_select_lf(); + Panel_setting fl_dir_typed(); + /* interposed destroy function */ + Notify_value fl_free_mem(); + + char dirname[GBUFSIZ]; + Display *display; + Xv_screen screen; + int screen_no; + Xv_Font font; + XFontStruct *font_data; + + data=passdata; + + /* create the frame */ + fl_getframe = xv_create(Parentframe, FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_LABEL, "Choose File", + FRAME_SHOW_RESIZE_CORNER, FALSE, + XV_X, x, + XV_Y, y, + NULL); + notify_interpose_destroy_func(fl_getframe, fl_free_mem); + + /* get font characteristics */ + font = xv_get(fl_getframe, XV_FONT); + fl_cell_h = xv_get(font, FONT_DEFAULT_CHAR_HEIGHT); + fl_width = 50*xv_get(font, FONT_DEFAULT_CHAR_WIDTH); + font_data = (XFontStruct *)xv_get(font, FONT_INFO); + fl_ascent = font_data->ascent; + + /* create the panel and panel buttons */ +/* + fl_Getpanel = xv_create(fl_getframe, PANEL, + NULL); +*/ + fl_Getpanel = xv_get(fl_getframe, FRAME_CMD_PANEL); + (void) xv_create(fl_Getpanel, PANEL_BUTTON, + PANEL_LABEL_STRING, "Open", + PANEL_NOTIFY_PROC, fl_open_btn_lf, + NULL); + (void) xv_create(fl_Getpanel, PANEL_BUTTON, + PANEL_LABEL_STRING, "Up Dir", + PANEL_NOTIFY_PROC, fl_up_dir_btn, + NULL); + (void) xv_create(fl_Getpanel, PANEL_BUTTON, + PANEL_LABEL_STRING, "Cancel", + PANEL_NOTIFY_PROC, lf_cancel_btn, + NULL); + /* create the "Directory:" field, initialized to the current working dir */ + getcwd(dirname, GBUFSIZ); + fl_DirText = xv_create(fl_Getpanel, PANEL_TEXT, + PANEL_LABEL_STRING,"Directory:", + XV_X, xv_col(fl_Getpanel, 0), + XV_Y, xv_row(fl_Getpanel, 1), + PANEL_VALUE_STORED_LENGTH, GBUFSIZ, + PANEL_VALUE_DISPLAY_LENGTH, 30, + PANEL_VALUE, dirname, + PANEL_NOTIFY_LEVEL, PANEL_SPECIFIED, + PANEL_NOTIFY_STRING, "\n\r\t", + PANEL_NOTIFY_PROC, fl_dir_typed, + NULL); + + window_fit(fl_Getpanel); + + /* create the file list canvas, below the above panel */ + fl_FileList = xv_create(fl_getframe, CANVAS, + XV_X, 0, + WIN_BELOW, fl_Getpanel, + XV_WIDTH, fl_width, + XV_HEIGHT, FL_VIEW_H*fl_cell_h+7, + CANVAS_REPAINT_PROC, fl_show_list_lf, + CANVAS_AUTO_EXPAND, FALSE, + CANVAS_AUTO_SHRINK, FALSE, + CANVAS_WIDTH, fl_width, + CANVAS_HEIGHT, fl_cell_h, + CANVAS_RETAINED, FALSE, + OPENWIN_AUTO_CLEAR, FALSE, + NULL); + fl_scroll = xv_create(fl_FileList, SCROLLBAR, + SCROLLBAR_DIRECTION, SCROLLBAR_VERTICAL, + SCROLLBAR_PIXELS_PER_UNIT, fl_cell_h, + SCROLLBAR_VIEW_LENGTH, fl_view_h(), + SCROLLBAR_PAGE_LENGTH, fl_view_h(), + NULL); + xv_set(canvas_paint_window(fl_FileList), + WIN_EVENT_PROC, fl_list_select_lf, + WIN_CONSUME_EVENTS, WIN_MOUSE_BUTTONS, LOC_DRAG, WIN_ASCII_EVENTS, NULL, + NULL); + xv_set(fl_Getpanel, XV_WIDTH, xv_get(fl_FileList, XV_WIDTH), NULL); + + /* set up the gc for drawing into the file list */ + display = (Display *)xv_get(fl_getframe, XV_DISPLAY); + screen = (Xv_screen)xv_get(fl_getframe, XV_SCREEN); + screen_no = (int)xv_get(screen, SCREEN_NUMBER); + fl_gc = XCreateGC(display, RootWindow(display, screen_no), + 0, NULL); + XCopyGC(display, DefaultGC(display, DefaultScreen(display)), + 0xFFFFFFFF, fl_gc); + XSetFont(display, fl_gc, xv_get(font, XV_XID)); +/* +* Added S.Smith 2/5/91 +*/ + XSetForeground(display,fl_gc,BlackPixel(display,DefaultScreen(display))); + XSetBackground(display,fl_gc,WhitePixel(display,DefaultScreen(display))); + + + /* set up the extra trailing node for the linked list, makes insertion + ** into the list easier */ + fl_start = (NameData *)calloc(1,1+sizeof(NameData)); + fl_start->FileN = (char *)NULL; + fl_start->Next = NULL; + + /* make the list, showing files in the application`s current directory + */ + (void) fl_make_list(); + + window_fit(fl_getframe); + xv_set(fl_getframe, XV_SHOW, TRUE, NULL); + return(fl_getframe); +} + + +int fl_open_btn_lf(item, event) +/* callback procedure for the open button. If it's a directory, switch to +** the new directory, otherwise return the filename +*/ +Panel_item item; +Event *event; +{ + int i, end; + char namebuf[GBUFSIZ], thestr[GBUFSIZ]; + NameData *current; + + if(fl_current_picked != -1) /* then an item is selected. Work with it */ + { + /* find item in list */ + current = fl_start; + for(i=0; iNext; + strcpy(namebuf, current->FileN); + if(current->type == 1) /* then it's a directory, so switch to it */ + { + if(fl_checkdir(namebuf)) + { + chdir(namebuf); + (void) fl_make_list(); + fl_set_dirtext(fl_DirText); + return XV_OK; + } + } + else /* it's a file name, so return it */ + { + if(fl_checkdir(xv_get(fl_DirText, PANEL_VALUE))) /* then valid dir */ + { + if(current->type != 0) /* then it's not a regular file, so strip off + ** the extra type character: *, =, /, or @ */ + namebuf[strlen(namebuf)-1]='\0'; + /* create the file string (with full directory path) */ + getcwd(thestr, GBUFSIZ); + if(thestr[strlen(thestr)-1] != '/') + strcat(thestr, "/"); + strcat(thestr, namebuf); + act_on_it_lf(thestr, data); /* give filename to application */ + xv_destroy_safe(fl_getframe); + return XV_OK; + } + else + { /* invalid directory, so show notice*/ + int result; + Panel panel = (Panel)xv_get(fl_FileList, PANEL_PARENT_PANEL); + + result = notice_prompt(panel, NULL, + NOTICE_MESSAGE_STRINGS, "Invalid Directory specified.", NULL, + NOTICE_FOCUS_XY, event_x(event), event_y(event), + NOTICE_BUTTON_YES, "Change Directory", + NULL); + } + } + } +} + +int fl_up_dir_btn(item, event) +/* go up one directory */ +Panel_item item; +Event *event; +{ + char dirname[GBUFSIZ]; + + /* pretty simple, just go up, show it, and change the "Directory:" field */ + (void) chdir(".."); + (void) fl_make_list(); + fl_set_dirtext(fl_DirText); + return XV_OK; +} + +Panel_setting fl_dir_typed(item, event) +/* handle when user types return, newline, or tab in the "Directory:" field. +** if it's a valid directory, it moves to it, otherwise, display a notice +*/ +Panel_item item; +Event *event; +{ + int error; + char dirname[GBUFSIZ]; + + switch (event_action(event)) + { + case '\n': + case '\r': + case '\t': + { + if(fl_checkdir(xv_get(fl_DirText, PANEL_VALUE))) + { /* valid directory, chdir to it and show it */ + chdir(xv_get(fl_DirText, PANEL_VALUE)); + fl_make_list(); + fl_set_dirtext(fl_DirText); + } + else + { /* invalid directory, so show notice */ + int result; + Panel panel = (Panel)xv_get(fl_FileList, PANEL_PARENT_PANEL); + + result = notice_prompt(panel, NULL, + NOTICE_MESSAGE_STRINGS, "Invalid Directory specified.", NULL, + NOTICE_FOCUS_XY, event_x(event), event_y(event), + NOTICE_BUTTON_YES, "Change Directory", + NULL); + } + return PANEL_NONE; + }; + /* if it wasn't \n, \t, or \r, pass event on to standard + ** panel_text handler + */ + default: + return(panel_text_notify(item, event)); + } +} + +int lf_cancel_btn(item, event) +/* handle the cancel button. Just destroys the frame and returns +*/ +Panel_item item; +Event *event; +{ + + act_on_it_lf(NULL); + xv_destroy_safe(fl_getframe); + return XV_OK; +} + +fl_readln(file, buf) +FILE *file; +char *buf; +{ + int ic; + int i = 0; + + while (((ic=getc(file)) != EOF) && ((char)ic != '\n')) + buf[i++]= (char)ic; + buf[i] = '\0'; +} + +int fl_make_list() +/* Creates a list of files, out of the current working directory. It then +** tells the file list canvas to refresh itself. The list sits attached to +** fl_start, for reading by the show_list() routine. +*/ +{ + FILE *dirp; /* for directory data */ + int i, list_len, cur_pos; + char dirname[GBUFSIZ], tempbuf[GBUFSIZ]; + NameData *current, *temp; /* structures for reading + ** and sorting file names */ + int notdone; + struct stat statbuf; /* for checking if a file + ** name is a directory */ + int pid = getpid(); /* for creation of temp + ** file for directory list */ + char tmpcmd[GBUFSIZ]; /* for holding ls command */ + char tmpname[GBUFSIZ]; /* for holding file names */ + + + getcwd(dirname, GBUFSIZ); + sprintf(tmpcmd, "cd %s;ls -F > /usr/tmp/.svlffil%d", dirname, pid); + sprintf(tmpname, "/usr/tmp/.svlffil%d", pid); + system(tmpcmd); + dirp = fopen(tmpname, "r"); + if (dirp == NULL) /* just a check to make sure */ + { + fprintf(stderr, "fl_make_list was passed bad directory name\n"); + return(-1); + } + else + { + /* free up the old list, to build a new one */ + for(current = fl_start; current->FileN != (char *)NULL; i++) + { + temp = current; + current = current->Next; + free(temp->FileN); + free(temp); + }; + /* set up the linked list for sorting */ + fl_start = (NameData *)calloc(1, sizeof(NameData)+1); + fl_start->FileN = (char *)NULL; + fl_start->Next = NULL; + /* read through the directory entries */ + list_len = 0; + for(fl_readln(dirp, tempbuf); tempbuf[0] != '\0'; fl_readln(dirp, tempbuf)) + { + /* don't include "." and ".." in the list */ + if((strcmp(tempbuf,"./")!=0)&& + (strcmp(tempbuf,"../")!=0)) + { + /* find the right spot in the list to insert the new name */ + current = fl_start; + notdone = 1; + while(notdone) + if(current->FileN == NULL) + notdone = 0; + else if(strcmp(tempbuf, current->FileN)>0) + current = current->Next; + else + notdone = 0; + /* insert the new name */ + temp = (NameData *)calloc(1, sizeof(NameData)+1); + temp->FileN = current->FileN; + temp->type = current->type; + temp->Next = current->Next; + ++list_len; + current->Next = temp; + /* set flag for file type */ + switch(tempbuf[strlen(tempbuf)-1]) + { + case '/': /* directory */ + { + current->type = 1; + break; + } + case '@': /* symbolic link */ + { + current->type = 3; + break; + } + case '=': /* socket */ + { + current->type = 4; + break; + } + case '*': /* executable */ + { + current->type = 2; + break; + } + default: + { + current->type = 0; + break; + } + } + current->FileN = (char *)calloc(1, 1+strlen(tempbuf)); + strcpy(current->FileN,tempbuf); + }; + } + fclose(dirp); + sprintf(tmpcmd, "rm %s", tmpname); + system(tmpcmd); + + /* adjust the Canvas size, and refresh it */ + fl_current_len = list_len; + cur_pos = xv_get(fl_scroll, SCROLLBAR_VIEW_START); + xv_set(fl_FileList, CANVAS_HEIGHT, + (list_len+fl_view_h()+1)*fl_cell_h, + NULL); + /* scrollbars bomb with zero-length objects */ + if(list_len == 0) ++list_len; + /* reset scrollbar */ + xv_set(fl_scroll, SCROLLBAR_VIEW_START, 0, + SCROLLBAR_OBJECT_LENGTH, list_len, + NULL); + /* refresh canvas */ + wmgr_refreshwindow(canvas_paint_window(fl_FileList)); + fl_current_picked = -1; + return(0); + } +} + +fl_set_dirtext(fl_DirText) +/* sets the "Directory:" field according to the current directory +** fl_DirText is the Xview pointer to the fl_DirText Panel Item +*/ +Panel_item fl_DirText; +{ + char dirbuf[GBUFSIZ]; + + getcwd(dirbuf, GBUFSIZ); + xv_set(fl_DirText, PANEL_VALUE, dirbuf, NULL); + +} + +int fl_checkdir(dirname) +/* check if a directory can be opened. directory can be specified by +** full root name or by current name. returns true if it can be opened. +*/ +char *dirname; +{ + DIR *dirp; + + dirp = opendir(dirname); + if(dirp == NULL) /* not available, user cannot enter */ + return(0); + else + { + closedir(dirp); /* must close it */ + return(1); + } +} + +void fl_show_list_lf(canvas, paint_window, repaint_area) +/* repaint procedure for the file list canvas. Repaints all file names in +** the damaged area */ +Canvas canvas; +Xv_Window paint_window; +Rectlist *repaint_area; +{ + NameData *current; + int i; + int start_draw, end_draw; + Display *dpy; + Window xwin; + + + /* make sure AUTO_CLEAR is off, this routine will do it itself */ + while(xv_get(fl_FileList, OPENWIN_AUTO_CLEAR)!=FALSE) + { + fprintf(stderr, "lf:found bug--OPENWIN_AUTO_CLEAR still TRUE"); + xv_set(fl_FileList, OPENWIN_AUTO_CLEAR, FALSE, NULL); + } + /* make sure RETAINED is off, this routine will repaint itself */ + while(xv_get(fl_FileList, CANVAS_RETAINED)!=FALSE) + { + fprintf(stderr, "lf:found bug--CANVAS_RETAINED still TRUE"); + xv_set(fl_FileList, CANVAS_RETAINED, FALSE, NULL); + } + /* get display and window */ + dpy = (Display *)xv_get(paint_window, XV_DISPLAY); + xwin = (Window)xv_get(paint_window, XV_XID); + + /* clear the area given us by Xview, for simplicity, we clear the + ** smallest rectangle that encloses all of the destroyed areas, the + ** rl_bound rectangle */ + XClearArea(dpy, xwin, + repaint_area->rl_bound.r_left, + repaint_area->rl_bound.r_top, + repaint_area->rl_bound.r_width, + repaint_area->rl_bound.r_height, + 0); + /* the next 3 lines calculate which file names must be drawn, by where the + ** top and bottom of the rl_bound rectangle lie */ + start_draw = repaint_area->rl_bound.r_top; + end_draw = (repaint_area->rl_bound.r_height + start_draw - 1) / fl_cell_h; + start_draw = (start_draw - 1) / fl_cell_h; + + /* find the first element to draw in the list */ + current = fl_start; + for(i = 0; (iNext != NULL); i++) + current = current->Next; + /* now start drawing them */ + for(; (i<=end_draw) && (current->Next != NULL); i++) + { + XDrawString(dpy, xwin, fl_gc, 5, i*fl_cell_h+fl_ascent, current->FileN, + strlen(current->FileN)); + /* add a box if we are drawing the currently picked one */ + if(i==fl_current_picked) + { + XDrawRectangle(dpy, xwin, fl_gc, + 2, i*fl_cell_h, + xv_get(canvas, XV_WIDTH)-11-xv_get(fl_scroll, XV_WIDTH), + fl_cell_h); + } + current = current->Next; + } +} + +void fl_list_select_lf(paint_window, event) +/* callback procedure for events that happen in the file list canvas. Checks +** mouse button press or drag, and for when the user types return */ +Xv_window paint_window; +Event *event; +{ + int picked, cur_pos; + Window xwin = (Window)xv_get(paint_window, XV_XID); + Display *dpy; + + dpy = (Display *)xv_get(paint_window, XV_DISPLAY); + /* get the current position of the scrollbar for future reference */ + cur_pos = xv_get(fl_scroll, SCROLLBAR_VIEW_START); + + /* first, check for user picking a file name */ + if((event_action(event) == ACTION_SELECT)|| + (event_action(event) == LOC_DRAG)) + { + picked = (event_y(event) - 1) / fl_cell_h; + /* make sure the file picked is on screen. if it is not, + ** we just ignore it. this avoids wierd stuff, like being + ** able to pick files that aren't shown on screen */ + if((picked >= cur_pos)&& + (picked < cur_pos+fl_view_h())&& + (picked < fl_current_len)) + { + /* efficiency: ignore if it is already picked */ + if(picked != fl_current_picked) + { +#ifdef SGI /* added refresh to get rid of old boxes*/ + wmgr_refreshwindow(canvas_paint_window(fl_FileList)); +#endif /sgi */ + + XSetFunction(dpy, fl_gc, GXclear); + XDrawRectangle(dpy, xwin, fl_gc, + 2, fl_current_picked*fl_cell_h, + xv_get(fl_FileList, XV_WIDTH)-11- + xv_get(fl_scroll, XV_WIDTH), + fl_cell_h); + XSetFunction(dpy, fl_gc, GXcopy); + XDrawRectangle(dpy, xwin, fl_gc, + 2, picked*fl_cell_h, + xv_get(fl_FileList, XV_WIDTH)-11- + xv_get(fl_scroll, XV_WIDTH), + fl_cell_h); + fl_current_picked = picked; + } + } + } + /* user may have pressed return, then just call the open button + ** callback procedure. PANEL_FIRST_ITEM gets the pointer to the + ** open button itself, since it happens to be the first item on + ** the panel. fl_open_btn doesn't really use this parameter, but + ** just in case it ever does, we include it. */ + else if((event_is_ascii(event))&&(event_action(event) == '\r')) + fl_open_btn_lf(xv_get(fl_Getpanel, PANEL_FIRST_ITEM), event); + else + return; +} +int fl_view_h() +/* returns the current height (in # of file names displayed) of the file list */ +{ + return (((int)xv_get(fl_FileList, XV_HEIGHT))/fl_cell_h); +} + +Notify_value +fl_free_mem(client, status) +/* clean up when the frame is destroyed. Frees up the memory used in the +** linked list of file names, and sets the Frame variable (getframe) to null */ +Notify_client client; +Destroy_status status; +{ + NameData *current, *temp; + int i; + +switch (status) + { + case DESTROY_CHECKING: + return NOTIFY_DONE; + case DESTROY_CLEANUP: + { + for(current = fl_start; current->FileN != (char *)NULL; i++) + { + temp = current; + current = current->Next; + free(temp->FileN); + free(temp); + }; + fl_getframe = XV_NULL; + return notify_next_destroy_func(client, status); + } + default: + return NOTIFY_DONE; + } +} diff --git a/CORE/ChooseFile.o b/CORE/ChooseFile.o new file mode 100644 index 0000000..c21acc0 Binary files /dev/null and b/CORE/ChooseFile.o differ diff --git a/CORE/CutCopyPaste.c b/CORE/CutCopyPaste.c new file mode 100755 index 0000000..0c3c9e6 --- /dev/null +++ b/CORE/CutCopyPaste.c @@ -0,0 +1,667 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ +EditCut(item,event) +Panel_item item; +Event *event; +{ + extern Frame frame; + extern Canvas EditCan,EditNameCan; + extern Panel_item left_foot,right_foot; + extern NA_Alignment *DataSet,*Clipboard; + char buffer[80]; + + int j,numselected=0,numshifted = 0; + + if(TestSelection() == SELECT_REGION) + return(EditSubCut(item,event)); + + for(j=0;jnumelements;j++) + { + FreeNASeq(Clipboard->element[j]); + InitNASeq(&(Clipboard->element[j]),TEXT); + } + Clipboard->numelements = 0; + + for(j=0;jnumelements;j++) + if(DataSet->element[j].selected) + { + if(numselected >= Clipboard->maxnumelements-1) + { + Clipboard->maxnumelements += 10; + Clipboard->element = (NA_Sequence*)Realloc + (Clipboard->element, + Clipboard->maxnumelements*sizeof(NA_Sequence)); + } + Clipboard->element[(Clipboard->numelements)] = + DataSet->element[j]; +/* +* Map sequences back into their global positions, as we will +* normailze the alignment after they are copied out. +*/ + Clipboard->element[(Clipboard->numelements)++].offset+= + DataSet->rel_offset; + + numselected++; + } + + for(j=0;jnumelements;j++) + if(DataSet->element[j].selected) + numshifted++; + else + DataSet->element[j-numshifted] = + DataSet->element[j]; + + DataSet->numelements -= numshifted; + + NormalizeOffset(DataSet); + + SetNADData(DataSet,EditCan,EditNameCan); + + Regroup(DataSet); + + RepaintAll(TRUE); + sprintf(buffer,"%d sequence in Sequence Clipboard",numselected); + xv_set(frame,FRAME_RIGHT_FOOTER,buffer,0); + xv_set(right_foot,PANEL_LABEL_STRING,buffer,0); + + return(XV_OK); +} + +EditCopy(item,event) +Panel_item item; +Event *event; +{ + extern Frame frame; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet,*Clipboard; + extern Panel_item left_foot,right_foot; + char buffer[80]; + + int i,j,numselected=0,numshifted = 0,this; + + if(TestSelection() == SELECT_REGION) + return(EditSubCopy(item,event)); + + for(j=0;jnumelements;j++) + { + FreeNASeq(Clipboard->element[j]); + InitNASeq(&(Clipboard->element[j]),TEXT); + } + Clipboard->numelements = 0; + + for(j=0;jnumelements;j++) + if(DataSet->element[j].selected) + { + this = Clipboard->numelements; + if(numselected >= Clipboard->maxnumelements-1) + { + Clipboard->maxnumelements += 10; + Clipboard->element = (NA_Sequence*)Realloc + (Clipboard->element, + Clipboard->maxnumelements*sizeof(NA_Sequence)); + InitNASeq(&(Clipboard->element + [this]), + DataSet->element[j].elementtype); + } + Clipboard->element[this] = DataSet->element[j]; +/* +* Handle comments +*/ + if(DataSet->element[j].comments) + { + Clipboard->element[this].comments = (char*) + strdup(DataSet->element[j].comments); + Clipboard->element[this].comments_maxlen = + Clipboard->element[this].comments_len; + } +/* +* And baggage +*/ + if(DataSet->element[j].baggage) + { + Clipboard->element[this].baggage = (char*) + strdup(DataSet->element[j].baggage); + Clipboard->element[this].baggage_maxlen = + Clipboard->element[this].baggage_len; + } + + Clipboard->element[this].cmask = NULL; + Clipboard->element[this].sequence + = (NA_Base*)Calloc(DataSet->element[j].seqmaxlen, + sizeof(NA_Base)); + for(i=0;ielement[j].seqlen;i++) + Clipboard->element[Clipboard->numelements]. + sequence[i] = DataSet->element[j].sequence[i]; + +/* + putelem(&(Clipboard->element[Clipboard-> + numelements]),i, + getelem(&(DataSet->element[j]),i)); +*/ +/* +* Map sequences back into their global positions, as we will +* normailze the alignment after they are copied out. +*/ + Clipboard->element[(Clipboard->numelements)].offset += + DataSet->rel_offset; + + (Clipboard->numelements)++; + numselected++; + } + sprintf(buffer,"%d sequence in Clipboard",numselected); + xv_set(frame,FRAME_RIGHT_FOOTER,buffer,0); + xv_set(right_foot,PANEL_LABEL_STRING,buffer,0); + + return(XV_OK); +} + +EditPaste(item,event) +Panel_item item; +Event *event; +{ + + extern Frame frame; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet,*Clipboard; + extern Panel_item left_foot,right_foot; + extern TextClipSize; + int j,last = -1; + + if(TextClipSize != 0) + { + if(Clipboard->numelements == 0) + return(EditSubPaste(item,event)); + else if(notice_prompt(frame,NULL,NOTICE_MESSAGE_STRINGS, + "You have data in both clipboards, do you", + "wish to paste from the...", + NULL, + NOTICE_BUTTON,"Sequence clipboard",1, + NOTICE_BUTTON,"Text clipboard",2, + 0) == 2) + return(EditSubPaste(item,event)); + } + + + for(j=0;jnumelements;j++) + if(DataSet->element[j].selected) + last = j; + + if(DataSet->maxnumelements <= DataSet->numelements+ + Clipboard->numelements) + { + DataSet->maxnumelements+=Clipboard->numelements; + DataSet->element =(NA_Sequence*)Realloc(DataSet->element, + DataSet->maxnumelements*sizeof(NA_Sequence)); + } + + for(j=DataSet->numelements-1;j>=last+1;j--) + DataSet->element[j+Clipboard->numelements] = + DataSet->element[j]; + + for(j=0;jnumelements;j++) + { + DataSet->element[last+1+j] = Clipboard->element[j]; +/* +* be sure to bring them back into alignment with the rest +*/ + DataSet->element[last+1+j].offset -= DataSet->rel_offset; + } + DataSet->numelements += Clipboard->numelements; + +/* + for(j=0;jnumelements;j++) + { + FreeNASeq(Clipboard->element[j]); + InitNASeq(&(Clipboard->element[j]),TEXT); + } +*/ + Clipboard->numelements = 0; + + NormalizeOffset(DataSet); + + SetNADData(DataSet,EditCan,EditNameCan); + + Regroup(DataSet); + + RepaintAll(TRUE); + xv_set(frame,FRAME_RIGHT_FOOTER,"Clipboard empty",0); + xv_set(right_foot,PANEL_LABEL_STRING,"Clipboard empty",0); + return(XV_OK); +} + + +Regroup(alignment) +NA_Alignment *alignment; +{ + + int j,group,last; + + for(j=0;jnumelements;j++) + { + alignment->element[j].groupf = NULL; + alignment->element[j].groupb = NULL; + } + + for(group = 1;group <= alignment->numgroups;group++) + { + last = -1; + for(j=0;jnumelements;j++) + if(alignment->element[j].groupid == group) + { + if(last != -1) + { + alignment->element[j].groupb = + &(alignment->element[last]); + alignment->element[last].groupf = + &(alignment->element[j]); + } + last = j; + } + } + return; +} + +EditSubCut(item,event) +Panel_item item; +Event *event; +{ + extern Frame frame; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet,*Clipboard; + extern Panel_item left_foot,right_foot; + extern char **TextClip; + extern int TextClipSize,TextClipLength; + int blank_space = 0; + + NA_Sequence *this_elem; + char buffer[80]; + + int j,i,k,numselected=0,numshifted = 0,columns=0; + +/* +* Check how many columns selected +*/ + + if(DataSet->selection_mask == NULL) + return; + + for(j=0;jmaxlen;j++) + if(DataSet->selection_mask[j] == '1') + columns++; + + if(columns == 0) + return; +/* +* Free old Text clipboard +*/ + if(SubCutViolate()) + { + Warning("Cut violates current protections"); + return XV_OK; + } + + for(j=0;jnumelements;j++) + if(DataSet->element[j].subselected) + TextClipSize++; + + if(TextClipSize == 0) + return; + + TextClip = (char**)Calloc(TextClipSize,sizeof(char*)); + + for(j=0;jnumelements;j++) + if(DataSet->element[j].subselected) + { + this_elem = &(DataSet->element[j]); +/* +* Need to check protections +*/ + + for(i=0,blank_space = 0;ioffset;i++) + if(DataSet->selection_mask[i] == '1') + TextClip[numselected][blank_space++] = + (char)getelem(this_elem,i); + + + for(i=0,k=0;iseqlen;i++) + { + if(DataSet->selection_mask[i+this_elem->offset] + == '1') + { + if(this_elem->tmatrix) + TextClip[numselected][k++] = + this_elem->tmatrix[ + (char)getelem(this_elem, + i+this_elem->offset)]; + else + TextClip[numselected][k++] = + (char)getelem(this_elem, + i+this_elem->offset); + } + + if((k!=0) && (iseqlen-1)) + { + this_elem->sequence[1+i-k] = + this_elem->sequence[1+i]; + if(this_elem->cmask) + { + this_elem->cmask[1+i-k] = + this_elem->cmask[1+i]; + } + } + } + + numselected++; + this_elem->seqlen -= k; + this_elem->offset -= blank_space; +/* +* This might cause problems later on if the selection mask is +* not cleaned...Make sure you test for subselected, not just +* for a non-0 mask. +*/ + this_elem->subselected = FALSE; + } + + TextClipLength = columns; + NormalizeOffset(DataSet); + + SetNADData(DataSet,EditCan,EditNameCan); + + Regroup(DataSet); + + RepaintAll(TRUE); + + sprintf(buffer,"%d bytes in Text Clipboard",numselected*columns); + xv_set(frame,FRAME_RIGHT_FOOTER,buffer,0); + xv_set(right_foot,PANEL_LABEL_STRING,buffer,0); + + return(XV_OK); +} + + + +SubCutViolate() +{ + int i,j; + NA_Sequence *this_elem; + extern NA_Alignment *DataSet; + char base,tbase; + int GAP=FALSE,UNAMB=FALSE,AMB=FALSE,prot; + + for(j=0;jnumelements;j++) + { + this_elem = &(DataSet->element[j]); + prot = this_elem->protect; + if((this_elem->subselected)&& (this_elem->elementtype!=TEXT)) + { + for(i=0;imaxlen;i++) + if(DataSet->selection_mask[i] == '1') + { + base = (char)getelem(this_elem,i); + if(this_elem->tmatrix) + tbase = (this_elem->tmatrix[base])|32; + switch(this_elem->elementtype) + { + case DNA: + case RNA: + if((base&15) == 0) + GAP=TRUE; + else if(tbase == 'n') + AMB=TRUE; + else + UNAMB = TRUE; + break; + case PROTEIN: + if(base == '-' || + base == '~' || + base == ' ') + GAP=TRUE; + else if(base == 'X' || + base == 'x') + AMB=TRUE; + else + UNAMB = TRUE; + break; + case MASK: + if(base == '0') + GAP = TRUE; + else + UNAMB = TRUE; + break; + default: + break; + } + if(((prot & PROT_WHITE_SPACE)==0) && GAP) + return(TRUE); + else if(((prot & PROT_GREY_SPACE)==0) && AMB) + return(TRUE); + else if(((prot & PROT_BASE_CHANGES)==0) && + UNAMB) + return(TRUE); + } + } + } + return FALSE; +} + + + +EditSubPaste(item,event) +Panel_item item; +Event *event; +{ + extern Frame frame; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet,*Clipboard; + extern Panel_item left_foot,right_foot; + extern char **TextClip; + extern int TextClipSize,TextClipLength; + int *temp_cmask,cursorx,cursory; + + NA_Sequence *this_elem; + char buffer[80]; + + int j,i,k,violate = FALSE; + + if(DataSet->selection_mask == NULL) + return(XV_OK); + + if(TextClipSize == 0 || TextClipLength==0) + return(XV_OK); + + cursorx = ((NA_DisplayData*)(DataSet->na_ddata))->cursor_x; + cursory = ((NA_DisplayData*)(DataSet->na_ddata))->cursor_y; + + if(cursory + TextClipSize > DataSet->numelements) + { + Warning("Can't paste a block there."); + return(XV_OK); + } + + for(j=0;jelement[j+cursory]); + violate |= InsertViolate(DataSet,this_elem, + TextClip[j],cursorx,TextClipLength); + } + if(violate == FALSE) + { + for(j=0;jelement[j+cursory]); + InsertNA(this_elem,TextClip[j],TextClipLength,cursorx); + } + } + + RepaintAll(TRUE); + sprintf(buffer,"%d bytes in Text Clipboard",TextClipLength * + TextClipSize); + xv_set(frame,FRAME_RIGHT_FOOTER,buffer,0); + xv_set(right_foot,PANEL_LABEL_STRING,buffer,0); + return(XV_OK); +} + + + +EditSubCopy(item,event) +Panel_item item; +Event *event; +{ + extern Frame frame; + extern Canvas EditCan,EditNameCan; + extern NA_Alignment *DataSet,*Clipboard; + extern Panel_item left_foot,right_foot; + extern char **TextClip; + extern int TextClipSize,TextClipLength; + int blank_space = 0; + + NA_Sequence *this_elem; + char buffer[80]; + + int j,i,k,numselected=0,numshifted = 0,columns=0; + +/* +* Check how many columns selected +*/ + + if(DataSet->selection_mask == NULL) + return; + + for(j=0;jmaxlen;j++) + if(DataSet->selection_mask[j] == '1') + columns++; + + if(columns == 0) + return; +/* +* Free old Text clipboard +*/ + for(j=0;jnumelements;j++) + if(DataSet->element[j].subselected) + TextClipSize++; + + if(TextClipSize == 0) + return; + + TextClip = (char**)Calloc(TextClipSize,sizeof(char*)); + + for(j=0;jnumelements;j++) + if(DataSet->element[j].subselected) + { + this_elem = &(DataSet->element[j]); +/* +* Need to check protections +*/ + + for(i=0,blank_space = 0;ioffset;i++) + if(DataSet->selection_mask[i] == '1') + TextClip[numselected][blank_space++] = + (char)getelem(this_elem,i); + + + for(i=0,k=0;iseqlen;i++) + { + if(DataSet->selection_mask[i+this_elem->offset] + == '1') + { + if(this_elem->tmatrix) + TextClip[numselected][k++] = + this_elem->tmatrix[ + (char)getelem(this_elem, + i+this_elem->offset)]; + else + TextClip[numselected][k++] = + (char)getelem(this_elem, + i+this_elem->offset); + } + + } + + numselected++; + this_elem->subselected = FALSE; + } + + TextClipLength = columns; + + SetNADData(DataSet,EditCan,EditNameCan); + + RepaintAll(TRUE); + + sprintf(buffer,"%d bytes in Text Clipboard",numselected*columns); + xv_set(frame,FRAME_RIGHT_FOOTER,buffer,0); + xv_set(right_foot,PANEL_LABEL_STRING,buffer,0); + + return(XV_OK); +} + +int TestSelection() +{ + int j,select_mode = 0,selected = 0,subselected = 0; + extern NA_Alignment *DataSet; + extern Frame frame; + + for(j=0;j<((NA_Alignment*)DataSet)->numelements;j++) + { + selected|=((NA_Alignment*)DataSet)->element[j].selected; + subselected|=((NA_Alignment*)DataSet)->element[j].subselected; + } + + if (!(selected || subselected)) + Warning("Warning, no sequences selected"); + + if(selected && !subselected) + select_mode = SELECTED; + + if(!selected && subselected) + select_mode = SELECT_REGION; + + if(selected && subselected) + select_mode = notice_prompt(frame,NULL,NOTICE_MESSAGE_STRINGS, + "Do you want to use the...",NULL, + NOTICE_BUTTON,"Selected sequences",SELECTED, + NOTICE_BUTTON,"Selected regions",SELECT_REGION, + NULL); +/* + for(j=0;j<((NA_Alignment*)DataSet)->numelements;j++) + { + if(select_mode == SELECT_REGION) + ((NA_Alignment*)DataSet)->element[j].selected = FALSE; + else + ((NA_Alignment*)DataSet)->element[j].subselected = FALSE; + } +*/ + + return(select_mode); +} + diff --git a/CORE/CutCopyPaste.o b/CORE/CutCopyPaste.o new file mode 100644 index 0000000..37e24c2 Binary files /dev/null and b/CORE/CutCopyPaste.o differ diff --git a/CORE/DrawNA.c b/CORE/DrawNA.c new file mode 100755 index 0000000..fdf4004 --- /dev/null +++ b/CORE/DrawNA.c @@ -0,0 +1,356 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + + +/* +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + + +DrawNAColor(can,NAdd,xwin,left,top,indx,lpos,rpos,dpy,gc,mode,inverted) +Canvas can; +NA_DisplayData *NAdd; +Window xwin; +Display *dpy; +int left,top,indx,lpos,rpos,mode,inverted; +GC gc; +{ + char buffer[MAX_NA_DISPLAY_WIDTH],map_chr; + register int j,i,next_color,invrt = FALSE; + int unselected_inverted = FALSE,dir; + int pmin,wid,x,y,*tmat,fdx,fdy,pmax,first,used; + register int seqposindx; + register unsigned long *pixels; + extern int SCALE, DisplayAttr; + extern Pixmap grey_pm[]; + + int *color_mask,*colors,color,start_col,offset; + int maxlen = 0,global_offset = 0; + NA_Base base; + NA_Sequence *elem; + NA_Alignment *aln = NAdd->aln; + int scrn = DefaultScreen(dpy); + int dithered = FALSE; + + colors = aln->element[indx].col_lut; +/* +* Just in case no characters need to be drawn... +*/ + next_color = 13; + XSetForeground(dpy,gc,BlackPixel(dpy,scrn)); + XSetBackground(dpy,gc,WhitePixel(dpy,scrn)); + + + pixels = (unsigned long*)xv_get(can,WIN_X_COLOR_INDICES); + fdx = NAdd -> font_dx; + fdy = NAdd -> font_dy; + y=fdy * (indx+1-top); + + elem = &(NAdd->aln->element[indx]); + tmat = aln->element[indx].tmatrix; +#ifdef HGL + dir = OrigDir(elem); +#else + dir = (elem->attr & IS_PRIMARY)?1: + (elem->attr & IS_SECONDARY)?-1:0; +#endif + + map_chr = (dir == 0)?'+':(dir == -1)?'<':'>'; + + wid = (rpos-lpos)/SCALE+1; + + seqposindx=lpos; + pmax = MIN(lpos+wid*SCALE,aln->element[indx].seqlen+ + aln->element[indx].offset); +/* + pmax = lpos+wid*SCALE; +*/ + if(aln->element[indx].elementtype == TEXT) + mode = COLOR_MONO; + + if((inverted && ((DisplayAttr & INVERTED)==0)) || + ((inverted==FALSE) && (DisplayAttr & INVERTED))) + unselected_inverted = TRUE; + else + unselected_inverted = FALSE; + if(NAdd->num_colors <16 && mode != COLOR_MONO ) + dithered = TRUE; + + if(mode == COLOR_SEQ_MASK) + { + color_mask = elem->cmask; + if(color_mask == NULL) + mode = COLOR_LOOKUP; + } + if(mode == COLOR_ALN_MASK) + { + color_mask = NAdd->aln->cmask; + global_offset = aln->cmask_offset - aln->rel_offset; + maxlen = aln->maxlen; + if(color_mask == NULL) + mode = COLOR_LOOKUP; + } + if(mode == COLOR_LOOKUP && colors == NULL) + mode = COLOR_MONO; + + color = 9999; + for(j=0;seqposindxoffset; + + switch(mode) + { + case COLOR_SEQ_MASK: + next_color = ((seqposindx >= offset) && + (seqposindxseqlen))? + color_mask[seqposindx-offset]:13; + break; + case COLOR_LOOKUP: + next_color = colors[base]; + break; + case COLOR_ALN_MASK: + next_color = ((seqposindx >= global_offset) && + (seqposindx-global_offset < aln->cmask_len))? + color_mask[seqposindx-global_offset]:13; + break; + case COLOR_STRAND: + if(((tmat?tmat[base]:base) == '-') || + ((tmat?tmat[base]:base) == '~')) + next_color = 13; +#ifdef HGL + else if(elem->attr & IS_ORIG_PRIMARY) + next_color = 3; + else if(elem->attr & IS_ORIG_SECONDARY) + next_color = 6; +#else + else if(elem->attr & IS_PRIMARY) + next_color = 3; + else if(elem->attr & IS_SECONDARY) + next_color = 6; +#endif + else + next_color = NAdd->black; + break; + case COLOR_MONO: + default: + next_color = NAdd->black; + break; + } + + if(elem->subselected) + if(aln->selection_mask[seqposindx] == '1') + next_color = 1000 + NAdd->black; +/* + Adding 1000 to a color signals that it is selected/inverted +*/ + if( next_color == color) + { + buffer[j] = tmat? + tmat[base]:base; +/* +* If in map view, set character to '>' '<' '+' or ' ' +*/ + if(SCALE > 1) + { + if(buffer[j] != '-' && buffer[j] != '~') + buffer[j] = map_chr; + else + buffer[j] = ' '; + } + } + + else if (color == 9999) + { + buffer[j] = tmat? tmat[base]:base; +/* +* If in map view, set character to '>' '<' '+' or ' ' +*/ + if(SCALE > 1) + { + if(buffer[j] != '-' && buffer[j] != '~') + buffer[j] = map_chr; + else + buffer[j] = ' '; + } + color = next_color; + start_col = (seqposindx - left)/SCALE * fdx; + } + else + { + if(color > 999) + { + invrt = (unselected_inverted)?FALSE:TRUE; + color -= 1000; + } + else + invrt = unselected_inverted; + if(invrt) + { + if(dithered) + { + XSetStipple(dpy,gc,grey_pm[15-color]); + } + else + { + XSetBackground(dpy,gc,pixels[color]); + XSetForeground(dpy,gc,WhitePixel(dpy,scrn)); + } + } + else + { + if(dithered) + { + XSetStipple(dpy,gc,grey_pm[color]); + } + else + { + XSetForeground(dpy,gc,pixels[color]); + XSetBackground(dpy,gc,WhitePixel(dpy,scrn)); + } + } + if(dithered == FALSE) + XDrawImageString(dpy,xwin,gc, + start_col, y, buffer,j); + else + { + XSetFillStyle(dpy,gc,FillOpaqueStippled); + XFillRectangle(dpy,xwin,gc,start_col,y-fdy, + j * fdx,fdy); + XSetFillStyle(dpy,gc,FillSolid); + XDrawString(dpy,xwin,gc, + start_col, y, buffer,j); + } + + wid -= j; + j=0; + buffer[j] = tmat? + tmat[base]:base; +/* +* If in map view, set character to '>' '<' '+' or ' ' +*/ + if(SCALE > 1) + { + if(buffer[j] != '-' && buffer[j] != '~') + buffer[j] = map_chr; + else + buffer[j] = ' '; + } + color = next_color; + start_col = (seqposindx - left)/SCALE * fdx; + } + } + + if(color == 9999) + color = 13; + + if(color > 999) + { + invrt = (unselected_inverted)?FALSE:TRUE; + color -= 1000; + } + else + invrt = unselected_inverted; + if(invrt) + { + if(dithered) + XSetStipple(dpy,gc,grey_pm[15-color]); + else + { + XSetBackground(dpy,gc,pixels[color]); + XSetForeground(dpy,gc,WhitePixel(dpy,scrn)); + } + } + else + { + if(dithered) + XSetStipple(dpy,gc,grey_pm[color]); + else + { + XSetForeground(dpy,gc,pixels[color]); + XSetBackground(dpy,gc,WhitePixel(dpy,scrn)); + } + } + + if(dithered == FALSE) + XDrawImageString(dpy,xwin,gc, start_col,y, buffer,j); + else + { + XSetFillStyle(dpy,gc,FillOpaqueStippled); + XFillRectangle(dpy,xwin,gc,start_col,y-fdy, + j * fdx,fdy); + XSetFillStyle(dpy,gc,FillSolid); + XDrawString(dpy,xwin,gc, start_col,y, buffer,j); + } + + wid -= j; + start_col = (seqposindx - left)/SCALE * fdx; + for(j=0;jelementtype != TEXT) + buffer[j] = '~'; + else + buffer[j] = ' '; + } + invrt = unselected_inverted; + + buffer[j] = '\0'; + switch(mode) + { + case COLOR_MONO: + color = NAdd ->black; + break; + case COLOR_SEQ_MASK: + case COLOR_ALN_MASK: + case COLOR_LOOKUP: + default: + color = 13; + break; + } + if(invrt) + { + if(dithered) + XSetStipple(dpy,gc,grey_pm[15-color]); + else + { + XSetBackground(dpy,gc,pixels[color]); + XSetForeground(dpy,gc,WhitePixel(dpy,scrn)); + } + } + else + { + if(dithered) + XSetStipple(dpy,gc,grey_pm[color]); + else + { + XSetForeground(dpy,gc,pixels[color]); + XSetBackground(dpy,gc,WhitePixel(dpy,scrn)); + } + } + + if(dithered == FALSE) + XDrawImageString(dpy,xwin,gc,start_col,y, buffer,j); + else + { + XSetFillStyle(dpy,gc,FillOpaqueStippled); + XFillRectangle(dpy,xwin,gc,start_col,y-fdy, j*fdx,fdy); + XSetFillStyle(dpy,gc,FillSolid); + XDrawString(dpy,xwin,gc,start_col,y, buffer,j); + } + + return; +} diff --git a/CORE/DrawNA.o b/CORE/DrawNA.o new file mode 100644 index 0000000..269a7df Binary files /dev/null and b/CORE/DrawNA.o differ diff --git a/CORE/Edit.c b/CORE/Edit.c new file mode 100755 index 0000000..3fb8542 --- /dev/null +++ b/CORE/Edit.c @@ -0,0 +1,1312 @@ +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + +NAEvents(win,event,arg) +Xv_window win; +Event *event; +Notify_arg arg; +{ + extern NA_Alignment *DataSet,*Clipboard; + extern Canvas EditCan; + extern DisplayAttr; + NA_DisplayData *ddata; + NA_Alignment *aln; + NA_Sequence *this_seq; + Display *dpy; + GC gc; + Xv_window view; + NA_Base c,this_base; + Scrollbar hsc,vsc; + char *buf; + extern int repeat_cnt,EditDir,BlockInput,SCALE; + extern Frame frame; + extern Panel_item left_foot,right_foot; + extern EditMode; + + Window xwin; + int i,j,k,x,y,cursorx,cursory,protection_violation,success=FALSE; + int startx,endx,starty,endy,test_offscreen = FALSE,eventid; + + if(DataSet == NULL) + return; + + for(j=0;jna_ddata); + if(ddata == NULL) + return; + + eventid = event_id(event); + dpy = (Display *)xv_get(EditCan, XV_DISPLAY); + xwin = (Window)xv_get(win,XV_XID); + gc = DefaultGC(dpy,DefaultScreen(dpy)); + cursorx = ddata->cursor_x; + cursory = ddata->cursor_y; + + if (eventid == WIN_RESIZE) + { + if(EditCan) + { + hsc=(Scrollbar)xv_get(EditCan, + OPENWIN_HORIZONTAL_SCROLLBAR, view); + vsc=(Scrollbar)xv_get(EditCan, + OPENWIN_VERTICAL_SCROLLBAR,view); + if(hsc) + xv_set(hsc,SCROLLBAR_VIEW_START,0,0); + if(vsc) + xv_set(vsc,SCROLLBAR_VIEW_START,0,0); + } + } +/* +* Highly interdependent with AUTO_SHRINK attribute +* +* The following an attempt to remove Warning messages on +* split screen and loading of new data. +*/ + if((event_is_down(event) || event_is_button(event)) && EditCan) + { + hsc=(Scrollbar)xv_get(EditCan,OPENWIN_HORIZONTAL_SCROLLBAR, + view); + vsc=(Scrollbar)xv_get(EditCan,OPENWIN_VERTICAL_SCROLLBAR,view); + if(hsc) + { + startx = (int)xv_get(hsc,SCROLLBAR_VIEW_START)/SCALE; + startx = (int)xv_get(hsc,SCROLLBAR_VIEW_START); + endx = startx + (int)xv_get(hsc,SCROLLBAR_VIEW_LENGTH) + *SCALE; + } + if(vsc) + { + starty = (int)xv_get(vsc,SCROLLBAR_VIEW_START); + endy = starty + (int)xv_get(vsc,SCROLLBAR_VIEW_LENGTH); + } + if(!(hsc || vsc)) + return; + } + if (eventid == LOC_WINENTER || + (event_action(event) == ACTION_TAKE_FOCUS)) + win_set_kbd_focus(win,xwin); + else if(event_is_down(event) && event_action(event) == ACTION_COPY) + { + (void)EditCopy(win,event); + } + else if(event_is_down(event) && event_action(event) == ACTION_CUT) + { + (void)EditCut(win,event); + } + else if(event_is_down(event) && event_action(event) == ACTION_PASTE) + { + (void)EditPaste(win,event); + } + else if(event_is_button(event) && !BlockInput) + { + x=(event_x(event)/ddata->font_dx)*SCALE + startx; + y=event_y(event)/ddata->font_dy + starty; + + y=MAX(0,MIN(y,aln->numelements - 1)); + x=MAX(0,MIN(x,aln->maxlen)); + if(event_is_down(event)&&(event_action(event)==ACTION_SELECT)) + { + repeat_cnt = 0; + UnsetNACursor(ddata,EditCan,win,xwin,dpy,gc); + ddata->cursor_x = x; + ddata->cursor_y = y; + ResetPos(ddata); + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + if(event_is_up(event)&&(event_action(event)==ACTION_SELECT)) + { + SubSelect(aln,event_shift_is_down(event), + ddata->cursor_x,ddata->cursor_y,x,y); + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + else if(event_is_up(event) + &&(event_action(event)==ACTION_ADJUST)) + { + SubSelect(aln,TRUE,ddata->cursor_x,ddata->cursor_y,x,y); + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + } + + if(event_is_ascii(event) && event_is_down(event) && + event_meta_is_down(event) && ((char)eventid == 'm' || + (char)eventid == 'M') ) + { + EditMode = (EditMode==CHECK)?INSERT:CHECK; + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + else if(event_is_ascii(event) && event_is_down(event) && + event_meta_is_down(event) && (char)eventid == 'u' ) + { + (void)Ungroup(NULL,NULL); + } + else if(event_is_ascii(event) && event_is_down(event) && + event_meta_is_down(event) && (char)eventid == 'g' ) + { + (void)Group(NULL,NULL); + } + else if(event_is_ascii(event) && event_is_down(event) && + event_meta_is_down(event) && (char)eventid == 'i' ) + { + (void)ModAttr(NULL,NULL); + } + else if(event_is_ascii(event) && event_is_down(event) && + event_meta_is_down(event) && (char)eventid == 'p' ) + { + (void)SetProtection(NULL,NULL); + } + else if(event_is_down(event) && event_is_ascii(event) && + event_meta_is_down(event) && (char)eventid>'9') + DoMeta((char)eventid); + + else if(event_is_down(event) && (event_is_ascii(event) || + (char)eventid==0x7 || (char)eventid==0x7f) && !BlockInput) + { + if(DisplayAttr & KEYCLICKS) + Keyclick(); +/* +* De-select the text +*/ + SubSelect(aln,FALSE,0,0,0,0); + + if((char)eventid<='9' && (char)eventid>='0' && ddata->use_repeat && + (aln->element[cursory].elementtype != MASK && + aln->element[cursory].elementtype != TEXT || + event_meta_is_down(event))) + { + repeat_cnt = repeat_cnt*10+eventid-'0'; + if(repeat_cnt > 100000000) + repeat_cnt = 0; + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + else if((eventid ==0x7f || eventid == 0x8) && EditMode != CHECK) + { + int current_id = aln->element[cursory].groupid; + protection_violation=FALSE; + repeat_cnt = MAX(1,repeat_cnt); + + for(this_seq = &(aln->element[cursory]); + this_seq != NULL; + this_seq = this_seq->groupf) + + protection_violation |= + DeleteViolate(aln,this_seq, + repeat_cnt,cursorx); + + for(this_seq = &(aln->element[cursory]); + + this_seq != NULL;this_seq = this_seq->groupb) + protection_violation |= + DeleteViolate(aln,this_seq,repeat_cnt,cursorx); + + if(protection_violation == FALSE) + { + if(current_id == 0) + success |= DeleteNA(aln,cursory, + repeat_cnt, cursorx); + else + for(j=0;jnumelements;j++) + if(aln->element[j].groupid + == current_id) + success|=DeleteNA(aln,j, + repeat_cnt,cursorx); + if(success) + ddata->cursor_x -=repeat_cnt; + test_offscreen = TRUE; + NormalizeOffset(aln); + } + else + { + xv_set(frame,FRAME_RIGHT_FOOTER, + "Cannot delete",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot delete",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot delete",0); + Beep(); + } + repeat_cnt = 0; + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + + /* --------------------------------------------------------- */ + /* Added by Scott Ferguson, Exxon Research & Engineering Co. */ + /* --------------------------------------------------------- */ + else if(EditMode == INSERT && (eventid == 11 || eventid == 12)) + { + /* + The FETCH key grabs the nearest repeat_cnt bases to the right + or left and moves them to where the cursor is without shifting + the other parts of the alignment + + eventid = 11 (CNTRL 'k') means Fetch from the right + eventid = 12 (CNTRL 'l') means Fetch from the left + */ + + repeat_cnt = MAX(1,repeat_cnt); + this_seq=&(aln->element[cursory]); + + for(;this_seq != NULL; + this_seq = this_seq->groupf) + if(this_seq != NULL) + success = FetchNA(this_seq,eventid, + repeat_cnt, cursorx, cursory); + + UnsetNACursor(ddata,EditCan,win,xwin,dpy,gc); + if (success) + if (eventid == 12) + ddata->cursor_x += repeat_cnt; + else + ddata->cursor_x -= repeat_cnt; + + test_offscreen = TRUE; + repeat_cnt = 0; + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + /* ---------------END OF SEGMENT:--------------------------- */ + /* Added by Scott Ferguson, Exxon Research & Engineering Co. */ + /* --------------------------------------------------------- */ + + else if(EditMode == INSERT) + { + repeat_cnt = MAX(1,repeat_cnt); + this_seq=&(aln->element[cursory]); + c = (char)eventid; + /* +* remap "space" to "-" if AA sequence +*/ + if(this_seq->elementtype == PROTEIN) + if(c == ' ') + c = '-'; + + buf = Calloc(sizeof(NA_Base),repeat_cnt); + for(j=0;jgroupf != NULL; + this_seq = this_seq->groupf) + protection_violation |= + InsertViolate(aln,this_seq,buf,cursorx, + repeat_cnt); + + for(this_seq = &(aln->element[cursory]); + this_seq->groupb != NULL; + this_seq = this_seq->groupb) + protection_violation |= + InsertViolate(aln,this_seq,buf,cursorx, + repeat_cnt); + + if(protection_violation == FALSE) + { + for(;this_seq != NULL; + this_seq = this_seq->groupf) + if(this_seq != NULL) + success = InsertNA(this_seq,buf, + repeat_cnt, cursorx, cursory); + + UnsetNACursor(ddata,EditCan,win,xwin,dpy,gc); + if(success) + ddata->cursor_x +=(repeat_cnt * EditDir); + + test_offscreen = TRUE; + } + else + { + xv_set(frame,FRAME_RIGHT_FOOTER, + "Cannot insert",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot delete",0); + Beep(); + } + + repeat_cnt = 0; + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + cfree(buf); + } + /* +* Check mode +*/ + else + { + printf("Eventid = %d\n",eventid); + c = toupper((char)eventid); + this_base = getelem(&(aln->element[cursory]),cursorx); + if(aln->element[cursory].tmatrix) + { + this_base=aln->element[cursory]. + tmatrix[(int)this_base]; + } + this_base = toupper(this_base); + + if(c==this_base) + { + UnsetNACursor(ddata,EditCan,win,xwin,dpy,gc); + ddata->cursor_x++; + test_offscreen = TRUE; + ResetPos(ddata); + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + else + { + Beep(); + } + } + } + else if(event_is_down(event)&& !BlockInput) + { + if (event_action(event) == ACTION_GO_COLUMN_BACKWARD) + { + if(DisplayAttr & KEYCLICKS) + Keyclick(); + repeat_cnt = MAX(1,repeat_cnt); + UnsetNACursor(ddata,EditCan,win,xwin,dpy,gc); + ddata->cursor_y = MAX(0,ddata->cursor_y-repeat_cnt); + test_offscreen = TRUE; + repeat_cnt = 0; + ResetPos(ddata); + SubSelect(aln,FALSE,0,0,0,0); + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + else if (event_action(event) == ACTION_GO_COLUMN_FORWARD) + { + if(DisplayAttr & KEYCLICKS) + Keyclick(); + repeat_cnt = MAX(1,repeat_cnt); + UnsetNACursor(ddata,EditCan,win,xwin,dpy,gc); + ddata->cursor_y = MAX(0,MIN(aln->numelements - 1, + ddata->cursor_y+repeat_cnt)); + repeat_cnt = 0; + test_offscreen = TRUE; + ResetPos(ddata); + SubSelect(aln,FALSE,0,0,0,0); + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + else if (event_action(event) == ACTION_GO_CHAR_BACKWARD) + { + if(DisplayAttr & KEYCLICKS) + Keyclick(); + repeat_cnt = MAX(SCALE,repeat_cnt); + UnsetNACursor(ddata,EditCan,win,xwin,dpy,gc); + ddata->cursor_x = MAX(0,ddata->cursor_x-repeat_cnt); + test_offscreen = TRUE; + repeat_cnt = 0; + ResetPos(ddata); + SubSelect(aln,FALSE,0,0,0,0); + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + else if (event_action(event) == ACTION_GO_CHAR_FORWARD) + { + if(DisplayAttr & KEYCLICKS) + Keyclick(); + repeat_cnt = MAX(SCALE,repeat_cnt); + UnsetNACursor(ddata,EditCan,win,xwin,dpy,gc); + ddata->cursor_x = MAX(0,MIN(aln->maxlen,ddata-> + cursor_x + repeat_cnt)); + test_offscreen = TRUE; + repeat_cnt = 0; + ResetPos(ddata); + SubSelect(aln,FALSE,0,0,0,0); + SetNACursor(ddata,EditCan,win,xwin,dpy,gc); + } + } + if(((ddata->cursor_xcursor_x>endx-1)) + && test_offscreen) + { + x = ddata->cursor_x-(endx-startx)/2; + x = (MAX(0,MIN(x,aln->maxlen - (endx-startx)))); + (void)JumpTo(view,x,starty); + } + + if(((ddata->cursor_ycursor_y>endy-1)) + && test_offscreen) + { + y = ddata->cursor_y-(endy-starty)/2; + y = (MAX(0,MIN(y,aln->numelements - (endy-starty)))); + (void)JumpTo(view,startx,y); + } + return; +} + + +/* +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + + +InsertViolate(aln,seq,insert,cursor_x,len) +NA_Alignment *aln; +NA_Sequence *seq; +NA_Base *insert; +int cursor_x,len; +{ + int i,j,prot,violated = FALSE; + prot = seq->protect; + if(seq->rmatrix) + for(i=0;irmatrix[insert[i]]; + + + if((prot & PROT_BASE_CHANGES)==0) + { + if(seq->elementtype == DNA || seq->elementtype == RNA) + { +/* +* if character is not '-' or 'N' then +* protection is violated. +*/ + for(j=0;jelementtype == PROTEIN) + { + for(j=0;jelementtype == MASK) + { + for(j=0;jelementtype == DNA || seq->elementtype == RNA) + { + /* +* if character is '-' then +* protection is violated. +*/ + for(j=0;jelementtype == PROTEIN) + { + for(j=0;jelementtype == DNA || seq->elementtype == RNA) + { + for(j=0;jelementtype == PROTEIN) + { + for(j=0;jtmatrix) + for(i=0;itmatrix[insert[i]]; + + return(violated); +} + + +InsertNA(seq,insert,len,pos) +/* +* return Success +*/ +NA_Sequence *seq; +NA_Base *insert; +int len,pos; +{ + int i,j,snum,x = pos+100; + int curlen,maxlen,offset; + extern NA_Alignment *DataSet; + extern Frame frame; + extern Panel_item left_foot,right_foot; + extern Canvas EditCan; + NA_DisplayData *NAdd; + NA_Alignment *aln = (NA_Alignment*)DataSet; + Xv_window win; + Window xwin; + Display *dpy; + GC gc; + + if(seq->rmatrix) + for(i=0;irmatrix[insert[i]]; + + dpy = (Display *)xv_get(EditCan, XV_DISPLAY); + gc = DefaultGC(dpy,DefaultScreen(dpy)); + NAdd =(NA_DisplayData*)aln->na_ddata; + offset = seq->offset; + curlen = seq->seqlen; + maxlen = seq->seqmaxlen; + + if(seq->elementtype == MASK) + for(i=0;i '9') + insert[i] = '0'; + + + /* +* The current snum (sequence number) should be passed into this +* routine. This means that the index into the alignment needs +* to be included in the "sequence id." The following is a lookup +* for the snum (slow). +*/ + + for(j = 0;j < aln->numelements;j++) + if(&(aln->element[j]) == seq) + { + snum = j; + j = aln->numelements; + } + + if(pos > seq->seqlen+seq->offset) + { + xv_set(frame,FRAME_RIGHT_FOOTER, "Cannot insert beyond end",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot insert beyond end",0); + Beep(); + if(seq->tmatrix) + for(i=0;itmatrix[insert[i]]; + return(FALSE); + } + + if(seq->seqlen+len>=seq->seqmaxlen-1) + { + if(seq->sequence) + { + seq->sequence = (NA_Base*) + Realloc(seq->sequence,(seq->seqlen+len+100)*sizeof(NA_Base)); + seq->seqmaxlen = seq->seqlen+len+100; + } + else + { + seq->sequence = (NA_Base*) + Calloc(sizeof(NA_Base),(seq->seqlen+len+100)); + seq->seqmaxlen = seq->seqlen+len+100; + } + if(seq->cmask) + { + seq->cmask = (int*)Realloc(seq->cmask, + seq->seqlen*sizeof(int)); + } + } + + /* +* This forces space to be allocated upstream, and thus prevents +* memory thrashing. Not a wonderful fix, but it will do for now... +*/ + if(posoffset) + putelem(seq,pos,'\0'); + for(j=0;j<=seq->seqlen+seq->offset - pos+1;j++) + { + putelem(seq,seq->seqlen+len+seq->offset - j, + getelem(seq,seq->offset+seq->seqlen - j)); + if(seq->cmask) + putcmask(seq,seq->seqlen+len+seq->offset-j, + getcmask(seq,seq->seqlen+seq->offset - j)); + } + + for(j=0;jcmask) + putcmask(seq,pos+j,8); + } + + seq->seqlen = seq->seqlen + len; + aln->maxlen = MAX(aln->maxlen,seq->seqlen+1); + + RedrawAllNAViews(snum,pos); + + if(seq->tmatrix) + for(i=0;itmatrix[insert[i]]; + + return(TRUE); +} + +/*------------------------------------------------------------------------*/ +/* Added by Scott Ferguson, Exxon Research & Engineering Co. */ +/* In support of the "Fetch" key operation */ +/*------------------------------------------------------------------------*/ +/* dir = CNTRL 'k'(11), fetch left; dir = CNTRL 'l'(12), fetch right */ +FetchNA(seq,dir,len,pos) +/* +* return Success +*/ +NA_Sequence *seq; +unsigned char dir; +int len,pos; +{ + extern Frame frame; + extern Panel_item left_foot,right_foot; + int i,j,snum,x = pos+100; + int curlen,maxlen,offset; + extern NA_Alignment *DataSet; + extern Canvas EditCan; + NA_DisplayData *NAdd; + NA_Alignment *aln = (NA_Alignment*)DataSet; + Xv_window win; + Window xwin; + Display *dpy; + GC gc; + NA_Base *scratch, tgap; + int incr, nearest, *cscratch, tcmask; + + dpy = (Display *)xv_get(EditCan, XV_DISPLAY); + gc = DefaultGC(dpy,DefaultScreen(dpy)); + NAdd =(NA_DisplayData*)aln->na_ddata; + offset = seq->offset; + curlen = seq->seqlen; + maxlen = seq->seqmaxlen; + + /* +* The current snum (sequence number) should be passed into this +* routine. This means that the index into the alignment needs +* to be included in the "sequence id." The following is a lookup +* for the snum (slow). +*/ + + for(j = 0;j < aln->numelements;j++) + if(&(aln->element[j]) == seq) + { + snum = j; + j = aln->numelements; + } + + scratch = (NA_Base *) Calloc(sizeof(NA_Base),len); + cscratch = (int *) Calloc(sizeof(int),len); + + if (dir == 12) + { + incr = 1; + if (pos < offset) nearest = offset-pos; + else nearest = 0; + while (pos+nearest < seq->seqlen+seq->offset) + { + if (isagap(seq,pos+nearest)) nearest++; + else break; + } + if (pos+nearest+len > seq->seqlen+seq->offset) + { + xv_set(frame,FRAME_RIGHT_FOOTER, "Cannot fetch beyond end",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot fetch beyond end",0); + Beep(); + return(FALSE); + } + } + else + { + if (pos >= offset + curlen) + { + xv_set(frame,FRAME_RIGHT_FOOTER, "Cannot fetch past end",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot fetch past end",0); + Beep(); + return(FALSE); + } + incr = -1; + nearest = 0; + while (pos+nearest > seq->offset) { + if (isagap(seq,pos+nearest)) nearest--; + else break; + } + if (pos+nearest-len+1 < seq->offset) + { + xv_set(frame,FRAME_RIGHT_FOOTER, "Cannot fetch beyond beginning",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot fetch beyond beginning",0); + Beep(); + return(FALSE); + } + } + + if (nearest == 0) + { + xv_set(frame,FRAME_RIGHT_FOOTER, "Base is not a Gap",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Base is not a Gap",0); + Beep(); + return(FALSE); /* this means we're not sitting on a gap */ + } + + tgap = getelem(seq,pos); + if (seq->cmask) + tcmask = getcmask(seq,pos); + for(j=0;jcmask) + cscratch[j] = getcmask(seq,pos+j*incr+nearest); + } + + for (j=0;jcmask) + putcmask(seq,pos+j*incr+nearest,tcmask); + } + + for (j=0;jcmask) + putcmask(seq,pos+j*incr,cscratch[j]); + } + + nearest = 0; + while (isagap(seq,seq->offset+nearest)) nearest++; + if (nearest != 0) { + for (j=0;jseqlen-nearest;j++) { + putelem(seq,seq->offset+j,getelem(seq,seq->offset+nearest+j)); + if(seq->cmask) + putcmask(seq,seq->offset+j, + getcmask(seq,seq->offset+nearest+j)); + } + for (j=0;joffset+seq->seqlen-nearest+j,tgap); + if(seq->cmask) + putcmask(seq,seq->offset+j,tcmask); + } + seq->seqlen -= nearest; + seq->offset += nearest; + } + + if (dir == 12) + RedrawAllNAViews(snum,MIN(pos,pos+nearest+(len-1)*incr)); + else + RedrawAllNAViews(snum,0); + xv_set(frame,FRAME_RIGHT_FOOTER, "",0); + xv_set(right_foot,PANEL_LABEL_STRING, "",0); + free(cscratch); + free(scratch); + return(TRUE); +} +/*------------------------------------------------------------------------*/ +/* End of lines added by S. R. Ferguson srfergu@erenj.com */ +/* In support of the "Fetch" key operation */ +/*------------------------------------------------------------------------*/ + + +DeleteNA(aln,seqnum,len,offset) +NA_Alignment *aln; +int seqnum,len,offset; +{ + int i,j,seqlen = aln->element[seqnum].seqlen+aln->element[seqnum].offset; + extern Frame frame; + extern Panel_item left_foot,right_foot; + + + NA_Sequence *seq; + seq = &(aln->element[seqnum]); + if(offset > seq->offset+seq->seqlen) + { + xv_set(frame,FRAME_RIGHT_FOOTER, "Cannot delete beyond end",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot delete beyond end",0); + Beep(); + return(FALSE); + } + + if (len>offset) + { + xv_set(frame,FRAME_RIGHT_FOOTER, "Cannot delete beyond end",0); + xv_set(right_foot,PANEL_LABEL_STRING, "Cannot delete beyond end",0); + Beep(); + return(FALSE); + } + for(j=offset-len;jelement[seqnum].cmask) + putcmask(seq,j-seq->offset,getcmask(seq,j+seq->offset)); + } + aln->element[seqnum].seqlen = aln->element[seqnum].seqlen-len; + RedrawAllNAViews(seqnum,offset-len); + return(TRUE); +} + + +DeleteViolate(aln,this_seq,len,offset) +NA_Alignment *aln; +NA_Sequence* this_seq; +int len,offset; +{ + int i,j,prot,violated = FALSE; + prot = this_seq->protect; + + if((prot & PROT_BASE_CHANGES)==0) + { + if(this_seq->elementtype == DNA || + this_seq->elementtype == RNA) + { + for(j=offset-len;jelementtype == PROTEIN) + { + for(j=offset-len;jelementtype == MASK) + for(j=offset-len;jsequence[j] != '0') + violated = TRUE; + } + + if((prot & PROT_WHITE_SPACE)==0) + { + if(this_seq->elementtype == DNA || + this_seq->elementtype == RNA) + { + for(j=offset-len;jelementtype == PROTEIN) + { + for(j=offset-len;jelementtype == DNA || + this_seq->elementtype == RNA) + { + for(j=offset-len;jelementtype == PROTEIN) + { + for(j=offset-len;joffset) || (offset > this_seq->seqlen + this_seq->offset)) + violated = TRUE; +*/ + if(len>offset) + violated = TRUE; + + return(violated); +} + + +RedrawAllNAViews(seqnum,start) +int seqnum,start; +{ + + extern NA_Alignment *DataSet; + extern Canvas EditCan; + extern int SCALE; + NA_DisplayData *ddata; + NA_Alignment *aln; + NA_Sequence *this_seq; + Scrollbar hsc,vsc; + Display *dpy; + GC gc; + Xv_window win,view; + Window xwin; + int hstart,vstart,hend,j; + + if(DataSet == NULL) + return; + + aln = (NA_Alignment*)DataSet; + ddata = (NA_DisplayData*)(aln->na_ddata); + if(ddata == NULL) + return; + + dpy = (Display *)xv_get(EditCan, XV_DISPLAY); + gc = DefaultGC(dpy,DefaultScreen(dpy)); + + for(j=0;j<(int)xv_get(EditCan,OPENWIN_NVIEWS);j++) + { + view = (Xv_window)xv_get(EditCan,OPENWIN_NTH_VIEW,j,0); + win = xv_get(view,CANVAS_VIEW_PAINT_WINDOW); + xwin = (Window)xv_get(win,XV_XID); + hsc = (Scrollbar)xv_get(EditCan,OPENWIN_HORIZONTAL_SCROLLBAR, + view); + vsc = (Scrollbar)xv_get(EditCan,OPENWIN_VERTICAL_SCROLLBAR, + view); + hstart = xv_get(hsc,SCROLLBAR_VIEW_START); + vstart = xv_get(vsc,SCROLLBAR_VIEW_START); + hend = hstart + xv_get(hsc,SCROLLBAR_VIEW_LENGTH) * SCALE; + if(start < hend) + DrawNAColor(EditCan,ddata,xwin,hstart, vstart,seqnum, + MAX(start,hstart),hend, dpy,gc,ddata->color_type,FALSE); + } + return; +} + + +ResetPos(ddata) +NA_DisplayData *ddata; +{ + NA_Base *seq; + int j,total = 0,maxpos; + NA_Sequence *elem; + + if(ddata == NULL) + return; + elem = &(ddata->aln->element[ddata->cursor_y]); + if(elem->sequence == NULL) + return; + + maxpos = MAX(0,MIN(ddata->cursor_x,elem->seqlen+elem->offset)); + + switch (elem->elementtype) + { + case DNA: + case RNA: + for(j=elem->offset;j<=maxpos;j++) + if((getelem(elem,j) & 15) + && !(getelem(elem,j) & 128)) + total++; + break; + case PROTEIN: + for(j=0;j<=maxpos;j++) + if((getelem(elem,j) != ' ') && + (getelem(elem,j) != '-') && + (getelem(elem,j) != '~')) + total++; + break; + case MASK: + case TEXT: + default: + total = ddata->cursor_y; + break; + } + ddata->position = total; + return; +} + +Beep() +{ +#ifdef SUN4 + FILE *audio; + int j; + audio = fopen("/dev/audio","w"); + if (audio != NULL) + for(j=0;j<20;j++) + fprintf(audio,"zzzzz "); + fclose(audio); +#else + fprintf(stderr,"%c",7); + fflush(stderr); +#endif + return; +} + +Keyclick() +{ +#ifdef SUN4 + FILE *audio; + int j; + audio = fopen("/dev/audio","w"); + if (audio != NULL) + for(j=0;j<10;j++) + fprintf(audio,"zzzzzzzzzz "); + fclose(audio); +#else + fprintf(stderr,"%c",7); + fflush(stderr); +#endif + return; +} + +putelem(a,b,c) +NA_Sequence *a; +int b; +NA_Base c; +{ + int j,newsize; + NA_Base *temp; + + if(b>=(a->offset+a->seqmaxlen)) + Warning("Putelem:insert beyond end of sequence space ignored"); + else if(b >= (a->offset)) + a->sequence[b-(a->offset)] = c; + else + { + temp =(NA_Base*)Calloc(a->seqmaxlen+a->offset-b, + sizeof(NA_Base)); + switch (a->elementtype) + { + /* +* Pad out with gap characters fron the point of insertion to the offset +*/ + case MASK: + for(j=b;joffset;j++) + temp[j-b]='0'; + break; + case DNA: + case RNA: + for(j=b;joffset;j++) + temp[j-b]='\0'; + break; + case PROTEIN: + for(j=b;joffset;j++) + temp[j-b]='-'; + break; + case TEXT: + default: + for(j=b;joffset;j++) + temp[j-b]=' '; + break; + } + + for(j=0;jseqmaxlen;j++) + temp[j+a->offset-b] = a->sequence[j]; + Cfree(a->sequence); + a->sequence = temp; + a->seqlen += (a->offset - b); + a->seqmaxlen +=(a->offset - b); + a->offset = b; + a->sequence[0] = c; + } + return; +} + +putcmask(a,b,c) +NA_Sequence *a; +int b; +int c; +{ + int j,newsize; + int *temp; + if(b >= (a->offset) ) + a->cmask[b-(a->offset)] = c; + return; +} + + +getelem(a,b) +NA_Sequence *a; +int b; +{ + if(a->seqlen == 0) + return(-1); + if(boffset || (b>a->offset+a->seqlen)) + switch(a->elementtype) + { + case DNA: + case RNA: + return(0); + case PROTEIN: + case TEXT: + return('~'); + case MASK: + return('0'); + default: + return('-'); + } + else + return(a->sequence[b-a->offset]); +} +/*------Added by Scott Ferguson, Exxon Research & Engineering Co. ---------*/ +isagap(a,b) +NA_Sequence *a; +int b; +{ + int j,newsize; + NA_Base *temp; + + if (b < a->offset) return(1); + + /* Check to see if base at given position is a gap */ + switch (a->elementtype) { + case MASK: + if (a->sequence[b-a->offset] == '0') return(1); + else return(0); + case DNA: + case RNA: + if (a->sequence[b-a->offset] == '\0') return(1); + else return(0); + case PROTEIN: + if (a->sequence[b-a->offset] == '-') return(1); + else return(0); + case TEXT: + default: + if (a->sequence[b-a->offset] == ' ') return(1); + else return(0); + } +} +/*-END:-Added by Scott Ferguson, Exxon Research & Engineering Co. ---------*/ + +SubSelect(aln,shift_down,x1,y1,x2,y2) +NA_Alignment *aln; +int shift_down,x1,y1,x2,y2; +{ + int j; + NA_Sequence *next_elem; + + if(aln == NULL) + return; + + if(!shift_down) + for(j=0;jnumelements;j++) + if(aln->element[j].subselected == TRUE) + { + aln->element[j].subselected = FALSE; + RedrawAllNAViews(j,aln->min_subselect); + } + + if(x1==x2 && y1==y2) + { + if(!shift_down) + { + if(aln->selection_mask) + for(j=0;jselection_mask_len;j++) + aln->selection_mask[j] = '0'; + return; + } + else + x1 = aln->min_subselect; + } + + + if(x1>x2) + { + j=x1; + x1=x2; + x2=j; + } + if(y1>y2) + { + j=y1; + y1=y2; + y2=j; + } + if(aln->maxlen > aln->selection_mask_len) + { + if(aln->selection_mask != NULL) + Cfree(aln->selection_mask); + aln->selection_mask = (char*)Calloc(aln->maxlen,sizeof(char)); + aln->selection_mask_len = aln->maxlen; + } + + if(shift_down) + { + /* +* Logical or select within the region +*/ + for(j=x1;j<=x2;j++) + aln->selection_mask[j] = '1'; + + /* +* Logical or select across selected seqeunces +*/ + for(j=y1;j<=y2;j++) + { + aln->element[j].subselected = TRUE; + /* +* Impose groups... +*/ + for(next_elem= &(aln->element[j]); + next_elem!=NULL; next_elem=next_elem->groupf) + next_elem->subselected = TRUE; + + for(next_elem= &(aln->element[j]); + next_elem!=NULL; next_elem=next_elem->groupb) + next_elem->subselected = TRUE; + } + for(j=0;jnumelements;j++) + if(aln->element[j].subselected) + RedrawAllNAViews(j,MIN(x1,aln->min_subselect)); + } + else + { + for(j=0;jselection_mask_len;j++) + if(j>x2 || jselection_mask[j] = '0'; + else + aln->selection_mask[j] = '1'; + + for(j=0;jnumelements;j++) + aln->element[j].subselected = FALSE; + + for(j=0;jnumelements;j++) + if(j<=y2 && j>=y1) + { + aln->element[j].subselected = TRUE; + /* +* Impose groups... +*/ + for(next_elem= &(aln->element[j]); + next_elem!=NULL; next_elem=next_elem->groupf) + next_elem->subselected = TRUE; + + for(next_elem= &(aln->element[j]); + next_elem!=NULL; next_elem=next_elem->groupb) + next_elem->subselected = TRUE; + } + + for(j=0;jnumelements;j++) + if(aln->element[j].subselected == TRUE) + RedrawAllNAViews(j,MIN(x1,aln->min_subselect)); + } + if(shift_down) + aln->min_subselect = MIN(x1,aln->min_subselect); + else + aln->min_subselect = x1; + return; +} diff --git a/CORE/Edit.o b/CORE/Edit.o new file mode 100644 index 0000000..838edab Binary files /dev/null and b/CORE/Edit.o differ diff --git a/CORE/EventHandler.c b/CORE/EventHandler.c new file mode 100755 index 0000000..a60d440 --- /dev/null +++ b/CORE/EventHandler.c @@ -0,0 +1,981 @@ +#include +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + + +/* +HandleMenus(): + Callback routine for the menus. Determine what function was called, +and perform the desired operation. + +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + + +void HandleMenus(m,mi) +Menu m; +Menu_item mi; +{ + extern Gmenu menu[]; + extern GmenuItem *current_item; + extern Frame frame,pframe; + extern Panel popup; + extern int num_menus,BlockInput; + + int i,j,k,curmenu,curitem; + Gmenu *thismenu; + GmenuItem *thisitem; + Panel choice; + char *label1; + + + /* +* Find menu, and menu item by searching menu[] and menu[].item[] +* for the called menu item. +*/ + if(xv_get(pframe,WIN_SHOW)) + { + /* +* By returning after destroying the dialog box, a potential +* problem with syncronization is avoided. To demonstrate, compile +* without the following "return", and click on a menu item several +* times quickly. The current solution is annoying in that if one +* decides to change menu items without hitting , they must +* hit the menu button twice. +*/ + DONT(); + return; + } + +/* +* Locate menu chosen... +*/ + BlockInput = TRUE; + for(j=0;jitem[j].label) == 0) + curitem = j; + + thisitem = &(thismenu->item[curitem]); + xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL,0); + + +/* +* Create a temporary dialog popup, and set all of the calling +* arguements by dialog box returned values. +*/ + +/* +* For all needed arguments... +*/ + for(j=0;jnumargs;j++) + { +/* +* Create a prompt for the argument +*/ + switch (thisitem->arg[j].type) + { + case SLIDER: + thisitem->arg[j].X=xv_create(popup,PANEL_SLIDER, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_MIN_VALUE,thisitem->arg[j].min, + PANEL_MAX_VALUE,thisitem->arg[j].max, + PANEL_VALUE,thisitem->arg[j].value, + PANEL_NOTIFY_PROC, HandleMenuItem, + 0); + break; + + case TEXTFIELD: + thisitem->arg[j].X = xv_create(popup,PANEL_TEXT, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_VALUE_DISPLAY_LENGTH,32, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_VALUE,thisitem->arg[j].textvalue, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_NOTIFY_PROC, HandleMenuItem, + 0); + break; + + case CHOOSER: + thisitem->arg[j].X=xv_create(popup, + PANEL_CHOICE, + PANEL_LAYOUT,PANEL_HORIZONTAL, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_NOTIFY_PROC, HandleMenuItem, + PANEL_CHOICE_STRING, + 0,thisitem->arg[j].choice[0].label, + 0); + for(i=1;iarg[j].numchoices;i++) + xv_set(thisitem->arg[j].X, + PANEL_CHOICE_STRING, i, + thisitem->arg[j].choice[i].label, + 0); + xv_set(thisitem->arg[j].X, + PANEL_VALUE,thisitem->arg[j].value, + 0); + break; + case CHOICE_LIST: + thisitem->arg[j].X=xv_create(popup, + PANEL_LIST, + PANEL_LAYOUT, PANEL_VERTICAL, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_DISPLAY_ROWS,3, + PANEL_NOTIFY_PROC, HandleMenuItem, + PANEL_LIST_STRING, + 0,thisitem->arg[j].choice[0].label, + 0); + for(i=1;iarg[j].numchoices;i++) + xv_set(thisitem->arg[j].X, + PANEL_LIST_STRING, i, + thisitem->arg[j].choice[i].label, + 0); + xv_set(thisitem->arg[j].X, + PANEL_VALUE,thisitem->arg[j].value, + 0); + break; + case CHOICE_MENU: + thisitem->arg[j].X=xv_create(popup, + PANEL_CHOICE_STACK, + PANEL_LAYOUT,PANEL_HORIZONTAL, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_NOTIFY_PROC, HandleMenuItem, + PANEL_CHOICE_STRING, + 0,thisitem->arg[j].choice[0].label, + 0); + for(i=1;iarg[j].numchoices;i++) + xv_set(thisitem->arg[j].X, + PANEL_CHOICE_STRING, i, + thisitem->arg[j].choice[i].label, + 0); + xv_set(thisitem->arg[j].X, + PANEL_VALUE,thisitem->arg[j].value, + 0); + break; + + + default: + break; + }; + } + + xv_set(pframe,FRAME_LABEL,thisitem->label, + WIN_DESIRED_HEIGHT,1000, + WIN_DESIRED_WIDTH,1000, +/* + I worry about this one, but a true dialog should not + allow you to do anything other than respond to it. +*/ + + WIN_GRAB_ALL_INPUT,TRUE, + 0); + + current_item = thisitem; + +/* +* Fit it, and show it +*/ + window_fit(popup); + window_fit(pframe); + if((thisitem->numargs >0) || (thisitem->help !=NULL)) + xv_set(pframe,WIN_SHOW,TRUE,0); + else + DO(); + return; +} + + +/* +HandleMenuItem(): + Callback routine for buttons etc. in the dialog box. Store the +values returned from the dialog box so that they can be used for calling +the external function. + +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + +HandleMenuItem(item,event) +Panel_item item; +Event *event; +{ + int i,j,thisarg; + extern GmenuItem *current_item; + + Panel_setting ps; +/* +* Find which value was modified... +*/ + for(j=0;jnumargs;j++) + if(item == current_item->arg[j].X) + thisarg = j; +/* +* and store the new value. +*/ + switch(current_item->arg[thisarg].type) + { + case CHOICE_LIST: + for(j=0;j < (int)xv_get(item,PANEL_LIST_NROWS);j++) + { + if((int)xv_get(item, PANEL_LIST_SELECTED, j) ) + current_item->arg[thisarg].value = j; + } + break; + case CHOICE_MENU: + current_item->arg[thisarg].value = + (int)xv_get(item,PANEL_VALUE); + break; + case CHOOSER: + current_item->arg[thisarg].value = + (int)xv_get(item,PANEL_VALUE); + break; + case SLIDER: + current_item->arg[thisarg].value = + (int)xv_get(item,PANEL_VALUE); + break; + case TEXTFIELD: + ps = panel_text_notify(item,event); + strcpy(current_item->arg[thisarg].textvalue, + (char*)xv_get(item,PANEL_VALUE)); + return(ps); + break; + default: + Error("Menu argument type invalid"); + } + return; +} + + +/* +DO(): + Call external function. + +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + +DO() +{ + extern GmenuItem *current_item; /* rtm 18.III.98 */ + extern int BlockInput; /* rtm 18.III.98 */ + extern Frame pframe,frame; + extern Panel popup; + extern NA_Alignment *DataSet; + extern char current_dir[]; + extern int OVERWRITE; + + int i,j,k,flag,select_mode; + static int fileindx = 0; + char *Action,buffer[GBUFSIZ],temp[80]; + + +/* +* Remove dialog..... +*/ + flag = FALSE; + for(j=0;jnuminputs;j++) + if(current_item->input[j].format != STATUS_FILE) + flag = TRUE; + + if(flag && DataSet) + select_mode = TestSelection(); + +/* + Make sure that we are still in a writeable directory +*/ + (void)chdir(current_dir); + for(j=0;jnuminputs;j++) + { + sprintf(buffer,"gde%d_%d",getpid(),fileindx++); + current_item->input[j].name = String(buffer); + switch(current_item->input[j].format) + { + case COLORMASK: + WriteCMask(DataSet,buffer,select_mode, + current_item->input[j].maskable); + break; + case GENBANK: + WriteGen(DataSet,buffer,select_mode, + current_item->input[j].maskable); + break; + case NA_FLAT: + WriteNA_Flat(DataSet,buffer,select_mode, + current_item->input[j].maskable); + break; + case STATUS_FILE: + WriteStatus(DataSet,buffer,select_mode); + break; + case GDE: + WriteGDE(DataSet,buffer,select_mode, + current_item->input[j].maskable); + break; + default: + break; + } + } + + for(j=0;jnumoutputs;j++) + { + sprintf(buffer,"gde%d_%d",getpid(),fileindx++); + current_item->output[j].name = String(buffer); + } + + xv_destroy_safe(pframe); + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + FRAME_SHOW_RESIZE_CORNER,FALSE, + WIN_DESIRED_HEIGHT,100, + WIN_DESIRED_WIDTH,300, + XV_X,300, + XV_Y,150, + WIN_SHOW,FALSE, + 0); + +/* +* Reset dialog for next call... +*/ + + popup = xv_get(pframe,FRAME_CMD_PANEL); + + xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"HELP", + PANEL_NOTIFY_PROC,HELP, + 0); + + xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"OK", + PANEL_NOTIFY_PROC,DO, + 0); + + xv_create(popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"Cancel", + PANEL_NOTIFY_PROC,DONT, + 0); + + /* +* Create the command line for external the function call +*/ + Action = (char*)strdup(current_item->method); + if(Action == NULL) + Error("DO(): Error in duplicating method string"); + for(j=0;jnumargs;j++) + Action = ReplaceArgs(Action,current_item->arg[j]); + + for(j=0;jnuminputs;j++) + Action = ReplaceFile(Action,current_item->input[j]); + + for(j=0;jnumoutputs;j++) + Action = ReplaceFile(Action,current_item->output[j]); + + + + /* +* call and go... +*/ + + xv_set(pframe,FRAME_BUSY,TRUE,0); + xv_set(frame,FRAME_BUSY,TRUE,0); + system(Action); + cfree(Action); + xv_set(pframe,FRAME_BUSY,FALSE,0); + xv_set(frame,FRAME_BUSY,FALSE,0); + BlockInput = FALSE; + + for(j=0;jnumoutputs;j++) + { + if(current_item->output[j].overwrite) + { + if(current_item->output[j].format == GDE) + OVERWRITE = TRUE; + else + Warning("Overwrite mode only available for GDE format"); + } + switch(current_item->output[j].format) + { +/* +* The LoadData routine must be reworked so that +* OpenFileName uses it, and so I can remove the +* major kluge in OpenFileName(). +*/ + case GENBANK: + case NA_FLAT: + case GDE: + OpenFileName(current_item->output[j].name,NULL); + break; + case COLORMASK: + ReadCMask(current_item->output[j].name); + break; + case STATUS_FILE: + ReadStatus(current_item->output[j].name); + break; + default: + break; + } + OVERWRITE = FALSE; + } + for(j=0;jnumoutputs;j++) + { + if(!current_item->output[j].save) + { + sprintf(buffer,"/bin/rm -f %s", + current_item->output[j].name); + system(buffer); + } + } + + for(j=0;jnuminputs;j++) + { + if(!current_item->input[j].save) + { + sprintf(buffer,"/bin/rm -f %s", + current_item->input[j].name); + system(buffer); + } + } + return; +} + + +/* +ReplaceArgs(): + Replace all command line arguements with the appropriate values +stored for the chosen menu item. + +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + +char *ReplaceFile(Action,file) +char *Action; +GfileFormat file; +{ + char *symbol,*method,*temp; + int i,j,newlen; + symbol = file.symbol; + method = file.name; + + for(; (i=Find2(Action,symbol)) != -1;) + { + newlen = strlen(Action)-strlen(symbol) + strlen(method)+1; + temp = calloc(newlen,1); + if (temp == NULL) + Error("ReplaceFile():Error in calloc"); + strncat(temp,Action,i); + strncat(temp,method,strlen(method)); + strcat( temp,&(Action[i+strlen(symbol)]) ); + cfree(Action); + Action = temp; + } + return(Action); +} + + + + + +char *ReplaceArgs(Action,arg) +char *Action; +GmenuItemArg arg; +{ + /* +* The basic idea is to replace all of the symbols in the method +* string with the values picked in the dialog box. The method +* is the general command line structure. All arguements have three +* parts, a label, a method, and a value. The method never changes, and +* is used to represent '-flag's for a given function. Values are the +* associated arguements that some flags require. All symbols that +* require argvalue replacement should have a '$' infront of the symbol +* name in the itemmethod definition. All symbols without the '$' will +* be replaced by their argmethod. There is currently no way to do a label +* replacement, as the label is considered to be for use in the dialog +* box only. An example command line replacement would be: +* +* itemmethod=> "lpr arg1 $arg1 $arg2" +* +* arglabel arg1=> "To printer?" +* argmethod arg1=> "-P" +* argvalue arg1=> "lw" +* +* arglabel arg2=> "File name?" +* argvalue arg2=> "foobar" +* argmethod arg2=> "" +* +* final command line: +* +* lpr -P lw foobar +* +* At this point, the chooser dialog type only supports the arglabel and +* argmethod field. So if an argument is of type chooser, and +* its symbol is "this", then "$this" has no real meaning in the +* itemmethod definition. Its format in the .GDEmenu file is slighty +* different as well. A choice from a chooser field looks like: +* +* argchoice:Argument_label:Argument_method +* +* +*/ + char *symbol,*method,*textvalue,buf1[GBUFSIZ],buf2[GBUFSIZ],*temp; + int i,j,newlen,type; + symbol = arg.symbol; + method = arg.method; + textvalue = arg.textvalue; + type = arg.type; + if(type == SLIDER) + { + textvalue = buf2; + sprintf(buf2,"%d",arg.value); + } + else if((type == CHOOSER) || (type == CHOICE_MENU) || (type == CHOICE_LIST)) + { + method = arg.choice[arg.value].method; + textvalue = arg.choice[arg.value].method; + } + + if(textvalue == NULL) + textvalue=""; + + if(method == NULL) + method=""; + + if(symbol == NULL) + symbol=""; + + for(; (i=Find2(Action,symbol)) != -1;) + { + if(i>0 && Action[i-1] =='$' ) + { + newlen = strlen(Action)-strlen(symbol) + +strlen(textvalue); + temp = calloc(newlen,1); + if (temp == NULL) + Error("ReplaceArgs():Error in calloc"); + strncat(temp,Action,i-1); + strncat(temp,textvalue,strlen(textvalue)); + strcat( temp,&(Action[i+strlen(symbol)]) ); + cfree(Action); + Action = temp; + } + else + { + newlen = strlen(Action)-strlen(symbol) + +strlen(method)+1; + temp = calloc(newlen,1); + if (temp == NULL) + Error("ReplaceArgs():Error in calloc"); + strncat(temp,Action,i); + strncat(temp,method,strlen(method)); + strcat( temp,&(Action[i+strlen(symbol)]) ); + cfree(Action); + Action = temp; + } + } + return(Action); +} + + + + +/* +DONT(): + Dont execute the command associated with the current dialog box. +This function corresponds to the button on the dialog box. + +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + +DONT(item,event) +Panel_item item; +Event *event; +{ + extern Frame pframe,frame; + extern Panel popup; + extern int BlockInput; + int i,j,k; + + /* +* Reset the dialog box, andf remove it. +*/ + xv_destroy_safe(pframe); + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + FRAME_SHOW_RESIZE_CORNER,FALSE, + WIN_DESIRED_HEIGHT,100, + WIN_DESIRED_WIDTH,300, + XV_X,300, + XV_Y,150, + WIN_SHOW,FALSE, + 0); + + popup = xv_get(pframe,FRAME_CMD_PANEL); +/* + popup = xv_create(pframe,PANEL, + PANEL_LAYOUT,PANEL_HORIZONTAL, + 0); +*/ + + xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"HELP", + PANEL_NOTIFY_PROC,HELP, + 0); + + xv_create (popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"OK", + PANEL_NOTIFY_PROC,DO, + 0); + + xv_create(popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"Cancel", + PANEL_NOTIFY_PROC,DONT, + 0); + + BlockInput = FALSE; + + return; +} + +FrameDone(this_frame) +Frame this_frame; +{ + extern Frame pframe; + if(this_frame == pframe) + DONT(NULL,NULL); + + return(XV_OK); +} +NANameEvents(win,event,arg) +Xv_window win; +Event *event; +Notify_arg arg; +{ + extern int EditMode; + extern NA_Alignment *DataSet; + NA_DisplayData *ddata; + NA_Alignment *aln; + NA_Sequence *this_seq; + extern int first_select,BlockInput; + int i,j,x,y,redraw = FALSE; + + if(DataSet == NULL || BlockInput) + return; + + aln = (NA_Alignment*)DataSet; + ddata = (NA_DisplayData*)(aln->na_ddata); + + if(ddata == NULL) + return; + + x=event_x(event)/ddata->font_dx; + y=event_y(event)/ddata->font_dy + + ddata->top_seq; + + y=MIN(y,aln->numelements - 1); + y=MAX(y,0); + + this_seq = &(aln->element[y]); + + if (event_id(event) == LOC_WINENTER) + win_set_kbd_focus(win,(Window)xv_get(win,XV_XID)); + else if(event_is_down(event) && event_is_ascii(event) && + event_meta_is_down(event)) + DoMeta(event_id(event)); + + else if(!event_is_up(event)) + { + switch (event_action(event)) + { + case ACTION_SELECT: + if(!event_shift_is_down(event)) + { + for(j=0;jnumelements;j++) + aln->element[j]. + selected = FALSE; + redraw = TRUE; + } + + if(x<=strlen(this_seq->short_name)) + { + redraw = TRUE; + first_select = y; + } + else + first_select = -1; + break; + default: + break; + } + } + else if(first_select != -1) + switch (event_action(event)) + { + case ACTION_SELECT: + if(!event_shift_is_down(event)) + { + for(j=0;jnumelements;j++) + aln->element[j].selected + = FALSE; + } + if(x<=strlen(this_seq->short_name)) + { + for(j=MIN(first_select,y); + j<=MAX(first_select,y);j++) + aln->element[j].selected = + aln->element[j].selected ? + FALSE:TRUE; + redraw = TRUE; + } + break; + default: + break; + } + + if(redraw) + DrawNANames(xv_get(win,XV_DISPLAY),xv_get(win,XV_XID)); + return; +} + + +DoMeta(Code) +int Code; +{ + + int k,j; + extern int num_menus; + extern Gmenu menu[]; + + for(j=0;jitem[curitem]); + xv_set(popup,PANEL_LAYOUT,PANEL_VERTICAL,0); + + for(j=0;jnumargs;j++) + { + /* +* Create a prompt for the argument +*/ + switch (thisitem->arg[j].type) + { + case SLIDER: + thisitem->arg[j].X=xv_create(popup,PANEL_SLIDER, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_MIN_VALUE,thisitem->arg[j].min, + PANEL_MAX_VALUE,thisitem->arg[j].max, + PANEL_VALUE,thisitem->arg[j].value, + PANEL_NOTIFY_PROC, HandleMenuItem, + PANEL_TICKS,10, + 0); + break; + + case TEXTFIELD: + thisitem->arg[j].X = xv_create(popup,PANEL_TEXT, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_VALUE_DISPLAY_LENGTH,32, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_VALUE,thisitem->arg[j].textvalue, + PANEL_NOTIFY_LEVEL,PANEL_ALL, + PANEL_NOTIFY_PROC, HandleMenuItem, + 0); + break; + + case CHOOSER: + thisitem->arg[j].X=xv_create(popup, + PANEL_CHOICE, + PANEL_LAYOUT,PANEL_HORIZONTAL, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_NOTIFY_PROC, HandleMenuItem, + PANEL_CHOICE_STRING, + 0,thisitem->arg[j].choice[0].label, + 0); + for(i=1;iarg[j].numchoices;i++) + xv_set(thisitem->arg[j].X, + PANEL_CHOICE_STRING, i, + thisitem->arg[j].choice[i].label, + 0); + xv_set(thisitem->arg[j].X, + PANEL_VALUE,thisitem->arg[j].value, + 0); + break; + case CHOICE_MENU: + thisitem->arg[j].X=xv_create(popup, + PANEL_CHOICE_STACK, + PANEL_LAYOUT,PANEL_VERTICAL, + PANEL_LABEL_STRING,thisitem->arg[j].label, + PANEL_NOTIFY_PROC, HandleMenuItem, + PANEL_CHOICE_STRING, + 0,thisitem->arg[j].choice[0].label, + 0); + for(i=1;iarg[j].numchoices;i++) + xv_set(thisitem->arg[j].X, + PANEL_CHOICE_STRING, i, + thisitem->arg[j].choice[i].label, + 0); + xv_set(thisitem->arg[j].X, + PANEL_VALUE,thisitem->arg[j].value, + 0); + break; + + + default: + break; + }; + } + + xv_set(pframe,FRAME_LABEL,thisitem->label, + WIN_DESIRED_HEIGHT,1000, + WIN_DESIRED_WIDTH,1000, + WIN_GRAB_ALL_INPUT,TRUE, + 0); + + current_item = thisitem; + + /* +* Fit it, and show it +*/ + window_fit(popup); + window_fit(pframe); + if(thisitem->numargs >0) + xv_set(pframe,WIN_SHOW,TRUE,0); + else + DO(); + return; +} + +HELP(item,event) +Panel_item item; +Event *event; +{ + extern GmenuItem *current_item; + extern Frame pframe; + extern Panel popup; + FILE *file; + char help_file[1024]; + + if(current_item->help == NULL) + { + Warning("Cannot open help file"); + return; + } + strncpy(help_file,current_item->help,1023); + file = fopen(help_file,"r"); + if((file == NULL) && (getenv("GDE_HELP_DIR") != NULL)) + { + strncpy(help_file,getenv("GDE_HELP_DIR"),1023); + strncat(help_file,"/",1023 - strlen(help_file)); + strncat(help_file,current_item->help,1023 - strlen(help_file)); + } + + file = fopen(help_file,"r"); + + if(file == NULL) + { + Warning("Cannot find help file"); + return; + } + + fclose(file); + window_fit( xv_create(pframe,TEXTSW, + WIN_INHERIT_COLORS,TRUE, + WIN_BELOW,popup, + TEXTSW_READ_ONLY,TRUE, + XV_HEIGHT,180, + XV_WIDTH,80*8, + TEXTSW_FILE,help_file, + 0) + ); + window_fit(pframe); + xv_set(item,PANEL_INACTIVE,TRUE,0); + + return; +} + diff --git a/CORE/EventHandler.o b/CORE/EventHandler.o new file mode 100644 index 0000000..b6a4ed0 Binary files /dev/null and b/CORE/EventHandler.o differ diff --git a/CORE/FileIO.c b/CORE/FileIO.c new file mode 100755 index 0000000..437e617 --- /dev/null +++ b/CORE/FileIO.c @@ -0,0 +1,1056 @@ +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* +LoadData(): + Load a data set from the command line argument. + +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + +LoadData(filename) +char *filename; +{ + extern NA_Alignment *DataSet; + extern int DataType,FileFormat,Default_DNA_Trans[],Default_RNA_Trans[]; + extern int Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[]; + + extern Frame frame; + extern Canvas EditCan,EditNameCan; + extern char FileName[]; + FILE *file; + NA_Alignment *DataNaAln; + char temp[1024]; +/* +* Get file name, determine the file type, and away we go.. +*/ + if(Find2(filename,"gde")!=0) + strcpy(FileName,filename); + if( (file=fopen(filename,"r"))!=0 ) + { + FindType(filename,&DataType,&FileFormat); + switch(DataType) + { + case NASEQ_ALIGN: + if(DataSet == NULL) + { + DataSet = (NA_Alignment*)Calloc(1, + sizeof(NA_Alignment)); + DataNaAln =(NA_Alignment*)DataSet; + DataSet->rel_offset = 0; + } + else + DataNaAln = (NA_Alignment*)DataSet; + + LoadFile(filename,DataNaAln, + DataType,FileFormat); + + break; + default: + break; + } + } + fclose(file); + sprintf(temp,"Genetic Data Environment 2.2 for HIV research(%s)",FileName); + xv_set(frame, + FRAME_LABEL, temp, + 0); + return; +} + + +/* +LoadFile(): + Load the given filename into the given dataset. Handle any +type conversion needed to get the data into the specified data type. +This routine is used in situations where the format and datatype is known. + +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + +LoadFile(filename,dataset,type,format) +char *filename; +char *dataset; +int type,format; +{ + extern int DataType; + + if (DataType != type) + fprintf(stderr,"Warning, datatypes do not match.\n"); +/* +Handle the overwrite/create/merge dialog here. +*/ + switch(format) + { + case NA_FLAT: + ReadNA_Flat(filename,dataset,type); + ((NA_Alignment*)dataset)->format = GDE; + break; + + case GENBANK: + ReadGen(filename,dataset,type); + ((NA_Alignment*)dataset)->format = GENBANK; + break; + + case GDE: + ReadGDE(filename,dataset,type); + ((NA_Alignment*)dataset)->format = GDE; + break; + case COLORMASK: + ReadCMask(filename); + + default: + break; + } + return; +} + + + +/* +* Print error message, and die +*/ +ErrorOut(code,string) +int code; +char *string; +{ + if (code == 0) + { + fprintf(stderr,"Error:%s\n",string); + exit(1); + } + return; +} + + +/* +* More robust memory management routines +*/ +char *Calloc(count,size) +int count,size; +{ + char *temp; +#ifdef SeeAlloc + extern int TotalCalloc; + TotalCalloc += count*size; + fprintf(stderr,"Calloc %d %d\n",count*size,TotalCalloc); +#endif + temp = calloc(count,size); + ErrorOut(temp,"Cannot allocate memory"); + return(temp); +} + +char *Realloc(block,size) +char *block; +int size; +{ + char *temp; +#ifdef SeeAlloc + extern int TotalRealloc; + TotalRealloc += size; + fprintf(stderr,"Realloc %d\n",TotalRealloc); +#endif + temp=realloc(block,size); + ErrorOut(temp,"Cannot change memory size"); + return(temp); +} + +Cfree(block) +char* block; +{ + if (block) + { + /* rtm 18.III.98 + FileIO.c: In function `Cfree': + FileIO.c:181: void value not ignored as it ought to be + + if(cfree(block) == 0) + Warning("Error in Cfree..."); + */ + cfree(block); + } + else + Warning("Error in Cfree, NULL block"); + return; +} + + + +/* +* same as strdup +*/ +char *String(string) +char *string; +{ + char *temp; + + temp = Calloc(strlen(string)+1,sizeof(char)); + strcpy(temp,string); + return(temp); +} + + +FindType(name,dtype,ftype) +char *name; +int *dtype,*ftype; +{ + FILE *file; + char Inline[GBUFSIZ]; + + file = fopen(name,"r"); + *dtype=0; + *ftype=0; + + if (file == NULL) + return(1); + + /* +* Is this a flat file? +* Get the first non blank line, see if a type marker shows up. +*/ + fgets(Inline,GBUFSIZ,file); + for(;strlen(Inline)<2 && fgets(Inline,GBUFSIZ,file) != NULL;); + if(Inline[0] == '#' || Inline[0] == '%' || + Inline[0] == '"' || Inline[0] == '@' ) + { + *dtype=NASEQ_ALIGN; + *ftype=NA_FLAT; + } + + /* +* Else, try genbank +*/ + else + { + fclose(file); + file = fopen(name,"r"); + *dtype=0; + *ftype=0; + + if (file == NULL) + return(1); + + for(;fgets(Inline,GBUFSIZ,file) != NULL;) + if(Find(Inline,"LOCUS")) + { + *dtype=NASEQ_ALIGN; + *ftype=GENBANK; + fclose(file); + return(0); + } + /* +* and last, try GDE +*/ + else if(Find(Inline,"sequence")) + { + *dtype = NASEQ_ALIGN; + *ftype = GDE; + fclose(file); + return(0); + } + else if(Find(Inline,"start:")) + { + *dtype = NASEQ_ALIGN; + *ftype = COLORMASK; + fclose(file); + return(0); + } + } + + fclose(file); + return(0); +} + +AppendNA(buffer,len,seq) +NA_Base *buffer; +int len; +NA_Sequence *seq; +{ + int curlen=0,j; + NA_Base *temp; + + if(seq->seqlen+len >= seq->seqmaxlen) + { + if(seq->seqlen>0) + seq->sequence = (NA_Base*)Realloc(seq->sequence, + (seq->seqlen + len+GBUFSIZ) * sizeof(NA_Base)); + else + seq->sequence = (NA_Base*)Calloc(1,(seq->seqlen + + len+GBUFSIZ) * sizeof(NA_Base)); + seq->seqmaxlen = seq->seqlen + len+GBUFSIZ; + } + /* +* seqlen is the length, and the index of the next free +* base +*/ + curlen = seq->seqlen + seq->offset; + for(j=0;jseqlen += len; + return; +} + +Ascii2NA(buffer,len,matrix) +char *buffer; +int len; +int matrix[16]; +{ + /* +* if the translation matrix exists, use it to +* encode the buffer. +*/ + register i; + if(matrix != NULL) + for(i=0;inumelements == (int) NULL) + return; + seqs = aln->element; + + file = fopen(filename,"w"); + if(file == NULL) + { + Warning("Cannot open file for output"); + return(1); + } + if(maskable && (method != SELECT_REGION)) + { + for(j=0;jnumelements;j++) + if(seqs[j].elementtype == MASK && + seqs[j].selected) + mask = j; + } + for(j=0;jnumelements;j++) + { + SeqNorm(&(seqs[j])); + } + + for(j=0;jnumelements;j++) + { + if(method != SELECT_REGION) + offset = seqs[j].offset; + else + for(offset=seqs[j].offset; + aln->selection_mask[offset] == '0'; + offset++); + + if(offset+aln->rel_offset != 0) + sprintf(offset_str,"(%d)",offset+aln->rel_offset); + else + offset_str[0] = '\0'; + + if(((j!=mask) && (seqs[j].selected) && method != SELECT_REGION) + || (method == SELECT_REGION && seqs[j].subselected) + || method == ALL) + { + fprintf(file,"%c%s%s\n", + seqs[j].elementtype == DNA?'#': + seqs[j].elementtype == RNA?'#': + seqs[j].elementtype == PROTEIN?'%': + seqs[j].elementtype == TEXT?'"': + seqs[j].elementtype == MASK?'@':'"', + seqs[j].short_name, + (offset+aln->rel_offset == 0)? "":offset_str); + if(seqs[j].tmatrix) + { + if(mask == -1) + for(k=0,kk=0;kk0) + { + buf[60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + if(method == SELECT_REGION) + { + if(aln->selection_mask[kk+offset]=='1') + { + buf[k%60] =((char)seqs[j].tmatrix[ + (int)getelem( &(seqs[j]),kk+offset) ]); + k++; + } + } + else + { + buf[k%60] =((char)seqs[j].tmatrix[ + (int)getelem( &(seqs[j]),kk+offset) ]); + k++; + } + } + else + for(k=0,kk=0;kk1) + { + buf[60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + buf[k%60] = ((char)seqs[j].tmatrix + [getelem(&(seqs[j]),kk+offset)]); + } + } + } + else + { + if(mask == -1) + for(k=0,kk=0;kk0) + { + buf[60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + if(method == SELECT_REGION) + { + if(aln->selection_mask[kk+offset]=='1') + { + buf[k%60] =(getelem( &(seqs[j]),kk+offset)); + k++; + } + } + else + { + buf[k%60] =( getelem( &(seqs[j]),kk+offset) ); + k++; + } + } + else + for(k=0,kk=0;kk1) + { + buf[60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + buf[k%60] =((char)getelem(&(seqs[j]), + kk+offset)); + } + } + } + buf[(k%60)>0 ? (k%60):60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + } + fclose(file); + return(0); +} + + +Warning(s) +char *s; +{ + extern Frame frame; + extern Panel_item left_foot,right_foot; + Beep(); + xv_set(frame,FRAME_RIGHT_FOOTER,s,0); + xv_set(right_foot,PANEL_LABEL_STRING,s,0); +} + + +InitNASeq(seq,type) +NA_Sequence *seq; +int type; +{ + extern int Default_RNA_Trans[]; /* rtm 18.III.98 */ + extern int Default_DNA_Trans[],Default_NA_RTrans[]; + extern int + Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[]; + + SetTime(&(seq->t_stamp.origin)); + SetTime(&(seq->t_stamp.modify)); + strncpy(seq->id,uniqueID(),79); + seq->seq_name[0] = '\0'; + seq->barcode[0] = '\0'; + seq->contig[0] = '\0'; + seq->membrane[0] = '\0'; + seq->authority[0] = '\0'; + seq->short_name[0] = '\0'; + seq->sequence = NULL; + seq->offset = 0; + seq->baggage = NULL; + seq->baggage_len = 0; + seq->baggage_maxlen = 0; + seq->comments = NULL; + seq->comments_len = 0; + seq->comments_maxlen = 0; + seq->description[0] = '\0'; + seq->mask = NULL; + seq->seqlen = 0; + seq->seqmaxlen = 0; + seq->protect = PROT_WHITE_SPACE + PROT_TRANSLATION; +#ifdef HGL + seq->attr = 0; +#else + seq->attr = IS_5_TO_3 + IS_PRIMARY; +#endif + seq->elementtype = type; + seq->groupid = 0; + seq->groupb = NULL; + seq->groupf = NULL; + seq->cmask = NULL; + seq->selected = 0; + seq->subselected = 0; + + switch (type) + { + case DNA: + seq->tmatrix = Default_DNA_Trans; + seq->rmatrix = Default_NA_RTrans; + seq->col_lut = Default_NAColor_LKUP; + break; + case RNA: + seq->tmatrix = Default_RNA_Trans; + seq->rmatrix = Default_NA_RTrans; + seq->col_lut = Default_NAColor_LKUP; + break; + case PROTEIN: + seq->tmatrix = NULL; + seq->rmatrix = NULL; + seq->col_lut = Default_PROColor_LKUP; + break; + case MASK: + case TEXT: + default: + seq->tmatrix = NULL; + seq->rmatrix = NULL; + seq->col_lut = NULL; + break; + } + return; +} + + +ReadCMask(filename) +char *filename; +{ + extern Frame frame; + extern NA_Alignment *DataSet; + + char Inline[GBUFSIZ],head[GBUFSIZ],curname[GBUFSIZ], + temp[GBUFSIZ]; + int IGNORE_DASH = FALSE,offset; + NA_DisplayData *NAdd; + NA_Alignment *aln; + + int i,j,k,curlen = 0,*colors,orig_ctype,jj,indx = 0; + FILE *file; + + if(DataSet == NULL) return; + + NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata; + + if(NAdd == NULL) + return; + + aln = (NA_Alignment*)DataSet; + + curname[0] = '\0'; + orig_ctype = NAdd->color_type; + file = fopen(filename,"r"); + if(file == NULL) + { + Warning("File not found"); + Warning(filename); + return; + } + + NAdd->color_type = COLOR_ALN_MASK; + for(;fgets(Inline,GBUFSIZ,file) !=0;) + { + if(Find(Inline,"offset:")) + { + crop(Inline,head,temp); + sscanf(temp,"%d",&(aln->cmask_offset)); + } + else if(Find(Inline,"nodash:")) + IGNORE_DASH = TRUE; + else if(Find(Inline,"dash:")) + IGNORE_DASH = TRUE; + else if(Find(Inline,"name:")) + { + crop(Inline,head,curname); + curname[strlen(curname)-1] = '\0'; + for(j=0;jcolor_type = orig_ctype; + return; + } + if(strlen(curname) != 0) + { + indx = -1; + for(j=0;jnumelements;j++) + if(Find(aln->element[j].short_name,curname) + || Find(aln->element[j].id,curname)) + { + if(aln->element[j].cmask != NULL) + Cfree(aln -> element[j].cmask); + colors=(int*)Calloc(aln->element[j] + .seqmaxlen+1+aln->element[j].offset + ,sizeof(int)); + aln->element[j].cmask = colors; + NAdd->color_type = COLOR_SEQ_MASK; + indx = j; + j = aln->numelements; + } + if(indx == -1) + colors=NULL; + } + else + { + if(aln->cmask != NULL) Cfree(aln->cmask); + colors=(int*)Calloc(curlen,sizeof(int)); + aln->cmask = colors; + aln->cmask_len = curlen; + NAdd->color_type = COLOR_ALN_MASK; + for(j=0;jelement[indx].seqlen);j++,jj++) + { + offset = aln->element[indx].offset; + if(fgets(Inline,GBUFSIZ,file)==NULL) + { + Warning + ("illegal format in colormask"); + NAdd->color_type = orig_ctype; + return; + } +/* +* Fixed so that the keyword nodash causes the colormask to be mapped +* to the sequence, not the alignment. +* +* The allocated space is equal the seqlen of the matched sequence. +* +*/ + if(aln->element[indx].tmatrix) + for(;(getelem(&(aln->element[indx]),jj + +offset) + ==(aln->element[indx].tmatrix['-']) + || (getelem(&(aln->element[indx]),jj + +offset) + ==aln->element[indx].tmatrix['~'])) + && jj < aln->element[indx].seqlen;) + colors[jj++] = 12; + else + for(;getelem(&(aln->element[indx]),jj + +offset) + =='-' && jj < aln->element[indx].seqlen;) + colors[jj++] = 12; + + sscanf(Inline,"%d",&(colors[jj])); + } + } + else if((indx == -1) && (strlen(curname) != 0)) + for(j=0;jcolor_type = orig_ctype; + return; + } + sscanf(Inline,"%d",&(colors[j])); + } + IGNORE_DASH = FALSE; + curname[0] = '\0'; + } + + } + RepaintAll(TRUE); + return; +} + + +ReadNA_Flat(filename,dataset,type) +char *filename; +char *dataset; +int type; +{ + int i, j, jj, c, curelem,offset; + char name[GBUFSIZ]; + char buffer[GBUFSIZ]; + char origin[GBUFSIZ],ref[GBUFSIZ]; + char Inline[GBUFSIZ],head[GBUFSIZ],tail[GBUFSIZ],temp[GBUFSIZ]; + char curname[GBUFSIZ]; + + NA_Sequence *this_elem; + NA_Alignment *data; + extern int Default_DNA_Trans[],Default_RNA_Trans[],Default_NA_RTrans[]; + + FILE *file; + + curname[0] = '\0'; + data = (NA_Alignment*)dataset; + + file = fopen(filename,"r"); + if(file == NULL) + { + fprintf(stderr,"Cannot open %s.\n",filename); + return; + } + for(;fgets(Inline,GBUFSIZ,file) !=0;) + { + if( + Inline[0] == '#' || + Inline[0] == '%' || + Inline[0] == '"' || + Inline[0] == '@' + ) + { + offset = 0; + for(j=0;jnumelements++; + if( curelem == 0 ) + { + data->element=(NA_Sequence*) + Calloc(5,sizeof(NA_Sequence)); + data->maxnumelements = 5; + } + else if (curelem==data->maxnumelements) + { + (data->maxnumelements) *= 2; + data->element= + (NA_Sequence*)Realloc(data->element + ,data->maxnumelements*sizeof(NA_Sequence)); + } + + InitNASeq(&(data->element[curelem]), + Inline[0] == '#'?DNA: + Inline[0] == '%'?PROTEIN: + Inline[0] == '"'?TEXT: + Inline[0] == '@'?MASK:TEXT); + this_elem= &(data->element[curelem]); + if(Inline[strlen(Inline)-1] == '\n') + Inline[strlen(Inline)-1] = '\0'; + strncpy(this_elem->short_name,(char*)&(Inline[1]),31); + this_elem->offset = offset; + } + else if(Inline[0] != '\n') + { + for(j=0,jj=0;jelement[curelem].rmatrix) + Ascii2NA(buffer,jj,data->element[curelem] + .rmatrix); + AppendNA(buffer,jj,&(data->element[curelem])); + } + } + + for(j=0;jnumelements;j++) + data->maxlen = MAX(data->maxlen,data->element[j].seqlen + + data->element[j].offset); + + for(j=0;jnumelements;j++) + if(data->element[j].seqlen==0) + data->element[j].protect = + PROT_BASE_CHANGES+ PROT_GREY_SPACE+ + PROT_WHITE_SPACE+ PROT_TRANSLATION; + + NormalizeOffset(data); + Regroup(data); + return; +} + + +WriteStatus(aln,filename,method) +NA_Alignment *aln; +char *filename; +int method; +{ + extern int EditMode,FileFormat; + extern NA_Alignment *DataSet; + NA_DisplayData *NAdd; + NA_Sequence *this_seq; + int j; + FILE *file; + + if(DataSet == NULL) + return; + + NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata; + if(NAdd == NULL) + return; + + file = fopen(filename,"w"); + if (file == NULL) + { + Warning("Cannot open status file."); + return(1); + } + fprintf(file,"File_format: %s\n",FileFormat==GENBANK?"genbank":"flat"); + /* + fprintf(file,"EditMode: %s\n",EditMode==INSERT?"insert": + "check"); +*/ + + this_seq = &(aln->element[NAdd->cursor_y]); + if(this_seq->id != NULL) + fprintf(file,"sequence-ID %s\n",this_seq->id); + fprintf(file,"Column: %d\nPos:%d\n",NAdd->cursor_x,NAdd->position); + switch(this_seq->elementtype) + { + case DNA: + case RNA: + fprintf(file,"#%s\n", + this_seq->short_name); + break; + case PROTEIN: + fprintf(file,"%%%s\n", + this_seq->short_name); + break; + case MASK: + fprintf(file,"@%s\n", + this_seq->short_name); + break; + case TEXT: + fprintf(file,"%c%s\n",'"', + this_seq->short_name); + break; + default: + break; + } + if(this_seq->tmatrix) + for(j=0;jseqlen;j++) + putc(this_seq->tmatrix[getelem(this_seq,j)],file); + else + for(j=0;jseqlen;j++) + putc(getelem(this_seq,j),file); + + fclose(file); + return; +} + +ReadStatus(filename) +char *filename; +{ + /* + int i,j; + FILE *file; + char Inline[GBUFSIZ],head[GBUFSIZ]; + file = fopen(filename,"r"); + for(;!DONE;) + { + fgets(Inline,GBUFSIZ,file); + if(strlen(Inline) == 0) + DONE = TRUE; + else + { + sscanf(Inline,"%s",head); + if(strncmp(head,"Col",3) != 0) + { + sscanf(Inline,"%*s %d",head,&(DataSet->nadd-> + cursor_x),&(DataSet->nadd->cursory); + } + else if(strncmp(head,"Pos",3) != 0) + { + } + } + } + +*/ +} + + +NormalizeOffset(aln) +NA_Alignment *aln; +{ + int i,j,offset = 99999999; + + for(j=0;jnumelements;j++) + offset = MIN(offset,aln->element[j].offset); + + for(j=0;jnumelements;j++) + aln->element[j].offset -= offset; + + aln->maxlen = -999999999; + for(j=0;jnumelements;j++) + aln->maxlen = MAX(aln->element[j].seqlen+aln->element[j].offset, + aln->maxlen); + + aln->rel_offset += offset; + + if(aln->numelements == 0) + aln->rel_offset = 0; + + return; +} + +WriteCMask(aln,filename,method,maskable) +NA_Alignment *aln; +char *filename; +int method,maskable; +{ + int j,kk,mask = -1,k,offset,min_offset= -999999; + char offset_str[100]; + int *buf; + NA_Sequence *seqs; + FILE *file; + if(aln == NULL) + return; + if(aln->numelements == (int) NULL) + return; + seqs = aln->element; + + file = fopen(filename,"w"); + if(file == NULL) + { + Warning("Cannot open file for output"); + return(1); + } + if(maskable && (method != SELECT_REGION)) + { + for(j=0;jnumelements;j++) + if(seqs[j].elementtype == MASK && + seqs[j].selected) + mask = j; + } + for(j=0;jnumelements;j++) + { + SeqNorm(&(seqs[j])); + } + + for(j=0;jnumelements;j++) + { + if(method != SELECT_REGION) + offset = seqs[j].offset; + else + for(offset=seqs[j].offset; + aln->selection_mask[offset] == '0'; + offset++); + + if(offset+aln->rel_offset != 0) + sprintf(offset_str,"(%d)",offset+aln->rel_offset); + else + offset_str[0] = '\0'; + + if(((j!=mask) && (seqs[j].selected) && method != SELECT_REGION) + || (method == SELECT_REGION && seqs[j].subselected) + || method == ALL) + { + fprintf(file,"%c%s%s\n", + seqs[j].elementtype == DNA?'#': + seqs[j].elementtype == RNA?'#': + seqs[j].elementtype == PROTEIN?'%': + seqs[j].elementtype == TEXT?'"': + seqs[j].elementtype == MASK?'@':'"', + seqs[j].short_name, + (offset+aln->rel_offset == 0)? "":offset_str); + + if(seqs[j].cmask != NULL) + { + + buf =(int*) Calloc(seqs[j].seqlen,sizeof(int) ); + + if(mask == -1) + { + for(k=0,kk=0;kkselection_mask[kk+offset]=='1') + buf[k++] = (getcmask( &(seqs[j]),kk+offset)); + } + + else + buf[k++] =( getcmask( &(seqs[j]),kk+offset) ); + } + } + else + { + for(k=0,kk=0;kk +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* +LoadData(): + Load a data set from the command line argument. + +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + +LoadData(filename) +char *filename; +{ + extern NA_Alignment *DataSet; + extern int DataType,FileFormat,Default_DNA_Trans[],Default_RNA_Trans[]; + extern int Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[]; + + extern Frame frame; + extern Canvas EditCan,EditNameCan; + extern char FileName[]; + FILE *file; + NA_Alignment *DataNaAln; + char temp[1024]; +/* +* Get file name, determine the file type, and away we go.. +*/ + if(Find2(filename,"gde")!=0) + strcpy(FileName,filename); + if( (file=fopen(filename,"r"))!=0 ) + { + FindType(filename,&DataType,&FileFormat); + switch(DataType) + { + case NASEQ_ALIGN: + if(DataSet == NULL) + { + DataSet = (NA_Alignment*)Calloc(1, + sizeof(NA_Alignment)); + DataNaAln =(NA_Alignment*)DataSet; + DataSet->rel_offset = 0; + } + else + DataNaAln = (NA_Alignment*)DataSet; + + LoadFile(filename,DataNaAln, + DataType,FileFormat); + + break; + default: + break; + } + } + fclose(file); + sprintf(temp,"Genetic Data Environment 2.2 (%s)",FileName); + xv_set(frame, + FRAME_LABEL, temp, + 0); + return; +} + + +/* +LoadFile(): + Load the given filename into the given dataset. Handle any +type conversion needed to get the data into the specified data type. +This routine is used in situations where the format and datatype is known. + +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + +LoadFile(filename,dataset,type,format) +char *filename; +char *dataset; +int type,format; +{ + extern int DataType; + + if (DataType != type) + fprintf(stderr,"Warning, datatypes do not match.\n"); +/* +Handle the overwrite/create/merge dialog here. +*/ + switch(format) + { + case NA_FLAT: + ReadNA_Flat(filename,dataset,type); + ((NA_Alignment*)dataset)->format = GDE; + break; + + case GENBANK: + ReadGen(filename,dataset,type); + ((NA_Alignment*)dataset)->format = GENBANK; + break; + + case GDE: + ReadGDE(filename,dataset,type); + ((NA_Alignment*)dataset)->format = GDE; + break; + case COLORMASK: + ReadCMask(filename); + + default: + break; + } + return; +} + + + +/* +* Print error message, and die +*/ +ErrorOut(code,string) +int code; +char *string; +{ + if (code == 0) + { + fprintf(stderr,"Error:%s\n",string); + exit(1); + } + return; +} + + +/* +* More robust memory management routines +*/ +char *Calloc(count,size) +int count,size; +{ + char *temp; +#ifdef SeeAlloc + extern int TotalCalloc; + TotalCalloc += count*size; + fprintf(stderr,"Calloc %d %d\n",count*size,TotalCalloc); +#endif + temp = calloc(count,size); + ErrorOut(temp,"Cannot allocate memory"); + return(temp); +} + +char *Realloc(block,size) +char *block; +int size; +{ + char *temp; +#ifdef SeeAlloc + extern int TotalRealloc; + TotalRealloc += size; + fprintf(stderr,"Realloc %d\n",TotalRealloc); +#endif + temp=realloc(block,size); + ErrorOut(temp,"Cannot change memory size"); + return(temp); +} + +Cfree(block) +char* block; +{ + if (block) + { + /* rtm 18.III.98 + FileIO.c: In function `Cfree': + FileIO.c:181: void value not ignored as it ought to be + + if(cfree(block) == 0) + Warning("Error in Cfree..."); + */ + cfree(block); + } + else + Warning("Error in Cfree, NULL block"); + return; +} + + + +/* +* same as strdup +*/ +char *String(string) +char *string; +{ + char *temp; + + temp = Calloc(strlen(string)+1,sizeof(char)); + strcpy(temp,string); + return(temp); +} + + +FindType(name,dtype,ftype) +char *name; +int *dtype,*ftype; +{ + FILE *file; + char Inline[GBUFSIZ]; + + file = fopen(name,"r"); + *dtype=0; + *ftype=0; + + if (file == NULL) + return(1); + + /* +* Is this a flat file? +* Get the first non blank line, see if a type marker shows up. +*/ + fgets(Inline,GBUFSIZ,file); + for(;strlen(Inline)<2 && fgets(Inline,GBUFSIZ,file) != NULL;); + if(Inline[0] == '#' || Inline[0] == '%' || + Inline[0] == '"' || Inline[0] == '@' ) + { + *dtype=NASEQ_ALIGN; + *ftype=NA_FLAT; + } + + /* +* Else, try genbank +*/ + else + { + fclose(file); + file = fopen(name,"r"); + *dtype=0; + *ftype=0; + + if (file == NULL) + return(1); + + for(;fgets(Inline,GBUFSIZ,file) != NULL;) + if(Find(Inline,"LOCUS")) + { + *dtype=NASEQ_ALIGN; + *ftype=GENBANK; + fclose(file); + return(0); + } + /* +* and last, try GDE +*/ + else if(Find(Inline,"sequence")) + { + *dtype = NASEQ_ALIGN; + *ftype = GDE; + fclose(file); + return(0); + } + else if(Find(Inline,"start:")) + { + *dtype = NASEQ_ALIGN; + *ftype = COLORMASK; + fclose(file); + return(0); + } + } + + fclose(file); + return(0); +} + +AppendNA(buffer,len,seq) +NA_Base *buffer; +int len; +NA_Sequence *seq; +{ + int curlen=0,j; + NA_Base *temp; + + if(seq->seqlen+len >= seq->seqmaxlen) + { + if(seq->seqlen>0) + seq->sequence = (NA_Base*)Realloc(seq->sequence, + (seq->seqlen + len+GBUFSIZ) * sizeof(NA_Base)); + else + seq->sequence = (NA_Base*)Calloc(1,(seq->seqlen + + len+GBUFSIZ) * sizeof(NA_Base)); + seq->seqmaxlen = seq->seqlen + len+GBUFSIZ; + } + /* +* seqlen is the length, and the index of the next free +* base +*/ + curlen = seq->seqlen + seq->offset; + for(j=0;jseqlen += len; + return; +} + +Ascii2NA(buffer,len,matrix) +char *buffer; +int len; +int matrix[16]; +{ + /* +* if the translation matrix exists, use it to +* encode the buffer. +*/ + register i; + if(matrix != NULL) + for(i=0;inumelements == (int) NULL) + return; + seqs = aln->element; + + file = fopen(filename,"w"); + if(file == NULL) + { + Warning("Cannot open file for output"); + return(1); + } + if(maskable && (method != SELECT_REGION)) + { + for(j=0;jnumelements;j++) + if(seqs[j].elementtype == MASK && + seqs[j].selected) + mask = j; + } + for(j=0;jnumelements;j++) + { + SeqNorm(&(seqs[j])); + } + + for(j=0;jnumelements;j++) + { + if(method != SELECT_REGION) + offset = seqs[j].offset; + else + for(offset=seqs[j].offset; + aln->selection_mask[offset] == '0'; + offset++); + + if(offset+aln->rel_offset != 0) + sprintf(offset_str,"(%d)",offset+aln->rel_offset); + else + offset_str[0] = '\0'; + + if(((j!=mask) && (seqs[j].selected) && method != SELECT_REGION) + || (method == SELECT_REGION && seqs[j].subselected) + || method == ALL) + { + fprintf(file,"%c%s%s\n", + seqs[j].elementtype == DNA?'#': + seqs[j].elementtype == RNA?'#': + seqs[j].elementtype == PROTEIN?'%': + seqs[j].elementtype == TEXT?'"': + seqs[j].elementtype == MASK?'@':'"', + seqs[j].short_name, + (offset+aln->rel_offset == 0)? "":offset_str); + if(seqs[j].tmatrix) + { + if(mask == -1) + for(k=0,kk=0;kk0) + { + buf[60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + if(method == SELECT_REGION) + { + if(aln->selection_mask[kk+offset]=='1') + { + buf[k%60] =((char)seqs[j].tmatrix[ + (int)getelem( &(seqs[j]),kk+offset) ]); + k++; + } + } + else + { + buf[k%60] =((char)seqs[j].tmatrix[ + (int)getelem( &(seqs[j]),kk+offset) ]); + k++; + } + } + else + for(k=0,kk=0;kk1) + { + buf[60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + buf[k%60] = ((char)seqs[j].tmatrix + [getelem(&(seqs[j]),kk+offset)]); + } + } + } + else + { + if(mask == -1) + for(k=0,kk=0;kk0) + { + buf[60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + if(method == SELECT_REGION) + { + if(aln->selection_mask[kk+offset]=='1') + { + buf[k%60] =(getelem( &(seqs[j]),kk+offset)); + k++; + } + } + else + { + buf[k%60] =( getelem( &(seqs[j]),kk+offset) ); + k++; + } + } + else + for(k=0,kk=0;kk1) + { + buf[60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + buf[k%60] =((char)getelem(&(seqs[j]), + kk+offset)); + } + } + } + buf[(k%60)>0 ? (k%60):60] = '\0'; + fputs(buf,file); + putc('\n',file); + } + } + fclose(file); + return(0); +} + + +Warning(s) +char *s; +{ + extern Frame frame; + extern Panel_item left_foot,right_foot; + Beep(); + xv_set(frame,FRAME_RIGHT_FOOTER,s,0); + xv_set(right_foot,PANEL_LABEL_STRING,s,0); +} + + +InitNASeq(seq,type) +NA_Sequence *seq; +int type; +{ + extern int Default_RNA_Trans[]; /* rtm 18.III.98 */ + extern int Default_DNA_Trans[],Default_NA_RTrans[]; + extern int + Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[]; + + SetTime(&(seq->t_stamp.origin)); + SetTime(&(seq->t_stamp.modify)); + strncpy(seq->id,uniqueID(),79); + seq->seq_name[0] = '\0'; + seq->barcode[0] = '\0'; + seq->contig[0] = '\0'; + seq->membrane[0] = '\0'; + seq->authority[0] = '\0'; + seq->short_name[0] = '\0'; + seq->sequence = NULL; + seq->offset = 0; + seq->baggage = NULL; + seq->baggage_len = 0; + seq->baggage_maxlen = 0; + seq->comments = NULL; + seq->comments_len = 0; + seq->comments_maxlen = 0; + seq->description[0] = '\0'; + seq->mask = NULL; + seq->seqlen = 0; + seq->seqmaxlen = 0; + seq->protect = PROT_WHITE_SPACE + PROT_TRANSLATION; +#ifdef HGL + seq->attr = 0; +#else + seq->attr = IS_5_TO_3 + IS_PRIMARY; +#endif + seq->elementtype = type; + seq->groupid = 0; + seq->groupb = NULL; + seq->groupf = NULL; + seq->cmask = NULL; + seq->selected = 0; + seq->subselected = 0; + + switch (type) + { + case DNA: + seq->tmatrix = Default_DNA_Trans; + seq->rmatrix = Default_NA_RTrans; + seq->col_lut = Default_NAColor_LKUP; + break; + case RNA: + seq->tmatrix = Default_RNA_Trans; + seq->rmatrix = Default_NA_RTrans; + seq->col_lut = Default_NAColor_LKUP; + break; + case PROTEIN: + seq->tmatrix = NULL; + seq->rmatrix = NULL; + seq->col_lut = Default_PROColor_LKUP; + break; + case MASK: + case TEXT: + default: + seq->tmatrix = NULL; + seq->rmatrix = NULL; + seq->col_lut = NULL; + break; + } + return; +} + + +ReadCMask(filename) +char *filename; +{ + extern Frame frame; + extern NA_Alignment *DataSet; + + char Inline[GBUFSIZ],head[GBUFSIZ],curname[GBUFSIZ], + temp[GBUFSIZ]; + int IGNORE_DASH = FALSE,offset; + NA_DisplayData *NAdd; + NA_Alignment *aln; + + int i,j,k,curlen = 0,*colors,orig_ctype,jj,indx = 0; + FILE *file; + + if(DataSet == NULL) return; + + NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata; + + if(NAdd == NULL) + return; + + aln = (NA_Alignment*)DataSet; + + curname[0] = '\0'; + orig_ctype = NAdd->color_type; + file = fopen(filename,"r"); + if(file == NULL) + { + Warning("File not found"); + Warning(filename); + return; + } + + NAdd->color_type = COLOR_ALN_MASK; + for(;fgets(Inline,GBUFSIZ,file) !=0;) + { + if(Find(Inline,"offset:")) + { + crop(Inline,head,temp); + sscanf(temp,"%d",&(aln->cmask_offset)); + } + else if(Find(Inline,"nodash:")) + IGNORE_DASH = TRUE; + else if(Find(Inline,"dash:")) + IGNORE_DASH = TRUE; + else if(Find(Inline,"name:")) + { + crop(Inline,head,curname); + curname[strlen(curname)-1] = '\0'; + for(j=0;jcolor_type = orig_ctype; + return; + } + if(strlen(curname) != 0) + { + indx = -1; + for(j=0;jnumelements;j++) + if(Find(aln->element[j].short_name,curname) + || Find(aln->element[j].id,curname)) + { + if(aln->element[j].cmask != NULL) + Cfree(aln -> element[j].cmask); + colors=(int*)Calloc(aln->element[j] + .seqmaxlen+1+aln->element[j].offset + ,sizeof(int)); + aln->element[j].cmask = colors; + NAdd->color_type = COLOR_SEQ_MASK; + indx = j; + j = aln->numelements; + } + if(indx == -1) + colors=NULL; + } + else + { + if(aln->cmask != NULL) Cfree(aln->cmask); + colors=(int*)Calloc(curlen,sizeof(int)); + aln->cmask = colors; + aln->cmask_len = curlen; + NAdd->color_type = COLOR_ALN_MASK; + for(j=0;jelement[indx].seqlen);j++,jj++) + { + offset = aln->element[indx].offset; + if(fgets(Inline,GBUFSIZ,file)==NULL) + { + Warning + ("illegal format in colormask"); + NAdd->color_type = orig_ctype; + return; + } +/* +* Fixed so that the keyword nodash causes the colormask to be mapped +* to the sequence, not the alignment. +* +* The allocated space is equal the seqlen of the matched sequence. +* +*/ + if(aln->element[indx].tmatrix) + for(;(getelem(&(aln->element[indx]),jj + +offset) + ==(aln->element[indx].tmatrix['-']) + || (getelem(&(aln->element[indx]),jj + +offset) + ==aln->element[indx].tmatrix['~'])) + && jj < aln->element[indx].seqlen;) + colors[jj++] = 12; + else + for(;getelem(&(aln->element[indx]),jj + +offset) + =='-' && jj < aln->element[indx].seqlen;) + colors[jj++] = 12; + + sscanf(Inline,"%d",&(colors[jj])); + } + } + else if((indx == -1) && (strlen(curname) != 0)) + for(j=0;jcolor_type = orig_ctype; + return; + } + sscanf(Inline,"%d",&(colors[j])); + } + IGNORE_DASH = FALSE; + curname[0] = '\0'; + } + + } + RepaintAll(TRUE); + return; +} + + +ReadNA_Flat(filename,dataset,type) +char *filename; +char *dataset; +int type; +{ + int i, j, jj, c, curelem,offset; + char name[GBUFSIZ]; + char buffer[GBUFSIZ]; + char origin[GBUFSIZ],ref[GBUFSIZ]; + char Inline[GBUFSIZ],head[GBUFSIZ],tail[GBUFSIZ],temp[GBUFSIZ]; + char curname[GBUFSIZ]; + + NA_Sequence *this_elem; + NA_Alignment *data; + extern int Default_DNA_Trans[],Default_RNA_Trans[],Default_NA_RTrans[]; + + FILE *file; + + curname[0] = '\0'; + data = (NA_Alignment*)dataset; + + file = fopen(filename,"r"); + if(file == NULL) + { + fprintf(stderr,"Cannot open %s.\n",filename); + return; + } + for(;fgets(Inline,GBUFSIZ,file) !=0;) + { + if( + Inline[0] == '#' || + Inline[0] == '%' || + Inline[0] == '"' || + Inline[0] == '@' + ) + { + offset = 0; + for(j=0;jnumelements++; + if( curelem == 0 ) + { + data->element=(NA_Sequence*) + Calloc(5,sizeof(NA_Sequence)); + data->maxnumelements = 5; + } + else if (curelem==data->maxnumelements) + { + (data->maxnumelements) *= 2; + data->element= + (NA_Sequence*)Realloc(data->element + ,data->maxnumelements*sizeof(NA_Sequence)); + } + + InitNASeq(&(data->element[curelem]), + Inline[0] == '#'?DNA: + Inline[0] == '%'?PROTEIN: + Inline[0] == '"'?TEXT: + Inline[0] == '@'?MASK:TEXT); + this_elem= &(data->element[curelem]); + if(Inline[strlen(Inline)-1] == '\n') + Inline[strlen(Inline)-1] = '\0'; + strncpy(this_elem->short_name,(char*)&(Inline[1]),31); + this_elem->offset = offset; + } + else if(Inline[0] != '\n') + { + for(j=0,jj=0;jelement[curelem].rmatrix) + Ascii2NA(buffer,jj,data->element[curelem] + .rmatrix); + AppendNA(buffer,jj,&(data->element[curelem])); + } + } + + for(j=0;jnumelements;j++) + data->maxlen = MAX(data->maxlen,data->element[j].seqlen + + data->element[j].offset); + + for(j=0;jnumelements;j++) + if(data->element[j].seqlen==0) + data->element[j].protect = + PROT_BASE_CHANGES+ PROT_GREY_SPACE+ + PROT_WHITE_SPACE+ PROT_TRANSLATION; + + NormalizeOffset(data); + Regroup(data); + return; +} + + +WriteStatus(aln,filename,method) +NA_Alignment *aln; +char *filename; +int method; +{ + extern int EditMode,FileFormat; + extern NA_Alignment *DataSet; + NA_DisplayData *NAdd; + NA_Sequence *this_seq; + int j; + FILE *file; + + if(DataSet == NULL) + return; + + NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata; + if(NAdd == NULL) + return; + + file = fopen(filename,"w"); + if (file == NULL) + { + Warning("Cannot open status file."); + return(1); + } + fprintf(file,"File_format: %s\n",FileFormat==GENBANK?"genbank":"flat"); + /* + fprintf(file,"EditMode: %s\n",EditMode==INSERT?"insert": + "check"); +*/ + + this_seq = &(aln->element[NAdd->cursor_y]); + if(this_seq->id != NULL) + fprintf(file,"sequence-ID %s\n",this_seq->id); + fprintf(file,"Column: %d\nPos:%d\n",NAdd->cursor_x,NAdd->position); + switch(this_seq->elementtype) + { + case DNA: + case RNA: + fprintf(file,"#%s\n", + this_seq->short_name); + break; + case PROTEIN: + fprintf(file,"%%%s\n", + this_seq->short_name); + break; + case MASK: + fprintf(file,"@%s\n", + this_seq->short_name); + break; + case TEXT: + fprintf(file,"%c%s\n",'"', + this_seq->short_name); + break; + default: + break; + } + if(this_seq->tmatrix) + for(j=0;jseqlen;j++) + putc(this_seq->tmatrix[getelem(this_seq,j)],file); + else + for(j=0;jseqlen;j++) + putc(getelem(this_seq,j),file); + + fclose(file); + return; +} + +ReadStatus(filename) +char *filename; +{ + /* + int i,j; + FILE *file; + char Inline[GBUFSIZ],head[GBUFSIZ]; + file = fopen(filename,"r"); + for(;!DONE;) + { + fgets(Inline,GBUFSIZ,file); + if(strlen(Inline) == 0) + DONE = TRUE; + else + { + sscanf(Inline,"%s",head); + if(strncmp(head,"Col",3) != 0) + { + sscanf(Inline,"%*s %d",head,&(DataSet->nadd-> + cursor_x),&(DataSet->nadd->cursory); + } + else if(strncmp(head,"Pos",3) != 0) + { + } + } + } + +*/ +} + + +NormalizeOffset(aln) +NA_Alignment *aln; +{ + int i,j,offset = 99999999; + + for(j=0;jnumelements;j++) + offset = MIN(offset,aln->element[j].offset); + + for(j=0;jnumelements;j++) + aln->element[j].offset -= offset; + + aln->maxlen = -999999999; + for(j=0;jnumelements;j++) + aln->maxlen = MAX(aln->element[j].seqlen+aln->element[j].offset, + aln->maxlen); + + aln->rel_offset += offset; + + if(aln->numelements == 0) + aln->rel_offset = 0; + + return; +} + +WriteCMask(aln,filename,method,maskable) +NA_Alignment *aln; +char *filename; +int method,maskable; +{ + int j,kk,mask = -1,k,offset,min_offset= -999999; + char offset_str[100]; + int *buf; + NA_Sequence *seqs; + FILE *file; + if(aln == NULL) + return; + if(aln->numelements == (int) NULL) + return; + seqs = aln->element; + + file = fopen(filename,"w"); + if(file == NULL) + { + Warning("Cannot open file for output"); + return(1); + } + if(maskable && (method != SELECT_REGION)) + { + for(j=0;jnumelements;j++) + if(seqs[j].elementtype == MASK && + seqs[j].selected) + mask = j; + } + for(j=0;jnumelements;j++) + { + SeqNorm(&(seqs[j])); + } + + for(j=0;jnumelements;j++) + { + if(method != SELECT_REGION) + offset = seqs[j].offset; + else + for(offset=seqs[j].offset; + aln->selection_mask[offset] == '0'; + offset++); + + if(offset+aln->rel_offset != 0) + sprintf(offset_str,"(%d)",offset+aln->rel_offset); + else + offset_str[0] = '\0'; + + if(((j!=mask) && (seqs[j].selected) && method != SELECT_REGION) + || (method == SELECT_REGION && seqs[j].subselected) + || method == ALL) + { + fprintf(file,"%c%s%s\n", + seqs[j].elementtype == DNA?'#': + seqs[j].elementtype == RNA?'#': + seqs[j].elementtype == PROTEIN?'%': + seqs[j].elementtype == TEXT?'"': + seqs[j].elementtype == MASK?'@':'"', + seqs[j].short_name, + (offset+aln->rel_offset == 0)? "":offset_str); + + if(seqs[j].cmask != NULL) + { + + buf =(int*) Calloc(seqs[j].seqlen,sizeof(int) ); + + if(mask == -1) + { + for(k=0,kk=0;kkselection_mask[kk+offset]=='1') + buf[k++] = (getcmask( &(seqs[j]),kk+offset)); + } + + else + buf[k++] =( getcmask( &(seqs[j]),kk+offset) ); + } + } + else + { + for(k=0,kk=0;kk +#include +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* +FreeNASeq(): +Destroy a nucleic acid sequence structure, and free its memory usage. + +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + + +FreeNASeq(seq) +NA_Sequence *seq; +{ + if(seq->sequence) + Cfree(seq->sequence); + if(seq->mask) + Cfree(seq->mask); + if(seq->cmask) + Cfree(seq->cmask); + if(seq->baggage) + Cfree(seq->baggage); + if(seq->comments) + Cfree(seq->comments); + + if(seq->groupf != NULL && seq->groupb != NULL) + { + ((NA_Sequence*)(seq->groupf))->groupb = seq->groupb; + ((NA_Sequence*)(seq->groupb))->groupf = seq->groupf; + } + return; +} + + +FreeNAAln(aln) +NA_Alignment*aln; +{ + Cfree(aln->id); + Cfree(aln->description); + Cfree(aln->authority); + Cfree(aln->cmask); + Cfree(aln->mask); + if(aln->na_ddata != NULL) + { + ((NA_DisplayData *)(aln->na_ddata))->aln = NULL; + FreeNADD(aln->na_ddata); + } + Cfree(aln); + + return; +} + + +FreeNADD(nadd) +NA_DisplayData *nadd; +{ + Cfree(nadd->jumptbl); + Cfree(nadd); + + return; +} diff --git a/CORE/Free.o b/CORE/Free.o new file mode 100644 index 0000000..c917852 Binary files /dev/null and b/CORE/Free.o differ diff --git a/CORE/Genbank.c b/CORE/Genbank.c new file mode 100755 index 0000000..46cd785 --- /dev/null +++ b/CORE/Genbank.c @@ -0,0 +1,465 @@ +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +all rights reserved. + +Copyright (c) 1993, Steven Smith, all rights reserved. + +*/ + +ReadGen(filename,dataset,type) +char *filename; +NA_Alignment *dataset; +int type; +{ + register int done = FALSE,len = 0, j=0; + int count,IS_REALLY_AA = FALSE; + char Inline[GBUFSIZ],c; + char *buffer,*gencomments = NULL,fields[8][GBUFSIZ]; + int buflen = 0,genclen = 0,curelem = 0,n = 0,flag = 0; + int start_col = -1; + + NA_Sequence *this_elem; + FILE *file; + extern int Default_DNA_Trans[], Default_RNA_Trans[]; + extern int Default_NA_RTrans[]; + extern int Default_PROColor_LKUP[],Default_NAColor_LKUP[]; + + ErrorOut("No such file",file = fopen(filename,"r")); + + for(;fgets(Inline,GBUFSIZ,file) != 0;) + { + if(Inline[strlen(Inline)-1] == '\n') + Inline[strlen(Inline)-1] = '\0'; + if(Find(Inline,"LOCUS")) + { + curelem = dataset->numelements++; + if(curelem == 0) + { + dataset->element=(NA_Sequence*) + Calloc(5,sizeof(NA_Sequence)); + dataset->maxnumelements = 5; + } + else if (curelem==dataset->maxnumelements) + { + (dataset->maxnumelements) *= 2; + dataset->element =(NA_Sequence*) + Realloc(dataset->element, + dataset->maxnumelements * sizeof(NA_Sequence)); + } + this_elem = &(dataset->element[curelem]); + n = sscanf(Inline,"%s %s %s %s %s %s %s %s", + fields[0],fields[1],fields[2],fields[3],fields[4], + fields[5],fields[6],fields[7]); + if(IS_REALLY_AA) + { + InitNASeq(this_elem,PROTEIN); + } + else if(Find(Inline,"DNA")) + { + InitNASeq(this_elem,DNA); + } + else if(Find(Inline,"RNA")) + { + InitNASeq(this_elem,RNA); + } + else if(Find(Inline,"MASK")) + { + InitNASeq(this_elem,MASK); + } + else if(Find(Inline,"TEXT")) + { + InitNASeq(this_elem,TEXT); + } + else if(Find(Inline,"PROT")) + { + InitNASeq(this_elem,PROTEIN); + } + else + InitNASeq(this_elem,DNA); + + strncpy(this_elem->short_name,fields[1],31); + AsciiTime(&(this_elem->t_stamp.origin),fields[n-1]); + this_elem->attr = DEFAULT_X_ATTR; + + if( Find(Inline, "Circular") ) + this_elem->attr |= IS_CIRCULAR; + + gencomments = NULL; + genclen = 0; + } + else if(Find(Inline,"DEFINITION")) + strncpy(this_elem->description,&(Inline[12]),79); + + else if(Find(Inline,"AUTHOR")) + strncpy(this_elem->authority,&(Inline[12]),79); + + else if(Find(Inline," ORGANISM")) + strncpy(this_elem->seq_name,&(Inline[12]),79); + + else if(Find(Inline,"ACCESSION")) + strncpy(this_elem->id,&(Inline[12]),79); + + else if(Find(Inline,"ORIGIN")) + { + done = FALSE; + len = 0; + for(;done == FALSE && fgets(Inline,GBUFSIZ,file) != 0;) + { + if(Inline[0] != '/') + { + if(buflen == 0) + { + buflen = GBUFSIZ; + buffer = Calloc(sizeof(char) , + buflen); + } + + else if (len+strlen(Inline) >= buflen) + { + buflen += GBUFSIZ; + buffer = Realloc(buffer, + sizeof(char)*buflen); + for(j=buflen-GBUFSIZ + ;j + element[curelem])); + for(j=0;jelement[curelem].comments + = gencomments; + dataset->element[curelem].comments_len= + genclen - 1; + dataset->element[curelem]. + comments_maxlen = genclen; + + gencomments = NULL; + genclen = 0; + } + } +/* +* Test if sequence should be converted by the translation table +* If it looks like a protein... +*/ + if(dataset->element[curelem].rmatrix && + IS_REALLY_AA == FALSE) + { + IS_REALLY_AA = CheckType(dataset->element[curelem]. + sequence,dataset->element[curelem].seqlen); + + if(IS_REALLY_AA == FALSE) + Ascii2NA(dataset->element[curelem].sequence, + dataset->element[curelem].seqlen, + dataset->element[curelem].rmatrix); + else +/* +* Force the sequence to be AA +*/ + { + dataset->element[curelem].elementtype = PROTEIN; + dataset->element[curelem].rmatrix = NULL; + dataset->element[curelem].tmatrix = NULL; + dataset->element[curelem].col_lut = + Default_PROColor_LKUP; + } + } + } + else if (Find(Inline,"ZZZZZ")) + { + Cfree(gencomments); + genclen = 0; + } + else + { + if (gencomments == NULL) + { + gencomments = String(Inline); + genclen = strlen(gencomments)+1; + } + else + { + genclen += strlen(Inline)+1; + gencomments = Realloc(gencomments,genclen * + sizeof(char)); + strncat(gencomments,Inline,GBUFSIZ); + strncat(gencomments,"\n",GBUFSIZ); + } + } + } + Cfree(buffer); + fclose(file); + for(j=0;jnumelements;j++) + dataset->maxlen = MAX(dataset->maxlen, + dataset->element[j].seqlen+dataset->element[j].offset); + return; +} + + + +typedef struct mya { + int yy; + int mm; + int dd; + int hr; + int mn; + int sc; +} sA; + +AsciiTime(sA *a,char *asciitime) +{ + int j; + char temp[GBUFSIZ]; + extern char month[12][6]; + + a->dd = 0; + a->yy = 0; + a->mm = 0; + sscanf(asciitime,"%d%5c%d",&(a->dd),temp,&(a->yy)); + temp[5] = '\0'; + for(j=0;j<12;j++) + if(strcmp(temp,month[j]) == 0) + a->mm = j+1; + if(a->dd <0 || a->dd > 31 || a->yy < 0 || a->mm > 11) + SetTime(a); + return; +} + + +WriteGen(aln,filename,method,maskable) +NA_Alignment *aln; +char *filename; +int method,maskable; +{ + int i,j,k,mask = -1; + FILE *file; + NA_Sequence *this_elem; + extern char month[12][6]; + char c; + if(aln == NULL) + return; + if(aln->na_ddata == NULL) + return; + + file = fopen(filename,"w"); + if(file == NULL) + { + Warning("Cannot open file for output"); + return(1); + } + + if(maskable && method != SELECT_REGION) + for(j=0;jnumelements;j++) + if(aln->element[j].elementtype == MASK && + aln->element[j].selected) + mask = j; + + for(j=0;jnumelements;j++) + { + if((aln->element[j].selected && j!=mask && method != SELECT_REGION) + ||(aln->element[j].subselected && method == SELECT_REGION) + || (method == ALL)) + { + this_elem = &(aln->element[j]); + fprintf(file, + "LOCUS %10s%8d bp %4s %10s %2d%5s%4d\n", + this_elem->short_name,this_elem->seqlen+this_elem->offset, + (this_elem->elementtype == DNA) ? "DNA": + (this_elem->elementtype ==RNA)?"RNA": + (this_elem->elementtype == MASK)?"MASK": + (this_elem->elementtype == PROTEIN)?"PROT":"TEXT", + this_elem->attr & IS_CIRCULAR?"Circular":"", + this_elem->t_stamp.origin.dd, + month[this_elem->t_stamp.origin.mm-1], + this_elem->t_stamp.origin.yy>1900?this_elem->t_stamp.origin.yy: + this_elem->t_stamp.origin.yy+1900); + if(this_elem->description[0]) + fprintf(file,"DEFINITION %s\n",this_elem->description); + if(this_elem->seq_name[0]) + fprintf(file," ORGANISM %s\n",this_elem->seq_name); + if(this_elem->id[0]) + fprintf(file," ACCESSION %s\n",this_elem->id); + if(this_elem->authority[0]) + fprintf(file," AUTHORS %s\n",this_elem->authority); + if(this_elem->comments) + fprintf(file,"%s\n",this_elem->comments); + fprintf(file,"ORIGIN"); + if(this_elem->tmatrix) + { + if(mask == -1) + { + for(i=0,k=0;kseqlen+this_elem->offset;k++) + { + if(method == SELECT_REGION) + { + if(aln->selection_mask[k] == '1') + { + if(i%60 == 0) + fprintf(file,"\n%9d",i+1); + if(i%10 == 0) + fprintf(file," "); + fprintf(file,"%c",this_elem->tmatrix + [getelem(this_elem,k)]); + i++; + } + } + else + { + if(i%60 == 0) + fprintf(file,"\n%9d",i+1); + if(i%10 == 0) + fprintf(file," "); + fprintf(file,"%c",this_elem->tmatrix + [getelem(this_elem,k)]); + i++; + } + } + } + else + { + for(k=0;kseqlen+this_elem->offset;k++) + { + c =(char)getelem(&(aln->element[mask]),k); + if(c != '0' && c!= '-') + { + if(k%60 == 0) + fprintf(file,"\n%9d",k+1); + if(k%10 == 0) + fprintf(file," "); + fprintf(file,"%c",this_elem->tmatrix + [getelem(this_elem,k)]); + } + } + } + } + else + { + if(mask == -1) + { + for(i=0,k=0;kseqlen+this_elem->offset;k++) + { + if(method == SELECT_REGION) + { + if(aln->selection_mask[k] == '1') + { + if(i%60 == 0) + fprintf(file,"\n%9d",i+1); + if(i%10 == 0) + fprintf(file," "); + fprintf(file,"%c", getelem(this_elem,k)); + i++; + } + } + else + { + if(i%60 == 0) + fprintf(file,"\n%9d",i+1); + if(i%10 == 0) + fprintf(file," "); + fprintf(file,"%c",getelem(this_elem,k)); + i++; + } + } + } + else + { + for(k=0;kseqlen+this_elem->offset;k++) + { + c =(char)getelem(&(aln->element[mask]),k); + if(c != '0' && c!= '-') + { + if(k%60 == 0) + fprintf(file,"\n%9d",k+1); + if(k%10 == 0) + fprintf(file," "); + fprintf(file,"%c",getelem(this_elem,k)); + } + } + } + } + fprintf(file,"\n//\n"); + } + } + fclose(file); + return; +} + + +SetTime(sA *a) +{ + struct tm *tim,*localtime(); + long clock; + + clock = time(0); + tim = localtime(&clock); + + a->yy = tim->tm_year; + a->mm = tim->tm_mon+1; + a->dd = tim->tm_mday; + a->hr = tim->tm_hour; + a->mn = tim->tm_min; + a->sc = tim->tm_sec; + return; +} + +/* +* CheckType: Check base composition to see if the sequence +* appears to be an amino acid sequence. If it is, pass back +* TRUE, else FALSE. +*/ +CheckType(seq,len) +char *seq; +int len; +{ + + int j,count1 = 0,count2 = 0; + + for(j=0;j 'a')) + { + count1++; + if(index("ACGTUNacgtun",seq[j]) == NULL) + count2++; + } + + return( (count2 > count1/4)?TRUE:FALSE); +} diff --git a/CORE/Genbank.o b/CORE/Genbank.o new file mode 100644 index 0000000..6ba3059 Binary files /dev/null and b/CORE/Genbank.o differ diff --git a/CORE/HGLfile.c b/CORE/HGLfile.c new file mode 100755 index 0000000..cbfac0a --- /dev/null +++ b/CORE/HGLfile.c @@ -0,0 +1,799 @@ +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ +ReadGDE(filename,dataset,type) +char *filename; +NA_Alignment *dataset; +int type; +{ + register int done = FALSE,len = 0, j=0; + int success,count,temp = 0; + char Inline[GBUFSIZ],c; + char *buffer,*line,*gencomments = NULL,fields[8][GBUFSIZ]; + int buflen = GBUFSIZ,genclen = 0,curelem = 0,n = 0,flag = 0; + NA_Sequence *this_elem = NULL,temp_elem; + FILE *file; + extern int Default_DNA_Trans[], Default_RNA_Trans[]; + extern int OVERWRITE,Default_NA_RTrans[], Default_PROColor_LKUP[]; + + ErrorOut("No such file",file = fopen(filename,"r")); + + for(;fgets(Inline,GBUFSIZ,file) != 0;) + { + for(line = Inline;line[0]==' ' || line[0] == '\t';line++); + + if(Find2(line,"{")==0) + { + this_elem = &temp_elem; + InitNASeq(this_elem,DNA); + this_elem->offset = -(dataset->rel_offset); + } + else if(Find2(line,"type")==0) + { + if(Find(line,"DNA")) + { + this_elem->elementtype = DNA; + this_elem->tmatrix = Default_DNA_Trans; + this_elem->rmatrix = Default_NA_RTrans; + } + else if(Find(line,"RNA")) + { + this_elem->elementtype = RNA; + this_elem->tmatrix = Default_RNA_Trans; + this_elem->rmatrix = Default_NA_RTrans; + } + else if(Find(line,"MASK")) + { + this_elem->elementtype = MASK; + this_elem->rmatrix = NULL; + this_elem->tmatrix = NULL; + this_elem->col_lut = NULL; + } + else if(Find(line,"TEXT")) + { + this_elem->elementtype = TEXT; + this_elem->rmatrix = NULL; + this_elem->tmatrix = NULL; + this_elem->col_lut = NULL; + } + else if(Find(line,"PROT")) + { + this_elem->elementtype = PROTEIN; + this_elem->rmatrix = NULL; + this_elem->tmatrix = NULL; + this_elem->col_lut = Default_PROColor_LKUP; + } +/* + this_elem->attr = DEFAULT_X_ATTR; +*/ + } + else if(Find2(line,"circular")==0) + { + sscanf(line,"%*s %d",&temp); + if(temp == 1) + { + this_elem->attr |= IS_CIRCULAR; + } + else + { + this_elem->attr &= ~IS_CIRCULAR; + } + } + else if(Find2(line,"orig_direction")==0) + { + sscanf(line,"%*s %d",&temp); + if(temp == 1) + { + this_elem->attr |= IS_ORIG_5_TO_3; + this_elem->attr &= ~IS_ORIG_3_TO_5; + } + else + { + this_elem->attr |= IS_ORIG_3_TO_5; + this_elem->attr &= ~IS_ORIG_5_TO_3; + } + } + else if(Find2(line,"direction")==0) + { + sscanf(line,"%*s %d",&temp); + if(temp == 1) + { + this_elem->attr |= IS_5_TO_3; + this_elem->attr &= ~IS_3_TO_5; + } + else + { + this_elem->attr |= IS_3_TO_5; + this_elem->attr &= ~IS_5_TO_3; + } + } + else if(Find2(line,"orig_strand")==0) + { + sscanf(line,"%*s %d",&temp); + if(temp == 1) + { + this_elem->attr |= IS_ORIG_PRIMARY; + this_elem->attr &= ~IS_ORIG_SECONDARY; + } + else + { + this_elem->attr |= IS_ORIG_SECONDARY; + this_elem->attr &= ~IS_ORIG_PRIMARY; + } + } + else if(Find2(line,"strandedness")==0) + { + sscanf(line,"%*s %d",&temp); + if(temp == 1) + { + this_elem->attr |= IS_PRIMARY; + this_elem->attr &= ~IS_SECONDARY; + } + else + { + this_elem->attr |= IS_SECONDARY; + this_elem->attr &= ~IS_PRIMARY; + } + } + else if(Find2(line,"creator")==0) + { + sscanf(line,"%*s %[^\n]",this_elem->authority); + RemoveQuotes(this_elem->authority); + } + else if(Find2(line,"longname")==0) + { + sscanf(line,"%*s %[^\n]",this_elem->seq_name); + RemoveQuotes(this_elem->seq_name); + } + else if(Find2(line,"descrip")==0) + { + sscanf(line,"%*s %[^\n]",this_elem->description); + RemoveQuotes(this_elem->description); + } + else if(Find2(line,"name")==0) + { + sscanf(line,"%*s %[^\n]",this_elem->short_name); + RemoveQuotes(this_elem->short_name); + } + else if(Find2(line,"group-ID")==0) + { + sscanf(line,"%*s %d",&(this_elem->groupid)); + dataset->numgroups = + MAX(this_elem->groupid, dataset->numgroups); + } + else if(Find2(line,"sequence-ID")==0) + { + sscanf(line,"%*s %[^\n]",this_elem->id); + RemoveQuotes(this_elem->id); + } + else if(Find2(line,"barcode")==0) + { + sscanf(line,"%*s %[^\n]",this_elem->barcode); + RemoveQuotes(this_elem->barcode); + } + else if(Find2(line,"membrane")==0) + { + sscanf(line,"%*s %[^\n]",this_elem->membrane); + RemoveQuotes(this_elem->membrane); + } + else if(Find2(line,"contig")==0) + { + sscanf(line,"%*s %[^\n]",this_elem->contig); + RemoveQuotes(this_elem->contig); + } + else if(Find2(line,"creation-date")==0) + { + sscanf(line,"%*s %2d%*c%2d%*c%2d%*c%2d%*c%2d%*c%2d\n", + &(this_elem->t_stamp.origin.mm), + &(this_elem->t_stamp.origin.dd), + &(this_elem->t_stamp.origin.yy), + &(this_elem->t_stamp.origin.hr), + &(this_elem->t_stamp.origin.mn), + &(this_elem->t_stamp.origin.sc)); + } + else if(Find2(line,"offset")==0) + { + sscanf(line,"%*s %d",&(this_elem->offset)); + this_elem->offset -= dataset->rel_offset; + } + else if(Find2(line,"comments")==0) + { + if(this_elem->comments_maxlen ==0) + buflen = 2048; + else + buflen = this_elem->comments_maxlen; + + done = FALSE; + len = this_elem->comments_len; + + for(;line[0] != '"';line++) + if(line[0] == '\0') + ErrorOut(0,"Error in input file"); + line++; + buffer = Calloc(buflen,sizeof(char)); + for(;!done;) + { + for(j=0;j= buflen) + { + buflen *=2; + buffer = Realloc(buffer, + buflen*sizeof(char)); + } + if(line[j] == '"') done = TRUE; + + else + buffer[len++] = line[j]; + } + /* +* Check pad with null +*/ + buffer[len] = '\0'; + if(!done) + { + if(fgets(Inline,GBUFSIZ,file) == 0) + done = TRUE; + line = Inline; + } + } + this_elem->comments = buffer; + this_elem->comments_len = strlen(buffer); + this_elem->comments_maxlen = buflen; + RemoveQuotes(this_elem->comments); + } + else if(Find2(line,"sequence")==0) + { + buflen = GBUFSIZ; + done = FALSE; + len = 0; + + buffer = Calloc(buflen,sizeof(char)); + for(;line[0] != '"';line++) + if(line[0] == '\0') + ErrorOut(0,"Error in input file"); + + line++; + for(;!done;) + { + for(j=0;j= buflen) + { + buflen *=2; + buffer = Realloc(buffer, + buflen*sizeof(char)); + } + if(line[j] == '"') done = TRUE; + + else + { + /* +* If not text, ignore spaces... +*/ + if(this_elem->elementtype !=TEXT) + { + if(line[j]!=' ' && line[j] != + '\t' && line[j] != '\n') + buffer[len++] = line[j]; + } + else + if(line[j] != '\t' && line[j] != '\n') + buffer[len++] = line[j]; + } + } + if(!done) + { + if(fgets(Inline,GBUFSIZ,file) == 0) + done = TRUE; + line = Inline; + } + } + if(this_elem->rmatrix) + for(j=0;jrmatrix[buffer[j]]; + this_elem->sequence =(NA_Base*)buffer; + this_elem->seqlen = len; + this_elem->seqmaxlen = buflen; + } + + else if (Find2(line,"}")==0) + { + if(this_elem->id[0] == '\0') + strncpy(this_elem->id,uniqueID(),79); + if(this_elem->short_name[0] == '\0') + strncpy(this_elem->short_name,this_elem->id,79); + if(this_elem->seqlen == 0) + this_elem->protect= + PROT_BASE_CHANGES+ + PROT_GREY_SPACE+ + PROT_WHITE_SPACE+ + PROT_TRANSLATION; + genclen = 0; +/* +* Make a new sequence entry... +*/ + + success = -1; + if(OVERWRITE) + success = OverWrite(this_elem,dataset); + + if(success == -1) + { + curelem = dataset->numelements++; + if(curelem == 0) + { + dataset->element=(NA_Sequence*) + Calloc(5,sizeof(NA_Sequence)); + dataset->maxnumelements = 5; + } + else if (curelem==dataset->maxnumelements) + { + (dataset->maxnumelements) *= 2; + dataset->element =(NA_Sequence*) + Realloc(dataset->element, + dataset->maxnumelements * sizeof(NA_Sequence)); + } + dataset->element[curelem] = *this_elem; + } + } + else if(this_elem != NULL) + { + if (this_elem->baggage == NULL) + { + this_elem->baggage = String(line); + this_elem->baggage_maxlen = + this_elem->baggage_len = + strlen(this_elem->baggage)+1; + } + else + { + this_elem->baggage_len += strlen(line)+1; + this_elem->baggage = Realloc( + this_elem->baggage,this_elem->baggage_len * + sizeof(char)); + this_elem->baggage_maxlen = + this_elem->baggage_len; + + strncat(this_elem->baggage,line,GBUFSIZ); + } + } + } + + fclose(file); + NormalizeOffset(dataset); + Regroup(dataset); + AdjustGroups(dataset); + return; +} + +WriteGDE(aln,filename,method,maskable) +NA_Alignment *aln; +char *filename; +int method,maskable; +{ + int i,j,k,mask = -1; + FILE *file; + NA_Sequence *this_elem; + extern char month[12][6]; + + if(aln == NULL) + return; + if(aln->na_ddata == NULL) + return; + + file = fopen(filename,"w"); + if(file == NULL) + { + Warning("Cannot open file for output"); + return(1); + } + + if(maskable && method != SELECT_REGION) + for(j=0;jnumelements;j++) + if(aln->element[j].elementtype == MASK && + aln->element[j].selected) + mask = j; + + for(j=0;jnumelements;j++) + { + if((aln->element[j].selected && j!=mask && method!=SELECT_REGION) + || (method == ALL) + || (aln->element[j].subselected && method == SELECT_REGION)) + { + this_elem = &(aln->element[j]); + SeqNorm(this_elem); + fprintf(file,"{\n"); + if(this_elem->short_name[0]) + fprintf(file,"name \"%s\"\n",this_elem->short_name); + switch(this_elem->elementtype) + { + case DNA: + fprintf(file,"type \"DNA\"\n"); + break; + case RNA: + fprintf(file,"type \"RNA\"\n"); + break; + case PROTEIN: + fprintf(file,"type \"PROTEIN\"\n"); + break; + case MASK: + fprintf(file,"type \"MASK\"\n"); + break; + case TEXT: + fprintf(file,"type \"TEXT\"\n"); + break; + } + if(this_elem->seq_name[0]) + fprintf(file,"longname %s\n",this_elem->seq_name); + + if(this_elem->id[0]) + fprintf(file,"sequence-ID \"%s\"\n",this_elem->id); + RemoveQuotes(this_elem->barcode); + RemoveQuotes(this_elem->contig); + + if(this_elem->barcode[0]) + fprintf(file,"barcode \"%s\"\n",this_elem->barcode); + if(this_elem->membrane[0]) + fprintf(file,"membrane \"%s\"\n",this_elem->membrane); + if(this_elem->contig[0]) + fprintf(file,"contig \"%s\"\n",this_elem->contig); + if(this_elem->description[0]) + fprintf(file,"descrip \"%s\"\n",this_elem->description); + if(this_elem->authority[0]) + fprintf(file,"creator \"%s\"\n",this_elem->authority); + if(this_elem->groupid) + fprintf(file,"group-ID %d\n", + this_elem->groupid); + if(this_elem->offset+aln->rel_offset && method!=SELECT_REGION) + fprintf(file,"offset %d\n",this_elem->offset+aln->rel_offset); + if(method == SELECT_REGION) + { +/* +* If selecting a region, the offset should be moved to the first +* non-'0' space in the mask. +*/ + for(k=this_elem->offset;kselection_mask_len && + aln->selection_mask[k] == '0';k++); + fprintf(file,"offset %d\n", aln->rel_offset+k); + } + if(this_elem->t_stamp.origin.mm != 0) + fprintf(file, + "creation-date %2d/%2d/%2d %2d:%2d:%2d\n", + this_elem->t_stamp.origin.mm, + this_elem->t_stamp.origin.dd, + (this_elem->t_stamp.origin.yy)>1900? + (this_elem->t_stamp.origin.yy-1900): + (this_elem->t_stamp.origin.yy), + this_elem->t_stamp.origin.hr, + this_elem->t_stamp.origin.mn, + this_elem->t_stamp.origin.sc); + if((this_elem->attr & IS_ORIG_5_TO_3) && + ((this_elem->attr & IS_ORIG_3_TO_5) == 0)) + fprintf(file,"orig_direction 1\n"); + + if((this_elem->attr & IS_CIRCULAR) ) + fprintf(file,"circular 1\n"); + + if((this_elem->attr & IS_5_TO_3) && + ((this_elem->attr & IS_3_TO_5) == 0)) + fprintf(file,"direction 1\n"); + + if((this_elem->attr & IS_ORIG_3_TO_5) && + ((this_elem->attr & IS_ORIG_5_TO_3) == 0)) + fprintf(file,"orig_direction -1\n"); + + if((this_elem->attr & IS_3_TO_5) && + ((this_elem->attr & IS_5_TO_3) == 0)) + fprintf(file,"direction -1\n"); + + if((this_elem->attr & IS_ORIG_PRIMARY) && + ((this_elem->attr & IS_ORIG_SECONDARY) == 0)) + fprintf(file,"orig_strand 1\n"); + + if((this_elem->attr & IS_PRIMARY) && + ((this_elem->attr & IS_SECONDARY) == 0)) + fprintf(file,"strandedness 1\n"); + + if(((this_elem->attr & IS_ORIG_PRIMARY) == 0) && + (this_elem->attr & IS_ORIG_SECONDARY)) + fprintf(file,"orig_strand 2\n"); + + if(((this_elem->attr & IS_PRIMARY) == 0) && + (this_elem->attr & IS_SECONDARY)) + fprintf(file,"strandedness 2\n"); + + if(this_elem->comments != NULL) + { + StripSpecial(this_elem->comments); + fprintf(file,"comments \"%s\"\n",this_elem->comments); + } + if(this_elem->baggage != NULL) + { + if(this_elem-> + baggage[strlen(this_elem->baggage)-1] == '\n') + fprintf(file,"%s",this_elem->baggage); + else + fprintf(file,"%s\n",this_elem->baggage); + } + fprintf(file,"sequence \""); + if(this_elem->tmatrix) + { + if(mask == -1) + { + for(k=this_elem->offset;kseqlen+this_elem->offset;k++) + { + if(k%60 == 0) + putc('\n',file); + if(method == SELECT_REGION) + { + if(aln->selection_mask[k] == '1') + putc(this_elem->tmatrix[getelem(this_elem,k)], + file); + } + else + putc(this_elem->tmatrix[getelem(this_elem,k)], + file); + } + } + else + { + for(i=0,k=this_elem->offset;kseqlen+this_elem->offset;k++) + if(aln->element[mask].seqlen+this_elem->offset>k) + if((char)getelem(&(aln->element[mask]),k) != '0' + && ((char)getelem(&(aln->element[mask]),k) != '-')) + { + if(i%60 == 0) + putc('\n',file); + putc(this_elem->tmatrix[getelem(this_elem,k)], + file); + i++; + } + } + fprintf(file,"\"\n"); + } + else + { + if(mask == -1) + { + for(k=this_elem->offset;kseqlen+this_elem->offset;k++) + { + if(k%60 == 0) + putc('\n',file); + if(method == SELECT_REGION) + { + if(aln->selection_mask[k] == '1') + putc(getelem(this_elem,k),file); + } + else + putc(getelem(this_elem,k),file); + } + } + else + { + for(i=0,k=this_elem->offset;kseqlen+this_elem->offset;k++) + if(((aln->element[mask].seqlen)+(aln->element[mask]. + offset)) > k) + if((char)getelem(&(aln->element[mask]),k) == '1') + { + if(i%60 == 0) + putc('\n',file); + putc(getelem(this_elem,k),file); + i++; + } + } + fprintf(file,"\"\n"); + } + fprintf(file,"}\n"); + } + } + fclose(file); + return; +} + + +StripSpecial(string) +char *string; +{ + register int i,j,len; + + len = strlen(string); + for(j=0;j=0 && (string[j]=='\n'||string[j]==' '); j--) + string[j] = '\0'; + + return; +} + + + +/* +* Normalize seq (remove leading indels in the sequence; +*/ +void SeqNorm(seq) +NA_Sequence *seq; +{ + int len,j,shift_width,trailer; + char *sequence; + len = seq->seqlen; + + sequence =(char*)seq->sequence; + + if(len == 0) return; + + if(seq->tmatrix) + for(shift_width=0; (shift_widthseqlen -= shift_width; + seq->offset += shift_width; + for(trailer=seq->seqlen-1;(sequence[trailer] =='-' || + sequence[trailer] == '\0') && trailer>=0; + trailer--) + sequence[trailer] = '\0'; + seq->seqlen = trailer+1; + return; +} + +/* ALWAYS COPY the result from uniqueID() to a char[32], + * (strlen(hostname)+1+10). Memory is lost when the function + * is finished. + */ +char vname[32]; +char *uniqueID() +{ + char hname[32]; /* ,vname[32]; rtm 18.III.98 */ + int hnamelen = 32; + time_t *tp; + static cnt = 0; + + tp = (time_t *)Calloc(1, sizeof(time_t)); + + if(gethostname(hname, 10) == -1) + { + fprintf(stderr, "UniqueID(): Failed to get host name.\n"); + exit(1); + } + + time(tp); + sprintf(vname, "%s:%d:%ld", hname, cnt, *tp); + cnt++; + Cfree(tp); + return(vname); +} + +/* +* OverWrite(), overwrite all non-default data from a sequence entry +* onto any entry with the same ID or short name. +*/ +OverWrite(this,aln) +NA_Sequence *this; +NA_Alignment *aln; +{ + int j,indx = -1; + NA_Sequence *that; + for(j=0;jnumelements;j++) + { + if(Find2(this->id,aln->element[j].id) != -1) + if(Find2(aln->element[j].id,this->id) != -1) + indx = j; + } + if(indx == -1) + for(j=0;jnumelements;j++) + { + if(Find2(this->short_name,aln->element[j].short_name)!= -1) + if(Find2(aln->element[j].short_name,this->short_name)!= -1) + indx = j; + } + if(indx != -1) + { + that = &(aln->element[indx]); + if(this->seq_name[0]) + strcpy(that->seq_name,this->seq_name); + if(this->barcode[0]) + strcpy(that->barcode,this->barcode); + if(this->contig[0]) + strcpy(that->contig,this->contig); + if(this->membrane[0]) + strcpy(that->membrane,this->membrane); + if(this->authority[0]) + strcpy(that->authority,this->authority); + if(this->short_name[0]) + strcpy(that->short_name,this->short_name); + if(this->description[0]) + strcpy(that->description,this->description); + if(this->sequence) + { + cfree(that->sequence); + that->sequence = this->sequence; + that->seqlen = this->seqlen; + that->seqmaxlen = this->seqmaxlen; + } + if(this->baggage) + { + that->baggage_len += this->baggage_len; + that->baggage_maxlen += this->baggage_maxlen; + if(that->baggage) + that->baggage = + Realloc(that->baggage,that->baggage_maxlen*sizeof(char)); + else + that->baggage = Calloc(that->baggage_maxlen,sizeof(char)); + strncat(that->baggage,this->baggage,that->baggage_maxlen); + } + if(this->comments) + { + that->comments_len += this->comments_len; + that->comments_maxlen += this->comments_maxlen; + if(that->comments) + that->comments = + Realloc(that->comments,that->comments_maxlen*sizeof(char)); + else + that->comments = Calloc(that->comments_maxlen,sizeof(char)); + strncat(that->comments,this->comments,that->comments_maxlen); + } + if(this->cmask) + { + cfree(that->cmask); + that->cmask = this->cmask; + } + if(this->offset != that->offset) + that->offset = this->offset; + if(this->attr != 0) + that->attr = this->attr; + if(this->groupid != 0) + { + that->groupid = this->groupid; + } + that->groupb = NULL; + that->groupf = NULL; + } +/* + NormalizeOffset(aln); + Regroup(aln); + AdjustGroups(aln); +*/ + return(indx); +} diff --git a/CORE/HGLfile.o b/CORE/HGLfile.o new file mode 100644 index 0000000..46485f3 Binary files /dev/null and b/CORE/HGLfile.o differ diff --git a/CORE/Makefile b/CORE/Makefile new file mode 100755 index 0000000..d0c4aa1 --- /dev/null +++ b/CORE/Makefile @@ -0,0 +1,60 @@ +OBJS= ParseMenu.o main.o BasicDisplay.o EventHandler.o FileIO.o \ +DrawNA.o Free.o BuiltIn.o Edit.o Genbank.o Scroll.o ChooseFile.o \ +CutCopyPaste.o HGLfile.o +SRCS= ParseMenu.c main.c BasicDisplay.c EventHandler.c FileIO.c \ +DrawNA.c Free.c BuiltIn.c Edit.c Genbank.c Scroll.c ChooseFile.c \ +CutCopyPaste.c HGLfile.c + +LIBS= -lm -lxview -lolgx -lX11 +CFLAGS= -g -L/usr/openwin/lib -I/usr/openwin/include +CC = cc +# Possible defines, SUN4 SGI DEC HGL +DEFINES = -DLINUX + +GLOBAL_DEPENDS= defines.h menudefs.h + +gde: $(OBJS) + $(CC) -o $@ $(OBJS) $(LIBS) $(CFLAGS) $(DEFINES) + +ParseMenu.o: ParseMenu.c $(GLOBAL_DEPENDS) + $(CC) -c ParseMenu.c $(CFLAGS) $(DEFINES) + +main.o: main.c $(GLOBAL_DEPENDS) globals.h + $(CC) -c main.c $(CFLAGS) $(DEFINES) + +BasicDisplay.o: BasicDisplay.c $(GLOBAL_DEPENDS) + $(CC) -c BasicDisplay.c $(CFLAGS) $(DEFINES) + +EventHandler.o: EventHandler.c $(GLOBAL_DEPENDS) + $(CC) -c EventHandler.c $(CFLAGS) $(DEFINES) + +FileIO.o: FileIO.c $(GLOBAL_DEPENDS) + $(CC) -c FileIO.c $(CFLAGS) $(DEFINES) + +DrawNA.o: DrawNA.c $(GLOBAL_DEPENDS) + $(CC) -c DrawNA.c $(CFLAGS) $(DEFINES) + +Edit.o: Edit.c $(GLOBAL_DEPENDS) + $(CC) -c Edit.c $(CFLAGS) $(DEFINES) + +Free.o: Free.c $(GLOBAL_DEPENDS) + $(CC) -c Free.c $(CFLAGS) $(DEFINES) + +BuiltIn.o: BuiltIn.c $(GLOBAL_DEPENDS) + $(CC) -c BuiltIn.c $(CFLAGS) $(DEFINES) + +Genbank.o: Genbank.c $(GLOBAL_DEPENDS) + $(CC) -c Genbank.c $(CFLAGS) $(DEFINES) + +Scroll.o: Scroll.c $(GLOBAL_DEPENDS) + $(CC) -c Scroll.c $(CFLAGS) $(DEFINES) + +ChooseFile.o: ChooseFile.c $(GLOBAL_DEPENDS) + $(CC) -c ChooseFile.c $(CFLAGS) $(DEFINES) + +CutCopyPaste.o: CutCopyPaste.c $(GLOBAL_DEPENDS) + $(CC) -c CutCopyPaste.c $(CFLAGS) $(DEFINES) + +HGLfile.o: HGLfile.c $(GLOBAL_DEPENDS) + $(CC) -c HGLfile.c $(CFLAGS) $(DEFINES) + diff --git a/CORE/ParseMenu.c b/CORE/ParseMenu.c new file mode 100755 index 0000000..5933f4b --- /dev/null +++ b/CORE/ParseMenu.c @@ -0,0 +1,566 @@ +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* +ParseMenus(): Read in the menu config file, and generate the internal +menu structures used by the window system. + +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + +extern Gmenu menu[]; +int num_menus; + +ParseMenu() +{ + int j,curmenu = -1,curitem = 0; + int curchoice = 0 ,curarg = 0,curinput = 0, curoutput = 0; + char Inline[GBUFSIZ],temp[GBUFSIZ],head[GBUFSIZ]; + char tail[GBUFSIZ],*home; + Gmenu *thismenu; + GmenuItem *thisitem; + GmenuItemArg *thisarg; + GfileFormat *thisinput,*thisoutput; + FILE *file; + char *resize; + +/* +* Open the menu configuration file ".GDEmenus" +* First search the local directory, then the home directory. +*/ + file=fopen(".GDEmenus","r"); + if(file == NULL) + { + home = (char*)getenv("HOME"); + strcpy(temp,home); + strcat(temp,"/.GDEmenus"); + + file=fopen(temp,"r"); + if(file == NULL) + { + home = (char*)getenv("GDE_HELP_DIR"); + if(home != NULL) + { + strcpy(temp,home); + strcat(temp,"/.GDEmenus"); + file=fopen(temp,"r"); + } + if(file == NULL) + Error( + ".GDEmenus file not in the home, local, or $GDE_HELP_DIR directory"); + } + + + } + +/* +* Read the .GDEmenus file, and assemble an internal representation +* of the menu/menu-item hierarchy. +*/ + + for(;getline(file,Inline) != EOF;) + { +/* +* menu: chooses menu to use +*/ + if(Inline[0] == '#'); + else if(Find(Inline,"menu:")) + { + crop(Inline,head,temp); + curmenu = -1; + for(j=0;jlabel = + (char*)calloc(strlen(temp)+1,sizeof(char)); + + if(thismenu->label == NULL) + Error("Calloc"); + (void)strcpy(thismenu->label,temp); + thismenu->numitems = 0; + } + } +/* +* item: chooses menu item to use +*/ + else if(Find(Inline,"item:")) + { + curarg = -1; + curinput = -1; + curoutput = -1; + crop(Inline,head,temp); + curitem = thismenu->numitems++; +/* +* Resize the item list for this menu (add one item); +*/ + if(curitem == 0) + resize = (char*)calloc(1,sizeof(GmenuItem)); + else + resize = realloc(thismenu->item, + thismenu -> numitems*sizeof(GmenuItem) ); + + if(resize == NULL) + Error ("Calloc"); + thismenu->item =(GmenuItem*)resize; + + thisitem = &(thismenu->item[curitem]); + thisitem->label = (char*)calloc(strlen(temp)+1, + sizeof(char)); + thisitem->meta = '\0'; + thisitem->numinputs = 0; + thisitem->numoutputs = 0; + thisitem->numargs = 0; + thisitem->X = 0; + thisitem->help = NULL; + +/* +* Create new item +*/ + + if(thisitem->label == NULL) + Error("Calloc"); + (void)strcpy(thisitem->label,temp); + } + +/* +* itemmethod: generic command line generated by this item +*/ + else if(Find(Inline,"itemmethod:")) + { + crop(Inline,head,temp); + thisitem->method = + (char*)calloc(strlen(temp)+1,sizeof(char)); + if(thisitem->method == NULL) + Error("Calloc"); + (void)strcpy(thisitem->method,temp); + } +/* +* Help file +*/ + else if(Find(Inline,"itemhelp:")) + { + crop(Inline,head,temp); + thisitem->help = + (char*)calloc(strlen(temp)+1,sizeof(char)); + if(thisitem->method == NULL) + Error("Calloc"); + (void)strcpy(thisitem->help,temp); + } +/* +* Meta key equiv +*/ + else if(Find(Inline,"itemmeta:")) + { + crop(Inline,head,temp); + thisitem->meta = temp[0]; + } +/* +* arg: defines the symbol for a command line arguement. +* this is used for substitution into the itemmethod +* definition. +*/ + + else if(Find(Inline,"arg:")) + { + crop(Inline,head,temp); + curarg=thisitem->numargs++; + if(curarg == 0) + resize = (char*)calloc(1,sizeof(GmenuItemArg)); + else + resize = realloc(thisitem->arg, + thisitem->numargs*sizeof(GmenuItemArg) ); + + + if(resize == NULL) + Error("arg: Realloc"); + + (thisitem->arg) = (GmenuItemArg*)resize; + thisarg = &(thisitem->arg[curarg]); + thisarg->symbol = (char*)calloc(strlen(temp)+1, + sizeof(char)); + if(thisarg->symbol == NULL) + Error("Calloc"); + (void)strcpy(thisarg->symbol,temp); + thisarg->optional = FALSE; + thisarg->type = 0; + thisarg->min = 0; + thisarg->max = 0; + thisarg->numchoices = 0; + thisarg->choice = NULL; + thisarg->textvalue = NULL; + thisarg->value = 0; + } +/* +* argtype: Defines the type of argument (menu,chooser, text, slider) +*/ + else if(Find(Inline,"argtype:")) + { + crop(Inline,head,temp); + if(strcmp(temp,"text")==0) + { + thisarg->type=TEXTFIELD; + thisarg->textvalue = + (char*)calloc(GBUFSIZ,sizeof(char)); + if(thisarg->textvalue == NULL) + Error("Calloc"); + } + else if(strcmp(temp,"choice_list")==0) + thisarg->type=CHOICE_LIST; + else if(strcmp(temp,"choice_menu")==0) + thisarg->type=CHOICE_MENU; + else if(strcmp(temp,"chooser")==0) + thisarg->type=CHOOSER; + else if(strcmp(temp,"slider")==0) + thisarg->type=SLIDER; + else + Error(sprintf(head,"Unknown argtype %s",temp)); + } +/* +* argtext: The default text value of the symbol. +* $argument is replaced by this value if it is not +* changed in the dialog box by the user. +*/ + else if(Find(Inline,"argtext:")) + { + crop(Inline,head,temp); + (void)strcpy(thisarg->textvalue,temp); + } +/* +* arglabel: Text label displayed in the dialog box for +* this argument. It should be a discriptive label. +*/ + else if(Find(Inline,"arglabel:")) + { + crop(Inline,head,temp); + thisarg->label=(char*)calloc(strlen(temp)+1, + sizeof(char)); + if(thisarg->label == NULL) + Error("Calloc"); + (void)strcpy(thisarg->label,temp); + } +/* +* Argument choice values use the following notation: +* +* argchoice:Displayed value:Method +* +* Where "Displayed value" is the label displayed in the dialog box +* and "Method" is the value passed back on the command line. +*/ + else if(Find(Inline,"argchoice:")) + { + crop(Inline,head,temp); + crop(temp,head,tail); + curchoice = thisarg->numchoices++; + if(curchoice == 0) + resize = (char*)calloc(1,sizeof(GargChoice)); + else + resize = realloc(thisarg->choice, + thisarg->numchoices*sizeof(GargChoice)); + + if(resize == NULL) + Error("argchoice: Realloc"); + thisarg->choice = (GargChoice*)resize; + + (thisarg->choice[curchoice].label) = NULL; + (thisarg->choice[curchoice].method) = NULL; + + (thisarg->choice[curchoice].label) = + (char*)calloc(strlen(head)+1,sizeof(char)); + + (thisarg->choice[curchoice].method) = + (char*)calloc(strlen(tail)+1,sizeof(char)); + + if(thisarg->choice[curchoice].method == NULL || + thisarg->choice[curchoice].label == NULL) + Error("Calloc"); + + (void)strcpy(thisarg->choice[curchoice].label,head); + (void)strcpy(thisarg->choice[curchoice].method,tail); + } +/* +* argmin: Minimum value for a slider +*/ + else if(Find(Inline,"argmin:")) + { + crop(Inline,head,temp); + (void)sscanf(temp,"%d",&(thisarg->min)); + } +/* +* argmax: Maximum value for a slider +*/ + else if(Find(Inline,"argmax:")) + { + crop(Inline,head,temp); + (void)sscanf(temp,"%d",&(thisarg->max)); + } +/* +* argmethod: Command line flag associated with this argument. +* Replaces argument in itemmethod description. +*/ + else if(Find(Inline,"argmethod:")) + { + crop(Inline,head,temp); + thisarg->method = (char*)calloc(GBUFSIZ,strlen(temp)); + if(thisarg->method == NULL) + Error("Calloc"); + (void)strcpy(thisarg->method,tail); + } +/* +* argvalue: default value for a slider +*/ + else if(Find(Inline,"argvalue:")) + { + crop(Inline,head,temp); + if(thisarg->type == TEXT) + strcpy(thisarg->textvalue,temp); + else + (void)sscanf(temp,"%d",&(thisarg->value)); + } +/* +* argoptional: Flag specifying that an arguement is optional +*/ + else if(Find(Inline,"argoptional:")) + thisarg->optional = TRUE; +/* +* in: Input file description +*/ + else if(Find(Inline,"in:")) + { + crop(Inline,head,temp); + curinput = (thisitem->numinputs)++; + if(curinput == 0) + resize = (char*)calloc(1,sizeof(GfileFormat)); + else + resize = realloc(thisitem->input, + (thisitem->numinputs)*sizeof(GfileFormat)); + + if(resize == NULL) + Error("in: Realloc"); + thisitem->input = (GfileFormat*)resize; + thisinput = &(thisitem->input)[curinput]; + thisinput->save = FALSE; + thisinput->overwrite = FALSE; + thisinput->maskable = FALSE; + thisinput->format = 0; + thisinput->symbol = String(temp); + thisinput->name = NULL; + thisinput->select = SELECTED; + } + +/* +* out: Output file description +*/ + + else if(Find(Inline,"out:")) + { + crop(Inline,head,temp); + curoutput = (thisitem->numoutputs)++; + if(curoutput == 0) + resize = (char*)calloc(1,sizeof(GfileFormat)); + else + resize = realloc(thisitem->output, + (thisitem->numoutputs)*sizeof(GfileFormat)); + + if(resize == NULL) + Error("out: Realloc"); + thisitem->output = (GfileFormat*)resize; + thisoutput = &(thisitem->output)[curoutput]; + thisitem->output = (GfileFormat*)resize; + thisoutput = &(thisitem->output)[curoutput]; + thisoutput->save = FALSE; + thisoutput->overwrite = FALSE; + thisoutput->format = 0; + thisoutput->symbol= String(temp); + thisoutput->name = NULL; + } + else if(Find(Inline,"informat:")) + { + if(thisinput == NULL) + Error("Problem with .GDEmenus"); + crop(Inline,head,tail); + if(Find(tail,"genbank")) + thisinput->format = GENBANK; + else if(Find(tail,"gde")) + thisinput->format = GDE; + else if(Find(tail,"na_flat")) + thisinput->format = NA_FLAT; + else if(Find(tail,"colormask")) + thisinput->format = COLORMASK; + else if(Find(tail,"flat")) + thisinput->format = NA_FLAT; + else if(Find(tail,"status")) + thisinput->format = STATUS_FILE; + else fprintf(stderr,"Warning, unknown file format %s\n" + ,tail); + } + else if(Find(Inline,"insave:")) + { + if(thisinput == NULL) + Error("Problem with .GDEmenus"); + thisinput->save = TRUE; + } + else if(Find(Inline,"inselect:")) + { + if(thisinput == NULL) + Error("Problem with .GDEmenus"); + crop(Inline,head,tail); + if(Find(tail,"one")) + thisinput->select = SELECT_ONE; + else if(Find(tail,"region")) + thisinput->select = SELECT_REGION; + else if(Find(tail,"all")) + thisinput->select = ALL; + } + else if(Find(Inline,"inmask:")) + { + if(thisinput == NULL) + Error("Problem with .GDEmenus"); + thisinput->maskable = TRUE; + } + else if(Find(Inline,"outformat:")) + { + if(thisoutput == NULL) + Error("Problem with .GDEmenus"); + crop(Inline,head,tail); + if(Find(tail,"genbank")) + thisoutput->format = GENBANK; + else if(Find(tail,"gde")) + thisoutput->format = GDE; + else if(Find(tail,"na_flat")) + thisoutput->format = NA_FLAT; + else if(Find(tail,"flat")) + thisoutput->format = NA_FLAT; + else if(Find(tail,"status")) + thisoutput->format = STATUS_FILE; + else if(Find(tail,"colormask")) + thisoutput->format = COLORMASK; + else fprintf(stderr,"Warning, unknown file format %s\n" + ,tail); + } + else if(Find(Inline,"outsave:")) + { + if(thisoutput == NULL) + Error("Problem with .GDEmenus"); + thisoutput->save = TRUE; + } + else if(Find(Inline,"outoverwrite:")) + { + if(thisoutput == NULL) + Error("Problem with .GDEmenus"); + thisoutput->overwrite = TRUE; + } + } + return; +} + + + +/* +Find(): Search the target string for the given key +*/ +Find(target,key) +char *key,*target; +{ + int i,j,len1,dif,flag = FALSE; + dif = (strlen(target)) - (len1 = strlen(key)) +1; + + if(len1>0) + for(j=0;j0) + for(j=0;joffset;end--); + + for(j=0,i=offset;i<=end;i++,j++) + tail[j]=input[i]; + tail[j] = '\0'; + return; +} diff --git a/CORE/ParseMenu.o b/CORE/ParseMenu.o new file mode 100644 index 0000000..ceaa17f Binary files /dev/null and b/CORE/ParseMenu.o differ diff --git a/CORE/Scroll.c b/CORE/Scroll.c new file mode 100755 index 0000000..e0f4353 --- /dev/null +++ b/CORE/Scroll.c @@ -0,0 +1,292 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" + +/* +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + +InitEditSplit(oldview, newview, pos) +Xv_Window oldview, newview; +int pos; +{ + Xv_Window view, win; + extern Frame frame; + extern NA_Alignment *DataSet; + extern Canvas EditCan; + extern int SCALE; + Scrollbar hsc,vsc; + int j; + + if(DataSet == (NA_Alignment *) NULL || EditCan == (Canvas) NULL) + return ; + + for(j=0;j + maxlen,0); + + xv_set(vsc, + SCROLLBAR_VIEW_START,0, + SCROLLBAR_OBJECT_LENGTH,((NA_Alignment*)DataSet)-> + numelements,0); + + if (view == newview) + { +/* +* Get the paint window associated, and set it up the same as in +* BasicDisplay: +*/ + + (void)xv_set(xv_get(view,CANVAS_VIEW_PAINT_WINDOW), + WIN_EVENT_PROC,NAEvents, + WIN_CONSUME_EVENTS, WIN_MOUSE_BUTTONS, + LOC_DRAG, LOC_WINENTER, WIN_ASCII_EVENTS, + WIN_META_EVENTS, 0, + 0); + + notify_interpose_event_func( + xv_get(hsc,SCROLLBAR_NOTIFY_CLIENT), + EditCanScroll,NOTIFY_SAFE); + + xv_set(hsc, + SCROLLBAR_OBJECT_LENGTH,((NA_Alignment*)DataSet)-> + maxlen,SCROLLBAR_VIEW_START,0, + 0); + + xv_set(vsc, + SCROLLBAR_OBJECT_LENGTH,((NA_Alignment*)DataSet)-> + numelements,0); + } + } + RepaintAll(FALSE); + return; +} + + +Notify_value EditCanScroll(client,event,arg,type) +Notify_client client; +Event *event; +Notify_arg arg; +Notify_event_type type; +{ + extern NA_Alignment *DataSet; + extern Canvas EditCan; + extern Panel_item left_foot,right_foot; + extern int DisplayAttr,SCALE; + + Notify_client parent; + Drawable draw; + GC gc; + Display *dpy; + Xv_xrectlist area; + + Xv_window win,view; + Scrollbar hsc,vsc; + extern Frame frame; + int lastx,currentx,deltax,j; + int lasty,currenty,deltay; + int dx,dy; + char buffer[80]; + + hsc=(Scrollbar)xv_get(EditCan,OPENWIN_HORIZONTAL_SCROLLBAR, client); + vsc=(Scrollbar)xv_get(EditCan,OPENWIN_VERTICAL_SCROLLBAR, client); +/* + test for hsc && vsc attempts to fix warnings at split +*/ + + if(event_id(event) == SCROLLBAR_REQUEST && hsc && vsc) + { + win=(Xv_window)xv_get(client, + CANVAS_VIEW_PAINT_WINDOW); + + dx=((NA_DisplayData*)(((NA_Alignment*)DataSet)-> + na_ddata))-> font_dx; + + dy=((NA_DisplayData*)(((NA_Alignment*)DataSet)-> + na_ddata))-> font_dy; + + lastx=(int)xv_get(hsc, + SCROLLBAR_LAST_VIEW_START); + + currentx=(int)xv_get(hsc,SCROLLBAR_VIEW_START)/SCALE; + deltax=(int)xv_get(hsc,SCROLLBAR_VIEW_LENGTH); + + lasty=(int)xv_get(vsc, + SCROLLBAR_LAST_VIEW_START); + + currenty=(int)xv_get(vsc,SCROLLBAR_VIEW_START); + deltay=(int)xv_get(vsc,SCROLLBAR_VIEW_LENGTH); + + area.count=1; + area.rect_array[0].x=0; + area.rect_array[0].y=0; + area.rect_array[0].width=(short)(deltax*dx); + area.rect_array[0].height=(short)(deltay*dy); + + RepaintNACan(EditCan,win,xv_get(client, + XV_DISPLAY), + (Window)xv_get(win,XV_XID),&area); + + sprintf(buffer,"Columns %d - %d shown",currentx, + currentx+deltax*SCALE); + if(DisplayAttr & VSCROLL_LOCK) + { + DisplayAttr &= (unsigned int)(255 - VSCROLL_LOCK); + for(j=0;jmaxlen,0); + (void)xv_set(vsc,SCROLLBAR_OBJECT_LENGTH,DataSet->numelements,0); + + return; +} + + +RepaintAll(Names) +int Names; +{ + extern NA_Alignment *DataSet; + extern Canvas EditCan,EditNameCan; + Xv_xrectlist area; + Xv_window win,view; + Scrollbar hsc,vsc; + extern int SCALE; + extern Frame frame; + int lastx,currentx,deltax; + int lasty,currenty,deltay; + int dx,dy,j; + char buffer[80]; + + if(DataSet == NULL) + return; + + if((NA_DisplayData*)(((NA_Alignment*)DataSet)->na_ddata == NULL)) + return; + + for(j=0;jna_ddata))-> + font_dx ; + dy = ((NA_DisplayData*)(((NA_Alignment*)DataSet)->na_ddata))-> + font_dy; + hsc=(Scrollbar)xv_get(EditCan,OPENWIN_HORIZONTAL_SCROLLBAR, + view); + vsc=(Scrollbar)xv_get(EditCan,OPENWIN_VERTICAL_SCROLLBAR, view); + + lastx = (int)xv_get(hsc,SCROLLBAR_LAST_VIEW_START); + currentx = (int)xv_get(hsc,SCROLLBAR_VIEW_START); + deltax = (int)xv_get(hsc,SCROLLBAR_VIEW_LENGTH); + + lasty = (int)xv_get(vsc,SCROLLBAR_LAST_VIEW_START); + currenty = (int)xv_get(vsc,SCROLLBAR_VIEW_START); + deltay = (int)xv_get(vsc,SCROLLBAR_VIEW_LENGTH); + + area.count = 1; + area.rect_array[0].x = 0; + area.rect_array[0].y = 0; + area.rect_array[0].width = (short)(deltax*dx); + area.rect_array[0].height = (short)(deltay*dy); + + RepaintNACan(EditCan,win,xv_get(view, XV_DISPLAY), + (Window)xv_get(win,XV_XID),&area); + + sprintf(buffer,"%d - %d",currentx/SCALE, + currentx/SCALE+deltax*SCALE); + xv_set(frame,FRAME_RIGHT_FOOTER,buffer,0); + } + if(Names) + DrawNANames(xv_get(view, XV_DISPLAY), + (Window)xv_get(xv_get(EditNameCan, + CANVAS_NTH_PAINT_WINDOW,0), XV_XID)); + return; +} + +DestroySplit(view) +Xv_window view; +{} diff --git a/CORE/Scroll.o b/CORE/Scroll.o new file mode 100644 index 0000000..9d762cf Binary files /dev/null and b/CORE/Scroll.o differ diff --git a/CORE/defines.h b/CORE/defines.h new file mode 100755 index 0000000..8dde96d --- /dev/null +++ b/CORE/defines.h @@ -0,0 +1,274 @@ +/* + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. +*/ + +#include +#include +#include + +#define TRUTH 1 +#define JUSTICE 2 +#define BEAUTY 3 + +/* +* Edit modes +*/ + +#define INSERT 0 +#define CHECK 1 + +/* +* Cursor directions +*/ +#define RIGHT 1 +#define LEFT 0 +#define UP 0 +#define DOWN 1 + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +#define GBUFSIZ 512 +#define MAX_NA_DISPLAY_WIDTH 1024 +#define MAX_NA_DISPLAY_HEIGHT 1024 +#define MAX_STARTUP_CANVAS_HEIGHT 512 +#define grey_height 8 +#define grey_width 8 + +/* +* Definable dialog types +*/ +#define TEXTFIELD 0x1 +#define SLIDER 0x2 +#define CHOOSER 0x3 +#define CHOICE_MENU 0x4 +#define CHOICE_LIST 0x5 + +/* +* File Formats +*/ +#define GDE 0x100 +#define GENBANK 0x101 +#define NA_FLAT 0x102 +#define COLORMASK 0x103 +#define STATUS_FILE 0x104 + +/* +* Protection bits +*/ + +#define PROT_BASE_CHANGES 0x1 /* Allow base changes */ +#define PROT_GREY_SPACE 0x2 /* Allow greyspace modification */ +#define PROT_WHITE_SPACE 0x4 /* Allow whitespace modification */ +#define PROT_TRANSLATION 0x8 /* Allow translation */ +#define PROT_REORIENTATION 0x10 /* Allow reorientation */ + + +/* +* File loading methods (must be 'OR/AND' able) +*/ + +#define NONE 0x0 +#define DESTROY 0x1 +#define LOAD 0x2 +#define SAVE 0x4 +#define SELECTED 0x8 +#define ALL 0x10 +#define SELECT_REGION 0x20 +#define SELECT_ONE 0x30 + +/* +* Sequence DISPLAY Types +*/ +#define NASEQ_ALIGN 0x201 +#define NASEQ 0x202 + +/* +* Sequence Data Types +*/ +#define DNA 0x300 +#define RNA 0x301 +#define TEXT 0x302 +#define MASK 0x303 +#define PROTEIN 0x304 +/* +* extended sequence attributes (true/false) +*/ + +#define IS_5_TO_3 0x01 /* 5prime to 3 prime */ +#define IS_3_TO_5 0x02 /* 3 prime to 5 prime */ +#define IS_CIRCULAR 0x04 /* circular dna */ +#define IS_PRIMARY 0x10 /* on the primary strand */ +#define IS_SECONDARY 0x20 /* on the secondary strand */ +#define IS_MODIFIED 0x40 /* modification flag */ +#define IS_ORIG_PRIMARY 0x80 /* Original sequence was primary */ +#define IS_ORIG_SECONDARY 0x100 /* Original sequence was secondary */ +#define IS_ORIG_5_TO_3 0x200 /* Original sequence was 5_to_3 */ +#define IS_ORIG_3_TO_5 0x400 /* Original sequence was 3_to_5 */ + +#ifdef HGL +#define DEFAULT_X_ATTR 0 +#else +#define DEFAULT_X_ATTR IS_5_TO_3+IS_PRIMARY; +#endif + +/* +* Other display attributed +*/ +#define INVERTED 1 +#define VSCROLL_LOCK 2 +#define KEYCLICKS 4 +#define GDE_MESSAGE_PANEL 8 + +/* +* Coloring Methods +*/ +#define COLOR_MONO 0x40 /* no color, simple black and white */ +#define COLOR_LOOKUP 0x41 /* Use a simple value->color lookup */ +#define COLOR_ALN_MASK 0x42 /* The alignment has a column by column color + mask associated with it */ +#define COLOR_SEQ_MASK 0x43 /* Each sequence has a color mask*/ +#define COLOR_STRAND 0x44 /* Color based on original strandedness*/ + + +/* +* Data types +*/ + +typedef struct +{ + int *valu; +} NumList; + + +typedef struct +{ + struct + { + int yy; + int mm; + int dd; + int hr; + int mn; + int sc; + } origin,modify; +} TimeStamp; + +typedef unsigned char NA_Base; + +typedef struct +{ + char *name; + int type; + NumList *list; + int listlen; + int maxlen; +} GMask; + + +typedef struct NA_SeqStruct +{ + char id[80]; /* sequence id (ACCESSION)*/ + char seq_name[80]; /* Sequence name (ORGANISM) */ + char short_name[32]; /* Name (LOCUS) */ + char barcode[80]; + char contig[80]; + char membrane[80]; + char description[80]; /* Description (DEFINITION)*/ + char authority[80]; /* Author (or creator) */ + char *comments; /* Stuff we can't parse */ + int comments_len, comments_maxlen; + + NA_Base *sequence; /* List of bases */ + TimeStamp t_stamp; /* Time stamp of origin/modification */ + Mask *mask; /* List of masks(analysis/display) */ + int offset; /* offset into alignment (left white) + space */ + int seqlen; /* Number of elements in sequence[] */ + int seqmaxlen; /* Size sequence[] (for mem alloc) */ + unsigned int protect; /* Protection mask */ + int attr; /* Extended attributes */ + int groupid; /* group id */ + int *col_lut; /* character to color LUT */ + struct NA_SeqStruct *groupb; /* Group link backward */ + struct NA_SeqStruct *groupf; /* Group link forward */ + int *cmask; /* color mask */ + int selected; /* Selection flag */ + int subselected; /* Sub selection flag */ + int format; /* default file format */ + int elementtype; /* what type of data are being aligned*/ + char *baggage; /* unformatted comments*/ + int baggage_len, + baggage_maxlen; + int *tmatrix; /* translation matrix (code->char) */ + int *rmatrix; /* reverse translation matrix + (char->code)*/ +} NA_Sequence; + +typedef struct +{ + char *id; /* Alignment ID */ + char *description; /* Description of the alignment*/ + char *authority; /* Who generated the alignment*/ + int *cmask; /* color mask */ + int cmask_offset; /* color mask offset */ + int cmask_len; /* color mask length */ + int ref; /* reference sequence */ + int numelements; /* number of data elements */ + int maxnumelements; /* maximum number of data elements */ + int nummasks; /* number of masks */ + int maxlen; /* Maximum length of alignment */ + int rel_offset; /* add this to every sequence offset */ + /* to orient it back to 0 */ + Mask *mask; /* masks */ + NA_Sequence *element; /* alignment elements */ + int numgroups; /* number of groups */ + NA_Sequence **group; /* link to array of pointers into + each group */ + char *na_ddata; /* display data */ + int format; /* default file format */ + char *selection_mask; /* Sub sequence selection mask */ + int selection_mask_len; /* Sub selection mask length */ + int min_subselect; /* Leftmost coord of selection mask */ +} NA_Alignment; + + +typedef struct +{ + int font_dx; /* width of a character in this font*/ + int font_dy; /* height of a character in this font*/ + int wid,ht; /* width and height of edit win (in + characters */ + int top_seq; /* Top sequence index shown */ + int lft_pos; /* Leftmost column (in alignment + position coords) */ + int color_type; /* Method of manipulating colors + (See above) */ + int depth; /* number of color bits available */ + int num_colors; /* Actual number of colors used */ + int *palette; /* palette for display */ + int *col_lut; /* character to color LUT */ + int black,white; /* color indicies for blk,wht */ + int cursor_x,cursor_y; /* Current cursor positions */ + int position; /* Current position minus whitespace */ + int *jumptbl; /* the jump table for fast access + into the sequence data */ + int jtsize; /* its length */ + NA_Alignment *aln; /* Pointer to the actual data set + (the alignment */ + Xv_font font; /* The default font */ + Canvas seq_can,nam_can; /* ties to the canvas for screen + updates. */ + Window seq_x,nam_x; /* X versions of the above */ + int use_repeat; /* Number keys set repeat count*/ + +} NA_DisplayData; + + +#define getcmask(a,b) (b < ((a)->offset))?0:((a)->cmask[(b-(a)->offset)]) + +#include "functions.h" diff --git a/CORE/formatdb.log b/CORE/formatdb.log new file mode 100644 index 0000000..db93c0e --- /dev/null +++ b/CORE/formatdb.log @@ -0,0 +1,8 @@ + +========================[ Feb 1, 2002 1:57 PM ]======================== +NOTE: CoreLib [002.003] FileOpen("HIV1POLDNA.fasta","r") failed +Cannot open input database file. Formating failed... + +========================[ Feb 1, 2002 7:27 PM ]======================== +NOTE: CoreLib [002.003] FileOpen("SIVPOLPRO.fasta","r") failed +Cannot open input database file. Formating failed... diff --git a/CORE/functions.h b/CORE/functions.h new file mode 100755 index 0000000..9c85821 --- /dev/null +++ b/CORE/functions.h @@ -0,0 +1,90 @@ +int AdjustGroups(); +int CaseChange(); +int Cfree(); +int ChAttr(); +int ChAttrType(); +int ChColor(); +int ChDisAttr(); +int ChDisplayDone(); +int ChEditDir(); +int ChEditMode(); +int ChFontSize(); +int ChangeDisplay(); +int CheckType(); +int CompSeqs(); +int CompressAlign(); +int DONT(); +int DO(); +int DestroySplit(); +int DoMeta(); +int DrawNAColor(); +int DummyRepaint(); +int EditCopy(); +int EditCut(); +int EditPaste(); +int EditSubCut(); +int EditSubPaste(); +int Find(); +int Find2(); +int FrameDone(); +int Group(); +int HELP(); +int HandleMenuItem(); +int InitEditSplit(); +int InitNASeq(); +int LoadData(); +int ModAttr(); +int ModAttrDone(); +int NAEvents(); +int NANameEvents(); +int New(); +int NormalizeOffset(); +Notify_value EditCanScroll(); +Notify_value SaveComments(); +int Open(); +int OpenFileName(); +int OrigDir(); +int OverWrite(); +Panel BasicDisplay(); +int Prot(); +int QuitGDE(); +int ReadCMask(); +int ReadStatus(); +int Regroup(); +int RemoveQuotes(); +int RepaintAll(); +int RepaintNACan(); +int ResizeNACan(); +int RevSeqs(); +int SaveAs(); +int SaveAsFileName(); +int SaveFormat(); +int SelectAll(); +int SelectBy(); +int SelectByName(); +int SetFilename(); +NA_DisplayData *SetNADData(); +int SetProtection(); +int SetScale(); +int SetTime(); +int StripSpecial(); +int TestSelection(); +int Ungroup(); +int Warning(); +int WriteGDE(); +int WriteGen(); +int WriteNA_Flat(); +int WriteCMask(); +int WriteStatus(); +char *Calloc(); +char *ReplaceArgs(); +char *ReplaceFile(); +char *Realloc(); +char *String(); +char *uniqueID(); +int TestSelection(); +int hnorm(); +int vnorm(); +void HandleMenus(); +void JumpTo(); +void SeqNorm(); diff --git a/CORE/globals.h b/CORE/globals.h new file mode 100755 index 0000000..8d1babb --- /dev/null +++ b/CORE/globals.h @@ -0,0 +1,201 @@ +#include +int DataType; +int FileFormat,first_select = FALSE; +int Dirty,OldEditMode,EditMode = INSERT, EditDir = RIGHT; +int DisplayAttr = 0,OVERWRITE = FALSE; +int SCALE = 1; +int BlockInput = FALSE; +#ifdef SeeAlloc +int TotalCalloc = 0; +int TotalRealloc = 0; +#endif +char FileName[80]; +char current_dir[1024]; + +/* +* Months of the year +*/ +char month[12][6] = +{ + "-JAN-","-FEB-","-MAR-","-APR-","-MAY-","-JUN-", + "-JUL-","-AUG-","-SEP-","-OCT-","-NOV-","-DEC-" +}; + +/* +* Tables for DNA/RNA <--> ASCII translation +*/ + +int Default_RNA_Trans[128] = +{ +'-','A','C','M','G','R','S','V','U','W','Y','H','K','D','B','N',/*Upper*/ +'~','a','c','m','g','r','s','v','u','w','y','h','k','d','b','n',/*lower*/ +'-','A','C','M','G','R','S','V','U','W','Y','H','K','D','B','N',/*Upper select*/ +'~','a','c','m','g','r','s','v','u','w','y','h','k','d','b','n',/*lwr select*/ +'-','A','C','M','G','R','S','V','U','W','Y','H','K','D','B','N',/*extended*/ +'~','a','c','m','g','r','s','v','u','w','y','h','k','d','b','n',/*extended*/ +'-','A','C','M','G','R','S','V','U','W','Y','H','K','D','B','N',/*extended*/ +'~','a','c','m','g','r','s','v','u','w','y','h','k','d','b','n',/*extended*/ +}; + +int Default_DNA_Trans[128]={ +'-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N',/*Upper*/ +'~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n',/*lower*/ +'-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N',/*Upper select*/ +'~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n',/*lwr select*/ +'-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N',/*extended*/ +'~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n',/*extended*/ +'-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N',/*extended*/ +'~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n',/*extended*/ +}; + +int Default_NA_RTrans[128]={ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + /* Upper case alpha */ +0x01,0xe,0x02,0x0d,0,0,0x04,0x0b,0,0,0x0c,0,0x03,0x0f,0,0,0,0x05,0x06,0x08, +0x08,0x07,0x09,0x0f,0xa,0,0,0,0,0,0,0, + /* Lower case alpha */ +0x11,0x1e,0x12,0x1d,0,0,0x14,0x1b,0,0,0x1c,0,0x13,0x1f,0,0,0,0x15,0x16,0x18, +0x18,0x17,0x19,0x1f,0x1a,0,0,0,0,0x10,0 +}; + + +/* +* RGB values for the simple palette +*/ + +Xv_singlecolor Default_Colors[16]= { + {0,128,0}, + {255,192,0}, + {255,0,255}, + {225,0,0}, + {0,192,192}, + {0,192,0}, + {0,0,255}, + {128,0,255}, + {0,0,0}, + {36,36,36}, + {72,72,72}, + {109,109,109}, + {145,145,145}, + {182,182,182}, + {218,218,218}, + {255,255,255} +}; + + +/* +* Character->color lookup table +*/ + +int Default_NAColor_LKUP[128] = +{ +13,3,6,13,8,13,13,13,5,13,13,13,13,13,13,13, +13,3,6,13,8,13,13,13,5,13,13,13,13,13,13,13, +13,3,6,13,8,13,13,13,5,13,13,13,13,13,13,13, +13,3,6,13,8,13,13,13,5,13,13,13,13,13,13,13, +13,3,6,13,8,13,13,13,5,13,13,13,13,13,13,13, +13,3,6,13,8,13,13,13,5,13,13,13,13,13,13,13, +13,3,6,13,8,13,13,13,5,13,13,13,13,13,13,13, +13,3,6,13,8,13,13,13,5,13,13,13,13,13,13,13 +}; + +int Default_PROColor_LKUP[128] = +{ +12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, +12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, +12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, +12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, +12,2,8,3,8,8,6,2,4,5,12,4,5,5,8,12,2, +8,4,2,2,12,5,6,12,6,8,12,12,12,12,12,12, +2,8,3,8,8,6,2,4,5,12,4,5,5,8,12,2, +8,4,2,2,12,5,6,12,6,8,12,12,12,12,12 +}; + +char vert_mito[512][4] = +{ +"AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", +"ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Ter", "AGC","Ser", +"AGG","Ter", "AGT","Ser", "ATA","Met", "ATC","Ile", "ATG","Met", +"ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", +"CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", +"CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", +"CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", +"GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", +"GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", +"GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", +"TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", +"TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", +"TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" + }, +mycoplasma[512][4] = +{ +"AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", +"ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", +"AGG","Arg", "AGT","Ser", "ATA","Ile", "ATC","Ile", "ATG","Met", +"ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", +"CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", +"CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", +"CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", +"GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", +"GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", +"GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", +"TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", +"TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", +"TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" }, +universal[512][4] = +{ +"AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", +"ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", +"AGG","Arg", "AGT","Ser", "ATA","Ile", "ATC","Ile", "ATG","Met", +"ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", +"CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", +"CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", +"CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", +"GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", +"GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", +"GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", +"TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", +"TCT","Ser", "TGA","Ter", "TGC","Cys", "TGG","Trp", "TGT","Cys", +"TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" }, +yeast[512][4] = +{ +"AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", +"ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", +"AGG","Arg", "AGT","Ser", "ATA","Met", "ATC","Ile", "ATG","Met", +"ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", +"CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", +"CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Thr", "CTC","Thr", +"CTG","Thr", "CTT","Thr", "GAA","Glu", "GAC","Asp", "GAG","Glu", +"GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", +"GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", +"GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", +"TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", +"TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", +"TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" +}; + + +char three_to_one[23][5] = { +"AlaA", "ArgR", "AsnN", "AspD", +"AsxB", "CysC", "GlnQ", "GluE", +"GlxZ", "GlyG", "HisH", "IleI", +"LeuL", "LysK", "MetM", "PheF", +"ProP", "SerS", "ThrT", "TrpW", +"TyrY", "ValV", "Ter*" +}; + + + +static unsigned char grey0[] = {0,0,0,0,0,0,0,0}; +static unsigned char grey1[] = {138,0,0,0,138,0,0,0}; +static unsigned char grey2[] = {138,0,34,0,138,0,34,0}; +static unsigned char grey3[] = {138,85,34,85,138,85,34,85}; +static unsigned char grey4[] = {117,170,221,170,117,170,221,170}; +static unsigned char grey5[] = {117,255,221,255,117,255,221,255}; +static unsigned char grey6[] = {117,255,255,255,117,255,255,255}; +static unsigned char grey7[] = {255,255,255,255,255,255,255,255}; + +unsigned char *greys[] = {grey1,grey3,grey3,grey1,grey2,grey3,grey0,grey3, + grey0,grey1,grey2,grey3,grey4,grey5,grey6,grey7}; +Pixmap *grey_pm[16]; diff --git a/CORE/icon_gde b/CORE/icon_gde new file mode 100755 index 0000000..42511d4 --- /dev/null +++ b/CORE/icon_gde @@ -0,0 +1,66 @@ +/* Format_version=1, Width=64, Height=64, Depth=1, Valid_bits_per_item=16 + */ + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x000F,0xE000,0x0000,0x0000, + 0x0030,0x1800,0x0000,0x0000, + 0x007F,0xFC00,0x0000,0x0000, + 0x0180,0x0300,0x0000,0x0000, + 0x0100,0x0100,0x0000,0x0000, + 0x0201,0x80FF,0xFFFF,0xFE00, + 0x0402,0x0040,0x0000,0x0100, + 0x043A,0x1840,0x0000,0x0080, + 0x0812,0x2020,0x0000,0x0040, + 0x0811,0xA820,0xE0E0,0xE020, + 0x0810,0x2831,0x1111,0x1020, + 0x0890,0x1831,0x1111,0x1020, + 0x0940,0x0031,0x1111,0x0020, + 0x09C0,0x072E,0x0E0E,0x0020, + 0x0940,0x0220,0x0000,0x0040, + 0x0540,0x0240,0x0000,0x0080, + 0x0400,0x0240,0x0000,0x0100, + 0x0200,0x00FF,0xFFFF,0xFE00, + 0x0100,0x0100,0x0000,0x0000, + 0x0180,0x0300,0x0000,0x0000, + 0x007F,0xFE00,0x0000,0x0000, + 0x0030,0x1E00,0x0000,0x0000, + 0x000F,0xFD00,0x1E1F,0x07E0, + 0x0000,0x5B00,0x3319,0x8600, + 0x0000,0x3680,0x6018,0xC600, + 0x0000,0x2D80,0x6018,0xC600, + 0x0000,0x1B40,0x6018,0xC7C0, + 0x0000,0x16C0,0x6318,0xC600, + 0x0000,0x0DA0,0x6318,0xC600, + 0x0000,0x0B60,0x3319,0x8600, + 0x0000,0x06F0,0x1F1F,0x07E0, + 0x0000,0x05F0,0x0000,0x0000, + 0x0000,0x03E0,0x0000,0x0000, + 0x0000,0x03C0,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000 diff --git a/CORE/infile b/CORE/infile new file mode 100755 index 0000000..ddde93d --- /dev/null +++ b/CORE/infile @@ -0,0 +1,191 @@ + 10 916 +contig GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG +W22140 AAAAANGCCC NNTTCNAAGN GGGGGGGGGG GGGGGGGATA TTTTGCNNAG +R.C.W27436 GGGNNNNGNN NNNNNNNNNN NNNNNNAANN NNNNNNNNNN NNNNNNNNNN +R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN +W28762 TCTTGACATT TGTCTCCATT TCAGCAAAAC GANACCTGTG GTGAAGGGAT +#10005_2 2 GGnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn +R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN +W28762 ---------- ---------- ---------- ---------- ---------- +W28762(165 GGGNNGGNGN GGNNNGNNGN NNNGGNNNNN NNNTNTGTNT GNNGGNAGGG +#10005_2 2 GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG + nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA + GGGGGCATGA TGNNGAGANC NAAAGAAAGN NCNGGGNGGG AAAAAAGAAG + NNNANNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN + NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN + TTGTGTGCTG GCACTG---- ---------- ---------- ---------- + nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn + NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN + ---------- ---------- ---------- ---------- ---------- + NNTNTNANNN NNTTNTANAG TNAAAGNTTG GTNNNNGTNN NTTTGANGAA + nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA + GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC + GAGGNCCCTG GNGGGAGGGG GGNNCGNNTT TNNTGCNCCG GATGGAGGGN + NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN + NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT + ---------- ---------- ---------- ---------- ---------- + nnnnnnnnGn AAnnnnnnnn nnnnnnnnnn nnnnnnnnnT TGAAAACTGT + NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT + ---------- ---------- ---------- ---------- ---------- + GNTCAANNTG GGGNNNANAN NNGNNNTTGA NTGAAAATGG GGNAANCCCC + GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC + CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC TGAA-n--Tc TACT---CCG + GGGGNTTTTN AAGNNTGTTT NTTTANAAGN AAGAGGGGGA NAAAATTTTT + NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN + TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAACCGAAA + ---------- ---------- ---------- ---------- ---------- + TAnCCAAnTG GAATCCTAAG ACAATTTTCT -cCAwTTCA- sCAAC-CGAA + TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAAC-CGAA + ---------- ---------- ---------- ---------- ---------- + CNTTTTNCCA GTCANCTGGT AAGTCCAAGC TGAA-N--TC TACTC--C-G + CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC Tgaa----Tc TACTC--C-G + CATGTAA-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG + TTNNTTCTNT NNCTNGNNNG GGGGGGGGGG GGGGCCCCCA ATAAGNNNTT + NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN + CCCTGTGGTG GAGGGAATTN CGTTCTTGGC NCTTCAGACT NCAGGGCAGG + ---------- ---------- ---------- ----CAGACT GCAGGGNAGG + ACCCTGTGGT GrAGGGATTT GTGTGCT-GG CACTGCAGAC TGCAGGGCAG + ACCCTGTGGT GGAGGGAATT NCGTTCTTGG CNCTTCAGAC TNCAGGGCAG + ---------- ---------- ---------- ---------- ---------- + CATGTAACCC C-NAAAGAGT TGTCCAGAGC CAAGGCTTCT ACCTTCATTG + CATGTAa-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG + TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC + GNGCNCAGAA NNAGGGGGGG GNGGGGGGGC CCCTTTNCTC CNAAAAATTT + NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN + AA-------- ---------- ---------- ---------- ---------- + AA-------- ---------- ---------- ---------- ---------- + GAAAGGGCTA GGGCCCAGGG GCTGGGAmAT GCATGAGGT- gCTCGGAGGA + GAAAGGGCTA GGGCCCAGGG GCTGGGAAAT GCATGAGGTT GCTCGGAGGA + ---------- ---------- ---------- ---------- ---------- + TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC + TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC + CTaAGCAGAT AGCAAAGaAG ATaATGGAGG AgCAATTGGT CATGGCCtTG + CCCCCCNTTT TGGGNAAGGG TGGGGGAAAN NNTTTGGGCA AANAGGGGAA + NNNNNNNNNN NNNAANNAGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA + ---------- -------AGG GCTAGGGCCC AGGGGCTGGG AAATGCATGA + ---------- -------AGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA + GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC + GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC + ---------- ---------- ---------- ---------- ---------- + CTAAGCAGAT AGCAAAGNAG ATNATGGAGG ANCAATTGGT CATGGCCNTG + CTAAGCAGAT AGCAAAGAAG ATAATGGAGG AGCAATTGGT CATGGCCTTG + GTTTCCCTCk AAACaACgCT GCAGATTTAT CTGCACAAAC ATCTCCACTT + AAAAAAAGNG GGGGGGGGCG GNTTCCANAA AANAANAAAG GGTNCACCCN + GG-TTCTNGG NGGAGCCTGG CTAAANCCAA GCACCAGCAC CTGTGAGTCT + GGTTGCTCGG AGGAGCCTGG CTAAATCCAA GCACCAGCAC CTGTGAGTCT + GG-TGCTCGG AGGAGCCTGG NTAAATCCAA GCACCAGCAC CTGTGAGTCT + TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC + TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC + ---------- ---------- ---------- ---------- ---------- + GTTTCCCTCC AAACNACNCT GCAGATTTAT CTGCACAAAC ATCTCCACTT + GTTTCCCTCk AAACAACGCT GCAGATTTAT CTGCACAAAC ATCTCCACTT + tmGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC + TNGGGGGNCN CCCCCCCCNC NNGNAAATCN TCCCTTTTTT TGANGGGCNA + GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT NCCTCTTCTC + GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC + GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC + TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCAAAAG + TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCCNAAG + ---------- ---------- ---------- ---------- ---------- + TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC + TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC + ACGAGAGCTG GGAGAAGAGG cAAAGCTACA GGTTTACTTG GGAGCCAGCT + ANNNCATTTN CTTGNCCTTG AAGATTGACC NTGACTGCTC TGGCAAGAAG + CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC + CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC + CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC + TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTkGA GGGAAACCAA + TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTTGA GGGAAACCAA + ---------- ---------- ---------- ---------- ---------- + ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT + ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT + GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGaTTTA gCCAGGCTCC + AAGAGGTGTC CTTACAGAGA CCTCTTTACT GACCAACTGA AGNATAGACT + CTTTCCCCCN AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT + CTTTCCCCCC NAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT + CTTTCCCCCA AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGNGTNGTT + GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT + GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT + ---------- ---------- ---------- ---------- ---------- + GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA NCCAGGCTCC + GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA GCCAGGCTCC + tCCgAGkA-- CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT----- + TACTGCTGGA CAATCTGCAT GGGCATCACC CCTCCCCGCA TGTAACCC-A + TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC + TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC + TGGAGGGAAA CCANGGCCAT GACCAATTGN TCCTCCATNA TCTNCTTTGC + ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA + ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA + ---------- ---------- ---------- ---------- ---------- + TCCGAGC--A CCTCATGCAT GTCCCAGCCC CTGGGCCCTA GCCCT----- + TCCGAGc--A CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT----- + ---------- ---------- ---------- ---------- ---------- + AAAGAGGTGT CCAGAGCCAA GGCTTCTACC TTCATTGTCC CTCTCTGTGC + TATCTGCTNA GAGNANNCAA NNNAANNNA- ---------- ---------- + TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC + TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC + TGAAGGTAGA AGCCTTGGCT CTGGACAmCT CTTTTGGG-t TACATGCG-- + TGAAGGTAGA AGCCTTGGCT CTGGACACCT CTTTTGGG-T TACATGCGGT + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- TTCCTgCCCT GcAGTCTGAA GnGCCAAG-A -ACGnAATTC + TCAAGGAGTT CCATTCCAGG AGGAAGAGAT CTATACCCT- ---------- + ---------- ---------- ---------- ---------- ---------- + ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC ACCTCTTTT- + ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC AACTCTTTNG + GAGTAgA-tt cAGCTTGGAC TTACCAGnTG ACTGGnAAAA nGGGGGnTTn + GAGTANA-NN NA-------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- TTCCTNCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C + ---------- TTCCTGCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C + CCTCCACCAC AGGGTTTCG- GTTGGGTGGn TTGGAAGA-A AATTGTCTTA + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + GGGTTACATG CGGTGAGTAN ANNNA----- ---------- ---------- + GGGTTACATG CGG--AGTAG ANTTCAGCTT GGACTTACCA GNTGACTGGN + CCCCATTTTC AnTCAAnnnC nnnTnTnnnC CCCAnnTTGA nCTTCnTCAA + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + CCTTCACCAC A-GGTNTCGT TTTGC-TGAA ATGG-AGACA AAT-GTCa-a + CCTrCACCAC AGGGTTTCG- GTTGs-TGAA wTGg-AGA-A AATTGTCTTA + GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + AAAANGGGGG NTTNCCCCAT TTTCANTCAA NNNCNNNTNT NNNCCCCANN + AnnnACnnnn ACCAAnCTTT nACTnTAnAA nnnnnTnAnA nnCCCTnCCn + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + g-a------- ---------- ---------- ---------- ---------- + GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn + nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + TTGANCTTCN TCAAANNNAC NNNNACCAAN CTTTNACTNT ANAANNNNNT + nCAnACAnAn nnnnnnnCCn nnnCnnCnnn CCnCnCCnnC CC-------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn + nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + NANANNCCCT NCCNNCANAC ANANNNNNNN NCCNNNNCNN CNNNCCNCNC + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn + nnnnnnnnnn nnnnCC + ---------- ------ + ---------- ------ + ---------- ------ + CNNCCC---- ------ + ---------- ------ + ---------- ------ + ---------- ------ + ---------- ------ + nnnnnnnnnn nnnnCC diff --git a/CORE/install.csh b/CORE/install.csh new file mode 100755 index 0000000..1426899 --- /dev/null +++ b/CORE/install.csh @@ -0,0 +1,2 @@ +make +cp gde ../bin diff --git a/CORE/installBLASTDB.pl b/CORE/installBLASTDB.pl new file mode 100755 index 0000000..3ee1f31 --- /dev/null +++ b/CORE/installBLASTDB.pl @@ -0,0 +1,51 @@ +#!/usr/bin/perl -w +use strict; + +my $newFileName; +my $line; + +my $sourceFile = shift; +my $menuName = shift; + +print("mv -f ./$sourceFile.* /usr/local/bio/db/\n"); +print("cp -f ./$sourceFile /usr/local/bio/db/\n"); + + + +print system("mv -f ./$sourceFile.* /usr/local/bio/db/"); +# or die ("cannot copy files\n"); +print system("cp -f ./$sourceFile /usr/local/bio/db/") ; +#or die ("cannot copy file\n"); + + +open(MENUFILE, "/usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/bio/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^arg:BLASTDBDNA/){ + print "FOUND\n"; + while (){ + print NEWFILE; + if (/^argchoice:/){ + print NEWFILE "argchoice:$menuName:/usr/local/bio/db/$sourceFile\n"; + last READLOOP; + } + } + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); +system("mv $newFileName /usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/CORE/installBLASTDBPROT.pl b/CORE/installBLASTDBPROT.pl new file mode 100755 index 0000000..19d9d74 --- /dev/null +++ b/CORE/installBLASTDBPROT.pl @@ -0,0 +1,51 @@ +#!/usr/bin/perl -w +use strict; + +my $newFileName; +my $line; + +my $sourceFile = shift; +my $menuName = shift; + +print("mv -f ./$sourceFile.* /usr/local/bio/db/\n"); +print("cp -f ./$sourceFile /usr/local/bio/db/\n"); + + + +print system("mv -f ./$sourceFile.* /usr/local/bio/db/"); +# or die ("cannot copy files\n"); +print system("cp -f ./$sourceFile /usr/local/bio/db/") ; +#or die ("cannot copy file\n"); + + +open(MENUFILE, "/usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/bio/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^arg:BLASTDBPROT/){ + print "FOUND\n"; + while (){ + print NEWFILE; + if (/^argchoice:/){ + print NEWFILE "argchoice:$menuName:/usr/local/bio/db/$sourceFile\n"; + last READLOOP; + } + } + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); +system("mv $newFileName /usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/CORE/libxview.a b/CORE/libxview.a new file mode 100755 index 0000000..51431b7 Binary files /dev/null and b/CORE/libxview.a differ diff --git a/CORE/main.c b/CORE/main.c new file mode 100755 index 0000000..62b4e21 --- /dev/null +++ b/CORE/main.c @@ -0,0 +1,178 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "menudefs.h" +#include "defines.h" +#include "globals.h" + +/* +Main() + +Copyright (c) 1989, University of Illinois board of trustees. All rights +reserved. Written by Steven Smith at the Center for Prokaryote Genome +Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. +Carl Woese. + + +Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. +All rights reserved. + +*/ + +Gmenu menu[100]; +int num_menus = 0,repeat_cnt = 0; +Frame frame,pframe,infoframe; +Panel popup,infopanel; +Panel_item left_foot,right_foot; +Canvas EditCan,EditNameCan; +int DisplayType; +GmenuItem *current_item; +NA_Alignment *DataSet = NULL; +NA_Alignment *Clipboard = NULL; +char **TextClip; +int TextClipSize = 0,TextClipLength = 0; + +/* +* Icon structure (pixmap dependent) +*/ + + +static short GDEicon[258]={ +#include "icon_gde" +}; + +mpr_static(iconpr,64,64,1,GDEicon); + +main(argc,argv) +int argc; +char **argv; +{ + + Icon tool_icon; /* obvious */ + extern char FileName[],current_dir[]; + + int type = GENBANK; /* default file type */ + DataSet = NULL; + Clipboard = (NA_Alignment*)Calloc(1,sizeof(NA_Alignment)); + DisplayType = NASEQ_ALIGN; /* default data type */ + Clipboard->maxnumelements = 5; + Clipboard->element =(NA_Sequence*)Calloc(Clipboard->maxnumelements, + sizeof(NA_Sequence)); + +/* +* Connect to server, and set up initial XView data types +* that are common to ALL display types +*/ + xv_init(XV_INIT_ARGC_PTR_ARGV, &argc,argv,0); + + + /* +* Main frame (primary window); +*/ + + frame = xv_create((int) NULL,FRAME, + FRAME_NO_CONFIRM,FALSE, + FRAME_LABEL, "Genetic Data Environment 2.2", + FRAME_INHERIT_COLORS,TRUE, + XV_WIDTH,700, + XV_HEIGHT,500, + FRAME_SHOW_FOOTER,TRUE, + 0); + + /* +* Popup frame (dialog box window), and default settings in +* the dialog box. These are changed to fit each individual +* command's needs in EventHandler(). +*/ + infoframe = xv_create(frame,FRAME_CMD, + FRAME_LABEL,"Messages", + WIN_DESIRED_HEIGHT,100, + WIN_DESIRED_WIDTH,300, + FRAME_SHOW_RESIZE_CORNER,TRUE, + FRAME_INHERIT_COLORS,TRUE, + FRAME_CLOSED,FALSE, + WIN_SHOW,FALSE, + 0); + + pframe = xv_create(frame,FRAME_CMD, + FRAME_CMD_PUSHPIN_IN,TRUE, + FRAME_DONE_PROC,FrameDone, + XV_HEIGHT,100, + XV_WIDTH,300, + FRAME_SHOW_RESIZE_CORNER,FALSE, + FRAME_CLOSED,FALSE, + XV_X,300, + XV_Y,150, + WIN_SHOW,FALSE, + 0); + + infopanel = xv_get(infoframe,FRAME_CMD_PANEL); + xv_set(infopanel, PANEL_LAYOUT,PANEL_VERTICAL, + XV_WIDTH,300, + XV_HEIGHT,50, + 0); + + left_foot = xv_create(infopanel,PANEL_MESSAGE,0); + right_foot = xv_create(infopanel,PANEL_MESSAGE,0); + + window_fit(infoframe); + +/* + popup = xv_create(pframe,PANEL, + PANEL_LAYOUT,PANEL_HORIZONTAL, + 0); +*/ + popup = xv_get(pframe,FRAME_CMD_PANEL); + + xv_create(popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"HELP", + PANEL_NOTIFY_PROC,HELP, + 0); + + xv_create(popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"OK", + PANEL_NOTIFY_PROC,DO, + 0); + + xv_create(popup,PANEL_BUTTON, + PANEL_LABEL_STRING,"Cancel", + PANEL_NOTIFY_PROC,DONT, + 0); + +/* +* Keep original directory where program was started +*/ + (void)getwd(current_dir); + + ParseMenu(); + GenMenu(type); + if(argc>1) + LoadData(argv[1]); + + /* +* Set up the basics of the displays, and off to the main loop. +*/ + BasicDisplay(DataSet); + + if(DataSet != NULL) + ((NA_Alignment*)DataSet)->na_ddata = (char*)SetNADData + ((NA_Alignment*)DataSet,EditCan,EditNameCan); + + tool_icon = xv_create((int) NULL,ICON, + ICON_IMAGE,&iconpr, + ICON_LABEL,strlen(FileName)>0?FileName:"GDE", + 0); + + xv_set(frame, + FRAME_ICON,tool_icon, + 0); + + window_main_loop(frame); + exit(0); +} + diff --git a/CORE/main.o b/CORE/main.o new file mode 100755 index 0000000..e8e34e3 Binary files /dev/null and b/CORE/main.o differ diff --git a/CORE/menudefs.h b/CORE/menudefs.h new file mode 100755 index 0000000..9acffbf --- /dev/null +++ b/CORE/menudefs.h @@ -0,0 +1,56 @@ +typedef struct GargChoicetype +{ + char *label; /* name for display in dialog box */ + char *method; /* value (if null, return choice number) */ +} GargChoice; + +typedef struct GmenuItemArgtype +{ + int optional; /* is this optional? */ + int type; /* TEXT, SLIDER, CHOOSER, etc. */ + int min; /* minimum range value */ + int max; /* maximum range value */ + int value; /* default numeric value(or choice) */ + int numchoices; /* number of choices */ + char *textvalue; /* default text value */ + char *label; /* description of arg function */ + char *symbol; /* internal symbol table mapping */ + char *method; /* commandline interpretation */ + GargChoice *choice; /* choices */ + Panel_item X; /* Xview menu item entry */ +} GmenuItemArg; + +typedef struct GfileFormattype +{ + int save; /* how should file be saved */ + int overwrite; /* how should file be loaded */ + int format; /* what format is each field */ + int maskable; /* Can a write through mask be used? */ + int select; /* what type of selection */ + char *symbol; /* internal symbol table mapping */ + char *name; /* file name */ +} GfileFormat; + +typedef struct GmenuItemtype +{ + int numargs; /* number of agruments to cmnd */ + int numoutputs; /* number of outputs from cmnd */ + int numinputs; /* number of input files to cmnd */ + char *label; /* item name */ + char *method; /* commandline produced */ + GfileFormat *input; /* input definitions */ + GfileFormat *output; /* output definitions */ + GmenuItemArg *arg; /* arguement definitions */ + char meta; /* Meta character for function */ + char *help; /* commandline help */ + Panel_item X; /* Xview panel */ +} GmenuItem; + +typedef struct Gmenutype +{ + int numitems; /* number of items in menu */ + char *label; /* menu heading */ + GmenuItem *item; /* menu items */ + Panel_item button; /* Button to activate menu */ + Menu X; /* XView menu structure */ +} Gmenu; diff --git a/CORE/newURL.pl b/CORE/newURL.pl new file mode 100644 index 0000000..4bb18e7 --- /dev/null +++ b/CORE/newURL.pl @@ -0,0 +1,34 @@ +#!/usr/bin/perl -w +use strict; + +my $newFileName; +my $line; + +my $urlname = shift; +my $url = shift; + +open(MENUFILE, "/usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/biotools/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^menu:Online/){ + print "FOUND\n"; + print NEWFILE "item:$urlname\n"; + print NEWFILE "itemmethod:netscape $url\n"; + last READLOOP; + + } + } +close(NEWFILE); +close(MENUFILE); +system("mv $newFileName /usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/CORE/new_menu b/CORE/new_menu new file mode 100755 index 0000000..da409fd --- /dev/null +++ b/CORE/new_menu @@ -0,0 +1,259 @@ +Blastn (Nucleic Acid) +itemmethod:(echo PROGRAM blastn > in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo HISTOGRAM $PLOT >> in1.tmp; echo ALIGNMENTS $SCORE >> in1.tmp; echo SPLIT 100000 >> in1.tmp; echo BEGIN >> in1.tmp; readseq -i1 -p -f8 in1 >> in1.tmp; cat in1.tmp | /bin/mail blast@ncbi.nlm.nih.gov; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_menu +arglabel:Which Database? +argchoice:Non-redundant database:nr +argchoice:GenBank DNA sequence database:genbank +argchoice:GenBank update (cumulative daily updates):gbupdate +argchoice:EMBL DNA sequence database:embl +argchoice:EMBL update (cumulative weekly updates):emblu +argchoice:Vector subset of GenBank:vector +argchoice:Database of Expressed Sequence Tags (ESTs):dbest +argchoice:Eukaryotic promoterdatabase:epd +argchoice:Kabat's database of immunological interest:kabatnuc + +arg:PLOT +argtype:chooser +arglabel:Display Histogram? +argvalue:1 +argchoice:Yes:yes +argchoice:No:no + +arg:SCORE +argtype:slider +arglabel:Number of High Scoring Pairs Displayed? +argmin:1 +argmax:250 +argvalue:5 + +in:in1 +informat:genbank +insave: + +item:Blastp (Protein) +itemmethod:(echo PROGRAM blastp >in1.tmp; echo DATALIB $DBASE >> in1.tmp; echo HISTOGRAM $PLOT >> in1.tmp; echo ALIGNMENTS $SCORE >> in1.tmp; echo MATRIX $MATRX >> in1.tmp; echo SPLIT 100000 >> in1.tmp; echo BEGIN >> in1.tmp; readseq -i1 -p -f8 in1 >> in1.tmp; cat in1.tmp | /bin/mail blast@ncbi.nlm.nih.gov; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_menu +arglabel:Which Database? +argchoice:Non-redundant protein database:nr +argchoice:Swiss-Prot protein database:swissprot +argchoice:PIR protein database:pir +argchoice:Cumulative update to Swiss-Prot major release:spupdate +argchoice:GenPept (translated GenBank):genpept +argchoice:GenPept update (cumulative daily updates):gpupdate +argchoice:Brookhaven PDB:pdb +argchoice:Kabat's database of immunological interest:kabatnuc +argchoice:Transcription Factors Database:tfd +argchoice:6-frame translations of human Alu repeats:palu + +arg:PLOT +argtype:chooser +arglabel:Display Histogram? +argvalue:1 +argchoice:Yes:yes +argchoice:No:no + +arg:SCORE +argtype:slider +arglabel:Number of High Scoring Pairs Displayed +argmin:1 +argmax:250 +argvalue:5 + +arg:Matrx +argtype:chooser +arglabel:Weighting matrix +argchoice:PAM 250:PAM250 +argchoice:PAM 120:PAM10 +argchoice:PAM 40:PAM40 +argchoice:BLOSUM62:BLOSUM62 + + +in:in1 +informat:genbank +insave: + +item:Fasta (Nucleic Acid) +itemmethod:(echo LIB $DBASE > in1.tmp; echo WORD $KPL >> in1.tmp; echo LIST $TOP >> in1.tmp; echo ALIGN $ALNG >> in1.tmp; echo ONE >> in1.tmp; echo SEQ >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp;Mail FASTA@EMBL-Heidelberg.de < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_menu +arglabel:Which Database? +argchoice:GenBank Qrtly & Updates:GBALL +argchoice:GenBank Updates:GBNEW +argchoice:Entries only in GenBank, not in EMBL:GBONLY +argchoice:GenBank and EMBL entries (latest releases):GENEMBL +argchoice:New EMBL entries (Since latest release):EMNEW +argchoice:All EMBL entries (latest release + new ones):EMALL +argchoice:EMBL fungi division only:EFUN +argchoice:EMBL invertebrates division only:EINV +argchoice:EMBL mammals division only:EMAM +argchoice:EMBL organelles division only:EORG +argchoice:EMBL phages division only:EPHG +argchoice:EMBL plants division only:EPLN +argchoice:EMBL primates division only:EPRI +argchoice:EMBL prokaryotes division only:EPRO +argchoice:EMBL rodents division only:EROD +argchoice:EMBL synthetic sequences division only:ESYN +argchoice:EMBL unannotated division only:EUNA +argchoice:EMBL viruses division only:EVRL +argchoice:EMBL vertebrates division only:EVRT + + + +arg:KPL +argtype:slider +arglabel:K-tuple window +argmin:3 +argmax:6 +argvalue:4 + +arg:TOP +argtype:slider +arglabel:Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNG +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:Fasta (Protein) +itemmethod:(echo LIB $DBASE > in1.tmp; echo WORD $TPL >> in1.tmp; echo LIST $SCRS >> in1.tmp; echo ALIGN $ALNMNTS >> in1.tmp; echo PROT >> in1.tmp; echo SEQ >> in1.tmp; sed "s/-//g" < in1 | tr '@%#$' '>' >> in1.tmp; Mail FASTA@EMBL-Heidelberg.de < in1.tmp; rm in1 in1.tmp) & + +arg:DBASE +argtype:choice_menu +arglabel:Which Protein Database? +argchoice:Swiss-Protein (latest release + new ones):SWALL +argchoice:Swiss-Protein (since latest release):SWNEW +argchoice:Swiss-Protein (latest release):SW +argchoice:NBRF/PIR (latest release):NBRF +argchoice:NBRF/PIR (not Swiss-Prot):PIRONLY +argchoice:All Swiss-Prot and NBRF/PIR entries:SWISSPIRALL +argchoice:PDB structure entries:BROOKHAVEN +argchoice:PDB structure entries (NBRF version):NRL + +arg:TPL +argtype:slider +arglabel:K-TUP window +argmin:1 +argmax:2 +argvalue:1 + +arg:SCRS +argtype:slider +arglabel:# Scores Displayed? +argmin:1 +argmax:200 +argvalue:100 + +arg:ALNMNTS +argtype:slider +arglabel:# Alignments Displayed? +argmin:1 +argmax:200 +argvalue:20 + +in:in1 +informat:flat +insave: + +item:GeneID +itemmethod:($REPRINT > in1.tmp; echo Genomic Sequence >> in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail geneid@darwin.bu.edu < in1.tmp; rm in1 in1.tmp) & + +arg:REPRINT +argtype:chooser +arglabel:Do you want a GENEID reprint? +argchoice:YES:echo "Preprint Request" >> in1.tmp +argchoice:NO + +in:in1 +informat:flat +insave: + + + + + +item:Sequence Retrieval +itemmethod:(echo DATALIB $DBASE>> in1.tmp; echo MAXDOCS $NDOC >> in1.tmp;echo MAXLINES 100000 >> in1.tmp; echo BEGIN >> in1.tmp; echo $REGEXP >> in1.tmp; Mail retrieve@ncbi.nlm.nih.gov < in1.tmp; rm in1.tmp) & + +arg:DBASE +argtype:choice_menu +arglabel:Which Database? +argchoice:GenBank DNA sequence database:genbank +argchoice:GenBank update (cumulative daily updates):gbupdate +argchoice:EMBL DNA sequence database:embl +argchoice:EMBL update (cumulative weekly updates):emblu +argchoice:Vector subset of GenBank:vector +argchoice:Database of Expressed Sequence Tags (ESTs):dbest +argchoice:Swiss-Prot protein database:swissprot +argchoice:PIR protein database:pir +argchoice:GenPept (translated GenBank):genpept +argchoice:GenPept update (cumulative daily updates):gpupdate +argchoice:Transcription Factors Database:tfd + +arg:NDOC +argtype:slider +arglabel:Number of Sequences Retrieved? +argmin:1 +argmax:100 +argvalue:20 + + +arg:REGEXP +argtype:text +arglabel:key words, sequence IDs, boolean connectors + +item:Grail +itemmethod:(echo Sequences $TOTALSEQS $ID > in1.tmp; sed "s/-//g" < in1 | tr '@%#' '>' >> in1.tmp; Mail grail@ornl.gov in1.tmp; echo $NAME >> in1.tmp; echo $ADDRESS >> in1.tmp; echo $PHONE >> in1.tmp; echo $EMAIL >> in1.tmp; Mail grail@ornl.gov < in1.tmp; rm in1.tmp) + +arg:NAME +argtype:text +arglabel:Your Name + +arg:ADDRESS +argtype:text +arglabel:Your Address + +arg:PHONE +argtype:text +arglabel:Your Phone Number + +arg:EMAIL +argtype:text +arglabel:Your E-Mail Address + + diff --git a/CORE/outfile b/CORE/outfile new file mode 100755 index 0000000..8e3dd04 --- /dev/null +++ b/CORE/outfile @@ -0,0 +1,34 @@ + +DNA parsimony algorithm, version 3.51c + + +One most parsimonious tree found: + + + + + +-----------------------#10005_2 2 + ! + ! +--------------------W28762(165 + +--9 ! + ! ! ! +--R.C.W27652 + ! ! ! +-----------6 + ! ! ! ! +--#10005_2 2 + ! +--8 ! + ! ! +--5 +--W28762 + ! ! ! ! +--7 +--1 ! ! ! +--4 +--W28762 + ! ! ! ! ! ! + ! +--2 +-----3 +-----R.C.W27652 + ! ! ! + ! ! +--------R.C.W27436 + ! ! + ! +-----------------W22140 + ! + +--------------------------contig + + remember: this is an unrooted tree! + + +requires a total of 2453.000 + diff --git a/CORE/treefile b/CORE/treefile new file mode 100755 index 0000000..8f33212 --- /dev/null +++ b/CORE/treefile @@ -0,0 +1,2 @@ +((#10005_2_2,(W28762(165,(((R.C.W27652,#10005_2_2),(((W28762,W28762), +R.C.W27652),R.C.W27436)),W22140))),contig); diff --git a/CORE/xview-3.2p1.4-6.i386.rpm b/CORE/xview-3.2p1.4-6.i386.rpm new file mode 100755 index 0000000..f0fbf60 Binary files /dev/null and b/CORE/xview-3.2p1.4-6.i386.rpm differ diff --git a/CORE/xview-3.2p1.4-6.src.rpm b/CORE/xview-3.2p1.4-6.src.rpm new file mode 100755 index 0000000..3504acd Binary files /dev/null and b/CORE/xview-3.2p1.4-6.src.rpm differ diff --git a/CORE/xview-clients-3.2p1.4-6.i386.rpm b/CORE/xview-clients-3.2p1.4-6.i386.rpm new file mode 100755 index 0000000..7d602bc Binary files /dev/null and b/CORE/xview-clients-3.2p1.4-6.i386.rpm differ diff --git a/CORE/xview-devel-3.2p1.4-6.i386.rpm b/CORE/xview-devel-3.2p1.4-6.i386.rpm new file mode 100755 index 0000000..ed36f89 Binary files /dev/null and b/CORE/xview-devel-3.2p1.4-6.i386.rpm differ diff --git a/CORE/xylem/acc.template b/CORE/xylem/acc.template new file mode 100644 index 0000000..190b8ce --- /dev/null +++ b/CORE/xylem/acc.template @@ -0,0 +1,25 @@ +;--------------------------------------------------------------------------- +; FEATURES/GDE Accession File Instructions +; +; 1. Type in one or more GenBank Accession #'s below, +; or +; Place cursor at end of this file and choose 'Include File' in the FILE +; menu to read in a file of numbers. +; +; (NOTE: File can not contain LOCUS names.) +; +; 2. Choose 'Save Current File' in the File menu +; 3. Quit this window +; +; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT +; OUT THESE COMMENT LINES. +; +; NOTE: Put each accession # on a separate line +; SAMPLE ACCESSION FILE: +; +; M18249 +; X13383 +; J03680 +; +;--------------------------------------------------------------------------- + diff --git a/CORE/xylem/clu2ig.doc b/CORE/xylem/clu2ig.doc new file mode 100644 index 0000000..8848eaa --- /dev/null +++ b/CORE/xylem/clu2ig.doc @@ -0,0 +1,45 @@ + clu2ig update 3 Feb 94 + + NAME + clu2ig + + SYNOPSIS + clu2ig clustalfile > igfile + + DESCRIPTION + Converts interleaved .aln output from Clustal V into + sequential .ig (IntelliGenetics) format for use by MASE. + + clustalfile: + CLUSTAL V multiple sequence alignment + + name1 AACTTTCG + name2 ATCTTTCG + * ****** + + name1 CCTGCT + name2 CCCGCT + ** *** + + igfile: + ; + name1 + AACTTTCG + CCTGCT + : + name2 + ATCTTTCG + CCCGCT + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/dbstat.doc b/CORE/xylem/dbstat.doc new file mode 100644 index 0000000..fa922c9 --- /dev/null +++ b/CORE/xylem/dbstat.doc @@ -0,0 +1,36 @@ + dbstat update 3 Feb 94 + + NAME + dbstat - calculates amino acid frequencies in a protein + database + + SYNOPSIS + dbstat + + DESCRIPTION + dbstat reads a file of one or more nucleic acid sequences + and calculates the amino acid frequencies, both in terms of + absolute numbers, and as a fraction of the total. + + input - The input file is the standard .wrp (Pearson) format, + such as that produced by getob: + + >name + ; one or more comment lines (optional) + sequence lines + + Comments begin either with semicolon (;) or right arrow (>) + characters. + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/expfile.template b/CORE/xylem/expfile.template new file mode 100644 index 0000000..9c82cb8 --- /dev/null +++ b/CORE/xylem/expfile.template @@ -0,0 +1,30 @@ +;--------------------------------------------------------------------------- +; FEATURES/GDE Expression File Instructions 8/7/95 +; +; 1. Type in one or more GenBank expressions below, +; or +; Place cursor at end of this file and choose 'Include File' in the FILE +; menu to read in a file of feature keys. +; or +; Copy expressions from another window and Paste into this window. +; 2. Choose 'Save Current File' in the File menu +; 3. Quit this window +; +; NOTES: +; 1) FEATURES will then extract the appropriate sequences. +; YOU DON'T NEED TO EDIT OUT THESE COMMENT LINES. +; 2) All expressions referring to GenBank entries must begin with a '@' +; Literals (ie. sequences to be embedded in the final output) +; do NOT begin with a '@'. +; 3) Put each expression on a separate line. +; +; SAMPLE EXPRESSION FILE: +; +; @J05635:83..1813 +; ; EcoRI/NotI adaptor {this is a comment line} +; AATTGCGGCCGC +; @J05635:/product="flagellin A" +; @x17548:singed_trans +; +;--------------------------------------------------------------------------- + diff --git a/CORE/xylem/feafile.template b/CORE/xylem/feafile.template new file mode 100644 index 0000000..12e8dd9 --- /dev/null +++ b/CORE/xylem/feafile.template @@ -0,0 +1,23 @@ +;--------------------------------------------------------------------------- +; FEATURES/GDE Feature Key File Instructions +; +; 1. Type in one or more GenBank FEATURE Table feature keys below, +; or +; Place cursor at end of this file and choose 'Include File' in the FILE +; menu to read in a file of feature keys. +; +; 2. Choose 'Save Current File' in the File menu +; 3. Quit this window +; +; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT +; OUT THESE COMMENT LINES. +; +; NOTE: Put each feature key on a separate line +; SAMPLE FEATURE KEY FILE: +; +; mRNA +; CDS +; mat_peptide +; +;--------------------------------------------------------------------------- + diff --git a/CORE/xylem/features.doc b/CORE/xylem/features.doc new file mode 100644 index 0000000..8e1321c --- /dev/null +++ b/CORE/xylem/features.doc @@ -0,0 +1,407 @@ + + FEATURES.DOC update 7 Feb 94 + + + NAME + FEATURES - extracts features from GenBank entries + + SYNOPSIS + features + features expression + features [-f featurekey | -F keyfile] + [-n name |-a accession | -e expression | + -N namefile |-A accfile | -E expfile] + [-u dbfile | -U dbfile | -g ] + features -h + + DESCRIPTION + FEATURES extracts sequence objects from GenBank entries, using + the Features Table language. Features can be retrieved either by + specifying keywords (eg. CDS, mRNA, exon, intron etc.) or by + evaluating expressions. In practical terms, FEATURES is actually + a user interface for GETOB, which actually performs the parsing + and extraction of sequence objects. FEATURES can be run either as + an interactive program or with command line arguments. + + 'features' with no arguments runs the program interactively. + 'features' followed by an expression retrieves the data directly + from GenBank and evaluates the expression. The third form of + features requires all arguments to be accompanied by their + respective option flags. Finally, 'features -h' prints the + SYNOPSIS. + + + INTERACTIVE EXECUTION + FEATURES executed with no arguments runs interactively. An example of the + FEATURES menu is shown below: + + ___________________________________________________________________ + FEATURES - Version 7 FEB 94 + Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003 + ___________________________________________________________________ + Features: tRNA + Entries: EPFCPCG + Dataset: + ___________________________________________________________________ + Parameter Description Value + ------------------------------------------------------------------- + 1).................... FEATURES TO EXTRACT ....................> f + f:Type a feature at the keyboard + F:Read a list of features from a file + 2)....................ENTRIES TO BE PROCESSED (choose one).....> n + Keyboard input - n:name a:accession # e:expression + File input - N:name(s) A:accession #(s) E:expression(s) + 3)....................WHERE TO GET IT .........................> g + u:Genbank dataset g:complete GenBank database + U: same as u, but all entries + 4)....................WHERE TO SEND IT ........................> a + s:Each feature to a separate file a:All output to same file + --------------------------------------------------------------- + Type number of your choice or 0 to continue: + 0 + Messages will be written to EPFCPCG.msg + Final sequence output will be written to EPFCPCG.out + Expressions will be written to EPFCPCG.exp + Extracting features... + + In the example, FEATURES was instructed to retrieve all tRNAs from + the GenBank entry EPFCPCG, which contains the Epifagus plastid + genome. By default, the GenBank database was the source of the + sequence. Messages indicate the progress of the job. A log describing + the extraction of each feature is written to EPFCPCG.msg, while the + extracted features themselves are written to EPFCPCG.out. Feature + expressions which could be used by FEATURES to reconstruct the .out + file, are written to EPFCPCG.exp. + + The first step is to retrieve the EPFCPCG entry from GenBank, which is + accomplished by calling FETCH. Next, FEATURES extracts the specified + features from the entry. + + An excerpt from EPFCPCG.msg is shown below, describing the extraction + of the fifth tRNA found in this entry. To create this tRNA, two exons + had to be joined. The qualifier line associated with this feature + indicates that it is an Isoleucine tRNA with a gat anticodon. + + + EPFCPCG:anticodon gtg + complement + ( + join + ( + 70023 70028 + + 1 69 + + ) + + ) + + + /product="transfer RNA-His" + /gene="His-tRNA" + /label=anticodon gtg + /note="anticodon gtg" + //---------------------------------------------- + + + The actual sequence for this feature, as written to EPFCPCG.out, is + written with each exon beginning a new line: + + >EPFCPCG:anticodon gtg + ggcggatgtagccaaatggatcaaggtagtggattgtgaatccaacatat + gcgggttcaattcccgtcg + ttcgcc + + Finally, the expression that was evaluated to create this feature is + written to EPFCPCG.exp: + + >EPFCPCG:anticodon gtg + @M81884:anticodon gtg + + If EPFCPCG.exp was used as an expression file in option 2 (E) of FEATURES, + EPFCPCG.out would be recreated. + + OPTIONS + 1) FEATURES - choosing f will cause FEATURES to prompt for + a feature to extract. If you wish to extract several types of + features simultaneously (ie. F), you must construct a file listing the + feature keywords. The following example would retrieve both tRNA and + rRNA sequences: + + OBJECTS + tRNA + rRNA + SITES + + The words 'OBJECTS' and 'SITES' must enclose the feature keywords, + and each keyword must be on a separate line. For a rigorous + definition of the input file format, see the GETOB manual pages + (getob.doc). + + In the menu shown above, f was chosen, and the user entered tRNA at + the prompt. Thus tRNA is now displayed on the Features: line. If + features had been specified from a file (suboption F) then the + filename containing the feature keywords would be displayed instead. + A complete list of legal feature keywords can be found in the GenBank + Release notes (gbrel.txt) under the subheading 'Feature Key Names'. + + 2) ENTRIES + n User is prompted for the name of an entry from which the + feature is to be extracted. The name of the entry will appear + on the 'Entries' line of the menu. + + N User is prompted for a filename containing one or more + entry names. Each name must be on a separate line. The filename + will be displayed on the 'Entries' menu line. + + a User is prompted for an accession number, which will appear + on the 'Entries' line of the menu. + + A User is prompted for a filename for accession numbers. The filename + will appear on the 'Entries:' line. + + e User is prompted for a GenBank Features expression of the + form accession:location.'accession' refers to a GenBank + accession number, while 'location' is any legal feature location. + A brief description of location syntax can be found under the + subheading "Feature Location" in the GenBank release notes + (gbrel.txt). See "The DDBJ/EMBL/GenBank Feature Table: + Definition" Version 1.04 for a complete definition. + E User is prompted for a filename containing one or more Feature + expressions. EACH EXPRESSION MUST BEGIN A '@'. All lines beginning + with '@' are processed as expressions, and all other lines are + copied to the output file unchanged. + + Examples: + + The tRNA shown above could have been extracted by choosing + suboption e and entering either of the following expressions: + + M81884:complement(join(70023..70028,1..69)) + M81884:anticodon gtg + + In the first example, the feature line from the original entry + is used as the location. In the second example, the feature is + found by its qualifier line, which also appeared in the + original entry. It must be noted that the qualifier line must + be unique from others in the same entry in its first 15 + characters after the = . + + The flaL protein coding region of B. licheniformis is described + in GenBank entry BLIFALA, accession number M60287 in the + following feature: + + CDS 305..640 + /note="flaD (sin) homologue" + /gene="flaL" + /label=ORF2 + /codon_start=1 + + This feature could be retrieved using any of the following + expressions: + + M60287:305..640 + M60287:ORF2 + M60287:/label=ORF2 + M60287:/gene="flaL" + M60287:/note="flaD (sin) homologue" + + Note that the /label= qualifier is special, in that labels are + specifically intented as unique tags on an feature. For labels, + only the label itself is need be specified. Thus, /label=ORF2 is + equivalent to ORF2. For other qualifiers, the qualifier keyword + (eg. /note=) must be included. + + 3) DATABASE (WHERE TO GET IT) - By default, all entries processed will + be automatically retrieved from GenBank using FETCH. Specifying 'u' + (User-defined database subset) makes it possible to extract features + from GenBank subsets created by the user. Usually, retrieval of + features is much faster with a User-defined subset, so if you + frequently work with sets of genes, it is best to retrieve them + en-masse using FETCH, and work with them directly. For example, if + you had retrieved a set of Beta-globin sequences into a file called + 'globin.gen', you could directly extract features from these entries + by specifying 'globin' or 'globin.gen' as your User-defined database. + If the file extension is '.gen', FEATURES will automatically create + temporary files called globin.ano, globin.wrp and globin.ind, + containing annotation, sequence, and an index, respectively. These + files will be read during feature extraction, and then discarded. If + you have already created such files using SPLITDB, simply specify + any of 'globin', 'globin.ano', etc. ie. anything, as long as it does + not have the .gen file extension. + + 'U' rather than 'u' causes ALL entries in the user-defined + database to be subset. This means that it is unnecessary to + specify entry options (eg -n, -N etc.), as these will be + ignored, if given. + + One consequence of these conventions is that the individual GenBank + divisions can be processed directly. For example, suppose you were only + interested in rodent globins. You could directly access the rodent + division of GenBank by specifying the base name of that file division + (eg. /home/psgendb/GenBank/gbrod) as your user-defined database. In + this case, the files gbrod.ano, gbrod.wrp and gbrod.ind already + exist. Again, this approach is faster, since FEATURES would not have + to find and retrieve the sequences, but can read directly from the + database files. Finally, if you wanted to process all of the entries + in the database division, simply use -U. The user is warned that a + GenBank division is a huge amount of data, and processing every entry + could take a long time. + + 4) WHERE TO SEND IT - By default (a), the output for all entries goes + to a single set of files, whose names are chosen by FEATURES, + depending on the setting of option 2, Entries. If a single name (n) or + accession number (a) has been chosen, that will be used as + the raw filename. For example, if you were processing the entry + WHTCAB, the output files would be WHTCAB.msg and WHTCAB.out. If names + (N), accession numbers (A) or expressions (E) were read from a file, + the raw name of that file would be used eg. cellulase.nam would result + in cellulase.msg and cellulase.out. Finally, if a single expression + is processed (e), then the primary accession number in that + expression will be used for the filenames. In all cases, FEATURES + will tell you the names of the files being written. + + Choosing suboption s, you can specify that the features created for + each entry be sent to separate files. In this case, each file will + have the name of that entry, with the extension .obj. However, all + messages and expressions will still go to a single files. While this + can be a convenient way of creating separate files when you need them, + this option still has the limitation of writing all features for a + given entry (if there are more than one) to the same file. Also, + successive resolution of features (anything requiring 'getob -r') + will not work with this option. This may be corrected in future + versions. + + + COMMAND LINE EXECUTION + + There are two ways of running FEATURES from the command line. If only one + argument is supplied, that argument is interpreted as an expression, and + the result of that expression (ie. a sequence ) is written to the + standard output. .msg, .out and .exp files are NOT created. For example, + GenBank entry BACFLALA (M60287) contains the following feature: + + CDS 95..271 + /label=LORF- + /codon_start=1 + /translation="MNKDKNEKEELDEEWTELIKHALEQGISPDDIRIFLNLGKKSSK + PSASIERSHSINPF" + Any of + + features M60287:LORF- + features M60287:95..271 + features M60287:/label=LORF- + + would write the open reading frame to the standard output: + + atgaataaagataaaaatgagaaagaagaattggatgaggagtggacaga + actgattaaacacgctcttgaacaaggcattagtccagacgatatacgta + tttttctcaatttgggtaagaagtcttcaaaaccttccgcatcaattgaa + agaagtcattcaataaatcctttctga + + This form of FEATURES is provided to make it easy to pipe output to + other programs for further processing. For example + + features M60287:LORF- |ribosome >LORF.protein + + would write the translation of the open reading frame to a file called + LORF.protein. + + The full functionality of the FEATURES can be accessed using arguments on + the command line. In particular, when there are multiple entries to be + processed, or multiple features within entries, it is much faster to + supply FEATURES with lists of entries, feature keys or expressions. + Command line options are similar to suboptions in menu items 1-3 above: + + Feature keys: + -f key {feature key} + -F filename {file of feature keys} + + Entries: + -n name {GenBank LOCUS name} + -N filename {file of GenBank LOCUS names} + -a accession {GenBank ACCESSION number} + -A filename {file of GenBank ACCESSION numbers} + -e expression {Feature Table expression} + -E filename {file of Feature Table expressions, each begin- + ning with '@'} + + Databases: + -u filename {GenBank dataset} + -U filename { " " " " " " , + process all entries ie. -nNaAeE options + will be ignored} + -g {GenBank} + + Examples: + + features -f tRNA -n EPFCPCG + + retrieves all tRNAs from GenBank entry EPFCPCG and writes .msg, .out, + and .exp files. + + features -e M60287:LORF- + + would retrieve the same open reading frame as in the earlier example. + + + Since most time-consuming operation in FEATURES is sequence retrieval, + it is often best to retrieve frequently-used sequences as database + subsets. For example, a set GenBank entries for chlorophyl a/b binding + protein genes might be stored in a file called CAB.gen. + + features -f CDS -N CAB.nam -u CAB.gen + + would generate the files CAB.msg, CAB.out and CAB.exp containing output + for all CDS features in the entries listed in the file CAB.nam. + + features -E CAB.exp -u CAB.gen + + would re-create the output file CAB.out. + + + + BUGS + FEATURES does no preliminary error checking for syntax of + GenBank expressions prior to their evaluation. Expressions that can + not be evaluated will be flagged by GETOB in the .msg file. + + At present, little checking is done to test for the presence or + correctness of input files. Some errors may cause the program to + crash. + + For User-defined datasets, filename expansion is not performed. + + FILES + Temporary files: + X.term X.ano X.wrp X.ind X.gen {X is raw filename, see 4) } + UNRESOLVED.fea UNRESOLVED.out + FEA.inf FEA.nam FEA.gen FEA.ano FEA.wrp FEA.ind FEA.msg FEA.out + + SEE ALSO + grep(1V) fetch getob splitdb + + TRANSPORTATION NOTES + It should be fairly easy to get FEATURES to work even on systems + in which GenBank has not been formatted for the XYLEM package. + This is because FEATURES does not work directly on the database, but + rather retrieves all necessary sequences by calling FETCH. Thus, + statements like 'fetch FEA.nam FEA.gen' could be replaced with any + command that, given a file containing names or accession numbers, + returns a file containing GenBank entries. In principle, you + could even implement this sort of command to retrieve entries from + the email server (retrieve@ncbi.nlm.nih.gov) at NCBI, although + such a setup would undoubtedly be quite slow. + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/fetch.doc b/CORE/xylem/fetch.doc new file mode 100644 index 0000000..9b4b1a6 --- /dev/null +++ b/CORE/xylem/fetch.doc @@ -0,0 +1,320 @@ + + FETCH.DOC update 24 Feb 96 + + + NAME + fetch - retrieves database entries by name or accession number + + SYNOPSIS + fetch {interactive mode} + fetch [options] namefile [output file] {batch mode} + + DESCRIPTION + fetch retrieves one or more entries from a database. + + Interactive mode: fetch prompts the user to set search parameters, + using an interactive menu: + ___________________________________________________________________ + FETCH - Version 7 Feb 94 + Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003 + ___________________________________________________________________ + Namefile: + Outfile: + Database: + ------------------------------------------------------------------- + Parameter Description Value + + 1) Name/Acc Name or Accession sequence to get + 2) Namefile Get list of sequences from Namefile + 3) WhatToGet a:annotation s:sequence b:both b + 4) Database g:GenBank p:PIR v:VecBase l:LiMB g + G:GenBank dataset P:PIR dataset + 5) Outfile Send all output to a single file (Outfile) + 6) Files f:Send each entry to a separate file f + ------------------------------------------------------------- + Type number of your choice or 0 to continue: + + After all parameters have been set, type 0 to commence the search. + Messages regarding the progress of the search will be printed. + + (1,2) Which entries to get? + If you want to get a single entry, option 1 lets you type in the + name of that entry, without having to create a namefile. To get + more than one entry, choose option 2, and specify the name of a + file containing sequence names or accession numbers. + + namefile is a file containing one or more sequence names or + accession numbers, each on a separate line. Names and accession + numbers can even be interspersed, in upper or lowercase, and in + any order. For example, the namefile prp.nam might contain + + ; plant pathogenesis related proteins + ; (these are sample comment lines) + ; note that any line containing a semicolon is ignored + x06362 + x05454 + TOBPR1A1 + ; comments can be interspersed with names. + PUMPR13 + tobpr1ar + + Options 1 & 2 are mutually exclusive. Setting one will negate the + other. If option 2 is chosen, the name of the namefile will appear + at the top of the menu. + + (3) WhatToGet + Use this option to specify whether to get annotation, sequence, + or both (default=both). + + (4) Database + Use this option to select the database. (default=GenBank). + G and P select user-created database subsets containing GenBank + or PIR entries, respectively. It is assumed that the database + has been split into .ano, .wrp and .ind files using splitdb. + For example, if you had created a database subset called PR1.pir, + splitdb would create PR1.ano, PR1.wrp and PR1.ind. These are + the files actually read by FETCH. When prompted for the name + of the database, simply type "PR1", without a file extension. + (If you do type a file extension, it will be ignored). + + (5, 6) Where to send output + By default, option 6 is set to f, and each entry will be written to + a separate file, where the name of the file is the name of the + entry, followed by a file extension. If a complete entry is + retrieved, the file extension will indicate the type of database + (GenBank: .gen; PIR: .pir, Vecbase: .vec; LiMB: .LiMB). If only + annotation or sequence are retrieved, the file extensions will be + .ano or .wrp, respectively. Using the default, the namefile above + would create the following files: + + PUMPR13.gen + TOBPR1A1.gen + TOBPR1AR.gen + TOBPR1CR.gen + TOBPR1PS.gen + + By choosing option 5, you can specify the name of an output file + for all entries to go to. This filename will appear at the top + of the menu. Obviously, options 5 & 6 are mutually exclusive. + Note entries retrieved are writen in alphabetical order (sorting by + ASCII values), not the order in which they appeared in namefile. + + (Note for remote users only: -f will only work for a single + name/accession supplied in 1). -f IS NOT ENABLED FOR NAMEFILES + specified in 2).) + + Batch mode: + Although it is transparent to the user, all fetch really does + is call getloc, saving the user the trouble of knowing which + database files to retrieve sequences from, or of having to + execute getloc multiple times to retrieve sequences from + different database files. Thus, the options are identical to those + for getloc: + + -a Write annotation portions of entries only, terminated by '//'. + -s Write sequence data only, in Pearson (.wrp) format. + -f Write each entry to a separate file. + -g GenBank (default) + -e EMBL {not implemented} + -p PIR (NBRF) + -v Vecbase + -l LiMB + -G GenBank_dataset + -P PIR_dataset + + If -f is not specified, outfile must be specified. + + -L force execution of findkey on local host even if + $XYLEM_RHOST is set. See "REMOTE EXECUTION" below + + + PIR_dataset + GenBank_dataset + This can be either a file of PIR entries, a file of GenBank entries, + or a XYLEM dataset created by splitdb. A file of PIR entries must + have the file extension ".pir". A file of GenBank entries must have + the file extension ".gen". A XYLEM dataset contains PIR entries split + among three files by splitdb: annotation (.ano), sequence (.wrp) + and index (.ind). These file extensions must be used! + + When specifying a split dataset, only the base name needs to be + used. For example given a XYLEM dataset consisting of the files + myset.ano, myset.wrp and myset.ind, the following two commands + are equivalent: + + fetch -P myset something.nam something.pir + fetch -P myset.ano something.nam something.pir + + If the original .pir file had been used, the command would have + been + + fetch -P myset.pir something.nam something.pir + + The ability to work directly with .gen or .pir files is quite + convenient. However, since FETCH needs to work with a split + FETCH automatically splits .pir or .gen files into .ano, .wrp + and .ind files, which are removed when finished. This requires + extra disk space and execution time, which could be significant + for large datasets. + + EXAMPLES + Batch example: + fetch -f chitinase.nam + will retrieve annotation and sequence for sequences listed in + chitinase.nam from GenBank, writing each entry to a separate file + with the extension .gen. + + fetch -s -v pbr.nam pbr.wrp + will retrieve sequence data only for the entries listed in pbr.nam, + from VecBase, and write all sequences to a Pearson format file + (ie. readable by fasta) with the name pbr.wrp. + + fetch -G sample sample.nam new.gen + fetch -G sample.ano sample.nam new.gen + Assumes that a set of GenBank entries has been split by splitdb + into sample.ano sample.wrp and sample.ind. The entries listed in + sample.nam are written to new.gen. + + + FILES + Database files: + The directories for database files are specified by the environment + variables $GB (GenBank) $PIR (PIR/NBRF) $VEC(Vecbase) and $LIMB + (LiMB). + + Index files are $GB/gbacc.idx for GenBank (this file is supplied + with each GenBank release), while the other databases + use .ind files generated by splitdb. Split database files MUST + have the following file extensions: .ano {annotation}, .wrp + {sequence} and .ind {index}. Thus, when creating database files + for pir1.dat with splitdb, the output files should be pir1.ano, + pir1.wrp and pir1.ind. + + Temporary files: + NAMEFILE.fetch + PRELIMINARY.fetch + TMP.fetch + FOUND.fetch + FETCHDIR {temporary directory} + + REMOTE EXECUTION + Where the databases can not be stored locally, FETCH can call + FETCH on another system and retrieve the results. To run + FETCH remotely, your .cshrc file should contain the following + lines: + + setenv XYLEM_RHOST remotehostname + setenv XYLEM_USERID remoteuserid + + where remotehostname is the name of the host on which the + databases reside (in XYLEM split format) and remoteuserid + is your userid on the remote system. When run remotely, + your local copy of FETCH will generate the following + commands: + + rcp filename $XYLEM_USERID@$XYLEM_HOST:filename + rsh $XYLEM_RHOST -l $XYLEM_USERID fetch ... + rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename + rsh $XYLEM_RHOST -l $XYLEM_USERID $RM temporary_files + + Because FETCH uses rsh and rcp, your home directory on both + the local and remote systems must have a world-readable + file called .rhosts, containing the names of trusted remote + hosts and your userid on each host. Before trying to get + FETCH to work remotely, make sure that you can rcp and + rsh to the remote host. + + Obviously, remote execution of FETCH implies that FETCH + must already be installed on the remote host. When FETCH + runs another copy of FETCH remotely, it uses the -L option + (findkey -L) to insure that the remote FETCH job executes, + rather than calling yet another FETCH on another host. + + + ---------- Remote execution on more than 1 host ----------- + If more than 1 remote host is available for running FINDKEY + (say, in a clustered environment where many servers mount + a common filesystem) the choice of a host can be determined + by the csh script choosehost, such that execution of + choosehost returns the name of a remote server. To use this + approach, the following script, called 'choosehost' should + be in your bin directory: + + #!/bin/csh + # choosehost - choose a host to use for a remote job. + # This script rotates among servers listed in .rexhosts, + # by choosing the host at the top of the list and moving + # it to the bottom. + + #Rotate the list, putting the current host to the bottom. + set HOST = `head -1 $home/.rexhosts` + set JOBID = $$ + tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID + echo $HOST >> /tmp/.rexhosts.$JOBID + /usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts + + # Write out the current host name + echo $HOST + + You must also have a file in your home directory called + .rexhosts, listing remote hosts, such as + + graucho.cc.umanitoba.ca + harpo.cc.umanitoba.ca + chico.cc.umanitoba.ca + zeppo.cc.umanitoba.ca + + Each time choosehost is called, choosehost will rotate the + names in the file. For example, starting with the .rexhosts + as shown, it will move graucho.cc.umanitoba.ca to the bottom + of the file, and write the line 'graucho.cc.umanitoba.ca' + to the standard output. The next time choosehosts is + run, it would write 'harpo.cc.umanitoba.ca', and so on. + + Depending on your local configuration, you may wish to + rewrite choosehosts. All that is really necessary is that + echo `choosehost` should return the name of a valid host. + + Once you have installed choosehost and tested it, you can + get FINDKEY to use choosehost simply by setting + + setenv XYLEM_RHOST choosehost + + in your .cshrc file. + + --------------- Remote filesystems ----------------------- + Finally, an alternative to remote execution is to remotely mount + the file system containing the databases across the network. + This has the advantage of simplicity, and means that the + databases are available for ALL programs on your local + workstation. However, it may still be advantageous to run + FETCH remotely, since that will shift much of the computational + load to another host. + + BUGS + When retrieving entries directly from GenBank, FETCH uses the + Accession Number index file gbacc.idx. In this case, FETCH + can retrieve all entries containing a given accession number. + This capability makes it possible to retrieve an entry using a + secondary accession number. However if more than one entry + share a secondary accession number, all of those entries will + be retrieved. While this behavior might be a bit of an + annoyance at times, it can also be useful because it alerts + the user to the presence of other, related entries that might + be of interest. + + SEE ALSO + getloc features + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/findkey.doc b/CORE/xylem/findkey.doc new file mode 100644 index 0000000..c3197c7 --- /dev/null +++ b/CORE/xylem/findkey.doc @@ -0,0 +1,365 @@ + + FINDKEY.DOC update 13 Mar 97 + + + NAME + findkey - finds database entries containg one or more keywords + + SYNOPSIS + findkey + findkey [-pvbmgrdutielnsaxzL] keywordfile [namefile findfile] + findkey [-P PIR_dataset] keywordfile [namefile findfile] + findkey [-G GenBank_dataset] keywordfile [namefile findfile] + + DESCRIPTION + findkey uses the grep family of commands to find lines in database + annotation files containing one or more keywords. Next, identify + is called to create a .nam file, containing the names of entries + containing the keywords, and a .fnd file, containing the actual + lines from each entry containing hits. A PIR or GenBank dataset is + either a file containing one or more GenBank or PIR entries, or + the name of a XYLEM dataset created by splitdb. See FILES below + for a more detailed description. + + INTERACTIVE USE + findkey prompts the user to set search parameters, using an interactive + menu: + + ___________________________________________________________________ + FINDKEY - Version 12 Aug 94 + Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003 + ___________________________________________________________________ + Keyfile: + Dataset: + ------------------------------------------------------------------- + Parameter Description Value + ------------------------------------------------------------------- + 1) Keyword Keyword to find thionin + 2) Keyfile Get list of keywords from Keyfile + 3) WhereToLook p:PIR v:VecBase p + GenBank - b:bacterial i:invertebrate + m:mamalian e:expressed seq. tag + g:phage l:plant + r:primate n:rna + d:rodent s:synthetic + u:unannotated a:viral + t:vertebrate x:patented + z:STS + G: GenBank dataset P: PIR dataset + ------------------------------------------------------------- + Type number of your choice or 0 to continue: + 0 + Searching /home/psgendb/PIR/pir1.ano... + Sequence names will be written to thionin~pir.nam + Lines containing keyword(s) will be written to thionin~pir.fnd + Searching /home/psgendb/PIR/pir2.ano... + Sequence names will be written to thionin~pir.nam + Lines containing keyword(s) will be written to thionin~pir.fnd + Searching /home/psgendb/PIR/pir3.ano... + Sequence names will be written to thionin~pir.nam + Lines containing keyword(s) will be written to thionin~pir.fnd + + As shown in the example above, the keyword thionin was specified + as the keyword to search for. By default, option 3 is set to p, + and the PIR protein database is searched. Messages describe the + progress of the search. Since PIR is broken up into two divisions + (new and protein) both are searched, but all output is written to + thionin.pir.nam and thionin.pir.fnd + + OPTIONS + (1,2) Which keywords to search for? + If you want to search for a single keyword, option 1 lets you type + the keyword, without having to create a file. To search for more + than one keyword, choose option 2, and specify the name of a + file containing the keywords. For example, entries containing + genes for antibiotic resistance might be found using the + following keyword file: + + ampicillin + chloramphenicol + kanamycin + neomycin + tetracycline + + Note: keyword searches are case insensitive. + + As you might expect, it takes longer to search for multiple + keywords than a single keyword. + + Options 1 & 2 are mutually exclusive. Setting one will negate the + other. If option 2 is chosen, the name of the keyword file will + appear at the top of the menu. + + Finally, it is probably not a good idea to search GenBank + entries using very short keywords consisting only of letters. + This is because GenBank entries now include a /translation + field containing the amino acid sequence of each protein + coding sequence. Consequently, 3 or 4 letter keywords + consisting of legal amino acid symbols (eg. CAP, recA) will + turn up fairly often in protein translations. + + (3) WhereToLook + Use this option to specify the database to be searched In the + case of GenBank, only one division at a time may be searched. + User-created database subsets containing PIR (P) or GenBank (G) + entries may also be searched. User-created database subsets + must be in the .ano/.wrp/.ind form created by splitdb. + + OUTPUT + The output filenames take the following form: + + name_ex1.ex2 + + The 'name' part of the filename is either the keyword searched for, + if option 1 was chosen, or the name of the keyword file,if option 2 + obtains. 'ex1' indicates the database division that was searched. For + PIR and VecBase, ex1 is 'pir' and 'vec', respectively. For GenBank, + ex1 is as follows: + + bct - bacterial + inv - invertebrate + mam - other mamalian + est - expressed sequence tag + phg - phage + pln - plant (includes fungi) + pri - primate + rna - structural RNAs + rod - rodent + syn - synthetic sequences + sts - sequence tagged sites + una - unannotated (new) sequences + vrl - viral + vrt - other vertebrate + + 'ex2' distinguishes the files containing the names of entries + containing keywords (.nam) and the files containing the lines found + in each entry (.fnd). + + The .nam file can be used directly as a namefile for fetch, getloc, + or getob. + + COMMAND LINE USE + + OPTIONS + p search PIR (default) + P PIR dataset search dbfile, containing PIR entries + v search VecBase + b search Genbank bacterial division + m search Genbank mamalian division + g search Genbank phage division + r search Genbank primate division + d search Genbank rodent division + u search Genbank unannotated division + t search Genbank vertebrate division + i search Genbank invertebrate division + l search Genbank plant division + n search Genbank rna division + s search Genbank synthetic division + a search Genbank viral division + x search Genbank patented division + e search Genbank exp.seq.tag division + z search GenBank STS division + S search GenBank Genom. Survey division + h search GenBank High Thrput. division + G GenBank dataset search dbfile, containing GenBank entries + + L force execution of findkey on local host + even if $XYLEM_RHOST is set. See "REMOTE + EXECUTION" below + + FILES + + keywordfile - contains keywords to search for + + namefile - LOCUS names of hits are written to this file + + findfile - for each hit, a report listing the LOCUS name and the + lines matching the keyword if written to this file. + + If namefile and findfile are not specified on the command line, + filenames will be created as described above for interactive + use. + + PIR_dataset + GenBank_dataset + This can be either a file of PIR entries, a file of GenBank entries, + or a XYLEM dataset created by splitdb. A file of PIR entries must + have the file extension ".pir". A file of GenBank entries must have + the file extension ".gen". A XYLEM dataset contains PIR entries split + among three files by splitdb: annotation (.ano), sequence (.wrp) + and index (.ind). These file extensions must be used! + + When specifying a split dataset, only the base name needs to be + used. For example given a XYLEM dataset consisting of the files + myset.ano, myset.wrp and myset.ind, the following two commands + are equivalent: + + findkey -P myset something.kw + findkey -P myset.ano something.kw + + If the original .pir file had been used, the command would have + been + + findkey -P myset.pir something.kw + + The ability to work directly with .gen or .pir files is quite + convenient. However, since FINDKEY needs to work with a split + FINDKEY automatically splits .pir or .gen files into .ano, .wrp + and .ind files, which are removed when finished. This requires + extra disk space and execution time, which could be significant + for large datasets. + + EXAMPLES + If the list of antibiotics shown above was stored in the file + antibiotic.kw, and option 3 was set to 'b', then the annotation + portion of the GenBank bacterial division would be searched, and + all lines containing any of these keywords would be written to + antibiotic~bac.fnd. The corresponding GenBank entry names would + appear in antibiotic~bac.nam. + + The same keyword file could be used to search other database files. + If VecBase was searched, the output files would be antibiotic~vec.fnd + and antibiotic~vec.nam. These filename conventions make it easy + to search different database divisions, and to keep track of where + data came from. + + Command line examples: + + findkey thionin.kw + + would be equivalent to the interactive example shown above. In + this case, the file thionin.kw contains the word 'thionin'. + (Note that since PIR is the default, -p need not be supplied.) + + findkey -b antibiotic.kw drugs.nam drugs.fnd + + would search the GenBank bacterial division for the keywords + contained in antibiotic.kw, and write the output to drugs.nam + and drugs.kw. + + FILES + Database files: + The directories for database files are specified by the environment + variables $GB (GenBank) $PIR (PIR/NBRF) and $VEC(Vecbase). + Annotation (.ano) and index (.ind) are those generated by splitdb. + + Temporary files: + $jobid.fnd + $jobid.nam + $jobid.grep + + where $jobid is a unique jobid generated by the shell + + REMOTE EXECUTION + Where the databases can not be stored locally, FINDKEY can call + FINDKEY on another system and retrieve the results. To run + FINDKEY remotely, your .cshrc file should contain the following + lines: + + setenv XYLEM_RHOST remotehostname + setenv XYLEM_USERID remoteuserid + + where remotehostname is the name of the host on which the + databases reside (in XYLEM split format) and remoteuserid + is your userid on the remote system. When run remotely, + your local copy of FINDKEY will generate the following + commands: + + rcp filename $XYLEM_USERID@$XYLEM_HOST:filename + rsh $XYLEM_RHOST -l $XYLEM_USERID findkey ... + rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename + rsh $XYLEM_RHOST -l $XYLEM_USERID rm temporary_files + + Because FINDKEY uses rsh and rcp, your home directory on both + the local and remote systems must have a world-readable + file called .rhosts, containing the names of trusted remote + hosts and your userid on each host. Before trying to get + FINDKEY to work remotely, make sure that you can rcp and + rsh to the remote host. + + Obviously, remote execution of FINDKEY implies that FINDKEY + must already be installed on the remote host. When FINDKEY + runs another copy of FINDKEY remotely, it uses the -L option + (findkey -L) to insure that the remote FINDKEY job executes, + rather than calling yet another FINDKEY on another host. + + ---------- Remote execution on more than 1 host ----------- + If more than 1 remote host is available for running FINDKEY + (say, in a clustered environment where many servers mount + a common filesystem) the choice of a host can be determined + by the csh script choosehost, such that execution of + choosehost returns the name of a remote server. To use this + approach, the following script, called 'choosehost' should + be in your bin directory: + + #!/bin/csh + # choosehost - choose a host to use for a remote job. + # This script rotates among servers listed in .rexhosts, + # by choosing the host at the top of the list and moving + # it to the bottom. + + #Rotate the list, putting the current host to the bottom. + set HOST = `head -1 $home/.rexhosts` + set JOBID = $$ + tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID + echo $HOST >> /tmp/.rexhosts.$JOBID + /usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts + + # Write out the current host name + echo $HOST + + You must also have a file in your home directory called + .rexhosts, listing remote hosts, such as + + graucho.cc.umanitoba.ca + harpo.cc.umanitoba.ca + chico.cc.umanitoba.ca + zeppo.cc.umanitoba.ca + + Each time choosehost is called, choosehost will rotate the + names in the file. For example, starting with the .rexhosts + as shown, it will move graucho.cc.umanitoba.ca to the bottom + of the file, and write the line 'graucho.cc.umanitoba.ca' + to the standard output. The next time choosehosts is + run, it would write 'harpo.cc.umanitoba.ca', and so on. + + Depending on your local configuration, you may wish to + rewrite choosehosts. All that is really necessary is that + echo `choosehost` should return the name of a valid host. + + Once you have installed choosehost and tested it, you can + get FINDKEY to use choosehost simply by setting + + setenv XYLEM_RHOST choosehost + + in your .cshrc file. + + --------------- Remote filesystems ----------------------- + Finally, an alternative to remote execution is to remotely mount + the file system containing the databases across the network. + This has the advantage of simplicity, and means that the + databases are available for ALL programs on your local + workstation. However, it may still be advantageous to run + XYLEM remotely, since that will shift much of the computational + load to another host. + + + BUGS + At present, regular expression characters cannot be used for + keyword searches. + + SEE ALSO + grep(1V) identify splitdb + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/getloc.doc b/CORE/xylem/getloc.doc new file mode 100644 index 0000000..f1c1bc1 --- /dev/null +++ b/CORE/xylem/getloc.doc @@ -0,0 +1,65 @@ + + GETLOC.DOC update 30 May 95 + + + NAME + getloc - retrieve database entries listed in namefile to outfile. + + SYNOPSIS + getloc [-asfcgepvl] namefile [anofile] [seqfile] indfile outfile + + DESCRIPTION + getloc reads a list of names from namefile and recreates + entries by combining the annotation and sequence portions of each + entry from anofile and seqfile. getloc will work most quickly + when the namefile is in alphabetical order, but it will also + work on unsorted lists. The following options affect the output: + + a Write annotation portions of entries only, terminated by '//'. + seqfile is not included on command line. + + s Write sequence data only, in Pearson (.wrp) format. + anofile is not included on commandline. + + f Write each entry to a separate file. The filename will + consist of the LOCUS name, followed by .ano for annotation + only, .wrp for sequence only, or gen for complete GenBank + format. + + c namefile contains accession numbers, rather than names + + The following options identify the type of database being read: + + g GenBank (default) + e EMBL + p PIR (NBRF) + v Vecbase + l LiMB + + namefile consists of an alphabetically ordered list of LOCUS names, + each on a separate line. Indfile could be used to create a + namefile by simply editing out some subset of names. (This can also + be done using the Unix comm command.) If the entire indfile was + used, the entire database would be recreated, minus the header + information that might have been present in the original, but + deleted by splitdb. + + NOTE + Getloc automatically expands leading blanks that have been + compressed using splitdb -c. See splitdb.doc for more information. + + SEE ALSO + splitdb, comm(1). + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/getob.doc b/CORE/xylem/getob.doc new file mode 100644 index 0000000..895bd17 --- /dev/null +++ b/CORE/xylem/getob.doc @@ -0,0 +1,327 @@ + + GETOB 21 Dec 94 + + + NAME + getob - Get an object from GenBank + + SYNOPSIS + getob [-frcn] infile namefile anofile seqfile indfile message + [outfile] expfile + + DESCRIPTION + getob extracts 'objects' (subsequences) from GenBank entries, using + the features table, and writes them to outfile (.out). A log + describing the construction of each object is written to message + (.msg). If -r is not set, a list of expressions that would recreate + the .out file if evaluated by getob -r, is written to expfile (.exp) + + The following options are available: + + f Write each entry to a separate file. The name will consist + of the entry name, and the extension '.obj'. + + r Resolve expressions from namefile into objects. + Expressions take the form: + + @[::]: + + In effect, r makes it possible to use getob to resolve + features that span more than one entry, such as segmented + files. In the first run of the program, features that require + data from outside the entry in which they are defined will be + written to outfile with those externally-defined parts rep- + resented using the '@' notation described above. During a + subsequent run, the outfile from the previous run is used as + namefile. When r is set, all lines not beginning with '@' (ie. + name lines and sequence lines) are simply copied to the new + outfile. When an '@' is encountered, the expression is parsed + into accession number and location. The entry with the + specified accession number is located in indfile, and read from + anofile and seqfile. It is then evaluated, and the result + written to outfile in place of the '@' expression. + + getob can also be used to get specific labeled objects from + a given entry. Examples: + + @k30576:polyprotein + @k30576:/label=polyprotein + @x10345:/product="hsp70" + @j00879:group(1..2200,mutation_37) + + The first two constructs given above are equivalent. Both + will extract the feature called polyprotein. The third + construct shows that any feature label can be specified. If + none is specified, as in the first example, then /label= is + assumed. One limitation, however, is that the label sought + must be unique within the entry in its first 15 characters + including double quotes ("). Otherwise, only the first + matching label expression will be evaluated. Finally, the + last example shows that a mutant sequence can be constructed + by first specifying an expression that evaluates to a + sequence (ie. 1..2200) and then a labeled expression that + upon evaluation, uses replace() to modify that sequence. The + usage shown in examples 3 & 4 above represent extensions to + the DDBJ/EMBL/GenBank Features Table Format. + + As touched on briefly above, the r option makes it possible + to construct objects that include recursive references to + other entries (eg. segmented files) by iterative calls to + getob. The 'features' command automates this process. The basic + algorithm is as follows: + + getob infile namefile anofile seqfile indfile ... + + #Pull out all lines containing indirect references + grep '@' outfile > unresolved.grep + + while (unresolved.grep is not empty) + + #extract accession numbers to be retrieved + cut -c2-7 unresolved.grep > unresolved.nam + + #retrieve the sequences into a new file, and create + #a database subset to be used by getob + fetch unresolved.nam new.gen + splitdb new.gen new.ano new.wrp new.ind + + #run getob again to resolve indirect references + getob -r infile outfile new.ano new.wrp new.ind ... + + #Pull out all lines containing indirect references + grep '@' outfile > unresolved.grep + end + + c NAMEFILE contains accession numbers, rather than locus names + + n By default, the qualifier 'codon_start' is used to determine + how many n's, if necessary, must be added to the 5' end of + CDS, mat_peptide, or sig_peptide, to preserve the reading + frame. To turn OFF this feature, -n must be set. -n must be set + for GenBank Releases 67.0 and earlier. + + infile contains commands indicating what data is to be pulled from + each entry. Two types of output may be presented, GenBank or + OBJECTS. These are described below: + + 1) GenBank output - If the word 'GENBANK' is the first line in + infile, a pseudo-GenBank entry will be recreated. This option + is only intended for debugging purposes and will probably be + removed in later releases. + + 2) Object format - This option instructs getob to write part or + all of each sequence, along with site annotation, by specifying + feature key names. The syntax for infile is shown below: + + Backus-Naur format: Example: + ---------------------------------------------------------- + OBJECTS OBJECTS + tRNA + { rRNA + . . . SITES + } stem_loop + SITES + { + . . . + } + + In the example above, getob is instructed to extract all tRNA or + rRNA sequences from each entry, and annotate the position of each + stem/loop structure. Note that the SITES coordinates written to the + file tell the positions of those SITES relative to the start of the + object, rather than the original location in the sequence. As above, + each word begins a separate line. + + While the -r option does not use infile, at least a dummy infile + must be included in the command line. This dummy file need only + contain two lines: + + OBJECTS + SITES + + NOTE: SITES IS NOT YET IMPLEMENTED! Although inclusion of SITES in + the input file will have no effect, the word SITES must still be + present after the last feature key. + + + namefile + namefile consists of a list of LOCUS names or accession numbers, + each on a separate line. Names or accession numbers should appear + in the order in which they appear in the database file. Unordered + namefiles will slow the progress of the search. Since only the + first non-blank field of each line in namefile is read, indfile + could be used to create a namefile. If the entire indfile was + used, the entire database file would be processed. A sample + namefile requesting four sequences by LOCUS name is shown below: + + POTPR1A + POTPSTH2 + POTPSTH21 + POTSTHA + + anofile, seqfile, and indfile + The database subset containing GenBank entries must be divided + among annotation, sequence and an index by splitdb. + + message + message contains a log describing the parsing of each object. + For annotative purposes, qualifier lines from the object are + included in along with the location expression being parsed. + The beginning of a typical message file is shown below: + + GETOB Version 0.962 14 May 1992 + + POTPR1A:CDS1 + join + ( + 295 603 + + 1011 1355 + + ) + + + /note="pathogenesis-related protein (prp1)" + /codon_start=1 + /translation="MAEVKLLGLRYSPFSHRVEWALKIKGVKYEFIEEDLQNKSPLLL + QSNPIHKKIPVLIHNGKCICESMVILEYIDEAFEGPSILPKDPYDRALARFWAKYVED + KGAAVWKSFFSKGEEQEKAKEEAYEMLKILDNEFKDKKCFVGDKFGFADIVANGAALY + LGILEEVSGIVLATSEKFPNFCAWRDEYCTQNEEYFPSRDELLIRYRAYIQPVDASK" + //---------------------------------------------- + + In the example above, getob was instructed to retrieve all CDS + features from the database subset. The message for the entry + POTPR1A is shown, along with a reconstruction of the location + expression that was evaluated to create the object. In this + case, protien coding sequences from two exons had to be joined + to create the object. + + outfile + outfile contains the actual objects constructed, consisting of + sites found and sequences. The beginning of a typical output file + is shown below: + + >POTPR1A:CDS1 + atggcagaagtgaagttgcttggtctaaggtatagtccttttagccatag + agttgaatgggctctaaaaattaagggagtgaaatatgaatttatagagg + aagatttacaaaataagagccctttacttcttcaatctaatccaattcac + aagaaaattccagtgttaattcacaatggcaagtgcatttgtgagtctat + ggtcattcttgaatacattgatgaggcatttgaaggcccttccattttgc + ctaaagacccttatgatcgcgctttagcacgattttgggctaaatacgtc + gaagataag + ggggcagcagtgtggaaaagtttcttttcgaaaggagaggaacaagagaa + agctaaagaggaagcttatgagatgttgaaaattcttgataatgagttca + aggacaagaagtgctttgttggtgacaaatttggatttgctgatattgtt + gcaaatggtgcagcactttatttgggaattcttgaagaagtatctggaat + tgttttggcaacaagtgaaaaatttccaaatttttgtgcttggagagatg + aatattgcacacaaaacgaggaatattttccttcaagagatgaattgctt + atccgttaccgagcctacattcagcctgttgatgcttcaaaatga + + In the example, the CDS from entry POTPR1A has been written in + two chunks, corresponding to the two exon portions of the coding + sequence. Each location retrieved in constructing the object is + written as a separate block of sequence. By comparing message file + to outfile, it is possible to verify the correctness of the + operation. + + Numbers are appended to the sequence names to indicate + which CDS in the entry has been retrieved. Thus, if two CDS + features were present, the second one would be named >POTPR1A:2. + For compatiblility with the FASTA programs of Pearson, the name line + begins with a '>'. + + expfile + The expression evaluated to create this feature is written + to expfile: + + >POTPR1A:CDS1 + @J03679:join(295..603,1011..1355) + + expfile is only created if -r is not set. It is itended as a way + of automating the creation of a feature expression file for use + in generating customized datasets. Expressions in expfile can be + deleted or modified, or new expressions added, to tailor the + dataset to individual needs. To generate a dataset from expfile: + + getob -r infile expfile anofile seqfile indfile message outfile + + EXTENSIONS TO THE FEATURE TABLE LANGUAGE + + 1) poly(||,x) + + This operator evaluates an absolute location, literal, or + feature name (ie. any location not containing functional + operators) and writes it x times. The most obvious + application of poly is to create spacers to represent regions + of unknown sequence between sequences that are known. For + example, the restriction map of a 4kb EcoR1 fragment with a + Hind3 site 1000 bp from one end could be represented as follows: + + join("gaattc",poly("n",1000),"aagctt",poly("n",3000),"gaattc") + + 2) The following feature keys are recognized by GETOB, although + not included in the language definition. While they will not + appear in GenBank entries, they could be used in user-created + GenBank-format files: + + contig + This feature key is meant to be used to assemble large + sequence segments from smaller segments, possibly using the + poly() operator. + + chromosome + Intended to annotate the complete sequence of a chromosome. This + feature may be constructed by a join of two or more contigs. + + Use of these keywords is illustrated in the features table + shown below, which could be used to construct a model of part + of the E.coli chromosome, spanning map units 763.4 to 1031.4 kb: + + contig join(J01619:1..13063,poly("n",7140), + J03939:1..1363,poly("n",14380), + X02306:complement(1..1622),poly("n",14710), + J04423:1..5793,poly("n",22500), + X03722:1..2400,poly("n",123750), + one-of(X05017:complement(1..1854),X05017:1..1854)) + /label=Eco_contig8 + /map=763.4-950.6kb + contig join(V00352:1..2412,poly("n",28800),M15273:1..3409) + /label=Eco_contig9 + /map=972.9-1001.7kb + contig join(X02826:1..1357,poly("n",13540), + J01654:complement(1..2270)) + /label=Eco_contig10 + /map=1016.5-1031.4kb + chromosome join(Eco_contig8,poly("n",22300), + Eco_contig9,poly("n",14800), + Eco_contig10) + /label=Ecoli_chromosome + + NOTES + 1) If the const DEBUG is set to true in the Pascal source code, getob + writes messages to the standard output, indicating the progress of + processing for each entry read in. By default, DEBUG=false. + This feature is solely for debugging purposes and will be removed in + later releases. + + 2) GETOB automatically expands leading blanks that have been + compressed using splitdb -c. See splitdb.doc for more information. + + SEE ALSO + features, splitdb, getloc + The DDBJ/EMBL/GenBank Feature Table: Definition, Version 1.04 + September 1, 1992 + GenBank Release Notes for Release 79.0. + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/identify.doc b/CORE/xylem/identify.doc new file mode 100644 index 0000000..56ced71 --- /dev/null +++ b/CORE/xylem/identify.doc @@ -0,0 +1,83 @@ + + IDENTIFY update 3 Feb 94 + + + NAME + identify - creates a file of locus names corresponding to lines + found by grep in a GenBank annotation file. + + SYNOPSIS + identify grepfile indfile namefile findfile + + DESCRIPTION + grepfile is created using the Unix grep command to search a .ano + file created by splitgb. For example, to find all lines containing + the word 'chlorophyll' in plant.ano, use + + grep -n -i 'chlorophyll' plant.ano > plant.grep + + In the example shown, the -n option causes each line written to + plant.grep to be preceeded by the number of that line in plant.ano. + (The -i option causes grep to ignore case.) Identify can use the + indfile do determine which entry a given numbered line was found + in, and writes the corresponding LOCUS name to namefile. In + addition, all lines found in a given entry are re-written to + findfile without the line numbers, and preceeded by the LOCUS name + for that entry. + + EXAMPLES + Suppose you wanted to obtain a list of names for all plant + sequences which code for proteins. The task is complicated by the + fact that many fungal sequences are included in the GenBank plant + file. You could begin by searching plant.ano (containing all + GenBank plant entries) for the word 'Planta': + + grep -n 'Planta' plant.ano > Planta.grep + + However, we want to eliminate all fungal sequences, as well as all + sequences for RNAs other than mRNAs. If we create the file + bad.str containing the keywords + + Mycophyta + tRNA + rRNA + uRNA + + we can then type + + grep -n -f bad.str plant.ano > bad.grep + + bad.grep now contains all lines containing the offending keywords. + We next use identify to find the names of the entries found by + grep. + + identify Planta.grep plant.ind Planta.nam Planta.fnd + identify bad.grep plant.ind bad.nam bad.fnd + + Next, we can use the Unix comm command to compare the two .nam + files and produce an output file containing only names which are + present in Planta.nam but not bad.nam: + + comm -23 Planta.nam bad.nam > plants.nam + + The file plants.nam now contains names of either plant cDNA or + genomic sequences which do not code for structural RNAs. + At this point, getloc could to create a sub-database containing + only those entries listed in planta.nam. See documentation for + getloc for a more detailed discussion. + + SEE ALSO + grep, fgrep, egrep, ngrep, comm, splitgb, getloc + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/keyfile.template b/CORE/xylem/keyfile.template new file mode 100644 index 0000000..66ac651 --- /dev/null +++ b/CORE/xylem/keyfile.template @@ -0,0 +1,23 @@ +;--------------------------------------------------------------------------- +; FINDKEY/GDE Keyword File Instructions +; +; 1. Type in one or more keywords below, +; or +; Place cursor at end of this file and choose 'Include File' in the FILE +; menu to read in a file of keywords. +; +; 2. Choose 'Save Current File' in the File menu +; 3. Quit this window +; +; FINDKEY will then perform the keyword search. YOU DON'T NEED TO EDIT +; OUT THESE COMMENT LINES. +; +; NOTE: Put each keyword on a separate line +; SAMPLE KEYWORD FILE: +; +; maize +; corn +; Z.mays +; Zea +;--------------------------------------------------------------------------- + diff --git a/CORE/xylem/namefile.template b/CORE/xylem/namefile.template new file mode 100644 index 0000000..cd63482 --- /dev/null +++ b/CORE/xylem/namefile.template @@ -0,0 +1,25 @@ +;--------------------------------------------------------------------------- +; FETCH/GDE Name/Accession File Instructions +; +; 1. Type in one or more LOCUS names or Accession #'s below, +; or +; Place cursor at end of this file and choose 'Include File' in the FILE +; menu to read in a file of names or accession #'s. +; or +; Copy names or accession #'s from another window and Paste into this window. +; +; 2. Choose 'Save Current File' in the File menu +; 3. Quit this window +; +; FETCH will then retrieve the data. YOU DON'T NEED TO EDIT +; OUT THESE COMMENT LINES. +; +; NOTE: Put each name on a separate line +; SAMPLE NAME/ACCESSION FILE: +; +; X30412 +; PSDRR1 +; PEADRRG +; +;--------------------------------------------------------------------------- + diff --git a/CORE/xylem/names.template b/CORE/xylem/names.template new file mode 100644 index 0000000..e2e4f23 --- /dev/null +++ b/CORE/xylem/names.template @@ -0,0 +1,25 @@ +;--------------------------------------------------------------------------- +; FEATURES/GDE Name File Instructions +; +; 1. Type in one or more GenBank LOCUS names below, +; or +; Place cursor at end of this file and choose 'Include File' in the FILE +; menu to read in a file of names. +; +; (NOTE: File can not contain accession numbers.) +; +; 2. Choose 'Save Current File' in the File menu +; 3. Quit this window +; +; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT +; OUT THESE COMMENT LINES. +; +; NOTE: Put each name on a separate line +; SAMPLE NAME FILE: +; +; PEADRRA +; PSDRR1 +; PEADRRG +; +;--------------------------------------------------------------------------- + diff --git a/CORE/xylem/printdoc.doc b/CORE/xylem/printdoc.doc new file mode 100644 index 0000000..8ca092d --- /dev/null +++ b/CORE/xylem/printdoc.doc @@ -0,0 +1,56 @@ + printdoc update 3 Feb 94 + + NAME + printdoc - prints documentation files + + SYNOPSIS + printdoc filename + + DESCRIPTION + printdoc uses the file extension to decide how to print a + documentation file. If necessary, a filter such as pr or nroff + is used to format the file before sending to the appropriate + printer. A list of file extensions recognized by printdoc is + given below. If no file extension is given, or the extension is + not in the list, printdoc assumes .doc. + + .doc - (default) Uses pr to print the text, using the default + settings provided by pr (56 text lines per page plus a 5 line + header and footer). Printing is at 12 cpi, front only. This works + reasonbly well for most unformatted documentation files, + provided that the line length doesn't exceed 80 char. This + option assumes that a half-inch left margin is automatically + provided by the printer. + + .tex - Assumes that document is already pre-formatted. Thus, + no headers or footers are provided, and it is assumed that + the top and bottom of pages are padded with blanks or header/ + footer lines as needed. Form-feed characters (^L) may be + included in the text to force page breaks. + + .ps - Assumes file is in PostScript format. Sends it to the + PostScript printer. + + .nroff - Assumes file is formatted for use by nroff, using the + standard macro set (nroff -ms). + + .nroff.me - Assumes file is formatted for use by nroff, using the + e macro set (nroff -me). + + TRANSPORTATION NOTES + For reasons which should be obvious, this script needs major + rewriting at each site, since the available printers will + be of different types and have different names. + + SEE ALSO + pr, pr(V), xlp, nroff + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + diff --git a/CORE/xylem/prot2nuc.doc b/CORE/xylem/prot2nuc.doc new file mode 100644 index 0000000..0212a58 --- /dev/null +++ b/CORE/xylem/prot2nuc.doc @@ -0,0 +1,123 @@ + prot2nuc update 10 Aug 94 + + NAME + prot2nuc - reverse translates protein into nucleic acid + + SYNOPSIS + prot2nuc [-ln -gn] < input > output + + DESCRIPTION + prot2nuc reads a file containing an amino acid sequence + and writes the corresponding reverse translated nucleic acid + sequence, using the standard IUPAC-IUB ambiguity codes to output. + The amino acid sequence may contain internal stop '*' characters. + That is, all legal amino acid characters will be processed. + + -ln print n amino acids/codons per line. (default = 25) + + -gn number the amino acid sequence every n amino acids/codons. + (defalut = 5) + + If l is not evenly divisible by g, the defaults are used. + + input - If the first line of the file begins with '>' or ';', + input will be read as the standard .wrp (Pearson) format, + such as that produced by getob: + + >name + sequence lines + + + Otherwise, it will be assumed that the file ONLY contains + sequence, and all legal IUPAC/IUB DNA characters will be + read as sequence. + + output - The output begins with a header, listing the both + 1 and 3 letter amino acid codes [J. Biol. Chem. 243, 3557-3559 + (1968)], as well as the nucleic acid ambiguity codes [Cornish- + Bowden (1985) Nucl. Acids Res. 13:3021-3030.]. The amino acid + sequence, along with its reverse translation, are then printed on + lines of l amino acids/codons, numbering every g amino acids/codons. + Non-ambiguous nucleotides appear capitalized, while ambiguous + nucleotides are in lowercase. A sample output file appears below: + + PROT2NUC Version 8/10/94 + + IUPAC-IUP AMINO ACID SYMBOLS + [J. Biol. Chem. 243, 3557-3559 (1968)] + + Phe F Leu L Ile I + Met M Val V Ser S + Pro P Thr T Ala A + Tyr Y His H Gln Q + Asn N Lys K Asp D + Glu E Cys C Trp W + Arg R Gly G STOP * + Asx B Glx Z UNKNOWN X + + + IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE + [Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.] + + Symbol Meaning | Symbol Meaning + ------------------------------------+--------------------------------- + G Guanine | k G or T + A Adenine | s G or C + C Cytosine | w A or T + T Thymine | h A or C or T + U Uracil | b G or T or C + r Purine (A or G) | v G or C or A + y Pyrimidine (C or T) | d G or T or A + m A or C | n G or A or T or C + + pI39 + 5 10 15 20 + M E K K S L A A L S F L L L L V L F V A + ATGGArAArAArTCnCTnGCnGCnCTnTCnTTyCTnCTnCTnCTnGTnCTnTTyGTnGCn + AGyTTr TTrAGy TTrTTrTTrTTr TTr + + 25 30 35 40 + Q E I V V T E A N T C E H L A D T Y R G + CArGArAThGTnGTnACnGArGCnAAyACnTGyGArCAyCTnGCnGAyACnTAyCGnGGn + TTr AGr + + 45 50 55 60 + V C F T N A S C D D H C K N K A H L I S + GTnTGyTTyACnAAyGCnTCnTGyGAyGAyCAyTGyAArAAyAArGCnCAyCTnAThTCn + AGy TTr AGy + + 65 70 + G T C H D W K C F C T Q N C + GGnACnTGyCAyGAyTGGAArTGyTTyTGyACnCArAAyTGy + + + With the Universal Genetic code, ambiguity symbols make it possible + to represent all possible codons for an amino acid using two output + lines. It is important to realize that the ambiguities on each line + can not be combined. For example, CTn and TTr represent all codons for + Leucine. However, attempting to combine them into a single triplet, + yTn, would be incorrect. For example, TTT and TTC are codons for + Phenylalanine, not Leucine. + + FUTURE PLANS + 1. It wouldn't be hard to have the output printed as nucleic acid + sequences in Perason format, so that the output could be read back + into GDE. I don't know why you would want to do this, but it could + be done. + 2. Right now, only the Universal Genetic Code is used, but it should + be possible to read in alternative genetic codes, have prot2nuc + figure out the ambiguity rules (as is already done in ribosome) and + print out the appropriate ambiguous codons. + 3. It might be useful to have each possible codon printed out, rather + than ambiguous codons. This would take up a lot more space and + wouldn't be as pretty. If there's a lot of demand I could do this. + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + diff --git a/CORE/xylem/reform.doc b/CORE/xylem/reform.doc new file mode 100644 index 0000000..add7a38 --- /dev/null +++ b/CORE/xylem/reform.doc @@ -0,0 +1,107 @@ + reform update 3 Feb 94 + + NAME + reform - reformats multiply-aligned sequences for printing. + + SYNOPSIS + reform [-gpcnm] [-fx] [-sn] [-ln] [file {ralign only}] + or + ralign file parameters | reform [-gpcn] [-sn] [-ln] file + + DESCRIPTION + + g Gaps are to be represented by dashes (-). + p Bases which agree with the consensus are + represented by periods (.). + c Positions at which all sequences agree are + capitalized in the consensus. + n Sequence data is nucleic acid. Protein default + fx Specify input file format, where x is + r:RALIGN (default) p:PEARSON i:MBCRR-MASE (Intelligenetics) + m Input file contains multiline format sequences already aligned, + as opposed to ralign output. This option is obsolete, and is + equivalent to -fp. + ln The output linelength is set to n. + Default is 70. + sn numbering starts with n (default=0) + + file Sequence file as described in ralign docu- + mentation. reform needs to re-read the + sequence file read by ralign to get the + names of the sequences, which ralign ignores. + This filename is only included for ralign output. + If -m is set, file is ignored, and sequence names + must be read from the input. + + Note that positions in the consensus at which no nucleotide is in the + majority are represented by n's (for nucleic acids) or x's (for proteins), + rather than periods, as in ralign. + + Gaps in the input sequences may be represented by either blanks or dashes. + + INPUT FILE FORMATS + + (a) ralign (default, -fr) + As described in ralign documentation, the input file (which is assumed to + be ralign output) must have each sequence on a single long line. All + characters on a given line will be included in the alignment. All lines + must be exactly the same length. For example, if ralign had been read + sequence from a file called 'allcab.seq' and written output to 'allcab.ral', + the following command might be used: + + reform allcab.seq allcab.ref + + (b) Pearson (-fp, -m) + Compatible with sequence files used by Pearson's fasta programs as shown: + >name1 + sequence1 + >name2 + sequence2 + ... + >namen + sequencen + + Sequences may run over many lines and line length does not have to be + uniform. However, both dashes ('-') and blanks (' ') will be read in + as gaps in the alignment. A right arrow (>) at the beginning of a line + indicates the name line at the beginning of a new sequence. + + Any line beginning with a semicolon (';') will be considered a comment, + and will be ignored. + + (c) MBCRR-MASE (Intelligenetics) (-fi) + Compatible with .mase files produced by MBCRR's mase and pima programs, + which use the Intelligenetics format as shown: + + ;one or more comment lines + name1 + sequence1 + ;one or more comment lines + name2 + sequence2 + ... + ;one or more comment lines + namen + sequencen + + Sequences may run over many lines and line length does not have to be + uniform. However, both dashes ('-') and blanks (' ') will be read in + as gaps in the alignment. Each sequence MUST begin with at least one + comment line. When a comment line is encountered, that signals the + beginning of a new sequence. The first line after the comment is read + as the name, and the sequence begins on the next line after that. + + SEE ALSO ralign, mase + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/ribosome.doc b/CORE/xylem/ribosome.doc new file mode 100644 index 0000000..df13855 --- /dev/null +++ b/CORE/xylem/ribosome.doc @@ -0,0 +1,84 @@ + ribosome update 3 Feb 94 + + NAME + ribosome - translates nucleic acid into protein + + SYNOPSIS + ribosome [-g gcfile] < input > output + + DESCRIPTION + ribosome reads a file of one or more nucleic acid sequences + and writes the corresponding amino acid sequence, in the standard + one letter code, to output. Ribosome begins translating at the + first nucleotide in each input sequence and continues to the end. + If the length of the translated sequence is not divisible by 3, + ribosome pads the final codon with N's and attempts to use ambi- + guity rules to translate the final codon. Based on the genetic + code used, ribosome derives a set of rules to resolve all ambi- + guities that can possibly be resolved. + + -g read in an alternative genetic code from gcfile. If this + option is not specified, ribosome uses the universal + genetic code. + + gcfile - This file specifies an alternative genetic code. An + example is shown below. ribosome reads the first 64 legal + capital letters as amino acids. Consequently, lowercase letters + can be used for annotation purposes, as shown in the example. + All non-amino acid characters are ignored. + + sgc2 - yeast mitochondrial genetic code + + second position + first position ------------------------------- third position + (5' end) u c a g (3' end) + ----------------------------------------------------------------- + u F S Y C u + F S Y C c + L S * W a + L S * W g + ----------------------------------------------------------------- + c T P H R u + T P H R c + T P Q R a + T P Q R g + ----------------------------------------------------------------- + a I T N S u + I T N S c + M T K R a + M T K R g + ----------------------------------------------------------------- + g V A D G u + V A D G c + V A E G a + V A E G g + + + input - If the first line of the file begins with '>' or ';', + input will be read as the standard .wrp (Pearson) format, + such as that produced by getob: + + >name + ; one or more comment lines (optional) + sequence lines + + + Otherwise, it will be assumed that the file ONLY contains + sequence, and all legal IUPAC/IUB DNA characters will be + read as sequence. + + SEE ALSO + getob + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/shuffle.doc b/CORE/xylem/shuffle.doc new file mode 100644 index 0000000..77c69e8 --- /dev/null +++ b/CORE/xylem/shuffle.doc @@ -0,0 +1,66 @@ + shuffle.doc update 3 Feb 94 + + SYNOPSIS + shuffle -sn [-wn -on] + + DESCRIPTION + Shuffles sequences locally. See Lipman DJ, Wilbur WJ, Smith TF + and Waterman MS (1984) On the statistical significance of nucleic + acid similarities. Nucl. Acids Res. 12:215-226. + -sn n is a random integer between 0 and 32767. This number + must be provided for each run. + + -wn n is an integer, indicating the width of the window for + random localization. If w exceeds the length of a sequence, + or is negative, the entire sequence is scrambled as a single + window. This is also the case if w is not specified. + + -on n is an integer, indicating the number of nucleotides + overlap between adjacent windows. It should never exceed + the window size. o defaults to 0 if not specified. + + If w and o are specified, overlapping windows of w nucleotides + are shuffled, thus preserving the local characteristic base + composition. Windows overlap by o nucleotides. + If w and o are not specified, each sequence is shuffled globally, + thus preserving the overall base composition, but not the local + variations in comp. + + Any number of sequences may be processed from a single input + file. In Pearson-format files, each new sequence begins with a + '>' comment line, indicating the name and a short description of + the sequence. + + No distinction is made between protein or nucleic acid sequences. + That is, shuffle will read any of the following characters as + sequence: + + T,U,C,A,G,N,R,Y,M,W,S,K,D,H,V,B,L,Z,F,P,E,I,Q,X,*,- + + where '*' is the result of translating a stop codon, and '-' + is a gap generated during sequence alignment. Lowercase is + also accepted. + + EXAMPLE + A sample output file is shown below. Note that the first two + lines of output are comment lines, listing the version of the + program and the parameters used in the run. + + >SHUFFLE VERSION 11/ 8/93 + >RANDOM SEED: 9873 WINDOW: 12 OVERLAP: 3 + >BAZFAZ - Borborigmus azerbi F-actin-zeta gene + ctgagtagctagtcctaaatagttagtccatagtactagtacgggtcgtt + cacccttgggcagtg.....(etc.) + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/splitdb.doc b/CORE/xylem/splitdb.doc new file mode 100644 index 0000000..49e97c2 --- /dev/null +++ b/CORE/xylem/splitdb.doc @@ -0,0 +1,141 @@ + + SPLITDB update 28 Mar 98 + + + NAME + splitdb - split GenBank files into annotation, sequence, and index + + SYNOPSIS + splitdb [-gepvlct] dbfile anofile seqfile indfile + + DESCRIPTION + Splitdb splits a database (dbfile) among three files: anofile, seqfile + and indfile. Splitdb ignores any header information that might be in the + file and begins processing at the first entry. + + anofile contains the annotation portion of each entry. Entries are + terminated with '//' or '///' (PIR only). Trailing blanks present in + dbfile are omitted in anofile. + + seqfile contains the sequence data for each entry. Each sequence + entry begins with a header line, followed by sequence data on + succeeding lines of 75 characters per line. The header line + includes the header flag character '>' in column 1, followed by the + name, followed by the first 50 characters of the 1st + DEFINITION line. An example is shown below: + + >UNHOR1 - Unicorn horn protein 1, complete cDNA sequence + attcctctatagtctattctagctagccaaataggttagatggctgtcttactacttacgc + ... + + Removal of blanks and numbers from sequence lines makes makes split + datasets about 8-9% smaller than the original GenBank files. + + indfile is an index which tells the line numbers for each entry in + anofile and seqfile. It is assumed to be in alphabetical order by + name. Each line contains a name and accession number, followed by the + line numbers on which the annotation and sequence data begin in anofile + and seqfile, respectively. Thus the file plants.ind might contain: + + + A15660 TA156608 1 1 + A15671 A15671 33 11 + A15673 A15673 65 25 + A15675 AK156751 97 36 + A15677 BA156770 128 46 + A16780 BA167807 160 57 + A16782 A16782 192 70 + ATHRPRP1C GM905105 225 83 + etc... + + Note that indfile is a perfectly legitimate .nam file, for use with + programs such as getloc, getob, or comm. + + + The following options identify the type of database being read: + + -g GenBank (default) + -e EMBL + -p PIR (NBRF) + -v Vecbase + -l LiMB + + Other options: + -c Compress 3 or more leading blanks in annotation lines + to take the form , where CRUNCHFLAG + is the ASCII character specified by the Pascal const + CRUNCHOFFSET, which is set to 33 ("!") in the current + implementation. For each annotation line read, if the + number of leading blanks is >=3, splitdb sets CRUNCHCHAR + to CRUNCHOFFSET+the number of blanks. Thus, for lines + with 3, 4, or 5 leading blanks, CRUNCHCHAR would be + '$', '%' and '&', respectively. GETLOC and GETOB + automatically expand crunched blanks when CRUNCHFLAG + is encountered on an input line. Empiracle observations + indicate that the -c option decreases the size of + GenBank files by about 10%. + + This compression method may fail when the number of + leading blanks exceeds 127-CRUNCHOFFSET. However, + none of the above mentioned databases currently + supports any datafield with anywhere near that number + of leading blanks. + + -t (GenBank only) Append all information in the first + ORGANISM to the end of each line in indfile. For example, + the entry which begins: + + LOCUS GORMTDLOOZ 282 bp DNA UNA 11-MAR-1996 + DEFINITION GGGOMT493; Gorilla gorilla gorilla (BomBom, ISIS 438, Audubon + Zoological Gardens) mitochondrial D-loop DNA. + ACCESSION L76759 + NID g1222584 + KEYWORDS D-loop. + SOURCE Mitochondrion Gorilla gorilla gorilla (individual_isolate BomBom, + ISIS 438, Audubon Zoological Gardens, sub_species gorilla) male + DNA. + ORGANISM Mitochondrion Gorilla gorilla gorilla + Eukaryotae; mitochondrial eukaryotes; Metazoa; Chordata; + Vertebrata; Eutheria; Primates; Catarrhini; Hominidae; Gorilla. + + might be indexed as + + GORMTDLOOZ L76759 1 1 Mitochondrion Gorilla gorilla gorilla + + This is useful for taxonomic studies, or as a way of making + it easy to create subsets from a single index. Thus, + 'grep gorilla primates.ind' would print all lines in the + file that contained the word gorilla. The output from + this command could be used as a .nam file for extracting + just gorilla sequences from a larger dataset using + fetch. + + + NOTES + 1. Header lines that aren't part of entries are automatically + stripped out during processing. For example, in a file containing + GenBank entries, all lines up to the first occurrence of 'LOCUS' + starting in column 1, are ignored. Similarly for PIR, processing + begins on the first line containing 'ENTRY' beginning in column 1. + 2. GenBank/EMBL/DDBJ entries created on or after Feb. 1, 1996, + have accession numbers of 8 characters, rather than 6. Previously + assigned accession numbers will remain at 6 characters. Splitdb has + been updated to write all accession numbers to the .ind file, left + justified in a field of 8 characters, in columns 14-21 of the .ind + file. + + SEE ALSO + getloc, getob, comm(1) (Unix command). + + AUTHOR + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB Canada R3T 2N2 + Phone: 204-474-6085 + FAX: 204-261-5732 + frist@cc.umanitoba.ca + + REFERENCE + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/xylem.doc b/CORE/xylem/xylem.doc new file mode 100644 index 0000000..e8bf2cd --- /dev/null +++ b/CORE/xylem/xylem.doc @@ -0,0 +1,125 @@ + + + XYLEM.DOC update 10 Aug 1994 + + XYLEM: TOOLS FOR MANIPULATION OF GENETIC DATABASES + Brian Fristensky, University of Manitoba + + Fristensky, B. (1993) Feature expressions: creating and manipulating + sequence datasets. Nucleic Acids Research 21:5997-6003. + + SPLITDB - Splits files containing one or more GenBank entries into + annotation, sequence, and index files. Indexfiles can also serve as + namefiles for GETLOC. Sequence files are in the format required for + use with the Pearson programs (FASTA,LFASTA etc.). + + GETLOC - Reads a file containing LOCUS names (namefile) and + retrieves either annotation, sequence, or both from a split + database or database subset created by SPLITDB. + + FETCH - A c-shell script that provides a convenient menu-driven + front end for retrieval of database entries using GETLOC. + + FINDKEY - A c-shell script that provides a convenient menu-driven + front end for keyword searches of database annotation files, + using IDENTIFY. + + IDENTIFY- Given line-numbered output from grep, IDENTIFY uses the + index file to determine which entries contained the keywords + searched for by grep. It then produces a namefile for use by + GETLOC. Namefiles can serve as logical databases, and utilities + such as the Unix comm command can perform logical operations on + these namefiles to produce database subsets. + + FEATURES/GETOB - Given a namefile, pulls objects (mRNA, tRNA, CDS + etc.) from each of the named entries, using the new + DDBJ/EMBL/GenBank International Features Table Format. A future + version will also allow the annotation of sites within objects that + are extracted. + + DBSTAT - Calculates amino acid frequencies in a protein database. + + RIBOSOME - Given a file of one or more nucleic acids (eg. output + from GETOB) , RIBOSOME translates them into protein, using either + the universal genetic code or an alternative genetic code supplied + by the user. All ambiguities that can be resolved are translated. + + PROT2NUC - reverse translates a sequence from protein to nucleic + acid, using IUPAC-IUB ambiguity codes. + + SHUFFLE - Given a random seed, shuffles each sequence in a Pearson- + format (.wrp) file. Shuffling is done locally in overlapping windows + across the length of a given sequence. The window size and overlap + length can be specified by the user. + + REFORM - Reformats multiply aligned nucleic acid or protein + sequences for publication. Output for M. Waterman's RALIGN + program, or the MBCRR MASE editor, can be directly used as input. + A variety of options are available for representing gaps, consensus + sequences and other features. + + Fristensky (Cornell) Sequence Analysis Package - General purpose + sequence analysis package written in Standard Pascal. Features + include: sequence numbering, formatting, & translation, restriction + site searches & mapping, matrix similarity searches, TESTCODE + analysis, base composition analysis. All programs are interactive + and read free-format, BIONET, and GenBank files. + + + + + + + + XYLEM DATABASE TOOLS + + + + ---------- + | .gen | getloc + |----------|<-------------------------- + | GenBank | | + ---------- | + | | + | splitgb | + /|\ | + / | \ | + / | \ | + / | \ | + / | \ | + / | \ | + v v v | + ---------- ---------- ---------- | + | .ano | | .wrp | | .ind | | + |----------| |----------| |----------| | + |annotation| | sequence | | index | | + ---------- ---------- ---------- | + | \ | / | + | \ | / | + | \ | / | + | \ | / | + grep -n | \ | / | + | \ | / | + | | | + | | -------------------------------+ + | ^ | + v | getob | + ---------- ---------- v + | .grep | identify | .nam | ---------- + |----------| --------->|----------| | .wrp | + | numbered | | LOCUS | ---------- + |file lines| ---------- | eg. mRNA | + ---------- | ^ | tRNA | + | | | rRNA | + | | | CDS | + --comm-- ---------- + (logical operations on + sets of names) + + Dr. Brian Fristensky + Dept. of Plant Science + University of Manitoba + Winnipeg, MB R3T 2N2 CANADA + 204-474-6085 + frist@cc.umanitoba.ca + diff --git a/GDE2.0_manual.ps b/GDE2.0_manual.ps new file mode 100755 index 0000000..a9b90c8 --- /dev/null +++ b/GDE2.0_manual.ps @@ -0,0 +1,6258 @@ +%! +%%Title: "Laser Prep -- The Apple PostScript Dictionary (md)" +%%Creator: Apple Software Engineering +%%CreationDate: Thursday, March 19, 1987 +%{appledict version #70 0 +% ) CopyRight Apple Computer, Inc. 1984-89 All Rights Reserved. +%%EndComments +%%BeginProcSet: "(AppleDict md)" 70 0 +statusdict begin product(LaserWriter II NT)eq revision 1 eq and +{userdict begin/oldcds/cleardictstack load def/cleardictstack{31 sendpcmd 4 eq tonerlight/oldcds load exec}bind def +end +currentfile eexec +}{save currentfile 359 string readhexstring pop pop restore}ifelse +35de8eabfc7fa5eac0431edc501ad43f5fcbdf9fdd321cce93b525f4439dd94696bf56ac13a0a2aad1e6bcf444711e941d7217138d20ae0500145f815439cc14e697ad201df728ea4ccad4ac +331aa03a7aacde10760bf4ee12bbf73c77cdcbf1796f26f0dd255d2407e1ac41 +d27489a69d6b69c6a841468b46720b75ad65650700e0c528e7af61e7e3e821b59445c44b69831ebc9deaf0e3aecc14b7a1c2e18bc1fa42a59219f1e36f236e3d6c89114b1f231999c3dbce6b43f3e2918fcb85575941a9d1e65c86aa08e6eea86cc66ce90e5e4add57f2585e +7b1c0b5203cfc46868d6e3c0d66db79174e7091e32e307679732da062e440e41dabd36a161b611a7e4523a49694026206803dbfd5be5c5fe433f0f18a40057db6f1302848c8da4a10a7f14c6 +3d512806362b1be092ad5dbd36d75fe63e4cae2ba9b72390f580cac344a08bdf6eb9e36ec45bad2a0b82829a72e0efa2d87332c482196e690361168271c55053341ab3 +end +/sc {60 45 {abs exch abs 2 copy add 1 gt{1.0 sub dup mul exch 1.0 sub dup mul add 1.0 sub}{dup mul exch dup mul add 1.0 exch sub} +ifelse}setscreen} bind def statusdict begin product(LaserWriter II)anchorsearch end +{pop pop/letter [/letter load /exec load /sc load /exec load]cvx def/legal [/legal load /exec load /sc load /exec load]cvx def/a4 [/a4 load /exec load /sc load /exec load]cvx def/b5 [/b5 load /exec load /sc load /exec load]cvx def +/lettersmall [/lettersmall load /exec load /sc load /exec load]cvx def/a4small [/a4small load /exec load /sc load /exec load]cvx def/note [/note load /exec load /sc load /exec load]cvx def}{pop}ifelse +systemdict/currentpacking known{currentpacking true setpacking}if +/LW{save statusdict/product get(LaserWriter)anchorsearch +exch pop{length 0 eq{1}{2}ifelse}{0}ifelse exch restore}bind def +/LW+{LW 2 eq}bind def +/ok{systemdict/statusdict known dup{LW 0 gt and}if}bind def +ok{statusdict begin 9 sccinteractive 3 ne exch 0 ne or{9 0 3 setsccinteractive}if end}if +/md 270 dict def md begin +/av 0 def +/T true def/F false def/mtx matrix def/s75 75 string def/sa8 8 string def/sb8 8 string def +/sc8 8 string def/sd8 8 string def/s1 ( ) def/pxs 1 def/pys 1 def +/ns false def +1 0 mtx defaultmatrix dtransform exch atan/pa exch def/nlw .24 def/ppr [-32 -29.52 762 582.48] def +/pgr [0 0 0 0] def +/pgs 1 def/por true def/xb 500 array def/so true def/tso true def/fillflag false def/pnm 1 def/fmv true def +/sfl false def/ma 0 def/invertflag false def/dbinvertflag false def/xflip false def/yflip false def/noflips true def/scaleby96 false def/fNote true def/fBitStretch true def +/4colors false def/3colors false def/2colors false def +/wtkey false def +statusdict begin/waittimeout where{pop waittimeout 300 lt{md /wtkey true put}if}if end +wtkey{statusdict begin/setdefaulttimeouts where{pop 0 60 300 setdefaulttimeouts}if end}if +/fg (Rvd\001\001\000\000\177) def +/bdf{bind def}bind def +/xdf{exch def}bdf +/xl{neg exch neg translate}bdf +/fp{pnsh 0 ne pnsv 0 ne and}bdf +/nop{}bdf/lnop[/nop load]cvx bdf +/vrb[ +{fp{fg 6 get 0 ne{gsave stroke grestore}{gsave 1 setlinewidth pnsh pnsv scale stroke grestore}ifelse}if newpath}bind +/eofill load +dup +/newpath load +2 index +dup +{clip newpath}bind +{}bind +dup +2 copy +]def +systemdict/currentcolorscreen known{currentcolorscreen/dkspf xdf/dkrot xdf/dkfreq xdf/dyspf xdf/dyrot xdf/dyfreq xdf/dmspf xdf/dmrot xdf/dmfreq xdf +/dcspf xdf/dcrot xdf/dcfreq xdf}{currentscreen/spf xdf/rot xdf/freq xdf}ifelse +/doop{vrb exch get exec}bdf +/psu{/udf xdf/tso xdf /fNote xdf/fBitStretch xdf/scaleby96 xdf/yflip xdf/xflip xdf +/invertflag xdf/dbinvertflag invertflag statusdict begin version cvr 47.0 ge product (LaserWriter) eq not and end invertflag and {not}if def +xflip yflip or{/noflips false def}if +/pgs xdf 2 index .72 mul exch div/pys xdf div .72 mul/pxs xdf ppr astore pop pgr astore pop/por xdf sn and/so xdf}bdf +/tab{statusdict /11x17 known{statusdict begin /11x17 load end}{statusdict /setpage known{statusdict begin 792 1224 1 setpage end}{statusdict /setpageparams known{statusdict begin 792 1224 0 1 setpageparams end}if}ifelse}ifelse}bdf +/a3Size{statusdict /a3 known{statusdict begin /a3 load end}{statusdict /setpageparams known{statusdict begin 842 1191 0 1 setpageparams end}if}ifelse}bdf +/txpose{fNote{smalls}{bigs}ifelse pgs get exec pxs pys scale ppr aload pop por{noflips{pop exch neg exch translate pop 1 -1 scale}if +xflip yflip and{pop exch neg exch translate 180 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg translate}if +xflip yflip not and{pop exch neg exch translate pop 180 rotate ppr 3 get ppr 1 get neg sub neg 0 translate}if yflip xflip not and{ppr 1 get neg ppr 0 get neg translate}if} +{noflips{translate pop pop 270 rotate 1 -1 scale}if xflip yflip and{translate pop pop 90 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg translate}if +xflip yflip not and{translate pop pop 90 rotate ppr 3 get ppr 1 get neg sub neg 0 translate}if yflip xflip not and{translate pop pop 270 rotate ppr 2 get ppr 0 get neg sub neg 0 exch translate}if}ifelse +wtkey{statusdict/waittimeout 300 put}if +scaleby96{ppr aload pop 4 -1 roll add 2 div 3 1 roll add 2 div 2 copy translate .96 dup scale neg exch neg exch translate}if}bdf +/fr{4 copy pgr aload pop 3 -1 roll add 3 1 roll exch add 6 2 roll 3 -1 roll +sub 3 1 roll exch sub 3 -1 roll exch div 3 1 roll div exch scale pop pop xl}bdf +/obl{{0.212557 mul}{pop 0}ifelse}bdf +/sfd{ps fg 5 -1 roll get mul 100 div 0 ps 5 -1 roll obl ps neg 0 0 6a astore makefont setfont}bdf +/fnt{findfont sfd}bdf +/bt{sa 3 1 roll 3 index and put}bdf +/sa(\000\000\000\000\000\000\000\000\000\000)def +/fs{0 1 bt 1 2 bt 2 4 bt 3 8 bt 4 16 bt 5 32 bt 6 64 bt 7 128 bt sa exch 8 exch put}bdf +/mx1 matrix def +/mx2 matrix def +/mx3 matrix def +/bu{currentpoint 4colors{currentcmykcolor}{currentrgbcolor}ifelse currentlinewidth currentlinecap currentlinejoin +currentdash exch aload length fg 5 sfl{1}{0}ifelse put pnsv pnsh +2t aload pop 3a aload pop mx2 aload pop mx1 aload pop mtx currentmatrix aload pop +mx3 aload pop ps pm restore/ps xdf mx3 astore pop}bdf +/bn{/pm save def mx3 setmatrix newpath 0 0 moveto ct dup 39 get 0 exch getinterval cvx exec mtx astore setmatrix mx1 astore pop mx2 astore pop 3a +astore pop 2t astore pop/pnsh xdf/pnsv xdf gw +/sfl fg 5 get 0 ne def array astore exch setdash setlinejoin setlinecap +setlinewidth 4colors{setcmykcolor}{setrgbcolor}ifelse moveto}bdf +/fc{save vmstatus exch sub 50000 lt +{(%%[|0|]%%)=print flush}if pop restore}bdf +/tc{32768 div add 3 1 roll 32768 div add 2t astore pop}bdf +/3a [0 0 0] def +/2t 2 array def +/tp{3a astore pop}bdf +/tt{mx2 currentmatrix pop currentpoint 2 copy 2t aload pop qa 2 copy translate 3a aload pop exch dup 0 eq +{pop}{1 eq{-1 1}{1 -1}ifelse scale}ifelse rotate pop neg exch neg exch translate moveto}bdf +/te{mx2 setmatrix}bdf +/th{3 -1 roll div 3 1 roll exch div 2 copy mx1 scale pop scale/sfl true def}bdf +/tu{1 1 mx1 itransform scale/sfl false def}bdf +/ts{1 1 mx1 transform scale/sfl true def}bdf +/fz{/ps xdf}bdf +/dv{dup 0 ne{div}{pop}ifelse}bdf +/pop4{pop pop pop pop}bdf +/it{sfl{mx1 itransform}if}bdf +/gm{exch it moveto}bdf/rm{it rmoveto}bdf +/lm{currentpoint sfl{mx1 transform}if exch pop sub 0 exch it rmoveto}bdf +/fm{statusdict/manualfeed known}bdf +/se{statusdict exch/manualfeed exch put}bdf +/mf{dup/ma exch def 0 gt{fm se/t1 5 st ok ma 1 gt and{/t2 0 st/t3 0 st +statusdict/manualfeedtimeout 3600 put +}if}if}bdf +/jn{/statusdict where exch pop{statusdict exch /jobname exch put}if}bdf +/pen{pnm mul/pnsh xdf pnm mul/pnsv xdf pnsh setlinewidth}bdf +/min{2 copy gt{exch}if pop}bdf +/max{2 copy lt{exch}if pop}bdf +/dh{fg 6 1 put array astore dup {1 pxs div mul exch}forall astore exch pop exch pop exch setdash}bdf +/ih[currentdash]def +/rh{fg 6 0 put ih aload pop setdash}bdf +/dl{gsave nlw pys div setlinewidth 0 setgray}bdf +/dlin{exch currentpoint currentlinewidth 2 div dup +translate newpath moveto lineto currentpoint stroke grestore moveto}bdf +/lin{fg 6 get 0 ne{exch lineto currentpoint 0 doop moveto} +{exch currentpoint/pnlv xdf/pnlh xdf gsave newpath/@1 xdf/@2 xdf fp{pnlh @2 lt{pnlv @1 ge +{pnlh pnlv moveto @2 @1 lineto pnsh 0 rlineto +0 pnsv rlineto pnlh pnsh add pnlv pnsv add lineto pnsh neg 0 rlineto} +{pnlh pnlv moveto pnsh 0 rlineto @2 pnsh add @1 lineto 0 pnsv rlineto +pnsh neg 0 rlineto pnlh pnlv pnsv add lineto}ifelse}{pnlv @1 gt +{@2 @1 moveto pnsh 0 rlineto pnlh pnsh add pnlv lineto 0 pnsv rlineto +pnsh neg 0 rlineto @2 @1 pnsv add lineto}{pnlh pnlv moveto pnsh 0 rlineto +0 pnsv rlineto @2 pnsh add @1 pnsv add lineto pnsh neg 0 rlineto +0 pnsv neg rlineto}ifelse}ifelse +closepath fill}if @2 @1 grestore moveto}ifelse}bdf +/gw{/pnm fg 3 get fg 4 get div def}bdf +/lw{fg exch 4 exch put fg exch 3 exch put gw pnsv pnsh pen}bdf +/barc{/@1 xdf/@2 xdf/@3 xdf/@4 xdf/@5 xdf +/@6 xdf/@7 xdf/@8 xdf gsave +@5 @7 add 2 div @6 @8 add 2 div translate newpath 0 0 moveto +@5 @7 sub @6 @8 sub mtx currentmatrix pop scale @1{newpath}if +0 0 0.5 @4 @3 arc @4 @3 sub abs 360 ge{closepath}if +mtx setmatrix @2 doop grestore}bdf +/ar{dup 0 eq barc}bdf +/ov{0 exch 360 exch true barc}bdf +/rc{/@t xdf currentpoint 6 2 roll newpath 4 copy 4 2 roll exch moveto +6 -1 roll lineto lineto lineto closepath @t doop moveto}bdf +/mup{dup pnsh 2 div le exch pnsv 2 div le or}bdf +/rr{/@1 xdf 2. div/@2 xdf 2. div/@3 xdf +/@4 xdf/@5 xdf/@6 xdf/@7 xdf +@7 @5 eq @6 @4 eq @2 mup or or{@7 @6 @5 @4 @1 rc} +{@4 @6 sub 2. div dup @2 lt{/@2 xdf}{pop}ifelse +@5 @7 sub 2. div dup @2 lt{/@2 xdf}{pop}ifelse +@1 0 eq{/@2 @2 pnsh 2 div 2 copy gt{sub def}{0 pop4}ifelse}if +currentpoint newpath +@4 @6 add 2. div @7 moveto +@4 @7 @4 @5 @2 arcto pop4 +@4 @5 @6 @5 @2 arcto pop4 +@6 @5 @6 @7 @2 arcto pop4 +@6 @7 @4 @7 @2 arcto pop4 +closepath @1 doop moveto}ifelse}bdf +/pr{gsave newpath/pl{exch moveto/pl{exch lineto}def}def}bdf +/pl{exch lineto}bdf +/ep{dup 0 eq{{moveto}{exch lin}{}{(%%[|1|]%%)= flush}pathforall +pop grestore}{doop grestore}ifelse currentpoint newpath moveto}bdf +/gr{64. div setgray}bdf +/savescreen{ns not{/ns true def systemdict/currentcolorscreen known{currentcolorscreen/pkspf xdf/pkrot xdf/pkfreq xdf/pyspf xdf/pyrot xdf/pyfreq xdf/pmspf xdf/pmrot xdf/pmfreq xdf +/pcspf xdf/pcrot xdf/pcfreq xdf}{currentscreen/sspf xdf/srot xdf/sfreq xdf}ifelse}if}bdf +/restorescreen{/ns false def systemdict/setcolorscreen known{pcfreq pcrot/pcspf load pmfreq pmrot/pmspf load pyfreq pyrot/pyspf load +pkfreq pkrot/pkspf load setcolorscreen}{sfreq srot/sspf load setscreen}ifelse}bdf +/pat{savescreen sa8 +copy pop 9.375 pa por not{90 add}if{1 add 4 mul cvi sa8 exch get exch 1 add 4 mul cvi 7 sub bitshift 1 and}setscreen exch not{gr}{pop}ifelse}bdf +/sg{restorescreen gr}bdf +/cpat{savescreen 10 2 roll 7 -1 roll sa8 copy pop 9.375 pa por not{90 add}if{1 add 4 mul cvi sa8 exch get exch 1 add 4 mul cvi 7 sub bitshift 1 and}8 -1 roll sb8 copy pop 9.375 pa por not{90 add}if{1 add 4 mul cvi sb8 +exch get exch 1 add 4 mul cvi 7 sub bitshift 1 and}9 -1 roll sc8 copy pop 9.375 pa por not{90 add}if{1 add 4 mul cvi sc8 exch get exch 1 add 4 mul cvi 7 sub bitshift 1 and}10 -1 roll sd8 copy pop 9.375 pa por not{90 add}if{1 add 4 mul cvi sd8 +exch get exch 1 add 4 mul cvi 7 sub bitshift 1 and}psuedo1 dsc 4{4 -1 roll 1 exch 64 div sub}repeat setcmykcolor pop pop}bdf +systemdict/setcolorscreen known{/psuedo1 lnop bdf/dsc/setcolorscreen load def}{/psuedo1{16{pop}repeat sa8 copy pop 9.375 pa por not{90 add}if{1 add 4 mul cvi sa8 exch get exch 1 add 4 mul cvi 7 sub bitshift 1 and}}bdf +/bwsc{setscreen dup gr 0 exch 0 exch 64 exch 64 exch 64 exch}bdf/dsc/bwsc load def +}ifelse +systemdict/setcmykcolor known not{/setcmykcolor{1 sub 4 1 roll 3{3 index add neg dup 0 lt{pop 0}if 3 1 roll}repeat setrgbcolor pop}bdf}if +/dc{transform round .5 sub exch round .5 sub exch itransform}bdf +/sn{userdict/smooth4 known}bdf +/x8{3 bitshift}bdf +/x4{2 bitshift}bdf +/d4{-2 bitshift}bdf +/d8{-3 bitshift}bdf +/rb{15 add -4 bitshift 1 bitshift}bdf +/db{/@7 save def/@1 xdf/@2 xdf/@3 xdf/@4 xdf/@5 xdf/@6 @5 @3 4 add mul def +dc translate scale/xdbit 1 1 idtransform abs/ydbit exch def abs def{0 0 1 ydbit add 1 10 rc clip}if +@1 0 eq @1 4 eq or{currentrgbcolor 1 setgray ydbit 0 1 ydbit add 1 2 rc setrgbcolor}if +@1 3 eq @1 7 eq or{1 setgray}{currentrgbcolor 2 index eq exch 2 index eq and exch pop{0 setgray}if}ifelse/@9 @1 0 eq @1 1 eq @1 3 eq or or dbinvertflag xor def/@13 @6 def +@2 fBitStretch or{/@10 @4 x4 def/@11 @3 x4 def/@12 @10 rb def/@13 @12 @11 mul def/@15 1 1 dtransform abs/calcY 1 index def round cvi/@14 exch def +abs/calcX 1 index def round cvi scaleby96 not{1 add}if def/@16 @15 rb def/@17 @16 @14 mul def}if +sn @13 60000 lt and @2 fBitStretch or and{mtx currentmatrix dup 1 get exch 2 get 0. eq exch 0. eq and @17 60000 lt and fBitStretch and{@16 3 bitshift @14 @9 [calcX 0 0 calcY 0 0]{@17 string @13 string +currentfile @6 string readhexstring pop 1 index @4 @3 @5 @12 @2 smooth4 +@10 @11 @12 dup string 5 index @15 @14 @16 dup string stretch}imagemask}{@12 x8 @11 @9 [@10 0 0 @11 0 0]{@13 string +currentfile @6 string readhexstring pop 1 index @4 @3 @5 @12 @2 smooth4}imagemask}ifelse}{@5 3 bitshift @3 4 add @9 [@4 0 0 @3 0 2]{currentfile @6 string readhexstring pop}imagemask}ifelse +@7 restore}bdf +systemdict/setcmykcolor known{/psuedo lnop bdf/di/colorimage load def}{/routines[{.3 mul add 1}bind{.59 mul add 2}bind{.11 mul add round cvi str exch i exch put/i i 1 add def 0 0}bind]def +/psuedo{/i 0 def 0 exch 0 exch{exch routines exch get exec}forall pop pop str}bdf/bwi{pop pop image}bdf/di/bwi load def}ifelse +/cdb{/@7 save def/@1 xdf/@2 xdf/@3 xdf/@4 xdf/@5 xdf +systemdict/setcmykcolor known not{dc}if translate scale /@6 xdf +/@18 @5 dup 60000 ge{pop 60000}if string def @6 not{/str @18 0 @18 length 3 idiv getinterval def}if @4 @3 8 [@4 0 0 @3 0 0]@6{{currentfile @18 readhexstring pop}image}{{currentfile @18 readhexstring pop psuedo}false 3 di}ifelse @7 restore}bdf +/wd 16 dict def +/mfont 14 dict def +/mdf{mfont wcheck not{/mfont 14 dict def}if mfont begin xdf end}bdf +/cf{{1 index/FID ne{def}{pop pop}ifelse}forall}bdf/rf{/@1 exch def/@2 exch def +FontDirectory @2 known{cleartomark pop}{findfont dup begin dup length @1 add dict begin +cf{/Encoding macvec def}{Encoding dup length array copy/Encoding exch def +counttomark 2 idiv{Encoding 3 1 roll put}repeat}ifelse +pop +exec currentdict end end @2 exch definefont pop}ifelse}bdf +/bmbc{exch begin wd begin +/cr xdf +save +CharTable cr 6 mul 6 getinterval{}forall +/bitheight xdf/bitwidth xdf +.96 div/width xdf +Gkernmax add/XOffset xdf Gdescent add/YOffset xdf/rowbytes xdf +rowbytes 255 eq{0 0 0 0 0 0 setcachedevice} +{Gnormsize dup scale +width 0 XOffset YOffset bitwidth XOffset add bitheight YOffset add +setcachedevice +rowbytes 0 ne{ +XOffset YOffset translate newpath 0 0 moveto +bitwidth bitheight scale +sn{ +/xSmt bitwidth x4 def +/ySmt bitheight x4 def +/rSmt xSmt rb def +rSmt x8 ySmt true +[xSmt 0 0 ySmt neg 0 ySmt] +{rSmt ySmt mul string CharData cr get +1 index bitwidth bitheight rowbytes rSmt tso smooth4} +}{rowbytes 3 bitshift bitheight 4 add true +[bitwidth 0 0 bitheight neg 0 bitheight 2 add] +{CharData cr get} +}ifelse +imagemask +}if +}ifelse +restore +end end +}bdf +/bb{.96 exch div/Gnormsize mdf 2 index +/Gkernmax mdf 1 index/Gdescent mdf +3 index div 4 1 roll +2 index div 1. 5 2 roll +exch div 4 1 roll +4 array astore/FontBBox mdf +}bdf +/cdf{mfont/CharData get 3 1 roll put}bdf +/bf{ +mfont begin +/FontType 3 def +/FontMatrix [1 0 0 1 0 0] def +/Encoding macvec def +/MFontType 0 def +/BuildChar/bmbc load def +end +mfont definefont pop +}bdf +/wi LW 1 eq{{gsave 0 0 0 0 0 0 0 0 moveto lineto lineto lineto closepath clip stringwidth grestore}bind}{/stringwidth load}ifelse def +/aps{0 get 124 eq}bdf +/xc{s75 cvs dup}bdf +/xp{put cvn}bdf +/scs{xc 3 67 put dup 0 95 xp}bdf +/sos{xc 3 79 xp}bdf +/sbs{xc 1 66 xp}bdf +/sis{xc 2 73 xp}bdf +/sob{xc 2 79 xp}bdf +/sss{xc 4 83 xp}bdf +/dd{exch 1 index add 3 1 roll add exch}bdf +/smc{moveto dup show}bdf +/ndf2{udf{dup /FontType get 0 eq{/FDepVector get{dup /FontType get 0 eq{ndf2}{dup /df2 known{begin df2 0 null put end +}{pop}ifelse}ifelse}forall}{/df2 known{dup begin df2 0 null put end}if}ifelse}{pop}ifelse}bdf +/kwn{FontDirectory 1 index known{findfont dup ndf2 exch pop}}bdf +/gl{1 currentgray sub setgray}bdf +/newmm{dup /FontType get 0 eq{dup maxlength dict begin{1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse}forall currentdict end +dup /FDepVector 2 copy get[exch 6 index exch 6 index exch{newmm 3 1 roll}forall pop pop] put dup +}{/mfont 10 dict def mfont begin/FontMatrix [1 0 0 1 0 0] def +/FontType 3 def/Encoding macvec def/df 1 index def/df2 1 array def/FontBBox [0 0 1 1] def/StyleCode 2 index def +/mbc{bcarray StyleCode get}def/BuildChar{exch begin wd begin/cr exch def/cs s1 dup 0 cr put def df /MFontType known not{ +df2 0 get null eq{df dup length 2 add dict begin{1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse}forall +/StrokeWidth nlw 1000 mul pys div ps div dup 12 lt{pop 12}if def/PaintType 2 def currentdict end +/q exch definefont df2 exch 0 exch put}if}if mbc exec end end}def end mfont}ifelse +3 index exch definefont exch pop}bdf +/mb{dup sbs kwn{0 2 index findfont newmm exch pop exch pop exch pop}ifelse sfd}bdf +/mo{dup sos kwn{2 2 index findfont newmm exch pop exch pop exch pop}ifelse sfd}bdf +/ms{dup sss kwn{4 2 index findfont newmm exch pop exch pop exch pop}ifelse sfd}bdf +/ou{dup sos kwn{mfont/df2 known{mfont begin df2 0 null put end}if 3 2 index findfont newmm exch pop exch pop exch pop}ifelse sfd}bdf +/su{dup sss kwn{mfont/df2 known{mfont begin df2 0 null put end}if 5 2 index findfont newmm exch pop exch pop exch pop}ifelse sfd}bdf +/ao{/fmv true def ou}bdf/as{/fmv true def su}bdf +/vo{/fmv false def ou}bdf/vs{/fmv false def su}bdf +/c{currentrgbcolor dup 4 1 roll eq 3 1 roll eq and/gray xdf}bdf +/bcarray[{/da .03 def df setfont gsave cs wi 1 index 0 ne{exch da add exch}if grestore setcharwidth +cs 0 0 smc da 0 smc da da smc 0 da moveto show}bind dup{/da 1 ps div def df setfont gsave cs wi 1 index 0 ne{exch da add exch}if grestore setcharwidth +cs 0 0 smc da 0 smc da da smc 0 da smc c gray{gl}{1 setgray}ifelse da 2. div dup moveto show}bind +{df setfont gsave cs wi grestore setcharwidth c gray{gl}{currentrgbcolor 1 setgray}ifelse cs 0 0 smc df2 0 get setfont +gray{gl}{4 1 roll setrgbcolor}ifelse 0 0 moveto show}bind +{/da 1 ps div def/ds .05 def/da2 da 2. div def df setfont gsave cs wi 1 index 0 ne{exch ds add da2 add exch}if grestore setcharwidth +cs ds da2 add .01 add 0 smc 0 ds da2 sub translate 0 0 smc da 0 smc da da smc 0 da smc c gray{gl}{1 setgray}ifelse da 2. div dup moveto show}bind +{/da .05 def df setfont gsave cs wi 1 index 0 ne{exch da add exch}if grestore setcharwidth c cs da .01 add 0 smc 0 da translate +gray{gl}{currentrgbcolor 1 setgray 4 -1 roll}ifelse 0 0 smc gray{gl}{4 1 roll setrgbcolor}ifelse df2 0 get setfont 0 0 moveto show}bind]def +/st{1000 mul usertime add dup 2147483647 gt{2147483647 sub}if def}bdf +/the{usertime sub dup 0 lt exch -2147483648 gt and}bdf +/6a 6 array def +/2a 2 array def +/3q 3 array def +/qs{3 -1 roll sub exch 3 -1 roll sub exch}bdf +/qa{3 -1 roll add exch 3 -1 roll add exch}bdf +/qm{3 -1 roll 1 index mul 3 1 roll mul}bdf +/qn{6a exch get mul}bdf +/qA .166667 def/qB .833333 def/qC .5 def +/qx{6a astore pop +qA 0 qn qB 2 qn add qA 1 qn qB 3 qn add +qB 2 qn qA 4 qn add qB 3 qn qA 5 qn add +qC 2 qn qC 4 qn add qC 3 qn qC 5 qn add}bdf +/qp{6 copy 12 -2 roll pop pop}bdf +/qc{exch qp qx curveto}bdf +/qi{{exch 4 copy 2a astore aload pop qa .5 qm newpath moveto}{exch 2 copy 6 -2 roll 2 qm qs 4 2 roll}ifelse}bdf +/qq{{qc 2a aload pop qx curveto}{exch 4 copy qs qa qx curveto}ifelse}bdf +/pt{currentpoint newpath moveto}bdf +/qf{/fillflag true def}bdf +/ec{dup 4 and 0 ne{closepath}if 1 and 0 ne{0 doop}if grestore currentpoint newpath moveto/fillflag false def}bdf +/eu{currentpoint fp{0 ep}{grestore newpath}ifelse moveto/fillflag false def}bdf +/bp{currentpoint newpath 2 copy moveto}bdf +/ef{gsave fillflag{gsave eofill grestore}if}bdf +/sm{0 exch{@1 eq{1 add}if}forall}bdf +/lshow{4 1 roll exch/@1 exch def{1 index wi pop sub 1 index sm dv 0 @1 4 -1 roll widthshow}{1 index wi pop sub +1 index dup sm 10 mul exch length 1 sub add dv dup 10. mul 0 @1 4 -1 roll 0 6 -1 roll awidthshow}ifelse}bdf +/setTxMode{sa 9 2 index put exch not{3 eq{1}{0}ifelse setgray}{pop}ifelse}bdf +/SwToSym{{}mark false/Symbol/|______Symbol 0 rf 0 sa 6 get 0 ne{pop 1}{sa 7 get 0 eq{pop 2}if}ifelse +sa 1 get 0 ne/|______Symbol +sa 4 get 0 ne{vs}{sa 3 get 0 ne{vo}{fnt}ifelse}ifelse}bdf +/mc{0 3 1 roll transform neg exch pop}bdf +/ul{dup 0 ne sa 2 get 0 ne and{gsave 0 0 +/UnderlinePosition kif{mc}{ps -10 div}ifelse/UnderlineThickness kif{mc}{ps 15 div}ifelse +abs setlinewidth neg rmoveto +sa 4 get 0 ne{gsave currentlinewidth 2. div dup rmoveto currentpoint newpath moveto +2 copy rlineto stroke grestore}if +sa 3 get sa 4 get or 0 ne{gsave currentrgbcolor dup 4 1 roll eq 3 1 roll eq and{gl}{1 setgray}ifelse 2 copy rlineto stroke grestore rlineto strokepath nlw pys div setlinewidth}{rlineto}ifelse +stroke grestore}{pop}ifelse}bdf +/sgt{2 copy known{get true}{pop pop false}ifelse}bdf +/kif{currentfont dup/FontMatrix get exch/FontInfo sgt{true}{currentfont/df sgt +{dup/FontInfo sgt{3 1 roll/FontMatrix get mtx concatmatrix exch true}{pop pop pop false} +ifelse}{pop pop false}ifelse}ifelse{3 -1 roll sgt{exch true}{pop false}ifelse}{false}ifelse}bdf +/blank/Times-Roman findfont/CharStrings get/space get def +/macvec 256 array def +/NUL/SOH/STX/ETX/EOT/ENQ/ACK/BEL/BS/HT/LF/VT/FF/CR/SO/SI +/DLE/DC1/DC2/DC3/DC4/NAK/SYN/ETB/CAN/EM/SUB/ESC/FS/GS/RS/US +macvec 0 32 getinterval astore pop +macvec 32/Times-Roman findfont/Encoding get +32 96 getinterval putinterval macvec dup 39/quotesingle put 96/grave put +/Adieresis/Aring/Ccedilla/Eacute/Ntilde/Odieresis/Udieresis/aacute +/agrave/acircumflex/adieresis/atilde/aring/ccedilla/eacute/egrave +/ecircumflex/edieresis/iacute/igrave/icircumflex/idieresis/ntilde/oacute +/ograve/ocircumflex/odieresis/otilde/uacute/ugrave/ucircumflex/udieresis +/dagger/degree/cent/sterling/section/bullet/paragraph/germandbls +/registered/copyright/trademark/acute/dieresis/notequal/AE/Oslash +/infinity/plusminus/lessequal/greaterequal/yen/mu/partialdiff/summation +/product/pi/integral/ordfeminine/ordmasculine/Omega/ae/oslash +/questiondown/exclamdown/logicalnot/radical/florin/approxequal/Delta/guillemotleft +/guillemotright/ellipsis/blank/Agrave/Atilde/Otilde/OE/oe +/endash/emdash/quotedblleft/quotedblright/quoteleft/quoteright/divide/lozenge +/ydieresis/Ydieresis/fraction/currency/guilsinglleft/guilsinglright/fi/fl +/daggerdbl/periodcentered/quotesinglbase/quotedblbase/perthousand/Acircumflex/Ecircumflex/Aacute +/Edieresis/Egrave/Iacute/Icircumflex/Idieresis/Igrave/Oacute/Ocircumflex +/apple/Ograve/Uacute/Ucircumflex/Ugrave/dotlessi/circumflex/tilde +/macron/breve/dotaccent/ring/cedilla/hungarumlaut/ogonek/caron +macvec 128 128 getinterval astore pop +{}mark true/Courier/|______Courier 0 rf +{/Metrics 21 dict begin/zero 600 def/one 600 def/two 600 def/three 600 def/four 600 def/five 600 def/six 600 def/seven 600 def/eight 600 def +/nine 600 def/comma 600 def/period 600 def/dollar 600 def/numbersign 600 def/percent 600 def/plus 600 def/hyphen 600 def/E 600 def/parenleft 600 def/parenright 600 def/space 600 def +currentdict end def currentdict/UniqueID known{/UniqueID 16#800000 def}if/FontBBox FontBBox 4 array astore def}mark true/Helvetica/|______Seattle 1 rf +/oldsettransfer/settransfer load def +/concatprocs{/proc2 exch cvlit def/proc1 exch cvlit def/newproc proc1 length proc2 length add array def +newproc 0 proc1 putinterval newproc proc1 length proc2 putinterval newproc cvx}def +/settransfer{currenttransfer concatprocs oldsettransfer}def +/PaintBlack{{1 exch sub}settransfer gsave newpath clippath 1 setgray fill grestore}def +/od{(Rvd\001\001\000\000\177) fg copy pop txpose +1 0 mtx defaultmatrix dtransform exch atan/pa exch def +newpath clippath mark +{transform{itransform moveto}}{transform{itransform lineto}} +{6 -2 roll transform 6 -2 roll transform 6 -2 roll transform +{itransform 6 2 roll itransform 6 2 roll itransform 6 2 roll curveto}} +{{closepath}}pathforall newpath counttomark array astore/gc xdf pop ct 39 0 put +10 fz 0 fs 2 F/|______Courier fnt invertflag{PaintBlack}if +statusdict/processcolors known{statusdict begin processcolors end dup 4 eq{/4colors true def pop}{3 eq{/3colors true def}{/2color true def}ifelse}ifelse}{/2colors true def}ifelse}bdf +/cd{}bdf +/op{/sfl false def systemdict/currentcolorscreen known{dcfreq dcrot/dcspf load dmfreq dmrot/dmspf load dyfreq dyrot/dyspf load +dkfreq dkrot/dkspf load setcolorscreen}{freq rot/spf load setscreen}ifelse savescreen +/ns false def/pm save def}bdf +/cp{not{userdict/#copies 0 put}if ma 0 gt{{t1 the{exit}if}loop}if{copypage}{showpage}ifelse pm restore}bdf +/px{0 3 1 roll tp tt}bdf +/psb{/us save def}bdf +/pse{us restore}bdf +/ct 40 string def +/nc{currentpoint initclip newpath gc{dup type dup/arraytype eq exch/packedarraytype eq or{exec}if} +forall clip newpath moveto}def +/kp{ct 0 2 index length 2 index 39 2 index put getinterval copy cvx exec mx3 currentmatrix pop}bdf +/av 70 def +end +LW 1 eq userdict/a4small known not and{/a4small +[[300 72 div 0 0 -300 72 div -120 3381] +280 3255 +{statusdict/jobstate (printing) put 0 setblink +margins +exch 196 add exch 304 add 8 div round cvi frametoroket +statusdict/jobstate (busy) put +1 setblink} +/framedevice load +60 45{dup mul exch dup mul add 1.0 exch sub}/setscreen load +{}/settransfer load/initgraphics load/erasepage load]cvx +statusdict begin bind end readonly def}if +md begin/bigs[lnop userdict/letter known{/letter load}{lnop}ifelse userdict/legal known{/legal load}{lnop}ifelse userdict/a4 known{/a4 load}{lnop}ifelse userdict/b5 known{/b5 load}{lnop}ifelse +lnop lnop lnop /tab load/a3Size load]def +/smalls[lnop userdict/lettersmall known{/lettersmall load}{userdict/note known{/note load}{lnop}ifelse}ifelse +userdict/legal known{/legal load}{lnop}ifelse userdict/a4small known{/a4small load}{lnop}ifelse +userdict/b5 known{/b5 load}{userdict/note known{/note load}{lnop}ifelse}ifelse lnop lnop lnop /tab load/a3Size load]def end +systemdict/currentpacking known{setpacking}if +/checkload{{currentfile eexec} {/junk save def/mystring 65000 string def +/endexec (%endeexec) def{currentfile mystring readline not{stop}if endexec eq{exit}if}loop junk restore}ifelse}bind def +ok userdict/stretch known not and checkloadcleartomark +%endeexec + +ok userdict/smooth4 known not and checkloadcleartomark +%endeexec +%%EndProcSet + +%%EOF +%!PS-Adobe-2.0 +%%Title: GDE2.0_small +%%Creator: Microsoft Word +%%CreationDate: Sunday, April 26, 1992 +%%Pages: (atend) +%%BoundingBox: ? ? ? ? +%%PageBoundingBox: 30 31 582 761 +%%For: Genome1 +%%IncludeProcSet: "(AppleDict md)" 70 0 +%%EndComments +%%EndProlog +%%BeginDocumentSetup +md begin + +T T 0 0 730 552 -31 -30 761 582 100 72 72 1 F F F F T T T T psu +(Genome1; document: GDE2.0_small)jn +0 mf +od +%%EndDocumentSetup +%%Page: ? 1 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(1)show +86 90 gm +18 fz +2 F /|______Times-Roman fnt +0.25192 0. 32 0.02519 0.(Genetic Data Environment)awidthshow +86 306 gm +1.00982 0. 32 0.10098 0.(version 2.0)awidthshow +113 90 gm +14 fz +2 F /|______Times-Roman fnt +0.19348 0. 32 0.01934 0.(Table of Contents)awidthshow +125 90 gm +12 fz +2 F /|______Times-Roman fnt +0.87683 0.(Introduction.........................................................................................)ashow +125 148 gm +( )show +125 504 gm +(2)show +137 90 gm +-0.00367 0.(What's New for this Release)ashow +137 228 gm +1.01469 0.(.....................................................................)ashow +137 226 gm +( )show +137 504 gm +(2)show +149 90 gm +-0.10981 0.(System Requirements)ashow +149 196 gm +1.01313 0.(.............................................................................)ashow +149 193 gm +( )show +149 504 gm +(2)show +161 90 gm +-0.09104 0.(Note to Motif users)ashow +161 184 gm +1.01264 0.(................................................................................)ashow +161 182 gm +( )show +161 504 gm +(2)show +173 90 gm +-0.15522 0.(Installing the GDE)ashow +173 180 gm +1.01248 0.(.................................................................................)ashow +173 178 gm +( )show +173 504 gm +(3)show +185 90 gm +-0.08174 0.(Using the GDE)ashow +185 164 gm +1.01188 0.(.....................................................................................)ashow +185 163 gm +( )show +185 504 gm +(3)show +197 90 gm +-0.21920 0.(Data Types)ashow +197 144 gm +1.01121 0.(..........................................................................................)ashow +197 143 gm +( )show +197 504 gm +(7)show +209 90 gm +-0.10195 0.(Menu Functions)ashow +221 126 gm +-0.16563 0.(File menu)ashow +221 176 gm +1.01232 0.(..................................................................................)ashow +221 173 gm +( )show +221 504 gm +(7)show +233 126 gm +-0.20724 0.(Edit menu)ashow +233 176 gm +1.01232 0.(..................................................................................)ashow +233 174 gm +( )show +233 504 gm +(9)show +245 126 gm +7.54577 0. 32 0.75457 0.(DNA/RNA menu..........................................................................)awidthshow +245 208 gm +( )show +245 504 gm +(9)show +257 90 gm +-0.11619 0.(External Functions)ashow +257 180 gm +1.01248 0.(.................................................................................)ashow +257 179 gm +( )show +257 504 gm +(10)show +269 90 gm +-0.06219 0.(Bug reports/extensions)ashow +269 200 gm +1.01332 0.(............................................................................)ashow +269 199 gm +( )show +269 504 gm +(12)show +281 90 gm +-0.14102 0.(Acknowledgments)ashow +281 180 gm +1.01248 0.(.................................................................................)ashow +281 178 gm +( )show +281 504 gm +(12)show +293 90 gm +-0.08592 0.(Appendix A, File Formats)ashow +293 216 gm +1.01406 0.(........................................................................)ashow +293 214 gm +( )show +293 504 gm +(13)show +305 90 gm +-0.06129 0.(Appendix B, Adding Functions)ashow +305 240 gm +1.01536 0.(..................................................................)ashow +305 239 gm +( )show +305 504 gm +(16)show +317 90 gm +5.07019 0. 32 0.50701 0.(Appendix C, External functions..................................................................)awidthshow +317 240 gm +( )show +317 504 gm +(19)show +F T cp +%%Page: ? 2 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(2)show +99 90 gm +14 fz +2 F /|______Times-Roman fnt +0.16366 0.(Introduction)ashow +126 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.00480 0.(The Genetic Data Environment is part of a growing set of programs for manipulating and analyzing)ashow +137 90 gm +-0.02925 0.("genetic" data. It differs in design from other analysis programs in that it is intended to be an expandable and)ashow +148 90 gm +0.30548 0. 32 0.03054 0.(customizable system, while still being easy to use.)awidthshow +170 90 gm +-0.03929 0.(There are a tremendous number of publicly available programs for sequence analysis. Many of these)ashow +181 90 gm +-0.02575 0.(programs have found their way into commercial packages which incorporate them into integrated, easy to use)ashow +192 90 gm +0.00381 0. 32 0.00038 0.(systems. The goal of the GDE is to minimize the amount of effort required to integrate sequence analysis)awidthshow +203 90 gm +0.02471 0. 32 0.00247 0.(functions into a common environment. The GDE takes care of the user interface issues, and allows the)awidthshow +214 90 gm +0.08422 0. 32 0.00842 0.(programmer to concentrate on the analysis itself. Existing programs can be tied into the GDE in a matter of)awidthshow +225 90 gm +0.05676 0. 32 0.00567 0.(hours \(or minutes\) as apposed to days or weeks. Programs may be written in any language, and still)awidthshow +236 90 gm +-0.01551 0.(seamlessly be incorporated into the GDE.)ashow +258 90 gm +0.15960 0. 32 0.01596 0.(These programs are, and will continue to be, available at no charge. It is the hope that this system will)awidthshow +269 90 gm +-0.00163 0.(grow in functionality as more and more people see the benefits of a modular analysis environment. Users)ashow +280 90 gm +-0.04074 0.(are encouraged to make modifications to the system, and forward all changes and additions to Steven Smith)ashow +291 90 gm +-0.07734 0.(at smith@nucleus.harvard.edu.)ashow +316 90 gm +14 fz +2 F /|______Times-Roman fnt +0.33660 0. 32 0.03366 0.(What's New for this Release)awidthshow +339 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.02201 0.(There have been several changes made for this 2.0 release of GDE. The most notable changes involve new)ashow +350 90 gm +0.06668 0. 32 0.00666 0.(editing capabilities. For most functions, the user can either select the sequences by their name, or by)awidthshow +361 90 gm +-0.06840 0.(dragging over sections of the sequence text. See editing for more details. Other added features include a new)ashow +372 90 gm +-0.07867 0.(checking mode, reduced scale viewing for sequence assembly, improved cut/copy/paste, and many new)ashow +383 90 gm +0.08651 0. 32 0.00865 0.(external functions. There is also a new file format, GDE format, which retains more of the all of the)awidthshow +394 90 gm +0.05279 0. 32 0.00527 0.(available information about a sequence. This format \(unlike Genbank format in GDE1.0\) will not loose)awidthshow +405 90 gm +0.02593 0. 32 0.00259 0.(fields such as group numbers, etc.)awidthshow +427 90 gm +-0.01397 0.(There is one other change that should be noted. The old "flat" file format has changed. The format now)ashow +438 90 gm +(compresses leading gaps by allowing an offset number in parentheses after the name. GDE2.0 will still read)show +449 90 gm +0.15045 0. 32 0.01504 0.(old "flat" files, but will write them out in the new format. This might cause difficulties for some external)awidthshow +460 90 gm +0.17807 0. 32 0.01780 0.(functions written using this format. Because of the change, it is VERY important to keep the GDE1.0)awidthshow +471 90 gm +0.02624 0. 32 0.00262 0.(support programs separate from GDE2.0, as some are now incompatible. The original flatfile format can be)awidthshow +482 90 gm +-0.10980 0.(duplicated using ReadSeq/FastA format and sed.)ashow +518 90 gm +14 fz +2 F /|______Times-Roman fnt +-0.02626 0.(System Requirements)ashow +541 90 gm +10 fz +2 F /|______Times-Roman fnt +0.16036 0. 32 0.01603 0.(GDE 2.0 currently runs on the Sun family of workstations. This includes the Sun3 and Sun4 \(Sparcstation\))awidthshow +552 90 gm +0.19104 0. 32 0.01910 0.(systems. It was written in XView, and runs on Suns using OpenWindows 2.0 or MIT's X Windows. It)awidthshow +563 90 gm +0.00488 0. 32 0.00048 0.(runs in both monochrome, and color, and can be run remotely on any system capable of running X Windows)awidthshow +574 90 gm +-0.00602 0.(Release 4. You should have at least 15 meg of free disk space available.)ashow +596 90 gm +0.14724 0. 32 0.01472 0.(We are also supporting a DECStation version of GDE. This is running under XView 3.0/X11R5. We)awidthshow +607 90 gm +0.02227 0. 32 0.00222 0.(encourage interested people to port the programs to their favorite Unix platform. We hope to support the)awidthshow +618 90 gm +-0.00794 0.(SGI and Cray line of Supercomputers.)ashow +643 90 gm +14 fz +2 F /|______Times-Roman fnt +0.36575 0. 32 0.03657 0.(Note to Motif users)awidthshow +666 90 gm +10 fz +2 F /|______Times-Roman fnt +0.08575 0. 32 0.00857 0.(GDE2.0 can be run using different window managers. The most common alternative to olwm is the Motif)awidthshow +677 90 gm +0.12878 0. 32 0.01287 0.(window manager \(mwm\). The only problem in using another window manager is that the status line is not)awidthshow +688 90 gm +-0.04948 0.(displayed. We have added a "Message panel" as an option under "File->Properties" which displays all of the)ashow +699 90 gm +0.10406 0. 32 0.01040 0.(information contained on the status line.)awidthshow +F T cp +%%Page: ? 3 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(3)show +81 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.03054 0.(People using other window managers may also prefer using xterm, and xedit as default terminals and file)ashow +92 90 gm +-0.02236 0.(editors. This can be accomplished by replacing all occurrences of 'shelltool' and 'textedit' with 'xterm -e' and)ashow +103 90 gm +0.10833 0. 32 0.01083 0.('xedit' in the $GDE_HELP_DIR/.GDEmenus file.)awidthshow +128 90 gm +14 fz +2 F /|______Times-Roman fnt +0.33935 0. 32 0.03393 0.(Installing the GDE)awidthshow +151 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.03640 0.(Instructions for the source code release are included in the README.install file.)ashow +173 90 gm +-0.00282 0.(The binary installations consist of creating a GDE directory, such as /usr/local/GDE, and un-taring the)ashow +184 90 gm +0.15594 0. 32 0.01559 0.(installation tarfile into the directory. If you are installing the GDE for your own use, then you can simply)awidthshow +195 90 gm +0.01647 0. 32 0.00164 0.(make a GDE subdirectory. There is no need to be superuser \(root\) to do the installation in your own)awidthshow +206 90 gm +-0.02723 0.(directory. For example:)ashow +225 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +0.69610 0. 32 0.06961 0.(demo% )awidthshow +1 fs +{}mark T /Courier-Bold /|______Courier-Bold 0 rf +2 F /|______Courier-Bold fnt +1.28005 0. 32 0.12800 0.(mkdir /usr/local/GDE)awidthshow +233 90 gm +0 fs +{}mark T /Courier /|______Courier 0 rf +2 F /|______Courier fnt +0.79788 0. 32 0.07978 0.(demo% )awidthshow +1 fs +{}mark T /Courier-Bold /|______Courier-Bold 0 rf +2 F /|______Courier-Bold fnt +1.26770 0. 32 0.12677 0.(cp GDE2.0.tar /usr/local/GDE)awidthshow +241 90 gm +0 fs +{}mark T /Courier /|______Courier 0 rf +2 F /|______Courier fnt +0.55816 0. 32 0.05581 0.(demo% )awidthshow +1 fs +{}mark T /Courier-Bold /|______Courier-Bold 0 rf +2 F /|______Courier-Bold fnt +0.97320 0. 32 0.09732 0.(cd /usr/local/GDE)awidthshow +249 90 gm +0 fs +{}mark T /Courier /|______Courier 0 rf +2 F /|______Courier fnt +0.65994 0. 32 0.06599 0.(demo% )awidthshow +1 fs +{}mark T /Courier-Bold /|______Courier-Bold 0 rf +2 F /|______Courier-Bold fnt +0.85617 0. 32 0.08561 0.(tar -xf GDE2.0.tar)awidthshow +271 90 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.00643 0.(After this, each new user will need to add two lines to their .cshrc file so that they can find the gde programs)ashow +282 90 gm +-0.07226 0.(and files.)ashow +301 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +0.50384 0. 32 0.05038 0.(demo% )awidthshow +1 fs +{}mark T /Courier-Bold /|______Courier-Bold 0 rf +2 F /|______Courier-Bold fnt +0.59280 0. 32 0.05928 0.(cat >> ~/.cshrc)awidthshow +309 90 gm +1.39968 0. 32 0.13996 0.(set path = \($path /usr/local/GDE/bin\))awidthshow +317 90 gm +2.04071 0. 32 0.20407 0.(setenv GDE_HELP_DIR /usr/local/GDE/help/)awidthshow +325 90 gm +0.60202 0.(^D)ashow +344 90 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.06118 0. 32 0.00611 0.(You may wish to make a copy of the .GDEmenus file from the help directory into your home directory.)awidthshow +355 90 gm +0.13107 0. 32 0.01310 0.(This is only necessary if you wish to modify your menus. Copy the demo files from /usr/local/GDE/demo)awidthshow +366 90 gm +-0.02130 0.(into your local directory, and you are now ready to use the GDE.)ashow +388 90 gm +-0.05734 0.(FastA and Blast need to have the properly formatted databases installed in the $GDE_HELP_DIR under the)ashow +399 90 gm +0.15838 0. 32 0.01583 0.(directories FASTA/PIR, FASTA/GENBANK, BLAST/pir BLAST/genbank. For FASTA, simply copy a)awidthshow +410 90 gm +-0.01397 0.(version of PIR and Genbank into the proper directory. Alternately, the PIR and GENBANK files can be)ashow +421 90 gm +0.08224 0. 32 0.00822 0.(symbolic links to copies of Genbank held elsewhere on your system.)awidthshow +443 90 gm +0.01754 0. 32 0.00175 0.(Blast installation involves converting PIR and GENBANK to a temporary FASTA format \(using pir2fasta)awidthshow +454 90 gm +-0.07652 0.(and gb2fasta\) and then using pressdb for nucleic acid, and setdb for amino acid to reformat the databases again)ashow +465 90 gm +0.23468 0. 32 0.02346 0.(into blast format. The .GDEmenus file is currently set up to search with blast using the following)awidthshow +476 90 gm +0.01464 0. 32 0.00146 0.(databases: pir, genpept, genupdate, and genbank. If you wish to divide these into subdivisions, then the)awidthshow +487 90 gm +-0.00154 0.(.GDEmenus file will have to be edited.)ashow +509 90 gm +0.18249 0. 32 0.01824 0.(The most up to date release of blast can be obtained via anonymous ftp to ncbi.nlm.nih.gov. The most)awidthshow +520 90 gm +-0.02844 0.(recent release of FASTA can be obtained via anonymous ftp to uvaarpa.virginia.edu.)ashow +545 90 gm +14 fz +2 F /|______Times-Roman fnt +0.21560 0. 32 0.02156 0.(Using the GDE)awidthshow +568 90 gm +10 fz +2 F /|______Times-Roman fnt +0.08346 0. 32 0.00834 0.(It is assumed that the user is familiar with the Unix, and OpenWindows/Xwindows environments. It is also)awidthshow +579 90 gm +-0.04437 0.(assumed that people running standard MIT X-Windows will be using the OpenLook window manager)ashow +590 90 gm +0.03479 0. 32 0.00347 0.(\(olwm\). Other window managers work with varied success. If you are not certain as to how your system is)awidthshow +601 90 gm +0.06744 0. 32 0.00674 0.(set up, please contact your systems administrator.)awidthshow +623 90 gm +-0.00114 0.(Once the window system has started, and a terminal window \(xterm, shelltool etc.\) you can start up the GDE)ashow +634 90 gm +0.61325 0. 32 0.06132 0.(by typing:)awidthshow +656 90 gm +1.45217 0. 32 0.14521 0.(demo% )awidthshow +1 fs +{}mark T /Times-Bold /|______Times-Bold 0 rf +2 F /|______Times-Bold fnt +1.81137 0. 32 0.18113 0.(gde tRNAs)awidthshow +678 90 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +2 F /|______Times-Roman fnt +-0.00747 0.(This should load the sample data set tRNAs into GDE, and the following window should appear:)ashow +F T cp +%%Page: ? 4 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(4)show +0 0 gm +(nc 72 90 269 544 6 rc)kp +64 gr +112 136 243 419 1 rc +0 gr +T 283 8.72726 136 112 98 778 24 T 1 dbdbdbdbdbdbdbdbdb +00C40000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000401000C0000000000000000 +00C40000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000402800C0000000000000000 +00C40000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000401000C0000000000000000 +00C41FDE3E10000000000000000000000000000031DFC70E1C3BFFF1C3BF8EFE3070C1C3870E1C3070E183860E1C3870E1DFFFFF1C3070E1C3870EFE3060E183870E1DFFF8E1C3067F1C3870EFE3870E1C3860E1C3870C02800C0000000000000000 +00C40211203000000000000000000000000000003242091224484082448412103090C24489122430912184861224489122420408243091224489121030612184891224204122430608244891210489122448612244890C01000C0000000000000000 +00C40211205000000000000000000000000000003402102040804084080420104901240810204049020248092040810204020408404902040810201048920248102040204204048908408102010810204080920408101402800C0000000000000000 +00C40211201000000000000000000000000000003402102040804084080420104901240810204049020248092040810204020408404902040810201048920248102040204204048908408102010810204080920408101401000C0000000000000000 +00C4021E3C1000000000000000000000000000003402102040804084080420104901240810204049020248092040810204020408404902040810201048920248102040204204048908408102010810204080920408101402800C0000000000000000 +00C4021220100000000000000000000000000000344210204080408408842010FD13F448902040FD1227E81FA04089020402040840FD022448112210FDFA07E89122442042044FDF88408102010891224489FA0448103C01000C0000000000000000 +00C4021120100000000000000000000000000000344210204080408408842010851214489020408512242810A0408902040204084085022448112210850A04289122442042044850884081020108912244890A0448102402800C0000000000000000 +00C40211201000000000000000000000000000003242081020404082048410108492124488102084912424109020488102020408208481224409121085090424891224204102485088204081010489122449090244082401000C0000000000000000 +00C402113E10000000000000000000000000000031C2070E1C384081C3840E10847211C3870E1C8470E423908E1C3870E1C204081C8470E1C3870E108508E423870E1C2040E1C850881C3870E103870E1C3908E1C3872402800C0000000000000000 +00C40000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000401000C0000000000000000 +00C40000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000402800C0000000000000000 +00C40000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000401000C0000000000000000 +00C40000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000402800C0000000000000000 +00C41FDE3E38000000000000000000000000000031DFC70E1C3BFFF1C3BF8EFE3070C1C3870E1C3070E183860E1C3870E1DFFFFF1C3070E1C3870EFE3060E183870E1DFFF8E1C3067F1C3870EFE3070E1C3860E1C3870C01000C0000000000000000 +00C40211204400000000000000000000000000003242091224484082448412103090C24489122430912184861224489122420408243091224489121030612184891224204122430608244891210309122448612244890C02800C0000000000000000 +00C40211200400000000000000000000000000003402102040804084080420104901240810204049020248092040810204020408404902040810201048920248102040204204048908408102010490204080920408101401000C0000000000000000 +00C40211200400000000000000000000000000003402102040804084080420104901240810204049020248092040810204020408404902040810201048920248102040204204048908408102010490204080920408101402800C0000000000000000 +00C4021E3C0800000000000000000000000000003402102040804084080420104901240810204049020248092040810204020408404902040810201048920248102040204204048908408102010490204080920408101401000C0000000000000000 +00C4021220100000000000000000000000000000344210204080408408842010FD13F448902040FD1227E81FA04089020402040840FD022448112210FDFA07E89122442042044FDF88408102010FD1224489FA0448103C02800C0000000000000000 +00C4021120200000000000000000000000000000344210204080408408842010851214489020408512242810A0408902040204084085022448112210850A04289122442042044850884081020108512244890A0448102401000C0000000000000000 +00C40211204000000000000000000000000000003242081020404082048410108492124488102084912424109020488102020408208481224409121085090424891224204102485088204081010849122449090244082402800C0000000000000000 +00C402113E7C000000000000000000000000000031C2070E1C384081C3840E10847211C3870E1C8470E423908E1C3870E1C204081C8470E1C3870E108508E423870E1C2040E1C850881C3870E108470E1C3908E1C3872401000C0000000000000000 +00C40000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000402800C0000000000000000 +T 283 8 136 185.69564 98 778 23 T 1 dbdbdbdbdbdb +00C00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C0000000000000000 +00C00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C0000000000000000 +00C001E0000000000F00000000000000030000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C0000000000000000 +00C001860000000043000000001E0000030180000C1F01E019F80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C0000000000000000 +00C0018600000000C300000000330000030780000C19833039F80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C0000000000000000 +00C00186361F1E37F301B0F0F9B301E3C33180001618C61858180000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C0000000000000000 +00C001863B303336C301D99981B30336633180001618C61898300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C0000000000000000 +00C0018633383338C3019999C0330306630180002618C61918300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C0000000000000000 +00000186331E3F30C3019998F0330306630180003F18C619FC60000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0008018633073030C301999838330306630180006318C619FC60000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000 +0008018633033130C301999819B30316633180006319833018C0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000 +00080186333E1E307301F0F1F19E01E3C3318000631F01E018C0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000 +000801E0000000000F0180000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000 +0008000000000000000180000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000 +0008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000 +0000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000 +0000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000 +00003FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE0040000000000000000 +007FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFC0000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +T 283 9 136 234.78259 98 778 23 T 1 dbgm +(nc 73 244 85 330 6 rc)kp +0 gr +T 1 setTxMode +2 F /|______Times-Roman fnt +-0.11044 0.(Command menus)ashow +256 425 gm +(nc 247 424 259 474 6 rc)kp +-0.07157 0.(Scrollbars)ashow +247 144 gm +(nc 238 143 250 195 6 rc)kp +-0.13261 0.(Status line)ashow +184 92 gm +(nc 175 91 199 123 6 rc)kp +(Short)show +196 92 gm +-0.16336 0.(names)ashow +85 416 gm +(nc 76 415 100 464 6 rc)kp +-0.13940 0.(Sequence)ashow +97 416 gm +-0.24850 0.(alignment)ashow +(nc 72 90 269 544 6 rc)kp +pr +140 195 pl +129 186 pl +127 189 pl +126 192 pl +140 195 pl +0 gr +1 ep +94 172 gm +127 189 0 gr +lin +pr +157 361 pl +145 368 pl +147 371 pl +150 373 pl +157 361 pl +1 ep +103 415 gm +147 371 0 gr +lin +pr +184 397 pl +196 406 pl +197 403 pl +198 399 pl +184 397 pl +1 ep +247 424 gm +197 403 0 gr +lin +pr +229 325 pl +228 340 pl +232 339 pl +235 338 pl +229 325 pl +1 ep +247 424 gm +232 339 0 gr +lin +pr +166 136 pl +178 129 pl +176 126 pl +173 124 pl +166 136 pl +1 ep +184 118 gm +176 126 0 gr +lin +94 137 gm +(nc 85 136 97 170 6 rc)kp +0 gr +T 1 setTxMode +(Cursor)show +265 218 gm +(nc 256 217 268 327 6 rc)kp +-0.10447 0.(Split screen drag point)ashow +(nc 72 90 269 544 6 rc)kp +pr +225 384 pl +235 373 pl +232 372 pl +228 370 pl +225 384 pl +0 gr +1 ep +256 325 gm +232 372 0 gr +lin +167 472 gm +(nc 158 471 170 543 6 rc)kp +0 gr +T 1 setTxMode +-0.16412 0.(Scroll elevator)ashow +(nc 72 90 269 544 6 rc)kp +pr +148 397 pl +147 412 pl +151 411 pl +154 410 pl +148 397 pl +0 gr +1 ep +161 465 gm +151 411 0 gr +lin +pr +129 228 pl +115 229 pl +116 232 pl +117 236 pl +129 228 pl +1 ep +84 243 gm +116 232 0 gr +lin +131 460 gm +(nc 122 459 134 515 6 rc)kp +0 gr +T 1 setTxMode +-0.13102 0.(Resize tabs)ashow +(nc 72 90 269 544 6 rc)kp +pr +119 401 pl +118 416 pl +122 415 pl +125 414 pl +119 401 pl +0 gr +1 ep +129 454 gm +122 415 0 gr +lin +pr +229 399 pl +215 404 pl +217 407 pl +220 410 pl +229 399 pl +1 ep +164 445 gm +217 407 0 gr +lin +153 454 gm +137 465 lin +300 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +10 fz +2 F /|______Times-Roman fnt +0.08865 0. 32 0.00886 0.(This is the sequence alignment editor. It consists of a color alignment display, a set of command menus,)awidthshow +311 90 gm +0.00823 0. 32 0.00082 0.(horizontal and vertical scroll bars to navigate the alignment, a list of short sequence names \(usually the)awidthshow +322 90 gm +0.02410 0. 32 0.00241 0.(LOCUS of a Genbank entry\), and a status line. The cursor is located in the upper left corner.)awidthshow +355 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.07032 0.(Using the Mouse)ashow +378 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.02319 0.(The mouse follow OpenLook standards for operation. The functions for each button are:)ashow +0 0 gm +(nc 391 90 498 329 6 rc)kp +64 gr +411 178 497 239 17.5 17.5 1 rr +0 gr +411.5 178.5 496.5 238.5 17.5 17.5 0 rr +64 gr +426 189 460 197 17.5 17.5 1 rr +0 gr +426.5 189.5 459.5 196.5 17.5 17.5 0 rr +64 gr +426 205 460 212 17.5 17.5 1 rr +0 gr +426.5 205.5 459.5 211.5 17.5 17.5 0 rr +64 gr +426 220 460 227 17.5 17.5 1 rr +0 gr +426.5 220.5 459.5 226.5 17.5 17.5 0 rr +142 315 107 239 th +441 92 gm +tu +(nc 434 91 443 153 6 rc)kp +ts +0 gr +T 1 setTxMode +12 fz +2 F /|______Times-Roman fnt +-0.24205 0.(Object selection)ashow +tu +ts +450 92 gm +tu +(nc 443 91 452 165 6 rc)kp +ts +-0.14672 0.(clicking & dragging)ashow +tu +ts +441 270 gm +tu +(nc 434 269 443 328 6 rc)kp +ts +-0.20312 0.(Menu selection)ashow +tu +ts +450 270 gm +tu +(nc 443 269 452 302 6 rc)kp +ts +-0.09335 0.(dragging)ashow +tu +441 151 gm +(nc 391 90 498 329 6 rc)kp +441 189 0 gr +lin +433 182 449 198 165 195 1 ar +441 265 gm +441 231 lin +433 224 449 239 345 375 1 ar +ts +399 186 gm +tu +(nc 392 186 401 237 6 rc)kp +ts +0 gr +T 1 setTxMode +-0.22036 0.(Object adjust)ashow +tu +403 208 gm +(nc 391 90 498 329 6 rc)kp +426 208 0 gr +lin +418 201 434 217 255 285 1 ar +518 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +10 fz +2 F /|______Times-Roman fnt +0.01296 0. 32 0.00129 0.(The left mouse button is used for placing the cursor, selecting sequences by their short)awidthshow +529 90 gm +0.16891 0. 32 0.01689 0.(names, scrolling/paging, performing split screens, and resizing. The right button is used for pop up menus,)awidthshow +540 90 gm +-0.01223 0.(and scrollbar menus. The middle button is used for extending a text selection.)ashow +562 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.11749 0.(Cursor Movement)ashow +585 90 gm +10 fz +2 F /|______Times-Roman fnt +(The cursor can be moved using the arrow keys, or by clicking the mouse within a sequence. The cursors)show +596 90 gm +0.14465 0. 32 0.01446 0.(position is displayed on the status line in both sequence position and alignment column number. The right)awidthshow +607 90 gm +0.06561 0. 32 0.00656 0.(hand side of the status line shows the left and right column positions of the currently active display.)awidthshow +629 90 gm +0.07385 0. 32 0.00738 0.(Scrolling is controlled by the scrollbar elevator. By clicking \(left mouse button\) on one of the elevator)awidthshow +640 90 gm +-0.03851 0.(arrows, the screen will scroll one character in that direction. By dragging the elevator center, the screen can)ashow +651 90 gm +-0.00059 0.(be moved directly to any location. By clicking directly to one side of the elevator, the screen will scroll one)ashow +662 90 gm +0.07583 0. 32 0.00758 0.(full screen in that direction. And by clicking on the scrollbar anchor, the elevator will move to that anchor.)awidthshow +673 90 gm +0.20538 0. 32 0.02053 0.(Scrollbars also have menus associated with them giving other scroll options. Use the right mouse button to)awidthshow +684 90 gm +(activate the menu.)show +F T cp +%%Page: ? 5 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(5)show +92 90 gm +-0.12741 0.(Selecting Sequences)ashow +115 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.05250 0.(Sequence selection is necessary before most functions can be performed. Selecting sequences is)ashow +126 90 gm +-0.03315 0.(accomplished by clicking or dragging \(left button\) over the short name associated with the sequence\(s\). The)ashow +137 90 gm +-0.01461 0.(name of the sequence should become highlighted on the release of the mouse button. By holding down the)ashow +148 90 gm +0.13977 0. 32 0.01397 0.(shift key, you can toggle the selection on or off for any set of sequences. By clicking just to the right of)awidthshow +159 90 gm +-0.00563 0.(any sequence short name, you will deselect all of them.)ashow +181 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.20321 0.(Selecting Text)ashow +204 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.00436 0.(Selecting text is accomplished in much the same way as selecting entire sequences. In the editing window,)ashow +215 90 gm +0.04013 0. 32 0.00401 0.(you can drag the mouse pointer over a rectangular region the select a block of text. By using the shift key)awidthshow +226 90 gm +-0.01466 0.(\(or the middle mouse button\) you can adjust the selection to include other sequences, or other columns of)ashow +237 90 gm +-0.01264 0.(text. If groups are enabled, GDE will automatically select all sequences in a group if any one sequence in a)ashow +248 90 gm +-0.07606 0.(group is selected \(See Sequence Editing\).)ashow +270 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.12741 0.(Sequence Protection)ashow +293 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.05520 0.(All sequences can be individually protected against accidental modification. This is accomplished by)ashow +304 90 gm +-0.01127 0.(selecting the set of sequences that you are interested in editing, and choosing the "Set protections" menu)ashow +315 90 gm +0.03738 0. 32 0.00373 0.(item under the File menu. Your choices are:)awidthshow +337 90 gm +-0.03042 0.(Unambiguous modification)ashow +337 306 gm +-0.13563 0.(Changing/adding/deleting regular characters)ashow +348 90 gm +-0.02313 0.(Ambiguous changes)ashow +348 306 gm +0.28198 0. 32 0.02819 0.(Changing ambiguous codes \('N', 'X'...\))awidthshow +359 90 gm +-0.01168 0.(Alignment modifications)ashow +359 306 gm +0.23391 0. 32 0.02339 0.(Changing alignment gaps \('-', '~'\))awidthshow +381 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.15385 0.(Sequence Editing)ashow +404 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.04483 0.(Sequences can be edited by simply typing to insert, and using the delete or backspace key to delete characters.)ashow +415 90 gm +0.05371 0. 32 0.00537 0.(Sequences must have the proper protections set to allow the type of modifications that you are attempting.)awidthshow +426 90 gm +0.06790 0. 32 0.00679 0.(The default protection level only allows modification to the alignment, but not to the sequences themselves.)awidthshow +437 90 gm +-0.00852 0.(The Sun function keys, cut, copy and paste are used to edit selected text. Text selections work in rectangular)ashow +448 90 gm +(\(possibly disjointed\) regions. You can cut or copy a block of sequence text, and paste it to a new cursor)show +459 90 gm +0.10787 0. 32 0.01078 0.(location using these three keys.)awidthshow +F T cp +%%Page: ? 6 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(6)show +92 90 gm +-0.11007 0.(Repeat Counts)ashow +115 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.02497 0.(By typing a numeric value before an editing function you can insert, delete or move a number of characters at)ashow +126 90 gm +-0.00587 0.(a time. The current repeat count is displayed on the status line, and can be cleared by clicking the left mouse)ashow +137 90 gm +0.08789 0. 32 0.00878 0.(button in the alignment window. In order to insert twenty gaps into a sequence, one would type "20-". In)awidthshow +148 90 gm +-0.02253 0.(order to move down five sequences, one would type "5)ashow +{}mark F /Symbol /|______Symbol 0 rf +7 fz +2 F /|______Symbol fnt +(\257)show +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.02194 0.(". This works with all sequence types, however the)ashow +159 90 gm +-0.01254 0.(meta \(diamond\) key must be held down when the cursor is in a text or mask sequence. This is because)ashow +170 90 gm +-0.07215 0.(numbers are valid characters in these sequences, and would otherwise be confused with repeat counts.)ashow +192 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.11955 0.(Split Screen)ashow +215 90 gm +10 fz +2 F /|______Times-Roman fnt +0.11734 0. 32 0.01173 0.(Split screen editing allows the viewing one region while editing another. This is very useful for aligning)awidthshow +226 90 gm +-0.03680 0.("downstream" regions by editing "upstream".)ashow +248 90 gm +0.14480 0. 32 0.01448 0.(The alignment window can be split horizontally into two or more windows into the alignment. These)awidthshow +259 90 gm +-0.03460 0.(windows scroll independently of each other both horizontally and vertically. The short names displayed to)ashow +270 90 gm +-0.01350 0.(the left of the alignment correspond to the window that was last scrolled or edited. Care should be taken in)ashow +281 90 gm +-0.04472 0.(any modifications done in this mode so that edits are performed on the correct sequence. To avoid confusion)ashow +292 90 gm +0.00595 0. 32 0.00059 0.(during split screen operations, the vertical scroll bars may be locked so that all windows scroll together.)awidthshow +0 0 gm +(nc 294 162 447 412 6 rc)kp +64 gr +295 163 415 411 1 rc +0 gr +T 248 15.90908 163 295 70 554 35 T 1 dbdb +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000088000000800000001100000040000000000002000000000010000000000000100000008000000000000000000200000000000000000000000000000000000060000 +00600007888000000400001F011100000200000F8210C0278421800000800001E00010010000000400001F002000000000100000000000000000000000000000000000060000 +00600004008000000400001001010000020000084310C04446218000008000011000100000000004000010002000000000100000000000000000000000000000000000060000 +00600004088E00FE020000100F13C07F01000008231120444622401FC040000112C63CE11600FE02000010227ACE1C03F8080000000000000000000000000000000000060000 +00600004089100FC020000101111007E01000008229120844522401F80400001130911111900FC020000102223112003F0080000000000000000000000000000000000060000 +00600007889100780200001E1111003C0100000822D1208785A2400F00400001121091111100780200001E1422012001E0080000000000000000000000000000000000060000 +00600004089F0078020000101111003C010000082253F08484A7E00F00400001E21091F11100780200001008220F1801E0080000000000000000000000000000000000060000 +006000040890003002000010111100180100000822321104446420060040000102109101110030020000101422110400C0080000000000000000000000000000000000060000 +006000040891003002000010131100180100000842321104446420060040000102091111110030020000102222110400C0080000000000000000000000000000000000060000 +00600004088E00000200001F0D10C0000100000F82121204442420000040000102060CE11100000200001F221A0EB80000080000000000000000000000000000000000060000 +00600000000000000400000000000000020000000000020000000000008000000000000000000004000000000000000000100000000000000000000000000000000000060000 +00600000000000000400000000000000020000000000000000000000008000000000000000000004000000000000000000100000000000000000000000000000000000060000 +00600000000000000800000000000000040000000000000000000000010000000000000000000008000000000000000000200000000000000000000000000000000000060000 +00600400000000001000080000000000080004000000000000000000020001000000000000000010000800000000000000400000000000000000000000000000000000060000 +006003000000000060000600000000003000030000000000000000000C0000C00000000000000060000600000000000001800000000000000000000000000000000000060000 +006000FFFFFFFFFF800001FFFFFFFFFFC00000FFFFFFFFFFFFFFFFFFF000003FFFFFFFFFFFFFFF800001FFFFFFFFFFFFFE000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +00600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000060000 +0063FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE00023FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE0002060000 +00620000000000000000000000000000000000001800000000000000000000000000000000020002200000000000000000000000000000000000000000000000020002060000 +00620000000000000000000000000000000000001800000000000000000000000000000000020002200000000000000000000000000000000000000000000000020002060000 +00620000000000000000000000000000000000001FF00000000000000000000000000000000200023FC000000000000000000000000000000000000000000000020002060000 +T 248 15 163 325.90908 70 554 33 T 1 dbdbdbdbdbdbgr +391.5 255.5 446.5 314.5 90 180 0 ar +391.5 255.5 446.5 314.5 0 90 0 ar +0 0 2 9 9 2 dh +419 314 gm +425 321 lin +rh +psb +pse +0 0 2 9 9 2 dh +419 314 gm +425 307 lin +rh +psb +pse +0 0 pen +400 238 gm +400 238 lin +nc ct 39 0 put +pr +409 256 pl +400 238 pl +418 247 pl +409 256 pl +1 ep +1 1 pen +409 256 gm +bp +400 238 T qi +400 238 qc +418 247 qc +418 247 qc +409 256 qc +409 256 48 gr +T qq +qf +qf +ef +15 ec +(nc 294 162 447 412 6 rc)kp +2 2 pen +408 246 gm +417 255 lin +0 0 pen +401 306 gm +401 306 lin +nc ct 39 0 put +pr +410 324 pl +401 306 pl +419 315 pl +410 324 pl +0 gr +1 ep +1 1 pen +410 324 gm +bp +401 306 T qi +401 306 qc +419 315 qc +419 315 qc +410 324 qc +410 324 0 gr +T qq +qf +qf +ef +15 ec +(nc 294 162 447 412 6 rc)kp +2 2 pen +409 314 gm +418 323 lin +1 1 pen +467 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +2 F /|______Times-Roman fnt +0.05432 0. 32 0.00543 0.(In order to split a window into two views, grab \(left button\) the left or right anchor \(small rectangle\) at)awidthshow +478 90 gm +-0.02000 0.(either end of the horizontal scrollbar and drag to the middle of the window. This should split the window)ashow +489 90 gm +0.21789 0. 32 0.02178 0.(into two views. To join two views, place the mouse pointer on the horizontal scroll bar use the menu \(right)awidthshow +500 90 gm +0.61981 0. 32 0.06198 0.(button\) .)awidthshow +522 90 gm +(The views are NOT two copies of the alignment. Changes in one window are reflected in the other. Users)show +533 90 gm +0.00976 0. 32 0.00097 0.(should not be confused by this fact.)awidthshow +555 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.06040 0.(Sequence Grouping)ashow +578 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.00846 0.(Sequences can be grouped for editing functions. This is very helpful when trying to adjust several sub)ashow +589 90 gm +-0.00823 0.(alignments. When grouped, all sequences within a group will be affected by editing in any member of the)ashow +600 90 gm +0.07675 0. 32 0.00767 0.(group. All sequences within a group must have protections set to allow modification before any one will be)awidthshow +611 90 gm +-0.20011 0.(modified.)ashow +633 90 gm +-0.03013 0.(In order to group sequences, select the names of the sequences that should fall within a group, and select)ashow +644 90 gm +-0.02406 0.(Group under the Edit menu. A number will be placed at the left of the sequence representing its assigned)ashow +655 90 gm +-0.04641 0.(group number. To any sequence or sequences, the user selects those sequences and uses the Ungroup)ashow +666 90 gm +-0.06661 0.(command under the Edit menu.)ashow +688 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.11912 0.(Special keys)ashow +F T cp +%%Page: ? 7 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(7)show +81 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.01528 0.(There are also a few special function keys used in the GDE. Some functions have meta key equivalences so)ashow +92 90 gm +-0.01637 0.(that they can be called from the keyboard, instead of by the menu system. The "meta" key is a standard)ashow +103 90 gm +-0.05270 0.(property of X windows, and may be remapped to a different key symbol for different keyboards. For)ashow +114 90 gm +0.11459 0. 32 0.01145 0.(example, meta on Sun workstations is represented with a )awidthshow +{}mark F /Symbol /|______Symbol 0 rf +9 fz +2 F /|______Symbol fnt +(\340)show +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.11154 0. 32 0.01115 0.(, where on a Macintosh running MacX it might)awidthshow +125 90 gm +0.13610 0. 32 0.01361 0.(be the "apple" key. The operation of the key is the same as the control or shift key, it is held down while)awidthshow +136 90 gm +-0.04826 0.(pressing the second key in the sequence.)ashow +158 90 gm +0.07690 0. 32 0.00769 0.(Cut text, copy text and paste text are mapped to the Openlook equivalent keys \(L10, L6, and L8 on Sun)awidthshow +169 90 gm +-0.03259 0.(keyboards\). Other meta keys are defined in the .GDEmenus file, and may be changed to suit your)ashow +180 90 gm +-0.23315 0.(preferences.)ashow +205 90 gm +14 fz +2 F /|______Times-Roman fnt +-0.01499 0.(Data Types)ashow +228 90 gm +10 fz +2 F /|______Times-Roman fnt +0.01342 0. 32 0.00134 0.(The GDE supports several data types. The data types supported in 2.0 are DNA, RNA, protein \(single letter)awidthshow +239 90 gm +-0.08953 0.(codes\), mask sequence, and text.)ashow +261 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.16458 0.(DNA and RNA)ashow +284 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.06896 0.(Nucleic acid sequences are tightly type cast, and can contain any IUPAC code \(ACGTUM RSVWYHKDBN\))ashow +295 90 gm +0.00411 0. 32 0.00041 0.(as well as two alignment gap characters \('~' and '-'\). Some keys are remapped to fit IUPAC codes. For)awidthshow +306 90 gm +0.00274 0. 32 0.00027 0.(example, 'X' is mapped to 'N'. All nonstandard characters get mapped to the alignment gap '-'. Upper and)awidthshow +317 90 gm +-0.06726 0.(lower case are both supported, and the T/U characters are mapped based on whether you are working with)ashow +328 90 gm +-0.01962 0.(DNA or RNA. The color coding for DNA and RNA is identical. The color for ambiguous characters, and)ashow +339 90 gm +0.16555 0. 32 0.01655 0.(for alignment gaps is grey.)awidthshow +361 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.20167 0.(Amino Acid Sequence)ashow +384 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.04162 0.(Amino acid sequences are loosely type cast, and can contain any valid ASCII character. The results of)ashow +395 90 gm +-0.09436 0.(analysis on nonstandard characters is not guaranteed. The color for nonstandard amino acid characters, and for)ashow +406 90 gm +0.24002 0. 32 0.02400 0.(alignment gaps is grey.)awidthshow +428 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.16378 0.(Text Sequence)ashow +451 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.03298 0.(Any valid ASCII printable character can be entered into a text sequence. Care should be taken with using)ashow +462 90 gm +-0.00344 0.(space characters, as these will only be saved properly in Genbank format, and not in flat file format. The)ashow +473 90 gm +-0.04928 0.(characters @#% and " should be avoided as well, as these can confuse the reading of flat files if saved in that)ashow +484 90 gm +-0.02505 0.(format.)ashow +506 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.13627 0.(Mask Sequence)ashow +529 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.03303 0.(Mask sequence is identical to text sequence with the following exceptions. Mask sequence can have the)ashow +540 90 gm +0.06149 0. 32 0.00614 0.(ability \(function dependent\) of masking out positions in an alignment for analysis. If a mask sequence is)awidthshow +551 90 gm +0.01480 0. 32 0.00148 0.(selected along with some other sequence\(s\) for an analysis function that permits masking, then all columns)awidthshow +562 90 gm +0.06927 0. 32 0.00692 0.(that contain a '0' in the mask sequence will be ignored by the function. The mask itself would not be passed)awidthshow +573 90 gm +0.22354 0. 32 0.02235 0.(to the analysis function either. Some functions allow masking, some do not. Refer to the instruction page)awidthshow +584 90 gm +-0.00167 0.(for each function to see whether or not it supports sequence masking.)ashow +606 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.06585 0.(Color Masks)ashow +629 90 gm +10 fz +2 F /|______Times-Roman fnt +0.01022 0. 32 0.00102 0.(Color masks give color to a sequence on a position by position basis. Individual sequences can have color)awidthshow +640 90 gm +-0.01266 0.(masks attached to them, or one color mask can be used for an entire alignment. Color masks are generated)ashow +651 90 gm +-0.00192 0.(externally by some analysis functions, and are then passed back to the GDE. The file format for a colormask)ashow +662 90 gm +-0.07350 0.(is described in Appendix A.)ashow +698 90 gm +14 fz +2 F /|______Times-Roman fnt +0.15777 0. 32 0.01577 0.(Menu Functions: File menu)awidthshow +F T cp +%%Page: ? 8 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(8)show +81 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.00126 0.(The GDE has several built-in menu functions under the File and Edit menus. These functions are unique in)ashow +92 90 gm +-0.02758 0.(that they are part of the primary display editor, and are not described in the .GDEmenus file.)ashow +126 90 gm +12 fz +2 F /|______Times-Roman fnt +0.33511 0.(Open...)ashow +138 90 gm +10 fz +2 F /|______Times-Roman fnt +0.19515 0. 32 0.01951 0.(Selecting this will bring up the open file dialog box. Users can scroll through a list of files in the current)awidthshow +149 90 gm +-0.07145 0.(directory, move up and down the directory tree, and open any individual data file. The sequence data in that)ashow +160 90 gm +-0.02137 0.(file is loaded into the current editing window below any existing sequences. The open command will open)ashow +171 90 gm +-0.01487 0.(any Genbank formatted file, or a GDE flat file.)ashow +193 90 gm +12 fz +2 F /|______Times-Roman fnt +1.41235 0. 32 0.14123 0.(Save as...)awidthshow +205 90 gm +10 fz +2 F /|______Times-Roman fnt +0.07537 0. 32 0.00753 0.(This function will save the entire alignment to a specified file in either Genbank or flat file format. The file)awidthshow +216 90 gm +-0.00964 0.(will be saved in the local directory unless a relative or absolute path is specified.)ashow +249 90 gm +12 fz +2 F /|______Times-Roman fnt +0.19628 0.(Properties...)ashow +261 90 gm +10 fz +2 F /|______Times-Roman fnt +0.02334 0. 32 0.00233 0.(Properties controls the display settings. Those settings include character size, color type, and insert)awidthshow +272 90 gm +-0.07415 0.(direction. The screen can also be inverted, vertical scroll lock and keyboard clicks \(tactile feedback\) can be)ashow +283 90 gm +0.11657 0. 32 0.01165 0.(turned on or off. Vertical scrollbar lock will cause all split views to scroll together in the vertical)awidthshow +294 90 gm +-0.11550 0.(direction.)ashow +0 0 gm +(nc 296 234 431 370 6 rc)kp +64 gr +297 235 430 369 1 rc +0 gr +T 134 33.87997 235 297 40 305 77 T 1 dbdbdbdbgm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +12 fz +2 F /|______Times-Roman fnt +0.15495 0.(Protections...)ashow +485 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.01272 0.(This will display, and then set the default protections for all selected sequences. If two or more of the)ashow +496 90 gm +-0.02812 0.(sequences differ in their current protection settings, a warning message will appear in the protection dialog)ashow +507 90 gm +-0.00859 0.(box. The protections currently available are alignment gap protection, ambiguous character protection,)ashow +518 90 gm +-0.02893 0.(unambiguous character protection, and translation protection.)ashow +0 0 gm +(nc 520 270 600 352 6 rc)kp +64 gr +521 271 599 351 1 rc +0 gr +T 80 39.75726 271 521 28 210 105 T 1 db +00000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000 +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC000 +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000400000000000000000000000000000000000000000000C000 +C000000400000000000000000000000000000000000000000000C000 +C000000400000000000000000000000000030000000000000000C000 +C0003F840000000003C00201F000040000430000000000000000C000 +C0002004000000000660060198000C0000C00000000000000000C000 +C0001004000000000603CF819B679F3C3DF31E1B0F8000000000C000 +C000100400000000070666019B6CCC6666C3331D980000000000C000 +C00008040000000003C66601938CCC6660C333199C0000000000C000 +C00008040000000000E7E601E30CCC7E60C333198F0000000000C000 +C00000040000000000660601830CCC6060C33319838000000000C000 +C00000040000000006662601830CCC6262C33319818000000000C000 +C00000040000000003C3C3818307873C3C731E199F0000000000C000 +C000000400000000000000000000000000000000000000000000C000 +C001FFF800000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF8C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000200000000000000000000000000000000000000C000 +C0001F0000000100000000000000000000000000000000000000C000 +C000108000000100000000000000000000000000000000000000C000 +C00010430B0E0080000000000000000000000000000000000000C000 +C00010448C910080000000000000000000000000000000000000C000 +C000104848910080000000000000000000000000000000000000C000 +C0001048489F0080000000000000000000000000000000000000C000 +C000104848900080000000000000000000000000000000000000C000 +C000108488910080000000000000000000000000000000000000C000 +C0001F03088E0080000000000000000000000000000000000000C000 +C000000000000100000000000000000000000000000000000000C000 +C000000000000100000000000000000000000000000000000000C000 +C000000000000200000000000000000000000000000000000000C000 +C008000000000400000000000000000000000000000000000000C000 +C006000000001800000000000000000000000000000000000000C000 +C001FFFFFFFFE000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C0000C600000000C0000000630E6000003000000000000000000C000 +C0060C600000000C000000063186000043000000000000000000C000 +C0060C600000000C0000000601800000C0000000000000000000C000 +C00B0C63C606787C06CC3C3E33E63C79F31E1B0F980000000000C000 +C00B0C666666CCCC07766666318666CCC3331D98180000000000C000 +C0130C66636CCCCC066666663186600CC333199C000000000000C000 +C01F8C66636CFCCC066666663186607CC333198F000000000000C000 +C0318C6663FCC0CC06666666318660CCC3331983800000000000C000 +C0318C666198C4DC0666666E318662CCC3331981980000000000C000 +C0318C63C198786C06663C3631863C76731E199F180000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000040000000000008080000000000020000000000000000000C000 +C000040000000000008080000000000020000000100000000000C000 +C000040000000000008000000000000020000000100000000000C000 +C00004000445870B30B08F110C2238072C38B383BCE2CE000000C000 +C00004000446488CC8C89111122240083244C444111310000000C000 +C000040004444088888891112122401022048048111210000000C000 +C0000400044447888888911121223010223C83C811F20C000000C000 +C000040004444888888893112122081022448448110202000000C000 +C000040004C4488888888D131226080822448444111202000000C000 +C00004000344474888F0810D0C1A7007223A83A38CE21C000000C000 +C000040000000000000001000000000000000000000000000000C000 +C03FFC000000000000000E000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +C000000000000000000000000000000000000000000000000000C000 +T 80 39 271 560.75726 28 210 103 T 1 dbgm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +12 fz +2 F /|______Times-Roman fnt +1.17477 0. 32 0.11747 0.(Get info...)awidthshow +633 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.01683 0.(This option allows the viewing and setting of attributes associated with each individual sequence. These)ashow +644 90 gm +-0.00477 0.(attributes include short name, full name, description, author, comments, and the sequence type. The)ashow +655 90 gm +-0.03654 0.(attributes loosely correspond to fields in a Genbank entry. Comments can be included for each sequence in)ashow +666 90 gm +-0.00778 0.(the comments field.)ashow +F T cp +%%Page: ? 9 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 303 gm +12 fz +2 F /|______Times-Roman fnt +(9)show +0 0 gm +(nc 72 162 251 419 6 rc)kp +64 gr +73 163 250 418 1 rc +0 gr +T 255 11.87997 163 73 74 579 27 T 1 dbdbdbdbdbdbdbdbdbdbdbdbdbdbdbdbgm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +14 fz +2 F /|______Times-Roman fnt +0.53985 0. 32 0.05398 0.( Edit menu)awidthshow +308 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.29429 0.(Select All)ashow +320 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.03724 0.(Selects all sequences. This is helpful when you have several dozen sequences.)ashow +342 90 gm +12 fz +2 F /|______Times-Roman fnt +0.19332 0. 32 0.01933 0.(Select by name...)awidthshow +354 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.02021 0.(Select all sequences containing a given string in their short names field. No wild cards are allowed, and only)ashow +365 90 gm +-0.01213 0.(selecting is allowed, not de-selecting. The search is started when the Return key is pressed, and multiple)ashow +376 90 gm +-0.06175 0.(searches can be accumulated. Press Done when finished.)ashow +398 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.08575 0.(Cut/Copy/Paste sequences)ashow +410 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.05426 0.(Cut copy and paste are primarily useful for reordering sequences, and for making duplicate copies of a given)ashow +421 90 gm +0.07507 0. 32 0.00750 0.(sequence. They do not pass information to other programs. This capability will be implemented in a later)awidthshow +432 90 gm +-0.04278 0.(release. Cut and copy will place the selected sequences on an internal clipboard. They can then be pasted)ashow +443 90 gm +-0.06423 0.(back into the top of editing window \(default\) or under the last selected sequence.)ashow +465 90 gm +12 fz +2 F /|______Times-Roman fnt +0.02943 0.(Group/Ungroup)ashow +477 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.00544 0.(Assign a group number to the selected sequences. Edit operations in any one sequence within the group will)ashow +488 90 gm +0.00320 0. 32 0.00032 0.(be propagated to all within the group. Sequence protections from one group are also imposed upon all other)awidthshow +499 90 gm +0.07339 0. 32 0.00733 0.(sequence in the given group. If a given operation is illegal in one sequence in a group \(i.e. alignment)awidthshow +510 90 gm +0.06820 0. 32 0.00682 0.(modification\) then it will not work in any of the sequences in that group. Ungroup will remove the selected)awidthshow +521 90 gm +-0.05171 0.(sequences from a given group.)ashow +543 90 gm +12 fz +2 F /|______Times-Roman fnt +(Compress)show +555 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.01747 0.(Compress will remove gap characters from the selected sequences. The user has the option of removing all)ashow +566 90 gm +0.34576 0. 32 0.03457 0.(gaps, or simply all columns containing nothing but gaps. This is useful for minimizing the length of a)awidthshow +577 90 gm +0.05169 0.(subalignment.)ashow +599 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.10784 0.(Reverse Sequence)ashow +611 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.06991 0.(Reverses the selected sequences. Alignment gaps are reversed as well. The selected sequences will remain)ashow +622 90 gm +-0.13101 0.(aligned after reversal.)ashow +647 90 gm +14 fz +2 F /|______Times-Roman fnt +0.04180 0. 32 0.00418 0.( DNA/RNA menu)awidthshow +671 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.16490 0.(Complement Sequence)ashow +683 90 gm +10 fz +2 F /|______Times-Roman fnt +0.20431 0. 32 0.02043 0.(Converts DNA/RNA into its complement strand \(keeping full IUPAC ambiguity\). This function has no)awidthshow +694 90 gm +-0.01968 0.(effect on text, protein, or mask sequence. Note that this function does not produce the reverse strand of DNA)ashow +705 90 gm +-0.02952 0.(but merely converts A<->T and G<->C. If the reverse strand is needed, remember to Complement and)ashow +716 90 gm +-0.09683 0.(Reverse the sequence \(Edit menu\).)ashow +F T cp +%%Page: ? 10 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(10)show +107 90 gm +14 fz +2 F /|______Times-Roman fnt +0.68984 0. 32 0.06898 0.(External Functions)awidthshow +130 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.02468 0.(See appendix C for a full description of functions supported in GDE 2.0 All external functions are described)ashow +141 90 gm +0.00610 0. 32 0.00061 0.(in the configuration file .GDEmenus. Here is a brief description of some of the basic functions included.)awidthshow +163 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.16563 0.(File menu)ashow +186 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.07015 0.(New Sequence )ashow +186 198 gm +-0.05102 0.(Create a new sequence. Prompts for sequence type, and short name.)ashow +208 90 gm +-0.03674 0.(Import foreign format)ashow +219 90 gm +-0.03681 0.(Export foreign format)ashow +219 198 gm +-0.08541 0.(Load and save sequences using Readseq by Don Gilbert \(see Appendix C\).)ashow +241 90 gm +-0.01116 0.(Save Selection)ashow +241 198 gm +-0.08074 0.(Save the currently selected sequences in a specified file.)ashow +263 90 gm +0.35308 0. 32 0.03530 0.(Print Selection)awidthshow +263 198 gm +0.01174 0. 32 0.00117 0.(Print the selected sequences to the chosen printer. This function supports)awidthshow +274 198 gm +0.04150 0. 32 0.00415 0.(the Unix command enscript as well as lpr. The .GDEmenus file may need to)awidthshow +285 198 gm +-0.00283 0.(be modified to add the names of local printers to the printer list.)ashow +307 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.20724 0.(Edit menu)ashow +330 90 gm +10 fz +2 F /|______Times-Roman fnt +0.25080 0.(sort...)ashow +330 198 gm +-0.06867 0.(Sort the selected sequences by a primary and secondary key. Pass the new order)ashow +341 198 gm +-0.00726 0.(to a new GDE window.)ashow +363 90 gm +-0.14492 0.(Extract)ashow +363 198 gm +-0.07679 0.(Extract the selected sequences into a new window.)ashow +385 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.24058 0.(DNA/RNA Menu)ashow +397 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.14962 0.(Translate)ashow +397 198 gm +-0.05361 0.(Translate the selected sequences from DNA/RNA to Amino acid. The user can)ashow +408 198 gm +-0.07470 0.(specify the desired reading frame, and the minimum open reading frame \(stop)ashow +419 198 gm +-0.00601 0.(codon to stop codon\) to translate. The user can also choose between single)ashow +430 198 gm +-0.09318 0.(letter code and triple letter codes.)ashow +452 90 gm +0.55633 0. 32 0.05563 0.(Dot plot)awidthshow +452 198 gm +-0.02882 0.(Display a dotplot identity matrix for the selected sequence\(s\). If only one)ashow +463 198 gm +(sequence is selected, then the dotplot is a self comparison. If two or more)show +474 198 gm +-0.10966 0.(sequences are selected, then the first two sequences are compared.)ashow +496 90 gm +0.63247 0. 32 0.06324 0.(Clustal Align)awidthshow +496 198 gm +0.04074 0. 32 0.00407 0.(Align the selected sequences using the clustalv algorithm by Des Higgins.)awidthshow +507 198 gm +-0.07983 0.(\(See Appendix C\))ashow +529 90 gm +0.29251 0. 32 0.02925 0.(Find All )awidthshow +529 198 gm +-0.04464 0.(Search and highlight the selected sequences for a given substring. A specified)ashow +540 198 gm +-0.02334 0.(percent of mismatching can also be allowed.)ashow +562 90 gm +0.15136 0. 32 0.01513 0.(Variable Positions)awidthshow +562 198 gm +-0.06744 0.(The selected sequences are scored column by column for conservation. The)ashow +573 198 gm +0.02929 0. 32 0.00292 0.(result is returned as a grey scale alignment color mask. This can be useful)awidthshow +584 198 gm +0.41549 0. 32 0.04154 0.(in selecting PCR primers.)awidthshow +606 90 gm +-0.09349 0.(Sequence Consensus)ashow +606 198 gm +-0.02246 0.(Return the consensus for the selected sequences. This can either be a majority)ashow +617 198 gm +0.17730 0. 32 0.01773 0.(consensus, or an ambiguity consensus using IUPAC coding.)awidthshow +639 90 gm +-0.01284 0.(Distance Matrix )ashow +639 198 gm +-0.07553 0.(Calculate a distance matrix for the selected sequences. \(See Appendix C\))ashow +661 90 gm +(MFOLD)show +661 198 gm +-0.01577 0.(Fold the selected sequences using MFOLD by Michael Zuker. The resulting)ashow +672 198 gm +-0.08619 0.(structure is returned as a nested bracket \('[]'\) representation of the secondary)ashow +683 198 gm +-0.05360 0.(structure.\(See appendix C.\))ashow +705 90 gm +-0.13284 0.(Draw Secondary Structure)ashow +705 198 gm +-0.07971 0.(Draw the selected sequence using the proposed secondary structure. Both the)ashow +716 198 gm +-0.11421 0.(secondary structure prediction, and the RNA sequence should be selected before)ashow +F T cp +%%Page: ? 11 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(11)show +81 198 gm +10 fz +2 F /|______Times-Roman fnt +0.08117 0. 32 0.00811 0.(calling this function. The drawing program is LoopTool. \(See Appendix C\))awidthshow +103 90 gm +0.39718 0. 32 0.03971 0.(Highlight Helix)awidthshow +103 198 gm +-0.02478 0.(Show all violations to a proposed RNA secondary structure. The secondary)ashow +114 198 gm +-0.06376 0.(structure represented must be selected, as well as the aligned sequences to be)ashow +125 198 gm +-0.06709 0.(tested. The selected sequences will then be colored according to whether or not)ashow +136 198 gm +-0.02209 0.(they support the proposed 2)ashow +currentfont SwToSym +-0.02209 0.(\260)ashow +setfont +-0.02209 0.( structure. Standard Watson/Crick paring will be)ashow +147 198 gm +(colored dark blue, G-U paring will be colored light blue, mismatches will be)show +158 198 gm +-0.03656 0.(colored gold, and pairng to gaps will be red.)ashow +180 90 gm +-0.09217 0.(Blastn/BlastX)ashow +180 198 gm +-0.06690 0.(Search the selected sequence \(select only one\) against a given database with the)ashow +191 198 gm +0.21972 0. 32 0.02197 0.(BLAST searching tool written by Altschul, Gish, Miller, Myers, and Lipman.)awidthshow +202 198 gm +-0.05590 0.(Blastn searches DNA against DNA databases, blastx searches DNA against AA)ashow +213 198 gm +-0.04621 0.(databases by translating the sequence in all six reading frames. \(See Appendix C\))ashow +235 90 gm +0.02897 0.(FastA)ashow +235 198 gm +-0.06472 0.(Search the selected sequence \(select only one\) against a given database using the)ashow +246 198 gm +0.02075 0. 32 0.00207 0.(FASTA similarity search program written by Pearson and Lipman. \(See)awidthshow +257 198 gm +-0.09335 0.(Appendix C\))ashow +279 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.15008 0.(Protein Menu)ashow +313 90 gm +10 fz +2 F /|______Times-Roman fnt +0.63247 0. 32 0.06324 0.(Clustal Align)awidthshow +313 198 gm +-0.02090 0.(Align the selected amino acid sequences using the clustal algorithm. \(See)ashow +324 198 gm +-0.09335 0.(Appendix C\))ashow +346 90 gm +0.21408 0. 32 0.02140 0.(Blastp, Tblastn, Blast3)awidthshow +346 198 gm +-0.06690 0.(Search the selected sequence \(select only one\) against a given database with the)ashow +357 198 gm +0.21972 0. 32 0.02197 0.(BLAST searching tool written by Altschul, Gish, Miller, Myers, and Lipman.)awidthshow +368 198 gm +-0.04742 0.(Blastp searches AA against AA databases, tblastn searches AA against DNA)ashow +379 198 gm +-0.03672 0.(databases by translating the database in all six reading frames. Blast3 finds)ashow +390 198 gm +0.00274 0. 32 0.00027 0.(three way alignments that are could not be found with only pairwise comparisons.)awidthshow +401 198 gm +-0.07983 0.(\(See Appendix C\))ashow +423 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.20037 0.(Sequence Management Menu)ashow +446 90 gm +10 fz +2 F /|______Times-Roman fnt +0.07385 0. 32 0.00738 0.(Assemble contigs)awidthshow +446 198 gm +0.02410 0. 32 0.00241 0.(Assemble the selected sequences into contigs using the program CAP \(Contig)awidthshow +457 198 gm +-0.03036 0.(Assemble Program\) written by Xiaoqiu Huang. The resulting sequences are)ashow +468 198 gm +-0.03370 0.(returned to the current GDE window, and they are grouped into contigs. The)ashow +479 198 gm +-0.05517 0.(user can then sort the sequences by group, and offset to produce an ordered list of)ashow +490 198 gm +0.02960 0. 32 0.00296 0.(the contigs. \(See Appendix C\))awidthshow +512 90 gm +-0.02136 0.(Strategy view)ashow +512 198 gm +0.03570 0. 32 0.00357 0.(Pass out the selected sequences to StratView. This program will display contigs)awidthshow +523 198 gm +-0.01431 0.(in a greatly reduced line drawing. This is very useful for large contigs.)ashow +545 90 gm +0.32684 0. 32 0.03268 0.(Restriction sites)awidthshow +545 198 gm +-0.06761 0.(Search the selected sequences for the restriction enzymes specified in the given)ashow +556 198 gm +-0.01243 0.(enzyme file. The restriction sites are then colored by enzyme.)ashow +578 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.07624 0.(Phylogeny menu)ashow +602 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.07130 0.(DeSoete Tree fit)ashow +602 198 gm +-0.00927 0.(Calculate a phylogenetic tree using a least squares fitting algorithm on a distance)ashow +613 198 gm +-0.06059 0.(matrix calculated from the selected sequences. The results can then be passed on)ashow +624 198 gm +-0.01338 0.(to treetool for display and manipulation. \(See Appendix C\))ashow +646 90 gm +0.99151 0. 32 0.09915 0.(Phylip 3.4)awidthshow +646 198 gm +0.10299 0. 32 0.01029 0.(Pass the selected data to on of the treeing programs in Phylip, written by)awidthshow +657 198 gm +0.16418 0. 32 0.01641 0.(Joe Felsenstein. The chosen phylip program is started in it's own window,)awidthshow +668 198 gm +-0.12712 0.(with the selected sequences already loaded. \(See Appendix C\))ashow +F T cp +%%Page: ? 12 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(12)show +106 90 gm +14 fz +2 F /|______Times-Roman fnt +0.70434 0. 32 0.07043 0.(Citation of work)awidthshow +129 90 gm +10 fz +2 F /|______Times-Roman fnt +0.03417 0. 32 0.00341 0.(We ask that any published work using any of the external functions in GDE cite the appropriate authors.)awidthshow +140 90 gm +-0.08705 0.(Please see Appendix C for references.)ashow +187 90 gm +14 fz +2 F /|______Times-Roman fnt +0.42251 0. 32 0.04225 0.(Bug reports/extensions)awidthshow +210 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.03602 0.(Any bug reports, RFE's \(request for enhancement\), and useful extensions to the GDE should be forwarded by)ashow +221 90 gm +0.06652 0. 32 0.00665 0.(electronic mail to:)awidthshow +243 90 gm +-0.12284 0.(smith@nucleus.harvard.edu)ashow +265 90 gm +-0.01402 0.(Please include as much detail as possible in bug reports so that the bug can be reproduced.)ashow +276 90 gm +-0.16358 0.(Correspondence should be addressed to:)ashow +298 90 gm +0.66253 0. 32 0.06625 0.(Steven Smith)awidthshow +309 90 gm +0.03417 0. 32 0.00341 0.(Director of Computation)awidthshow +320 90 gm +-0.16851 0.(Harvard Genome Laboratory)ashow +331 90 gm +0.55831 0. 32 0.05583 0.(16. Divinity Ave.)awidthshow +342 90 gm +-0.04499 0.(Cambridge, MA)ashow +342 162 gm +(02138)show +433 90 gm +14 fz +2 F /|______Times-Roman fnt +-0.06921 0.(Acknowledgments)ashow +456 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.01979 0.(I would like to thank the following people for their input and assistance and code used in the development of)ashow +467 90 gm +(the GDE:)show +489 90 gm +0.10726 0. 32 0.01072 0.(Carl Woese, Gary Olsen and Mike Maciukenas at University of Illinois Dept of Microbiology, Ross)awidthshow +500 90 gm +-0.02450 0.(Overbeek at Argonne National Laboratories,Walter Gilbert, Patrick Gillevet, Chunwei Wang and Susan)ashow +511 90 gm +0.02166 0. 32 0.00216 0.(Russo at the Harvard Genome Laboratory. I would also like to personally thank the following people for)awidthshow +522 90 gm +0.02990 0. 32 0.00299 0.(their permission to include their software with this release of GDE.)awidthshow +544 90 gm +0.14419 0. 32 0.01441 0.(Des Higgins)awidthshow +555 90 gm +-0.04928 0.(David Lipman and the group at NCBI)ashow +566 90 gm +0.03280 0. 32 0.00328 0.(William Pearson)awidthshow +577 90 gm +(Don Gilbert)show +588 90 gm +-0.11433 0.(Xiaoqui Huang)ashow +599 90 gm +0.07690 0. 32 0.00769 0.(Joe Felsenstein)awidthshow +610 90 gm +-0.09466 0.(Michael Zuker)ashow +621 90 gm +-0.13116 0.(Geert DeSoete)ashow +632 90 gm +-0.01974 0.(Michael Maciukenas and the group at the Ribosomal Database Project)ashow +654 90 gm +0.10925 0. 32 0.01092 0.(It is only by the generosity of these people that GDE has been successful.)awidthshow +F T cp +%%Page: ? 13 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(13)show +95 90 gm +14 fz +2 F /|______Times-Roman fnt +0.57128 0. 32 0.05712 0.(Appendix A, File Formats)awidthshow +122 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.03565 0.(The currently supported file formats include GDE data files, Genbank formatted files \(with type extensions\),)ashow +133 90 gm +-0.01033 0.(a generic flat file format, and a color mask file.)ashow +155 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.18190 0.(GDE format)ashow +167 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.02778 0.(GDE format is a tagged field format used for storing all available information about a sequence. The format)ashow +178 90 gm +-0.03959 0.(matches very closely the GDE internal structures for sequence data. The format consists of text records)ashow +189 90 gm +-0.08013 0.(starting and ending with braces \('{}'\). Between the open and close braces are several tagged field lines)ashow +200 90 gm +-0.04377 0.(specifying different pieces of information about a given sequence. The tag values can be wrapped with)ashow +211 90 gm +-0.05966 0.(double quote characters \('""'\) as needed. If quotes are not used, the first whitespace delimited string is taken)ashow +222 90 gm +-0.03451 0.(as the value. The allowable fields are:)ashow +244 90 gm +({)show +255 90 gm +-0.21841 0.(name)ashow +255 162 gm +-0.09494 0.("Short name for sequence")ashow +266 90 gm +-0.06198 0.(longname)ashow +266 162 gm +-0.11402 0.("Long \(more descriptive\) name for sequence")ashow +277 90 gm +-0.35173 0.(sequence-ID)ashow +277 162 gm +-0.09922 0.("Unique ID number")ashow +288 90 gm +-0.26521 0.(creation-date)ashow +288 162 gm +0.12008 0. 32 0.01200 0.("mm/dd/yy hh:mm:ss")awidthshow +299 90 gm +-0.19244 0.(direction)ashow +299 162 gm +-0.19648 0.([-1|1])ashow +310 90 gm +-0.28077 0.(strandedness)ashow +310 162 gm +-0.16368 0.([1|2])ashow +321 90 gm +-0.07225 0.(type)ashow +321 162 gm +-0.07638 0.([DNA|RNA||PROTEIN|TEXT|MASK])ashow +332 90 gm +-0.15226 0.(offset)ashow +332 162 gm +-0.03222 0.(\(-999999,999999\))ashow +343 90 gm +-0.17175 0.(group-ID)ashow +343 162 gm +-0.02587 0.(\(0,999\))ashow +354 90 gm +-0.29151 0.(creator)ashow +354 162 gm +-0.02342 0.("Author's name")ashow +365 90 gm +-0.31198 0.(descrip)ashow +365 162 gm +-0.12005 0.("Verbose description")ashow +376 90 gm +-0.01441 0.(comments)ashow +376 162 gm +-0.01306 0.("Lines of comments that can be fairly arbitrary)ashow +387 90 gm +-0.03907 0.(text about a sequence. Return characters are allowed, but no internal)ashow +398 90 gm +-0.05203 0.(double quotes or brace characters. Remember to close with a double)ashow +409 90 gm +-0.25929 0.(quote")ashow +420 90 gm +-0.37757 0.(sequence)ashow +420 162 gm +-0.11505 0.("gctagctagctagctagctcttagctgtagtcgtagctgatgctagct)ashow +431 90 gm +-0.13807 0.(gatgctagctagctagctagctgatcgatgctagctgatcgtagctgacg)ashow +442 90 gm +-0.09281 0.(gactgatgctagctagctagctagctgtctagtgtcgtagtgcttattgc")ashow +453 90 gm +(})show +475 90 gm +-0.03117 0.(Any fields that are not specified are assumed to be the default values. Offsets can be negative as well as)ashow +486 90 gm +-0.00729 0.(positive. Genbank entries written out in this format will have all \("\) converted to \('\), and all \({}\) converted)ashow +497 90 gm +-0.03678 0.(to \([]\) to avoid confusion in the parser. Leading and trailing gaps are removed prior to writing each sequence.)ashow +508 90 gm +-0.00801 0.(This format is deliberately verbose in order to be simple to duplicate.)ashow +552 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.11645 0.(Genbank format:)ashow +564 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.06373 0.(GDE can read a concatenated list of Genbank entries, and extract certain fields from such files. The default)ashow +575 90 gm +(method for storing nucleic acid, amino acid, masking sequences or text is in Genbank format. The following)show +586 90 gm +-0.19308 0.(fields are recognized:)ashow +608 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.23880 0.(LOCUS:)ashow +608 162 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.04878 0.(Short name for this sequence \(Maximum of 32 characters\))ashow +0 -3 rm +7 fz +2 F /|______Times-Roman fnt +(\240)show +619 90 gm +{}mark T /Courier /|______Courier 0 rf +2 F /|______Courier fnt +-0.21890 0.(DEFINITION:)ashow +619 162 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.06732 0.(Definition of sequence \(Maximum of 80 characters\))ashow +630 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.22387 0.(ORGANISM:)ashow +630 198 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +(Full name of organism \(Maximum of 80 characters\))show +641 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.22743 0.(AUTHORS:)ashow +641 198 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.04580 0.(Authors of this sequence \(Maximum of 80 characters\))ashow +652 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.22111 0.(ACCESSION:)ashow +652 162 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.06069 0.(ID Number for this sequence \(Maximum of 80 characters\))ashow +663 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.23217 0.(ORIGIN:)ashow +663 162 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.16001 0.(Beginning of sequence data)ashow +0 -3 rm +7 fz +2 F /|______Times-Roman fnt +(\240)show +-4096 -4096 gm +-4095 -4095 0 gr +lin +6 25 lw +691 90 gm +691 233 lin +25 6 lw +1 1 lw +704 90 gm +0 gr +T 1 setTxMode +9 fz +2 F /|______Times-Roman fnt +-0.03189 0.(\240 Required field)ashow +F T cp +%%Page: ? 14 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(14)show +81 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.39801 0.(//)ashow +81 198 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.22740 0.(End of sequence data)ashow +0 -3 rm +7 fz +2 F /|______Times-Roman fnt +(\240)show +103 90 gm +10 fz +2 F /|______Times-Roman fnt +0.01617 0. 32 0.00161 0.(All other lines are retained as comments. The LOCUS line also specifies what type of sequence follows.)awidthshow +114 90 gm +0.31143 0. 32 0.03114 0.(The form of this line is:)awidthshow +133 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(LOCUS )ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.26533 0.(name)ashow +133 198 gm +-0.19900 0.(size)ashow +0 fs +{}mark T /Courier /|______Courier 0 rf +2 F /|______Courier fnt +-0.29850 0.( bp)ashow +133 234 gm +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.26533 0.(type)ashow +133 270 gm +-0.26533 0.(date)ashow +155 90 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.23757 0. 32 0.02375 0.(where )awidthshow +2 fs +{}mark T /Times-Italic /|______Times-Italic 0 rf +2 F /|______Times-Italic fnt +0.07641 0.(name)ashow +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +2 F /|______Times-Roman fnt +0.19012 0. 32 0.01901 0.( is the Genbank Locus name, )awidthshow +2 fs +{}mark T /Times-Italic /|______Times-Italic 0 rf +2 F /|______Times-Italic fnt +0.16464 0. 32 0.01646 0.(size )awidthshow +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +2 F /|______Times-Roman fnt +0.17791 0. 32 0.01779 0.(is total base count, )awidthshow +2 fs +{}mark T /Times-Italic /|______Times-Italic 0 rf +2 F /|______Times-Italic fnt +0.05877 0.(type)ashow +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +2 F /|______Times-Roman fnt +0.21957 0. 32 0.02195 0.( is one of DNA, RNA, PROTEIN,)awidthshow +166 90 gm +-0.03018 0.(MASK, or TEXT and )ashow +2 fs +{}mark T /Times-Italic /|______Times-Italic 0 rf +2 F /|______Times-Italic fnt +-0.02607 0.(date)ashow +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +2 F /|______Times-Roman fnt +-0.02537 0.( is of the form dd-MON-yyyy. In this way, the standard Genbank format is)ashow +177 90 gm +-0.06455 0.(extended to store all text, mask and protein data. The Genbank character set has also been extended in order)ashow +188 90 gm +-0.04171 0.(to support these other data types. Valid characters are:)ashow +210 90 gm +-0.04614 0.(DNA/RNA:)ashow +210 198 gm +-0.00355 0.(Full IUPAC coding as well as '-' and '~' characters for alignment)ashow +221 198 gm +-0.10925 0.(gaps)ashow +232 90 gm +0.04852 0.(Protein:)ashow +232 198 gm +-0.01260 0.(All valid single letter codes plus '-' and '~'. Other ASCII characters)ashow +243 198 gm +-0.04739 0.(may be inserted, however external functions may be confused by)ashow +254 198 gm +-0.12287 0.(such characters.)ashow +265 90 gm +(Mask:)show +265 198 gm +0.01281 0. 32 0.00128 0.(All legal printable ASCII characters. If used as a selection mask, all)awidthshow +276 198 gm +0.12207 0. 32 0.01220 0.(columns containing a '0' will be removed from any analysis.)awidthshow +287 90 gm +-0.02584 0.(Text:)ashow +287 198 gm +-0.05348 0.(All valid ASCII characters.)ashow +309 90 gm +0.05142 0. 32 0.00514 0.(Here is a valid Genbank entry for two E.coli tRNA's:)awidthshow +328 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.20202 0.(LOCUS ECOTRNT4 76 bp RNA 28-JAN-1991)ashow +336 90 gm +-0.20275 0.(DEFINITION E. coli \(T4 infected\) vulnerable tRNA \(A\).)ashow +344 90 gm +-0.20637 0.( ORGANISM Escherichia coli)ashow +352 90 gm +-0.20315 0.( AUTHORS Amitsur,M., Levitz,R. and Kaufmann,G.)ashow +360 90 gm +-0.20362 0.(FEATURES From To/Span Description)ashow +368 90 gm +-0.20298 0.( tRNA 1 76 vulnerable tRNA\(A\))ashow +376 90 gm +-0.21559 0.(BASE COUNT ?)ashow +384 90 gm +-0.23880 0.(ORIGIN)ashow +392 90 gm +-0.20169 0.( 1 GGGUCGUUAG CUCAGUUGGU AGAGCAGUUG ACUUUUAAUC AAUUGGNCGC AGGUUCGAAU)ashow +400 90 gm +-0.20666 0.( 61 CCUGCACGAC CCACCA)ashow +408 90 gm +-0.39801 0.(//)ashow +416 90 gm +-0.20202 0.(LOCUS ECOTRQ1 75 bp RNA 28-JAN-1991)ashow +424 90 gm +-0.20587 0.(DEFINITION E.coli Gln-tRNA-1.)ashow +432 90 gm +-0.20637 0.( ORGANISM Escherichia coli)ashow +440 90 gm +-0.20503 0.( AUTHORS Yaniv,M. and Folk,W.R.)ashow +448 90 gm +-0.20166 0.(SOURCE -REFERENCE [1] JOURNAL J. Biol. Chem. 250, 3243-3253 \(1975\))ashow +456 90 gm +-0.20362 0.(FEATURES From To/Span Description)ashow +464 90 gm +-0.20269 0.( tRNA 1 75 Gln-tRNA-1 \(NAR: 0510\))ashow +472 90 gm +-0.20231 0.( refnumbr 1 1 sequence not numbered in [1])ashow +480 90 gm +-0.21559 0.(BASE COUNT ?)ashow +488 90 gm +-0.23880 0.(ORIGIN)ashow +496 90 gm +-0.20169 0.( 1 UGGGGUAUCG CCAAGCGGUA AGGCACCGGU UUUUGAUACC GGCAUUCCCU GGUUCGAAUC)ashow +504 90 gm +-0.20697 0.( 61 CAGGUACCCC AGCCA)ashow +512 90 gm +-0.39801 0.(//)ashow +545 90 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +12 fz +2 F /|______Times-Roman fnt +-0.18539 0.(Flat file format:)ashow +557 90 gm +10 fz +2 F /|______Times-Roman fnt +0.11169 0. 32 0.01116 0.(This is a simplified format for importing sequence data, and passing it out to analysis functions. Very little)awidthshow +568 90 gm +0.02944 0. 32 0.00294 0.(information is actually retained in this format, and should be used carefully so as not to lose attribute)awidthshow +579 90 gm +0.03372 0. 32 0.00337 0.(information. It is defined as follow:)awidthshow +598 90 gm +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +7 fz +2 F /|______Courier-Oblique fnt +-0.20729 0.(type_character short_name)ashow +606 90 gm +-0.21559 0.(sequence_data)ashow +614 90 gm +-0.21559 0.(sequence_data)ashow +622 90 gm +-0.21559 0.(sequence_data)ashow +630 90 gm +-0.29850 0.(...)ashow +652 90 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.02508 0.(The type character is # for DNA/RNA, % for protein sequence, @ for mask sequence, and " for text. The)ashow +663 90 gm +0.07781 0. 32 0.00778 0.(short name is the same as the LOCUS line in Genbank. This is followed by lines of sequence, each ending)awidthshow +674 90 gm +-0.03877 0.(with a return character.These lines are read until the next type character is encountered, or until the end of the)ashow +685 90 gm +-0.01963 0.(file is reached. Care should be taken in using this format with text as space characters are stripped)ashow +-4096 -4096 gm +-4095 -4095 0 gr +lin +6 25 lw +703 90 gm +703 233 lin +25 6 lw +1 1 lw +F T cp +%%Page: ? 15 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(15)show +81 90 gm +10 fz +2 F /|______Times-Roman fnt +0.03921 0. 32 0.00392 0.(automatically. As of release 2.0, flat file format allows for an optional offset to be specified in parentheses)awidthshow +92 90 gm +-0.07716 0.(after the sequence name. An offset represents how many leading gap characters should be placed before the)ashow +103 90 gm +0.07568 0. 32 0.00756 0.(start of a sequence. If this offset does not exist, then it is defined to be 0.)awidthshow +125 90 gm +0.13809 0. 32 0.01380 0.(Here is a sample flat file for two Ecoli tRNA's:)awidthshow +144 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.22387 0.(#ECOTRNT4)ashow +152 90 gm +-0.20237 0.(GGGUCGUUAGCUCAGUUGGUAGAGCAGUUGACUUUUAAUCAAUUGGNCGCAGGUUCGAAU)ashow +160 90 gm +-0.21226 0.(CCUGCACGACCCACCA)ashow +168 90 gm +-0.22743 0.(#ECOTRQ1)ashow +176 90 gm +-0.20237 0.(UGGGGUAUCGCCAAGCGGUAAGGCACCGGUUUUUGAUACCGGCAUUCCCUGGUUCGAAUC)ashow +184 90 gm +-0.21322 0.(CAGGUACCCCAGCCA)ashow +217 90 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +12 fz +2 F /|______Times-Roman fnt +-0.09919 0.(Color mask:)ashow +229 90 gm +10 fz +2 F /|______Times-Roman fnt +0.10299 0. 32 0.01029 0.(The format for a color mask has been kept simple to make implementation of color functions easy. The)awidthshow +240 90 gm +0.05355 0. 32 0.00535 0.(format optionally defines which sequence to color, whether or not to color alignment gaps in the existing)awidthshow +251 90 gm +0.04089 0. 32 0.00408 0.(sequence, and how long the following mask will be. It is then followed by a list of decimal color codes)awidthshow +262 90 gm +-0.01760 0.(\(range 0 to 15\) for each position in the sequence. There are four keywords used in the color mask file.)ashow +273 90 gm +-0.12745 0.(Those keywords are:)ashow +295 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(name:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.22111 0.(short name)ashow +295 234 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.09985 0.(If short name matches a currently loaded sequence,)ashow +306 234 gm +0.16754 0. 32 0.01675 0.(then impose this color mask on that sequence. If this)awidthshow +317 234 gm +0.04577 0. 32 0.00457 0.(line is omitted, then color all sequences this color, and the color)awidthshow +328 234 gm +0.10223 0. 32 0.01022 0.(mask is expected to start at the leftmost column on the screen.)awidthshow +350 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(length:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.23880 0.(length)ashow +350 234 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.40191 0. 32 0.04019 0.(The following list in length long)awidthshow +372 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.23217 0.(nodash:)ashow +372 234 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.00579 0. 32 0.00057 0.(Skip over dash characters when imposing this color mask)awidthshow +383 234 gm +-0.03414 0.(on the named sequence. This allows an unaligned color)ashow +394 234 gm +-0.09637 0.(mask to be placed over aligned sequence.)ashow +416 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.23880 0.(start:)ashow +416 234 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.02685 0. 32 0.00268 0.(Begin reading the color mask on the next line.)awidthshow +438 90 gm +0.04302 0. 32 0.00430 0.(Here is a sample color mask file:)awidthshow +457 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.21070 0.(name:test_sequence)ashow +465 90 gm +-0.22387 0.(length:10)ashow +473 90 gm +-0.23217 0.(nodash:)ashow +481 90 gm +-0.23880 0.(start:)ashow +489 90 gm +(3)show +497 90 gm +(3)show +505 90 gm +(3)show +513 90 gm +(6)show +521 90 gm +(5)show +529 90 gm +(3)show +537 90 gm +(3)show +545 90 gm +(3)show +553 90 gm +(2)show +561 90 gm +(7)show +580 90 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.00993 0.(The colors in the default color lookup table are:)ashow +591 90 gm +(0)show +591 126 gm +-0.10839 0.(White)ashow +591 270 gm +(8)show +591 306 gm +-0.33126 0.(Black)ashow +602 90 gm +(1)show +602 126 gm +-0.08668 0.(Yellow)ashow +602 234 gm +(9)show +602 270 gm +-0.09704 0.(Grey 1)ashow +613 90 gm +(2)show +613 126 gm +(Violet)show +613 270 gm +(10)show +613 306 gm +-0.09704 0.(Grey 2)ashow +624 90 gm +(3)show +624 126 gm +-0.55415 0.(Red)ashow +624 270 gm +(11)show +624 306 gm +-0.09704 0.(Grey 3)ashow +635 90 gm +(4)show +635 126 gm +-0.55255 0.(Aqua)ashow +635 270 gm +(12)show +635 306 gm +-0.09704 0.(Grey 4)ashow +646 90 gm +(5)show +646 126 gm +-0.11416 0.(Lime Green)ashow +646 270 gm +(13)show +646 306 gm +-0.09704 0.(Grey 5)ashow +657 90 gm +(6)show +657 126 gm +-0.29557 0.(Blue)ashow +657 270 gm +(14)show +657 306 gm +-0.09704 0.(Grey 6)ashow +668 90 gm +(7)show +668 126 gm +-0.02070 0.(Purple)ashow +668 270 gm +(15)show +668 306 gm +-0.10839 0.(White)ashow +F T cp +%%Page: ? 16 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(16)show +84 90 gm +14 fz +2 F /|______Times-Roman fnt +0.21636 0. 32 0.02163 0.(Appendix B, Adding Functions)awidthshow +107 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.04040 0.(The GDE uses a menu description language to define what external programs it can call, and what parameters)ashow +118 90 gm +0.08483 0. 32 0.00848 0.(and data to pass to each function. This language allows users to customize their own environment to suite)awidthshow +129 90 gm +-0.14483 0.(individual needs.)ashow +151 90 gm +-0.02262 0.(The following is how the GDE handles external programs when selected from a menu:)ashow +0 0 gm +(nc 164 126 255 369 6 rc)kp +64 gr +164 211 202 277 14.5 14.5 4 rr +0 gr +164.5 211.5 201.5 276.5 14.5 14.5 0 rr +64 gr +164 126 202 192 14.5 14.5 4 rr +0 gr +164.5 126.5 201.5 191.5 14.5 14.5 0 rr +64 gr +164 300 202 366 14.5 14.5 4 rr +0 gr +164.5 300.5 201.5 365.5 14.5 14.5 0 rr +183 192 gm +(nc 164 126 255 208 6 rc)kp +183 211 lin +(nc 164 126 255 369 6 rc)kp +177 205 190 218 165 195 4 ar +183 277 gm +(nc 164 126 255 296 6 rc)kp +183 300 lin +(nc 164 126 255 369 6 rc)kp +177 293 190 307 165 195 4 ar +145 385 91 243 th +177 133 gm +0 gr +T 1 setTxMode +12 fz +2 F /|______Times-Roman fnt +-0.17835 0.(Display dialog)ashow +185 133 gm +-0.07539 0.(box presenting)ashow +192 133 gm +0.30838 0. 32 0.03083 0.(user options.)awidthshow +177 215 gm +-0.19335 0.(Write out selected)ashow +185 215 gm +-0.13066 0.(data \(if any\) to)ashow +192 215 gm +-0.06405 0.(temporary files.)ashow +177 303 gm +-0.20286 0.(Call external )ashow +185 303 gm +(function, passing)show +192 303 gm +-0.10202 0.(parameters and data.)ashow +tu +64 gr +214 300 255 366 4 rc +0 gr +214.5 300.5 254.5 365.5 0 rc +202 331 gm +(nc 164 126 211 369 6 rc)kp +214 331 lin +(nc 164 126 255 369 6 rc)kp +208 325 221 338 255 285 4 ar +ts +224 307 gm +0 gr +T 1 setTxMode +-0.15270 0.(External program)ashow +232 307 gm +0.09719 0. 32 0.00971 0.(runs analysis, and)awidthshow +239 307 gm +-0.06079 0.(writes results to)ashow +247 307 gm +-0.06405 0.(temporary files.)ashow +tu +64 gr +217 211 255 277 14.5 14.5 4 rr +0 gr +217.5 211.5 254.5 276.5 14.5 14.5 0 rr +236 300 gm +(nc 164 281 255 369 6 rc)kp +236 277 lin +(nc 164 126 255 369 6 rc)kp +230 271 243 284 345 375 4 ar +ts +227 218 gm +0 gr +T 1 setTxMode +-0.08067 0.(GDE reads results)ashow +235 218 gm +-0.11495 0.(of temporary files)ashow +242 218 gm +0.20019 0. 32 0.02001 0.(\(if any\).)awidthshow +tu +64 gr +217 126 255 192 14.5 14.5 4 rr +0 gr +217.5 126.5 254.5 191.5 14.5 14.5 0 rr +236 211 gm +(nc 164 195 255 369 6 rc)kp +236 192 lin +(nc 164 126 255 369 6 rc)kp +230 186 243 199 345 375 4 ar +ts +227 133 gm +0 gr +T 1 setTxMode +-0.13632 0.(GDE cleans up)ashow +235 133 gm +-0.06405 0.(temporary files,)ashow +242 133 gm +-0.04287 0.(and displays new)ashow +250 133 gm +(data.)show +tu +275 90 gm +(nc 31 30 761 582 6 rc)kp +10 fz +2 F /|______Times-Roman fnt +0.03387 0. 32 0.00338 0.(Each step in this process is described in a file .GDEmenus in the user's current or home)awidthshow +286 90 gm +-0.17651 0.(directory.)ashow +308 90 gm +-0.02418 0.(The language used in this file describes three phases to an external function call. The first phase describes)ashow +319 90 gm +0.12283 0. 32 0.01228 0.(the menu item as it will appear, and the Unix command line that is actually run when it is selected. The)awidthshow +330 90 gm +-0.06280 0.(second phase describes how to prompt for the parameters needed by the function. The third phase describes)ashow +341 90 gm +-0.05538 0.(what data needs to be passed as input to the external function, and what data \(if any\) needs to be read back)ashow +352 90 gm +0.58975 0. 32 0.05897 0.(from its output.)awidthshow +374 90 gm +-0.01350 0.(The form of the language is a simple keyword/value list delimited by the colon \(:\) character. The language)ashow +385 90 gm +0.13610 0. 32 0.01361 0.(retains old values until new ones are set. For example, setting the menu name is done once for all items in)awidthshow +396 90 gm +0.02822 0. 32 0.00282 0.(that menu, and is only reset when the next menu is reached.)awidthshow +418 90 gm +-0.09211 0.(The keywords for phase one are:)ashow +440 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(menu:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.22387 0.(menu name)ashow +440 306 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.06471 0.(Name of current menu)ashow +451 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(item:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.22387 0.(item name)ashow +451 306 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.02093 0.(Name of current menu item)ashow +462 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(itemmeta:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.22743 0.(meta_key)ashow +462 306 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.12283 0.(Meta key equivalence \(quick keys\))ashow +473 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(itemhelp:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.22387 0.(help_file)ashow +473 306 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.15548 0. 32 0.01554 0.(Help file \(either full path, or in)awidthshow +484 306 gm +-0.09056 0.(GDE_HELP_DIR\))ashow +492 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(itemmethod:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.21710 0.(Unix command)ashow +514 90 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.14251 0. 32 0.01425 0.(The item method command is a bit more involved, it is the Unix command that will actually run the)awidthshow +525 90 gm +0.02655 0. 32 0.00265 0.(external program intended. It is one line long, and can be up to 256 characters in length. It can have)awidthshow +536 90 gm +-0.02996 0.(embedded variable names \(starting with a '$'\) that will be replaced with appropriate values later on. It can)ashow +547 90 gm +-0.00277 0.(consist of multiple Unix commands separated by semi-colons \(;\), and may contain shell scripts and)ashow +558 90 gm +0.00350 0. 32 0.00035 0.(background processes as well as simple command names. Examples will be given later.)awidthshow +580 90 gm +-0.07740 0.(The keywords for phase two are:)ashow +602 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(arg:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.20848 0.(argument_variable_name)ashow +602 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.07934 0. 32 0.00793 0.(Name of this variable. It will appear)awidthshow +613 342 gm +0.09628 0. 32 0.00962 0.(in the itemmethod: line with a dollar)awidthshow +624 342 gm +0.35354 0. 32 0.03535 0.(sign \($\) in front of it.)awidthshow +635 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(argtype:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.20503 0.(slider,chooser,choice_menu or text)ashow +635 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.02027 0.(The type of graphic object)ashow +646 342 gm +-0.00474 0.(representing this argument.)ashow +668 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(arglabel:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.21144 0.(descriptive label)ashow +668 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.07995 0. 32 0.00799 0.(A short description of what this)awidthshow +679 342 gm +-0.09936 0.(argument represents)ashow +701 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(argmin:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.20805 0.(minimum_value \(integer\))ashow +701 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.11270 0.(Used for sliders.)ashow +723 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(argmax:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.20805 0.(maximum_value \(integer\))ashow +723 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.11270 0.(Used for sliders.)ashow +F T cp +%%Page: ? 17 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(17)show +92 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(argvalue:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.20805 0.(default_value \(integer\))ashow +92 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.00714 0.(It is the numeric value associated with)ashow +103 342 gm +-0.05737 0.(sliders or the default choice in)ashow +114 342 gm +-0.06477 0.(choosers and choice_menus \(the first)ashow +125 342 gm +0.10726 0. 32 0.01072 0.(choice is 0, the second is 1 etc.\))awidthshow +147 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(argtext:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.21559 0.(default value)ashow +147 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.07046 0.(Used for text fields.)ashow +169 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(argchoice:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.19900 0.(displayed value)ashow +0 fs +{}mark T /Courier /|______Courier 0 rf +2 F /|______Courier fnt +-0.19900 0.(:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.21710 0.(passed value)ashow +169 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.15618 0.(Used for choosers and)ashow +180 342 gm +0.10101 0. 32 0.01010 0.(choice_menus. The first value is)awidthshow +191 342 gm +-0.12577 0.(displayed on screen, and the second)ashow +202 342 gm +-0.00753 0.(value is passed to the itemmethod)ashow +213 342 gm +0.12620 0.(line.)ashow +235 90 gm +-0.06112 0.(The keywords for phase three are as follows:)ashow +257 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(in:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.19900 0.(input_file)ashow +0 fs +{}mark T /Courier /|______Courier 0 rf +2 F /|______Courier fnt +( )show +257 342 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.07873 0. 32 0.00787 0.(GDE will replace this name with a)awidthshow +268 342 gm +-0.12794 0.(randomly generated temporary file)ashow +279 342 gm +(name. It will then write the selected)show +290 342 gm +0.24902 0. 32 0.02490 0.(data out to this file.)awidthshow +312 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(informat:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.21890 0.(file_format)ashow +312 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.14724 0. 32 0.01472 0.(Write data to this file for input to)awidthshow +323 342 gm +0.33538 0. 32 0.03353 0.(this function. Currently support)awidthshow +334 342 gm +-0.05738 0.(values are Genbank, and flat.)ashow +345 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.23217 0.(inmask:)ashow +345 342 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.05563 0.(This data can be controlled by a)ashow +356 342 gm +0.14770 0. 32 0.01477 0.(selection mask.)awidthshow +378 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.23217 0.(insave:)ashow +378 342 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.08697 0. 32 0.00869 0.(Do not remove this file after running)awidthshow +389 342 gm +0.19424 0. 32 0.01942 0.(the external function. This is useful)awidthshow +400 342 gm +0.01831 0. 32 0.00183 0.(for functions put in the background.)awidthshow +422 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(out:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.21890 0.(output_file)ashow +422 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.07873 0. 32 0.00787 0.(GDE will replace this name with a)awidthshow +433 342 gm +-0.12794 0.(randomly generated temporary file)ashow +444 342 gm +0.21469 0. 32 0.02146 0.(name. It is up to the external function)awidthshow +455 342 gm +0.36849 0. 32 0.03684 0.(to fill this file with any results that)awidthshow +466 342 gm +0.03570 0. 32 0.00357 0.(might be read back into the GDE.)awidthshow +488 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.19900 0.(outformat:)ashow +2 fs +{}mark T /Courier-Oblique /|______Courier-Oblique 0 rf +2 F /|______Courier-Oblique fnt +-0.21890 0.(file_format)ashow +488 342 gm +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.18402 0. 32 0.01840 0.(The data in the output file will be in)awidthshow +499 342 gm +0.29846 0. 32 0.02984 0.(this format. Currently support)awidthshow +510 342 gm +-0.05863 0.(values are colormask, Genbank, and)ashow +521 342 gm +0.04431 0.(flat.)ashow +543 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.22743 0.(outsave:)ashow +543 342 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.01464 0.(Do not remove this file after reading.)ashow +554 342 gm +0.03158 0. 32 0.00315 0.(This is useful for background tasks.)awidthshow +576 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.21559 0.(outoverwrite:)ashow +576 342 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.06021 0.(Overwrite existing sequences in the current)ashow +587 342 gm +-0.00816 0.(GDE window. Currently supported with)ashow +598 342 gm +-0.03097 0.("gde" format only.)ashow +642 90 gm +0.11749 0. 32 0.01174 0.(Here is a sample dialog box, and it's entry in the .GDEmenus file:)awidthshow +F T cp +%%Page: ? 18 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(18)show +454 408 241 216 th +147 170 gm +tu +(nc 143 169 312 327 6 rc)kp +ts +{}mark T /Courier /|______Courier 0 rf +8.33332 fz +2 F /|______Courier fnt +( )show +165 170 gm +(menu:Test function)show +177 170 gm +(item:All capitals)show +182 170 gm +(itemmethod:\(tr '[a-z]' '[A-Z]' < INPUT_FILE > )show +188 170 gm +(INPUT_FILE.tmp ; mv INPUT_FILE.tmp $SAVE_FILE_NAME ; gde )show +194 170 gm +($SAVE_FILE_NAME -Wx $SIZE ; rm INPUT_FILE\) &)show +206 170 gm +(arg:SAVE_FILE_NAME)show +212 170 gm +(argtype:text)show +217 170 gm +(arglabel:Save converted data as?)show +223 170 gm +(argtext:CAPS)show +235 170 gm +(arg:SIZE)show +241 170 gm +(arglabel:Text size?)show +247 170 gm +(argtype:chooser)show +252 170 gm +(argvalue:1)show +258 170 gm +(argchoice:Small:small)show +264 170 gm +(argchoice:Medium:medium)show +270 170 gm +(argchoice:Large:large)show +276 170 gm +(argchoice:Extra Large:extra_large)show +288 170 gm +(in:INPUT_FILE)show +293 170 gm +(informat:flat)show +299 170 gm +(insave:)show +tu +305 170 gm +(nc 72 162 313 378 6 rc)kp +64 gr +73 166 142 355 1 rc +0 gr +73.5 166.5 141.5 354.5 0 rc +64 gr +79 174 89 193 12.5 12.5 1 rr +0 gr +79.5 174.5 88.5 192.5 12.5 12.5 0 rr +64 gr +77 172 137 352 1 rc +0 gr +T 180 31.07141 172 77 44 341 58 T 1 dbdbgr +77 172 137 352 1 rc +0 gr +T 180 31.07141 172 77 44 341 58 T 1 dbdbgm +119 274 lin +119 240 gm +119 214 lin +130 241 gm +130 276 lin +119 301 gm +119 345 lin +130 215 gm +119 215 lin +78.5 173.5 88.5 192.5 12.5 12.5 0 rr +78.5 197.5 88.5 228.5 12.5 12.5 0 rr +119 275 gm +130 275 lin +130 241 lin +200.5 163.5 224.5 263.5 13.5 13.5 0 rr +pr +100 268 pl +98 275 pl +100 275 pl +102 275 pl +100 268 pl +1 ep +100 377 gm +100 275 0 gr +lin +229.5 163.5 276.5 263.5 13.5 13.5 0 rr +0 0 pen +248 325 gm +248 325 lin +nc ct 39 0 put +1 1 pen +248 263 gm +bp +248 325 F qi +248 325 qc +248 363 qc +248 363 qc +124 363 qc +124 363 F qq +ef +9 ec +(nc 72 162 313 378 6 rc)kp +248 363 gm +pr +124 349 pl +122 356 pl +124 356 pl +126 356 pl +124 349 pl +1 ep +124 363 gm +124 356 0 gr +lin +100 377 gm +215 377 lin +215 263 gm +215 377 lin +322 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.02491 0.(Using the default parameters given in the dialog box, the executed Unix command line would be:)ashow +341 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.20088 0.(\(tr '[a-z]' '[A-Z]' < .gde_001 >.gde_001.tmp ; mv .gde_001.tmp CAPS ; gde CAPS -Wx medium ; rm .gde_001 \) &)ashow +363 90 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.06463 0.(where )ashow +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.06048 0.(.gde_001)ashow +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.05946 0.( is the name of the temporary file generated by the GDE which contains the selected sequences)ashow +374 90 gm +0.09124 0. 32 0.00912 0.(in flat file format. Since the GDE runs this command in the background \('&' at the end\) it is necessary to)awidthshow +385 90 gm +(specify the )show +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +(insave: )show +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +-0.00428 0.(line, and to remove all temporary files manually. There is no output file specific because)ashow +396 90 gm +-0.02909 0.(the data is not loaded back into the current GDE window, but rather a new GDE window is opened on the)ashow +407 90 gm +-0.01303 0.(file. A simpler command that reloads the data after conversion might be:)ashow +426 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.21559 0.(item:All caps)ashow +434 90 gm +-0.20352 0.(itemmethod:tr '[a-z]' '[A-Z]' OUTPUT)ashow +450 90 gm +-0.22743 0.(in:INPUT)ashow +458 90 gm +-0.21559 0.(informat:flat)ashow +474 90 gm +-0.22111 0.(out:OUTPUT)ashow +482 90 gm +-0.21430 0.(outformat:flat)ashow +501 90 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.06103 0. 32 0.00610 0.(In this example, no arguments are specified, and so no dialog box will appear. The command is not run in)awidthshow +512 90 gm +-0.03242 0.(the background, so the GDE can clean up after itself automatically. The converted sequence is automatically)ashow +523 90 gm +-0.07383 0.(loaded back into the current GDE window.)ashow +545 90 gm +0.02014 0. 32 0.00201 0.(In general, the easiest type of program to integrate into the GDE is a program completely driven from a)awidthshow +556 90 gm +-0.00051 0.(Unix command line. Interactive programs can be tied in \(MFOLD for example\), however shell scripts must)ashow +567 90 gm +-0.01737 0.(be used to drive the parameter entry for these programs. Programs of the form:)ashow +586 90 gm +{}mark T /Courier /|______Courier 0 rf +7 fz +2 F /|______Courier fnt +-0.20149 0.(program_name -a1 argument1 -a2 arguement2 -f inputfile -er errorfile > outputfile)ashow +608 90 gm +{}mark T /Times-Roman /|______Times-Roman 0 rf +10 fz +2 F /|______Times-Roman fnt +0.06240 0. 32 0.00624 0.(can be specified in the .GDEmenus file directly. As this is the general form of most one Unix commands,)awidthshow +619 90 gm +0.06774 0. 32 0.00677 0.(these tend to be simpler to implement under the GDE.)awidthshow +641 90 gm +0.07995 0. 32 0.00799 0.(As functions grow in complexity, they may begin to need a user interface of their own. In these cases, the)awidthshow +652 90 gm +-0.01388 0.(command line calling arguments are still necessary in order to allow the GDE to hand them the appropriate)ashow +663 90 gm +-0.02767 0.(data, and possible retrieve results after some external manipulation.)ashow +F T cp +%%Page: ? 19 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(19)show +84 90 gm +14 fz +2 F /|______Times-Roman fnt +0.49774 0. 32 0.04977 0.(Appendix C, External functions)awidthshow +107 90 gm +10 fz +2 F /|______Times-Roman fnt +0.08163 0. 32 0.00816 0.(ClustalV - Cluster multiple sequence alignment)awidthshow +129 90 gm +0.30380 0. 32 0.03038 0.(Author: Des Higgins.)awidthshow +151 90 gm +-0.25508 0.(Reference:)ashow +151 162 gm +0.14328 0. 32 0.01432 0.(Higgins,D.G. Bleasby,A.J. and Fuchs,R. \(1991\) CLUSTAL V: improved software)awidthshow +162 162 gm +0.13442 0. 32 0.01344 0.(for multiple sequence alignment. ms. submitted to CABIOS)awidthshow +183 90 gm +-0.11924 0.(Parameters:)ashow +194 162 gm +-0.07839 0.(k-tuple pairwise search)ashow +194 270 gm +-0.07196 0.(Word size for pairwise comparisons)ashow +205 162 gm +-0.14807 0.(Window size)ashow +205 270 gm +0.07278 0. 32 0.00727 0.(Smaller values give faster alignments,)awidthshow +216 270 gm +-0.03422 0.(larger values are more sensitive.)ashow +227 162 gm +-0.05999 0.(Transitions weighted)ashow +227 270 gm +0.19729 0. 32 0.01972 0.(Can weight transitions twice as high as)awidthshow +238 270 gm +-0.01446 0.(transversions \(DNA only\).)ashow +249 162 gm +-0.04051 0.(Fixed gap penalty)ashow +249 270 gm +0.06027 0. 32 0.00602 0.(Gap insertion penalty, lower value, more gaps)awidthshow +260 162 gm +0.20385 0. 32 0.02038 0.(Floating gap penalty)awidthshow +260 270 gm +0.02777 0. 32 0.00277 0.(Gap extension penalty, lower value, longer gaps)awidthshow +304 90 gm +0.11117 0.(Comments:)ashow +315 162 gm +-0.01083 0.(ClustalV is a directed multiple sequence alignment algorithm that)ashow +326 162 gm +0.06652 0. 32 0.00665 0.(aligns a set of sequences based on their level of similarity. It first)awidthshow +337 162 gm +0.05584 0. 32 0.00558 0.(uses a Lipman Peasron pairwise similarity scoring to find "clusters")awidthshow +348 162 gm +-0.06562 0.(of similar sequences, and pre-aligns those sequences. It then adds)ashow +359 162 gm +0.03463 0. 32 0.00346 0.(other sequences to the alignment in the order of their similarity so as)awidthshow +370 162 gm +-0.02696 0.(to produce the cleanest alignment.)ashow +392 162 gm +0.09170 0. 32 0.00917 0.(Warning: ClustalV only uses unambiguous character codes. It will also)awidthshow +403 162 gm +0.04348 0. 32 0.00434 0.(convert all sequences to upper case in the process of aligning. Clustal)awidthshow +414 162 gm +0.04180 0. 32 0.00418 0.(does not pass back comments, author etc. Be sure to keep copies of your)awidthshow +425 162 gm +0.15106 0. 32 0.01510 0.(sequences if you do not wish to lose this information.)awidthshow +F T cp +%%Page: ? 20 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(20)show +81 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.08074 0.(MFOLD - RNA secondary prediction)ashow +103 90 gm +-0.03694 0.(Author: Michael Zuker)ashow +125 90 gm +-0.17959 0.(Reference: )ashow +125 162 gm +0.13900 0. 32 0.01390 0.(M. Zuker)awidthshow +136 162 gm +0.27359 0. 32 0.02735 0.(On Finding All Suboptimal Foldings of an RNA Molecule.)awidthshow +147 162 gm +0.16525 0. 32 0.01652 0.(Science, 244, 48-52, \(1989\))awidthshow +169 162 gm +0.12847 0. 32 0.01284 0.(J. A. Jaeger, D. H. Turner and M. Zuker)awidthshow +180 162 gm +-0.06132 0.(Improved Predictions of Secondary Structures for RNA.)ashow +191 162 gm +0.25482 0. 32 0.02548 0.(Proc. Natl. Acad. Sci. USA, BIOCHEMISTRY, 86, 7706-7710, \(1989\))awidthshow +213 162 gm +0.12847 0. 32 0.01284 0.(J. A. Jaeger, D. H. Turner and M. Zuker)awidthshow +224 162 gm +-0.01690 0.(Predicting Optimal and Suboptimal Secondary Structure for RNA.)ashow +235 162 gm +0.13473 0. 32 0.01347 0.(in "Molecular Evolution: Computer Analysis of Protein and)awidthshow +246 162 gm +(Nucleic Acid Sequences", R. F. Doolittle ed.)show +257 162 gm +0.18035 0. 32 0.01803 0.(Methods in Enzymology, 183, 281-306 \(1989\))awidthshow +279 90 gm +-0.11924 0.(Parameters:)ashow +290 162 gm +-0.11352 0.(Linear/circular RNA fold)ashow +301 162 gm +0.25527 0. 32 0.02552 0.(ct File to save results)awidthshow +323 90 gm +0.11117 0.(Comments:)ashow +334 162 gm +0.06652 0. 32 0.00665 0.(MFOLD passes it's output to a program Zuk_to_gen that translates the secondary)awidthshow +345 162 gm +-0.01971 0.(structure prediction to a nested bracket \([]\) notation. This notation can then be used)ashow +356 162 gm +-0.00996 0.(in the Highlight Helix, and Draw Secondary structure \(LoopTool\) functions.)ashow +378 162 gm +-0.01683 0.(MFOLD currently does not support much in the way of additional parameters.)ashow +389 162 gm +-0.04089 0.(We hope to have all additional parameters available soon.)ashow +F T cp +%%Page: ? 21 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(21)show +92 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.01799 0.(Blast - Basic Local Alignment Search Tool)ashow +114 90 gm +-0.25508 0.(Reference:)ashow +125 162 gm +0.16143 0. 32 0.01614 0.(Karlin, Samuel and Stephen F. Altschul \(1990\). Methods for)awidthshow +136 162 gm +-0.04147 0.(assessing the statistical significance of molecular sequence)ashow +147 162 gm +-0.01135 0.(features by using general scoring schemes, Proc. Natl. Acad.)ashow +158 162 gm +0.51742 0. 32 0.05174 0.(Sci. USA 87:2264-2268.)awidthshow +180 90 gm +0.46295 0. 32 0.04629 0.( )awidthshow +180 162 gm +0.27801 0. 32 0.02780 0.(Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W.)awidthshow +191 90 gm +0.46295 0. 32 0.04629 0.( )awidthshow +191 162 gm +0.10040 0. 32 0.01004 0.(Myers, and David J. Lipman \(1990\). Basic local alignment)awidthshow +202 90 gm +0.46295 0. 32 0.04629 0.( )awidthshow +202 162 gm +0.45684 0. 32 0.04568 0.(search tool, J. Mol. Biol. 215:403-410.)awidthshow +224 90 gm +0.46875 0. 32 0.04687 0.( )awidthshow +224 162 gm +0.40649 0. 32 0.04064 0.(Altschul, Stephen F. \(1991\). Amino acid substitution)awidthshow +235 90 gm +0.46875 0. 32 0.04687 0.( )awidthshow +235 162 gm +0.10742 0. 32 0.01074 0.(matrices from an information theoretic perspective. J. Mol.)awidthshow +246 90 gm +0.46295 0. 32 0.04629 0.( )awidthshow +246 162 gm +0.43884 0. 32 0.04388 0.(Biol. 219:555-565.)awidthshow +290 90 gm +-0.11924 0.(Parameters:)ashow +301 162 gm +-0.13816 0.(Which Database)ashow +301 270 gm +-0.09788 0.(Which nucleic or amino acid database)ashow +312 270 gm +-0.03448 0.(to search.)ashow +334 162 gm +-0.18505 0.(Word Size)ashow +334 270 gm +0.17608 0. 32 0.01760 0.(Length of initial hit. after locating a match of)awidthshow +345 270 gm +0.27908 0. 32 0.02790 0.(this length, alignment extension is attempted.)awidthshow +356 126 gm +-0.11082 0.(Blastn)ashow +367 162 gm +-0.11381 0.(Match score)ashow +367 270 gm +-0.03680 0.(Score for matches in secondary alignment extension)ashow +378 162 gm +-0.04492 0.(Mismatch score)ashow +378 270 gm +-0.02404 0.(Score for mismatches in secondary alignment extension)ashow +400 126 gm +0.49514 0. 32 0.04951 0.(Blastx, tblastn, blastp, blast3)awidthshow +411 162 gm +0.69580 0. 32 0.06958 0.(Substitution Matrix)awidthshow +411 270 gm +0.38192 0. 32 0.03819 0.(PAM120 or PAM250)awidthshow +444 126 gm +0.11117 0.(Comments:)ashow +444 198 gm +-0.01263 0.(The report is loaded into a text editor. This should be saved as a new file)ashow +455 198 gm +-0.01432 0.(as the default file is removed after execution. The latest version of blast can)ashow +466 198 gm +0.30914 0. 32 0.03091 0.(be obtained via anonymous ftp to ncbi.nlm.nih.gov.)awidthshow +F T cp +%%Page: ? 22 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(22)show +81 90 gm +10 fz +2 F /|______Times-Roman fnt +0.14083 0. 32 0.01408 0.(FastA - Similarity search)awidthshow +103 126 gm +-0.25508 0.(Reference:)ashow +114 162 gm +0.31677 0. 32 0.03167 0.(W. R. Pearson and D. J. Lipman \(1988\),)awidthshow +125 162 gm +-0.00869 0.("Improved Tools for Biological Sequence Analysis", PNAS 85:2444-2448)ashow +147 162 gm +0.01358 0. 32 0.00135 0.(W. R. Pearson \(1990\) "Rapid and Sensitive Sequence)awidthshow +158 162 gm +0.26550 0. 32 0.02655 0.(Comparison with FASTP and FASTA" Methods in Enzymology 183:63-98)awidthshow +180 126 gm +-0.11924 0.(Parameters:)ashow +191 162 gm +-0.23434 0.(Database)ashow +191 306 gm +-0.12551 0.(Which database to search)ashow +202 162 gm +0.05493 0. 32 0.00549 0.(Number of alignments to report)awidthshow +213 162 gm +(SMATRIX)show +213 306 gm +0.26260 0. 32 0.02626 0.(Which similarity matrix to use)awidthshow +246 126 gm +0.11117 0.(Comments:)ashow +257 90 gm +0.47622 0. 32 0.04762 0.( )awidthshow +257 162 gm +-0.05303 0.(The FastA package includes several additional programs for pairwise alignment.)ashow +268 162 gm +-0.00224 0.(We have only included a bare bones link to FastA. We hope to include a more)ashow +279 162 gm +-0.00607 0.(complete setup for the actual 2.0 release.)ashow +F T cp +%%Page: ? 23 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(23)show +81 90 gm +10 fz +2 F /|______Times-Roman fnt +0.38360 0. 32 0.03836 0.(Assemble Contigs - CAP Contig Assembly Program)awidthshow +103 126 gm +-0.04878 0.(Author - Xiaoqiu Huang)ashow +114 162 gm +-0.03823 0.(Department of Computer Science)ashow +125 162 gm +-0.02279 0.(Michigan Technological University)ashow +136 162 gm +0.34759 0. 32 0.03475 0.(Houghton, MI 49931)awidthshow +147 162 gm +-0.01989 0.(E-mail: huang@cs.mtu.edu)ashow +169 162 gm +0.31494 0. 32 0.03149 0.(Minor modifications for I/O by S. Smith)awidthshow +191 126 gm +-0.23449 0.(Reference -)ashow +202 162 gm +0.05538 0. 32 0.00553 0.("A Contig Assembly Program Based on Sensitive Detection of)awidthshow +213 90 gm +( )show +213 162 gm +0.00946 0. 32 0.00094 0.(Fragment Overlaps" \(submitted to Genomics, 1991\))awidthshow +235 126 gm +-0.11924 0.(Parameters:)ashow +246 162 gm +0.21423 0. 32 0.02142 0.(Minimum overlap)awidthshow +246 306 gm +-0.11988 0.(Number of bases required for overlap)ashow +257 162 gm +-0.01672 0.(Percent match within overlap)ashow +257 306 gm +-0.10456 0.(Percentage match required in the overlap)ashow +268 306 gm +-0.07734 0.(region before merge is alowwed.)ashow +290 126 gm +0.11117 0.(Comments:)ashow +312 162 gm +-0.06814 0.(CAP returns the aligned sequences to the current editor window. The sequences are)ashow +323 162 gm +0.00427 0. 32 0.00042 0.(placed into contigs by setting the groupid. Cap does not change the order of the)awidthshow +334 162 gm +-0.05079 0.(sequences, and so the results should be sorted by group and offset \(see sort under the)ashow +345 162 gm +-0.02096 0.(Edit menu\).)ashow +F T cp +%%Page: ? 24 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(24)show +92 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.10661 0.(Lsadt - Least squares additive tree analysis)ashow +114 90 gm +0.18157 0. 32 0.01815 0.(Author: Geert De Soete, 'C' implementation by Mike Maciukenas University of Illinois)awidthshow +136 90 gm +-0.00590 0.(Reference:LSADT, 1983 Psychometrika, 1984 Quality and Quantity)ashow +158 90 gm +-0.11924 0.(Parameters:)ashow +169 162 gm +-0.02085 0.(Distance correction to use in distance matrix calculations \(see count below\).)ashow +180 162 gm +-0.03211 0.(What should be used for initial parameters estimates)ashow +191 162 gm +-0.12921 0.(Random number seed)ashow +202 162 gm +-0.05371 0.(Display method \(See TreeTool below\))ashow +224 90 gm +0.11117 0.(Comments:)ashow +235 162 gm +-0.02113 0.(The program has been rewritten in 'C' and will be included with the rRNA Database)ashow +246 162 gm +0.03906 0. 32 0.00390 0.(phylogenetic package being written at the University of Illinois Department of)awidthshow +257 162 gm +0.04248 0.(Microbiology.)ashow +279 162 gm +-0.01466 0.(Count is a short program to calculate a distance matrix from a sequence)ashow +290 162 gm +-0.01652 0.(alignment \(see below\).)ashow +334 90 gm +-0.00831 0.(Count - Distance matrix calculator)ashow +356 90 gm +0.45852 0. 32 0.04585 0.(Author: Steven Smith)awidthshow +378 90 gm +-0.11924 0.(Parameters:)ashow +389 162 gm +-0.07836 0.(Correction method)ashow +389 306 gm +-0.01190 0.(Currently Jukes-Cantor or none)ashow +400 162 gm +-0.17300 0.(Include dashed columns)ashow +411 162 gm +-0.04403 0.(Match upper case to lower)ashow +444 90 gm +0.11117 0.(Comments:)ashow +455 162 gm +-0.02917 0.(Passes back a distance matrix in a format readable by LSADT.)ashow +510 90 gm +-0.06629 0.(Treetool - Tree drawing/manipulation)ashow +532 90 gm +-0.01724 0.(Author:)ashow +532 126 gm +0.06912 0. 32 0.00691 0.(Michael Maciukenas, University of Illinois)awidthshow +554 90 gm +0.11117 0.(Comments:)ashow +565 162 gm +-0.08711 0.(See included documentation for TreeTool usage.)ashow +F T cp +%%Page: ? 25 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(25)show +81 90 gm +10 fz +2 F /|______Times-Roman fnt +0.08056 0. 32 0.00805 0.(Phylip - Complete phylogenetic analysis package \(3.4\))awidthshow +103 90 gm +0.13824 0. 32 0.01382 0.(Author: Joe Felsenstein)awidthshow +125 90 gm +-0.25508 0.(Reference:)ashow +125 162 gm +0.12664 0. 32 0.01266 0.(Felsenstein, J. 1989. PHYLIP -- Phylogeny Inference Package \(Version 3.2\).)awidthshow +136 162 gm +0.18722 0. 32 0.01872 0.(Cladistics 5: 164-166)awidthshow +169 90 gm +0.11117 0.(Comments:)ashow +180 162 gm +0.13565 0. 32 0.01356 0.(Phylip is a very complete set of programs for phylogenetic analysis. GDE)awidthshow +191 162 gm +-0.00163 0.(simply formats selected data \(obeying any selection mask\) to Phylip, and runs)ashow +202 162 gm +0.06301 0. 32 0.00630 0.(the chosen program. Although all programs from Phylip are included, only)awidthshow +213 162 gm +0.11154 0. 32 0.01115 0.(the programs pertaining to DNA/AA analysis are tied in to the menu system.)awidthshow +224 162 gm +-0.06831 0.(The complete set of documentation is included, and can be viewed directly from)ashow +235 162 gm +-0.00129 0.(GDE. It is strongly suggested that user read the documentation before attempting to)ashow +246 162 gm +(interpret results.)show +268 162 gm +-0.02252 0.(The full source code for Phylip can be obtained from genetics.washington.edu using)ashow +279 162 gm +0.34255 0. 32 0.03425 0.(anonymous ftp.)awidthshow +F T cp +%%Page: ? 26 +op +31 30 xl +1 1 pen +753 90 gm +(nc 31 30 761 582 6 rc)kp +0 gr +T 1 setTxMode +0 fs +{}mark T /Times-Roman /|______Times-Roman 0 rf +7 fz +2 F /|______Times-Roman fnt +0.34057 0. 32 0.03405 0.(GDE2.0 rev1)awidthshow +753 300 gm +12 fz +2 F /|______Times-Roman fnt +(26)show +92 90 gm +-0.10993 0.(Copyright Notice)ashow +115 90 gm +10 fz +2 F /|______Times-Roman fnt +-0.01510 0.(The Genetic Data Environment \(GDE\) software and documentation are not in the public domain. Portions)ashow +126 90 gm +-0.04327 0.(of this code are owned and copyrighted by the The Board of Trustees of the University of Illinois and by)ashow +137 90 gm +-0.03067 0.(Steven Smith at the Harvard Genome Laboratory. This release of the GDE program and documentation)ashow +148 90 gm +-0.00248 0.(may not be sold, or incorporated into a commercial product, in whole or in part without the expressed)ashow +159 90 gm +0.24673 0. 32 0.02467 0.(written consent of the University of Illinois and of its author, Steven Smith.)awidthshow +181 90 gm +-0.01826 0.(All interested parties may redistribute the GDE as long as all copies are accompanied by this)ashow +192 90 gm +0.05432 0. 32 0.00543 0.(documentation, and all copyright notices remain intact. Parties interested in redistribution must do so on a)awidthshow +203 90 gm +-0.03430 0.(non-profit basis, charging only for cost of media. Modifications to the GDE core editor should be forwarded)ashow +214 90 gm +0.04577 0. 32 0.00457 0.(to the author Steven Smith at the address given below for inclusion into future releases. External programs)awidthshow +225 90 gm +-0.02499 0.(used by the GDE are copyright by, and are the property of their respective authors unless otherwise stated.)ashow +258 90 gm +0.02899 0. 32 0.00289 0.(While all attempts have been made to insure the integrity of these programs:)awidthshow +280 90 gm +12 fz +2 F /|______Times-Roman fnt +-0.29307 0.(Disclaimer)ashow +303 90 gm +10 fz +2 F /|______Times-Roman fnt +(THE UNIVERSITY OF ILLINOIS, HARVARD UNIVERSITY AND THE AUTHOR, STEVEN SMITH)show +314 90 gm +0.21240 0. 32 0.02124 0.(GIVE NO WARRANTIES, EXPRESSED OR IMPLIED FOR THE SOFTWARE AND)awidthshow +325 90 gm +-0.03628 0.(DOCUMENTATION PROVIDED, INCLUDING, BUT NOT LIMITED TO WARRANTY OF)ashow +336 90 gm +0.20996 0. 32 0.02099 0.(MERCHANTABILITY AND WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE. User)awidthshow +347 90 gm +-0.06015 0.(understands the software is a research tool for which no warranties as to capabilities or accuracy are made,)ashow +358 90 gm +-0.01759 0.(and user accepts the software "as is." User assumes the entire risk as to the results and performance of the)ashow +369 90 gm +-0.05845 0.(software and documentation. The above parties cannot be held liable for any direct, indirect, consequential)ashow +380 90 gm +-0.00201 0.(or incidental damages with respect to any claim by user or any third party on account of, or arising from the)ashow +391 90 gm +-0.05157 0.(use of software and associated materials. This disclaimer covers both the GDE core editor and all external)ashow +402 90 gm +-0.01464 0.(programs used by the GDE.)ashow +F T cp +%%Trailer +cd +end +%%Pages: 26 0 +%%EOF diff --git a/HGL_SRC/Alloc.c b/HGL_SRC/Alloc.c new file mode 100755 index 0000000..6b227a8 --- /dev/null +++ b/HGL_SRC/Alloc.c @@ -0,0 +1,331 @@ +#include +#include "global_defs.h" +#include + +/* +* Alloc.c +* Memory functions for Harvard Genome Laboratory. +* Last revised 6/3/91 +* +* Print error message, and die +*/ +void ErrorOut(code,string) +int code; +char *string; +{ + if (code == 0) + { + fprintf(stderr,"Error:%s\n",string); + exit(1); + } + return; +} + + +/* +* Calloc count*size bytes with memory aligned to size. +* Return pointer to new block. +*/ +char *Calloc(count,size) +int count,size; +/*unsigned count,size;*/ +{ + char *temp; + temp = calloc(count,(unsigned)size); + + if(count*size == 0) + fprintf(stderr,"Allocate ZERO blocks?\n"); + ErrorOut(temp,"Cannot allocate memory"); + return(temp); +} + +/* +* Reallocate memory at block, expand to size. +* Return pointer to (possibly) new block. +*/ +char *Realloc(block,size) +char *block; +unsigned size; +{ + char *temp; + temp=realloc(block,size); + ErrorOut(temp,"Cannot change memory size"); + return(temp); +} + +/* +* Free block Allocated by Calloc. +* Return error code from free(). +*/ + +void Cfree(block) +char* block; +{ + extern void Warning(); + if(block != NULL) + { +#ifdef SUN4 + if(free(block) == 0) + Warning("Error in Cfree..."); +#endif + } +/* else + Warning("Error in Cfree, NULL block"); +*/ + return; +} + + + +/* +* Print Warning message to stderr. +*/ +void Warning(s) +char *s; +{ + fprintf(stderr,"Warning:%s\n",s); + return; +} + + +/* +* Get array element from a sequence structure. The index +* is relative to the alignment. +*/ +char GetElem(seq,indx) +Sequence *seq; /*Sequence to search*/ +int indx; /*Index relative to the global offset*/ +{ + if((indxoffset) || (indx >= seq->offset + seq->seqlen)) + return('-'); + else + return((char)(seq->c_elem[indx-seq->offset])); +} + +/* +* Replace the array element at seq[indx] with elem. The index +* is relative to the alignment. +*/ + +void ReplaceElem(seq,indx,elem) +Sequence *seq; /*Sequence */ +int indx; /*Position to overwrite (replace) */ +unsigned char elem; /*Character to replace with */ +{ + int j; + extern char *Calloc(); + int width; + +/* +* If no c_elem has been allocated yet... +*/ +/* if(index("abcdefghijklmnopqrstuvwxyz-0123456789",elem)==0) + fprintf(stderr,"Warning (ReplaceElem) elem = %c\n",elem); +*/ + width = seq->offset-indx; + if(seq->seqlen == 0 && elem != '-') + { + if(seq->seqmaxlen == 0 || seq->c_elem == NULL) + { + seq->c_elem = Calloc(4,sizeof(char)); + seq->offset = indx; + seq->seqmaxlen = 4; + } + seq->seqlen = 1; + seq->c_elem[0] = elem; + seq->offset = indx; + } +/* +* If inserting before the c_elem (< offset) +*/ + else if((indxoffset) && (elem!='-')) + { + seq->seqmaxlen += width; + seq->c_elem = Realloc(seq->c_elem,seq->seqmaxlen*sizeof(char)); + for(j=seq->seqmaxlen-1;j>=width;j--) + seq->c_elem[j] = seq->c_elem[j-width]; + for(j=0;jc_elem[j] = '-'; + seq->c_elem[0] = elem; + seq->seqlen += width; + seq->offset = indx; + } +/* +* if inserting after c_elem (indx > offset + seqlen) +*/ + else if((indx>=seq->offset+seq->seqlen) && (elem!='-')) + { + if(indx-seq->offset >= seq->seqmaxlen) + { + seq->seqmaxlen = indx-seq->offset+256; + seq->c_elem = Realloc(seq->c_elem,seq->seqmaxlen* + sizeof(char)); + } + for(j=seq->seqlen;jseqmaxlen;j++) + seq->c_elem[j] = '-'; + seq->c_elem[indx-seq->offset] = elem; + seq->seqlen = indx-seq->offset+1; + } + else + { + if(indx-(seq->offset)>=0 && indx-(seq->offset)seqlen) + seq->c_elem[indx-(seq->offset)] = elem; + else if(elem!='-') + fprintf(stderr,"%c better be a -\n",elem); + } + return; +} + + +/* +* InsertElem is a modification of InsertElems, and should be +* optimized. s.s.5/6/91 +*/ +int InsertElem(a,b,ch) +Sequence *a; /* Sequence */ +int b; /*Position to insert BEFORE*/ +char ch; /*element to insert */ +{ + char c[2]; + c[0]=ch; + c[1] = '\0'; + + return (InsertElems(a,b,c)); +} + + +/* +* Make a copy of Sequence one, place in Sequence two +*/ +void SeqCopy(one,two) +Sequence *one,*two; +{ + int j; + *two = *one; + if(two->seqmaxlen) + two->c_elem = Calloc(one->seqmaxlen,sizeof(char)); + if(two->commentsmaxlen) + two->comments = Calloc(one->commentsmaxlen,sizeof(char)); + for(j=0;jseqlen;j++) + two->c_elem[j] = one->c_elem[j]; + for(j=0;jcommentslen;j++) + two->comments[j] = one->comments[j]; + return; +} + + +/* +* Normalize seq (remove leading indels in the c_elem; +*/ +void SeqNormal(seq) +Sequence *seq; +{ + int len,j,shift_width,trailer; + char *c_elem; + len = seq->seqlen; + + c_elem = seq->c_elem; + + if(len == 0) return; + + for(shift_width=0; (shift_widthseqlen -= shift_width; + seq->offset += shift_width; + for(trailer=seq->seqlen-1;(c_elem[trailer] =='-' || + c_elem[trailer] == '\0') && trailer>=0; + trailer--) + c_elem[trailer] = '\0'; + seq->seqlen = trailer+1; + return; +} + +void SeqRev(seq,min,max) +Sequence *seq; +int min,max; +/* + SeqRev will reverse a given sequence within a window from + min to max (inclusive). The idea is to allow several sequences + to be reversed in such a manner as to allow them to remain aligned. + + BEFORE AFTER + min | | max min | |max + aaaacccgggttt tttgggcccaaaa + aaa-cccg-g ---g-gccc-aaa + ----cccgggt --tgggccc +*/ +{ + int j; + char temp1,temp2; + extern char GetElem(); + extern void ReplaceElem(); + + for(j=0;j<= (max-min)/2;j++) + { + temp1 = GetElem(seq,min+j); + temp2 = GetElem(seq,max-j); + ReplaceElem(seq,min+j,(unsigned char)temp2); + ReplaceElem(seq,max-j,(unsigned char)temp1); + } + + seq->direction *= -1; + + SeqNormal(seq); + return; +} + + +/* sequence complementing. */ +void SeqComp(seq) +Sequence *seq; +{ + int j; + unsigned char in,out,case_bit; + char *c; + static int tmatr[16] = {'-','a','c','m','g','r','s','v', + 't','w','y','h','k','d','b','n'}; + + static int matr[128] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0x01,0x0e,0x02,0x0d,0,0,0x04,0x0b,0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06, + 0x08,0x08,0x07,0x09,0x00,0x0a,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04, + 0x0b,0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06,0x08,0x08,0x07,0x09,0x00,0x0a, + 0,0,0,0,0x00,0 + }; + + c = seq->c_elem; + for(j=0;jseqlen;j++) + { +/* +* Save Case bit... +*/ + case_bit = c[j] & 32; + out = 0; + in = matr[c[j]]; + if(in&1) + out|=8; + if(in&2) + out|=4; + if(in&4) + out|=2; + if(in&8) + out|=1; + + if(case_bit == 0) + c[j] = toupper(tmatr[out]); + else + c[j] = tmatr[out]; + } + + seq->direction *= -1; + seq->strandedness = ( seq->strandedness == 2)?1: + ( seq->strandedness == 1)?2: + 0; + return; + +} diff --git a/HGL_SRC/Alloc.o b/HGL_SRC/Alloc.o new file mode 100755 index 0000000..0269c43 Binary files /dev/null and b/HGL_SRC/Alloc.o differ diff --git a/HGL_SRC/Consto01mask b/HGL_SRC/Consto01mask new file mode 100755 index 0000000..16b4c92 Binary files /dev/null and b/HGL_SRC/Consto01mask differ diff --git a/HGL_SRC/Consto01mask.c b/HGL_SRC/Consto01mask.c new file mode 100755 index 0000000..94bd732 --- /dev/null +++ b/HGL_SRC/Consto01mask.c @@ -0,0 +1,27 @@ +#ifndef _GLOBAL_DEFS_H +#define _GLOBAL_DEFS_H +#include "global_defs.h" +#endif + +main() +{ + Sequence tSeq; + int ii, jj, rr; + char acgt[128]; + + for(ii = 0; ii < 128; ii++) + acgt[ii] = '0'; + + acgt[0x41] = acgt[0x43] = acgt[0x47] = acgt[0x54] = acgt[0x55] = '1'; + acgt[0x61] = acgt[0x63] = acgt[0x67] = acgt[0x74] = acgt[0x75] = '1'; + + while((rr = ReadRecord(stdin, &tSeq)) != -1) + { + strcpy(tSeq.type, "MASK"); + for(ii = 0; ii < tSeq.seqlen; ii++) + { + tSeq.c_elem[ii] = acgt[tSeq.c_elem[ii]]; + } + WriteRecord(stdout, tSeq, NULL, 0); + } +} diff --git a/HGL_SRC/DotPlotTool b/HGL_SRC/DotPlotTool new file mode 100755 index 0000000..b0dfb05 Binary files /dev/null and b/HGL_SRC/DotPlotTool differ diff --git a/HGL_SRC/DotPlotTool.c b/HGL_SRC/DotPlotTool.c new file mode 100755 index 0000000..cbad5c6 --- /dev/null +++ b/HGL_SRC/DotPlotTool.c @@ -0,0 +1,1136 @@ +/******************************** + * + * To compile: + * + * cc -o DotPlotTool plot.c HGLfuncs.c Alloc.c ChooseFile.c + * -lxview -lolgx -lX11 + * + * Notes: Set canvas width and height to fit the max_width when + * loading a dataset. Change the viewable size by changing + * the viewable_length of the scrollbars. + * + ********************************/ + +#ifndef _GLOBAL_DEFS_H +#include "global_defs.h" +#define _GLOBAL_DEFS_H +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define POS2(array, i, j) (array[2*(i)+(j)]) + +#define WHITE 0 +#define BLACK 8 +#define RED 7 + +#define min_size 1 +#define max_size 4 +#define min_width 1 +#define min_filter 1 +#define max_filter 100 +#define min_cutoff -7 +#define max_cutoff 17 + +#define mmscore 0 /* WHITE */ +#define pmscore 9 /* GREY */ +#define mscore 8 /* BLACK */ +#define window_size 2 +#define margin 50 + +Frame frame=(Frame)NULL, prop_subframe; +Panel panel_1; /* panel_1 width should change with the size of the + * dataset. + */ +Panel width_panel=(Panel)NULL; /* There is a limitation on the max size of a + * canvas. When loading a large dataset, + * max_width may have to be reduced to fit into + * the canvas. So, width_panel has to be saved + * to allow changes. + */ +Canvas canvas; +Xv_Window paint_win; +Display *display; +GC gc; +Window xwin; +Cms cms; +Scrollbar h_scrollbar, v_scrollbar; +int Xaxis, Yaxis; + +int size = 2; +int max_width, width; +int cutoff = 1; +int filter = 1; +int direction = 0; /* both / and \. */ + /* 1: \ or direct + -1: / or reversed */ +char footnote[256], heading[128]; + +int drawarea_min_y, drawarea_max_y; +unsigned long *colors; +char clear_mark = 'F', compd = 'N'; +Sequence *tSeq; + +int tmp_direction, tmp_size,tmp_width,tmp_filter, tmp_cutoff; +char tmp_compd; +int mark_x=0; /* record the x,y of the paint window, */ +int mark_y=0; /* not the canvas or the view window. */ +int set_offset;/* The offset of the sequences being displayed. */ +int AAmatr[256][256]; + +short plot_bits[] = { +#include "plot.icon" +}; + +main(argc, argv) + int argc; + char *argv[]; +{ + int LoadHGLData(); + extern exit_proc(); + extern size_proc(); + extern width_proc(); + extern footer_proc(); + extern canvas_repaint_proc(); + extern Frame load_file(); + extern Load(); + extern show_prop_frame(); + extern dir_proc(); + extern compd_proc(); + extern ok_proc(); + extern filter_proc(); + extern cutoff_proc(); + extern ToDisplay(); + int i, layer, ii, jj, kk, ll; + char GetBase(); + Icon icon; + Server_image image; + + static Xv_singlecolor cms_colors[] = { + {255, 255, 255}, /* WHITE */ + {128, 0, 255}, /* purple */ + {0, 0, 128}, /* navy blue */ + {0, 0, 255 }, /* blue */ + {0, 255, 0}, /* green */ + {255, 255, 0}, /* yellow */ + {255, 165, 0}, /* orange */ + {225, 0, 0}, /* RED */ + {0, 0, 0}, /* black */ + {192, 192, 192}, /* GREY */ + {240, 240, 240}, + {210, 210, 210}, + {180, 180, 180}, + {150, 150, 150}, + {120, 120, 120}, + {90, 90, 90}, + {60,60,60}, + {30, 30, 30}, + {0, 0, 0} + }; + + FILE *fp; + Rect *rect; + Panel panel; + int pam; + static char aa[] = {"arndcqeghilkmfpstwyvbzx*"}; + static pam120[24][24] = { + /*a r n d c q e g h i l k m f p s t w y v b z x * */ +/*a*/ 3,-3,-1, 0,-3,-1, 0, 1,-3,-1,-3,-2,-2,-4, 1, 1, 1,-7,-4, 0, 1, 0,-1,-8, +/*r*/ -3, 6,-1,-3,-4, 1,-3,-4, 1,-2,-4, 2,-1,-5,-1,-1,-2, 1,-5,-3,-1, 0,-2,-8, +/*n*/ -1,-1, 4, 2,-5, 0, 1, 0, 2,-2,-4, 1,-3,-4,-2, 1, 0,-4,-2,-3, 4, 1,-1,-8, +/*d*/ 0,-3, 2, 5,-7, 1, 3, 0, 0,-3,-5,-1,-4,-7,-3, 0,-1,-8,-5,-3, 5, 3,-2,-8, +/*c*/ -3,-4,-5,-7, 9,-7,-7,-4,-4,-3,-7,-7,-6,-6,-4, 0,-3,-8,-1,-3,-4,-6,-4,-8, +/*q*/ -1, 1, 0, 1,-7, 6, 2,-3, 3,-3,-2, 0,-1,-6, 0,-2,-2,-6,-5,-3, 1, 5,-1,-8, +/*e*/ 0,-3, 1, 3,-7, 2, 5,-1,-1,-3,-4,-1,-3,-7,-2,-1,-2,-8,-5,-3, 3, 5,-1,-8, +/*g*/ 1,-4, 0, 0,-4,-3,-1, 5,-4,-4,-5,-3,-4,-5,-2, 1,-1,-8,-6,-2, 1,-1,-2,-8, +/*h*/ -3, 1, 2, 0,-4, 3,-1,-4, 7,-4,-3,-2,-4,-3,-1,-2,-3,-3,-1,-3, 2, 2,-2,-8, +/*i*/ -1,-2,-2,-3,-3,-3,-3,-4,-4, 6, 1,-3, 1, 0,-3,-2, 0,-6,-2, 3,-2,-2,-1,-8, +/*l*/ -3,-4,-4,-5,-7,-2,-4,-5,-3, 1, 5,-4, 3, 0,-3,-4,-3,-3,-2, 1,-3,-2,-2,-8, +/*k*/ -2, 2, 1,-1,-7, 0,-1,-3,-2,-3,-4, 5, 0,-7,-2,-1,-1,-5,-5,-4, 1, 0,-2,-8, +/*m*/ -2,-1,-3,-4,-6,-1,-3,-4,-4, 1, 3, 0, 8,-1,-3,-2,-1,-6,-4, 1,-3,-1,-2,-8, +/*f*/ -4,-5,-4,-7,-6,-6,-7,-5,-3, 0, 0,-7,-1, 8,-5,-3,-4,-1, 4,-3,-4,-5,-3,-8, +/*p*/ 1,-1,-2,-3,-4, 0,-2,-2,-1,-3,-3,-2,-3,-5, 6, 1,-1,-7,-6,-2,-1, 0,-2,-8, +/*s*/ 1,-1, 1, 0, 0,-2,-1, 1,-2,-2,-4,-1,-2,-3, 1, 3, 2,-2,-3,-2, 1, 0,-1,-8, +/*t*/ 1,-2, 0,-1,-3,-2,-2,-1,-3, 0,-3,-1,-1,-4,-1, 2, 4,-6,-3, 0, 1,-1,-1,-8, +/*w*/ -7, 1,-4,-8,-8,-6,-8,-8,-3,-6,-3,-5,-6,-1,-7,-2,-6,12,-2,-8,-5,-6,-5,-8, +/*y*/ -4,-5,-2,-5,-1,-5,-5,-6,-1,-2,-2,-5,-4, 4,-6,-3,-3,-2, 8,-3,-2,-4,-3,-8, +/*v*/ 0,-3,-3,-3,-3,-3,-3,-2,-3, 3, 1,-4, 1,-3,-2,-2, 0,-8,-3, 5,-2,-2,-1,-8, +/*b*/ 1,-1, 4, 5,-4, 1, 3, 1, 2,-2,-3, 1,-3,-4,-1, 1, 1,-5,-2,-2, 6, 4,-1,-8, +/*z*/ 0, 0, 1, 3,-6, 5, 5,-1, 2,-2,-2, 0,-1,-5, 0, 0,-1,-6,-4,-2, 4, 6, 0,-8, +/*x*/ -1,-2,-1,-2,-4,-1,-1,-2,-2,-1,-2,-2,-2,-3,-2,-1,-1,-5,-3,-1,-1, 0,-2,-8, +/***/ -8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8, 1}; + + static pam250[24][24] = { + /*a r n d c q e g h i l k m f p s t w y v b z x * */ + 2,-2, 0, 0,-2, 0, 0, 1,-1,-1,-2,-1,-1,-3, 1, 1, 1,-6,-3, 0, 2, 1, 0,-8, + -2, 6, 0,-1,-4, 1,-1,-3, 2,-2,-3, 3, 0,-4, 0, 0,-1, 2,-4,-2, 1, 2,-1,-8, + 0, 0, 2, 2,-4, 1, 1, 0, 2,-2,-3, 1,-2,-3, 0, 1, 0,-4,-2,-2, 4, 3, 0,-8, + 0,-1, 2, 4,-5, 2, 3, 1, 1,-2,-4, 0,-3,-6,-1, 0, 0,-7,-4,-2, 5, 4,-1,-8, + -2,-4,-4,-5,12,-5,-5,-3,-3,-2,-6,-5,-5,-4,-3, 0,-2,-8, 0,-2,-3,-4,-3,-8, + 0, 1, 1, 2,-5, 4, 2,-1, 3,-2,-2, 1,-1,-5, 0,-1,-1,-5,-4,-2, 3, 5,-1,-8, + 0,-1, 1, 3,-5, 2, 4, 0, 1,-2,-3, 0,-2,-5,-1, 0, 0,-7,-4,-2, 4, 5,-1,-8, + 1,-3, 0, 1,-3,-1, 0, 5,-2,-3,-4,-2,-3,-5, 0, 1, 0,-7,-5,-1, 2, 1,-1,-8, + -1, 2, 2, 1,-3, 3, 1,-2, 6,-2,-2, 0,-2,-2, 0,-1,-1,-3, 0,-2, 3, 3,-1,-8, + -1,-2,-2,-2,-2,-2,-2,-3,-2, 5, 2,-2, 2, 1,-2,-1, 0,-5,-1, 4,-1,-1,-1,-8, + -2,-3,-3,-4,-6,-2,-3,-4,-2, 2, 6,-3, 4, 2,-3,-3,-2,-2,-1, 2,-2,-1,-1,-8, + -1, 3, 1, 0,-5, 1, 0,-2, 0,-2,-3, 5, 0,-5,-1, 0, 0,-3,-4,-2, 2, 2,-1,-8, + -1, 0,-2,-3,-5,-1,-2,-3,-2, 2, 4, 0, 6, 0,-2,-2,-1,-4,-2, 2,-1, 0,-1,-8, + -3,-4,-3,-6,-4,-5,-5,-5,-2, 1, 2,-5, 0, 9,-5,-3,-3, 0, 7,-1,-3,-4,-2,-8, + 1, 0, 0,-1,-3, 0,-1, 0, 0,-2,-3,-1,-2,-5, 6, 1, 0,-6,-5,-1, 1, 1,-1,-8, + 1, 0, 1, 0, 0,-1, 0, 1,-1,-1,-3, 0,-2,-3, 1, 2, 1,-2,-3,-1, 2, 1, 0,-8, + 1,-1, 0, 0,-2,-1, 0, 0,-1, 0,-2, 0,-1,-3, 0, 1, 3,-5,-3, 0, 2, 1, 0,-8, + -6, 2,-4,-7,-8,-5,-7,-7,-3,-5,-2,-3,-4, 0,-6,-2,-5,17, 0,-6,-4,-4,-4,-8, + -3,-4,-2,-4, 0,-4,-4,-5, 0,-1,-1,-4,-2, 7,-5,-3,-3, 0,10,-2,-2,-3,-2,-8, + 0,-2,-2,-2,-2,-2,-2,-1,-2, 4, 2,-2, 2,-1,-1,-1, 0,-6,-2, 4, 0, 0,-1,-8, + 2, 1, 4, 5,-3, 3, 4, 2, 3,-1,-2, 2,-1,-3, 1, 2, 2,-4,-2, 0, 6, 5,-1,-8, + 1, 2, 3, 4,-4, 5, 5, 1, 3,-1,-1, 2, 0,-4, 1, 1, 1,-4,-3, 0, 5, 6, 0,-8, + 0,-1, 0,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-2,-1, 0, 0,-4,-2,-1,-1, 0,-1,-8, + -8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8, 1}; + + /* malloc_debug(2); */ + + if(argc == 1) + { + fprintf(stderr, "An input file is required.\n"); + fprintf(stderr, "Usage: %s filename [pam120|pam250(If AA seqs)]\n", + argv[0]); + exit(0); + } + + set_offset = LoadHGLData(argv[1]); + + /* setup comparison AAmatrix. */ + + for(ii = 0; ii < 256; ii++) + for(jj = 0; jj < 256; jj++) + AAmatr[ii][jj] = -100; + + pam = 250; + if(argc > 2) + { + if(strcmp(argv[2], "pam120") == 0) + pam = 120; + else if(strcmp(argv[2], "pam250") != 0) + { + fprintf(stderr, "Incorrect pam name: %s\n", argv[2]); + exit(1); + } + } + + for(ii = 0; ii < strlen(aa); ii++) + for(jj = 0; jj < strlen(aa); jj++) + { + AAmatr[aa[ii]][aa[jj]] = + AAmatr[aa[ii]-32][aa[jj]-32] = + AAmatr[aa[ii]][aa[jj]-32] = + AAmatr[aa[ii]-32][aa[jj]] = + (pam==250)?pam250[ii][jj]:pam120[ii][jj]; + } + + tmp_cutoff = cutoff; + tmp_width = width; + tmp_direction = direction; + tmp_size = size; + tmp_filter = filter; + tmp_compd = compd; + + xv_init(XV_INIT_ARGS, argc, argv, NULL); + + frame = xv_create(XV_NULL, FRAME, + FRAME_LABEL, heading, + FRAME_SHOW_FOOTER, TRUE, + FRAME_LEFT_FOOTER, "Mouse Location:", + FRAME_RIGHT_FOOTER, footnote, + NULL); + + panel_1 = (Panel) xv_create(frame, PANEL, + NULL); + + (void) xv_create(panel_1,PANEL_BUTTON, + PANEL_LABEL_STRING, "Properties...", + PANEL_NOTIFY_PROC, show_prop_frame, + NULL); + + (void) xv_create(panel_1,PANEL_BUTTON, + PANEL_LABEL_STRING, "Load", + PANEL_NOTIFY_PROC, Load, + NULL); + + (void) xv_create(panel_1,PANEL_BUTTON, + PANEL_LABEL_STRING, "Exit", + PANEL_NOTIFY_PROC, exit_proc, + NULL); + + window_fit_height(panel_1); + + canvas = (Canvas) xv_create(frame, CANVAS, + CANVAS_X_PAINT_WINDOW, TRUE, + CANVAS_REPAINT_PROC, canvas_repaint_proc, + CANVAS_AUTO_CLEAR, FALSE, + CANVAS_AUTO_SHRINK, FALSE, + CANVAS_AUTO_EXPAND, FALSE, + CANVAS_WIDTH, Xaxis*max_width+margin, + CANVAS_HEIGHT,Yaxis*max_width+margin, + XV_WIDTH, MIN(600, Xaxis*width)+margin, + XV_HEIGHT,MIN(400, Yaxis*width)+margin, + WIN_BELOW, panel_1, + CANVAS_RETAINED, FALSE, + NULL); +/* + printf("Canvas_WIDTH=%d _HEIGHT=%d\n", + (int)xv_get(canvas, CANVAS_WIDTH), + (int)xv_get(canvas, CANVAS_HEIGHT)); +*/ + paint_win = (Xv_Window) canvas_paint_window(canvas); + xv_set(paint_win, + WIN_BIT_GRAVITY, ForgetGravity, + WIN_CONSUME_EVENTS, + MS_LEFT, + NULL, + WIN_EVENT_PROC, footer_proc, + NULL); + + h_scrollbar = (Scrollbar) xv_create(canvas, SCROLLBAR, + SCROLLBAR_DIRECTION, SCROLLBAR_HORIZONTAL, + SCROLLBAR_OBJECT_LENGTH, Xaxis*width+margin, + SCROLLBAR_VIEW_START, 0, + NULL); + + v_scrollbar = (Scrollbar) xv_create(canvas, SCROLLBAR, + SCROLLBAR_DIRECTION, SCROLLBAR_VERTICAL, + SCROLLBAR_OBJECT_LENGTH, Yaxis*width+margin, + SCROLLBAR_VIEW_START, 0, + NULL); + + cms = xv_create((Cms)NULL, CMS, + CMS_SIZE, 19, + CMS_TYPE, XV_DYNAMIC_CMS, + CMS_COLORS, cms_colors, + NULL); + + xv_set(canvas, WIN_CMS, cms, + WIN_INHERIT_COLORS, FALSE, + WIN_BACKGROUND_COLOR, WHITE, + NULL); + + window_fit(canvas); + + prop_subframe = (Frame)xv_create(frame, FRAME_CMD, + FRAME_LABEL, "Properties", + NULL); + + panel = (Panel)xv_get(prop_subframe, FRAME_CMD_PANEL); + (void) xv_set(panel, PANEL_LAYOUT, PANEL_VERTICAL,NULL); + + (void) xv_create(panel, PANEL_SLIDER, + PANEL_LABEL_STRING, "Dot Size: ", + PANEL_VALUE, size, + PANEL_MIN_VALUE, min_size, + PANEL_MAX_VALUE, max_size, + PANEL_SLIDER_WIDTH, 100, + PANEL_TICKS, 4, + PANEL_NOTIFY_PROC, size_proc, + NULL); + + width_panel = xv_create(panel, PANEL_SLIDER, + PANEL_LABEL_STRING, "Dot Width: ", + PANEL_VALUE, width, + PANEL_MIN_VALUE, min_width, + PANEL_MAX_VALUE, max_width, + PANEL_SLIDER_WIDTH, 100, + PANEL_TICKS, 5, + PANEL_NOTIFY_PROC, width_proc, + NULL); + + (void) xv_create(panel, PANEL_SLIDER, + PANEL_LABEL_STRING, "Min Match Length: ", + PANEL_VALUE, filter, + PANEL_MIN_VALUE, min_filter, + PANEL_MAX_VALUE, max_filter, + PANEL_SLIDER_WIDTH, 100, + PANEL_TICKS, 5, + PANEL_NOTIFY_PROC, filter_proc, + NULL); + + (void)xv_create(panel, PANEL_CHOICE, + PANEL_LABEL_STRING, "Match Direction", + PANEL_CHOICE_STRINGS, + "Direct", + "Reversed", + "Both", + NULL, + PANEL_NOTIFY_PROC, dir_proc, + PANEL_VALUE, 0, + NULL); + + (void)xv_create(panel, PANEL_CHOICE, + PANEL_LABEL_STRING, "Complemented(for NN only):", + PANEL_CHOICE_STRINGS, "No", "Yes", NULL, + PANEL_NOTIFY_PROC, compd_proc, + PANEL_VALUE, 0, + NULL); + + (void) xv_create(panel, PANEL_SLIDER, + PANEL_LABEL_STRING, "Cutoff PAM250 score(for AA only):", + PANEL_VALUE, cutoff, + PANEL_MIN_VALUE, min_cutoff, + PANEL_MAX_VALUE, max_cutoff, + PANEL_SLIDER_WIDTH, 100, + PANEL_TICKS, 5, + PANEL_NOTIFY_PROC, cutoff_proc, + NULL); + + (void)xv_create(panel, PANEL_BUTTON, + PANEL_LABEL_STRING, "OK", + PANEL_NOTIFY_PROC, ok_proc, + NULL); + + window_fit(panel); + window_fit(prop_subframe); + + colors = (unsigned long *)xv_get(canvas, WIN_X_COLOR_INDICES); + + display = (Display *)xv_get(paint_win, XV_DISPLAY); + xwin = (Window)xv_get(paint_win, XV_XID); + gc = DefaultGC(display, DefaultScreen(display)); + + xv_set(panel_1,XV_WIDTH,MIN(600, Xaxis*width)+margin,NULL); + window_fit(frame); + + rect = (Rect *)xv_get(canvas,CANVAS_VIEWABLE_RECT,paint_win); + XClearArea(display, xwin, + rect->r_left, rect->r_top, + rect->r_width, rect->r_height, + 0); + + image = (Server_image)xv_create((Cms)NULL, SERVER_IMAGE, + XV_WIDTH, 64, + XV_HEIGHT, 64, + SERVER_IMAGE_BITS, plot_bits, + NULL); + + icon = (Icon)xv_create(frame, ICON, + ICON_IMAGE, image, +/* ICON_TRANSPARENT, TRUE,*/ + WIN_FOREGROUND_COLOR, WHITE, + WIN_BACKGROUND_COLOR, BLACK, + XV_LABEL, "DotPlotTool", + NULL); + + xv_set(frame, FRAME_ICON, icon, NULL); + + xv_main_loop(frame); +} + + + +int LoadHGLData(name_str) +char *name_str; +{ + int iSeq, ii, jj, kk, YY, tmp, cnt_match; + FILE *fp; + + if(strcmp(name_str, "") == 0) + return -1; + + if((fp = fopen(name_str, "r")) == NULL) + { + fprintf(stderr, "Can't open file %s.\n", name_str); + exit(1); + } + + iSeq = 0; + tSeq = (Sequence *)Calloc(2, sizeof(Sequence)); + + while(iSeq<2 && (ReadRecord(fp, &(tSeq[iSeq]))) != -1) + { + SeqNormal(&(tSeq[iSeq++])); + }; + if(iSeq == 1) + { + CopyRecord(&(tSeq[1]), &(tSeq[0])); + iSeq++; + } + + fclose(fp); + + Xaxis = strlen(tSeq[0].c_elem); + Yaxis = strlen(tSeq[1].c_elem); + max_width = MIN(6, MAX(1, MIN(28000/Xaxis, 28000/Yaxis))); + width = MAX(1, max_width/2); + if(width_panel != (Panel)NULL) + xv_set(width_panel, PANEL_MAX_VALUE, max_width, NULL); + + sprintf(heading, "PLOT : %s", name_str); + sprintf(footnote, "X-axis: %s Y-axis: %s ", + (tSeq[0].name[0]!='\0')?tSeq[0].name:tSeq[0].sequence_ID, + (tSeq[1].name[0]!='\0')?tSeq[1].name:tSeq[1].sequence_ID); + + if(frame != (Frame)NULL) + { + XClearWindow(display, xwin); + + (void)xv_set(canvas, + XV_WIDTH, MIN(600, Xaxis*width)+margin, + XV_HEIGHT,MIN(400, Yaxis*width)+margin, + CANVAS_WIDTH, Xaxis*max_width+margin, + CANVAS_HEIGHT,Yaxis*max_width+margin, + NULL); + + xv_set(panel_1,XV_WIDTH,(int)xv_get(canvas,XV_WIDTH),NULL); + window_fit(frame); + + xv_set(frame,FRAME_RIGHT_FOOTER, footnote, NULL); + + (void)xv_set(h_scrollbar, + SCROLLBAR_OBJECT_LENGTH, Xaxis*width+margin, + SCROLLBAR_VIEW_START, 0, + NULL); + + (void)xv_set(v_scrollbar, + SCROLLBAR_OBJECT_LENGTH, Yaxis*width+margin, + SCROLLBAR_VIEW_START, 0, + NULL); + + mark_x = mark_y = 0; + canvas_repaint_proc(canvas,paint_win,display, xwin, NULL); + } +} + + +DrawDot(display, xwin, gc, x, y, size) +Display *display; +Window xwin; +GC gc; +int x, y, size; +{ + int ii, jj; + + switch(size) + { + case 1: + XDrawPoint(display, xwin, gc, x, y); + break; + case 2: + XDrawLine(display, xwin, gc, x,y+1, x+2,y+1); + XDrawLine(display, xwin, gc, x+1,y, x+1,y+2); + break; + case 3: + XDrawLine(display, xwin, gc, x,y+1, x+3,y+1); + XDrawLine(display, xwin, gc, x,y+2, x+3,y+2); + XDrawLine(display, xwin, gc, x+1,y, x+1,y+3); + XDrawLine(display, xwin, gc, x+2,y, x+2,y+3); + break; + case 4: + XDrawLine(display, xwin, gc, x,y+1, x+4,y+1); + XDrawLine(display, xwin, gc, x,y+2, x+4,y+2); + XDrawLine(display, xwin, gc, x,y+3, x+4,y+3); + XDrawLine(display, xwin, gc, x+1,y, x+1,y+4); + XDrawLine(display, xwin, gc, x+2,y, x+2,y+4); + XDrawLine(display, xwin, gc, x+3,y, x+3,y+4); + break; + default: + fprintf(stderr, + "Dot size %d is not implemented. 2 is used.\n",size); + XDrawLine(display, xwin, gc, x,y+1, x+2,y+1); + XDrawLine(display, xwin, gc, x+1,y, x+1,y+2); + break; + } +} + + + +canvas_repaint_proc(canvas,pw,display, xwin, xrects ) + Canvas canvas; + Xv_Window pw; + Display *display; + Window xwin; + Xv_xrectlist *xrects; +{ + int y, iSeq, iSeg, i, cnt, head, tail; + XGCValues gc_val; + Rect *rect; + int drawarea_min_x = INT_MAX; + int drawarea_max_x = 0; + int ii, jj, prev_color = -99, tmp_color; + + if(xrects == NULL) + { + if(clear_mark == 'F') + { + rect=(Rect *)xv_get(canvas,CANVAS_VIEWABLE_RECT,paint_win); + drawarea_min_y = rect->r_top; + drawarea_max_y = drawarea_min_y + rect->r_height; + } + + rect=(Rect *)xv_get(canvas,CANVAS_VIEWABLE_RECT,paint_win); + drawarea_min_x = MAX(0, rect->r_left -1); + drawarea_max_x = rect->r_left + rect->r_width; + + /* + printf("min_x=%d max_x=%d min_y=%d max_y=%d\n", + drawarea_min_x,drawarea_max_x, + drawarea_min_y,drawarea_max_y); + */ + + for(ii=MAX(drawarea_min_y/width, 0); + iicount-1; cnt>=0; cnt--) + { + drawarea_min_y = xrects->rect_array[cnt].y; + drawarea_max_y = xrects->rect_array[cnt].y + + xrects->rect_array[cnt].height; + + drawarea_min_x = xrects->rect_array[cnt].x; + drawarea_max_x = xrects->rect_array[cnt].x + + xrects->rect_array[cnt].width; + + /* + printf("else: min_x=%d max_x=%d min_y=%d max_y=%d\n", + drawarea_min_x,drawarea_max_x, + drawarea_min_y,drawarea_max_y); + */ + + for(ii=MAX(drawarea_min_y/width, 0); + ii= Xaxis) + { + ii1 = Xaxis; + cc1 = ' '; + } + else + cc1 = tSeq[0].c_elem[ii1]; + + if((ii2 = mark_y/width) >= Yaxis) + { + ii2 = Yaxis; + cc2 = ' '; + } + else + cc2 = tSeq[1].c_elem[ii2]; + + sprintf(buf, + "Mouse location: X=(%c, %d) Y=(%c, %d) ", + cc1, ii1, cc2, ii2); + + xv_set(frame,FRAME_LEFT_FOOTER, buf, NULL); + } + + if(need_to_paint == 'T') + { + /** + ** clear the old mark. + **/ + + XSetForeground(display, gc, colors[WHITE]); + cross(size, save_mark_x, save_mark_y); + + drawarea_min_y = save_mark_y - 10; + drawarea_max_y = save_mark_y + 15; + + clear_mark = 'T'; + + canvas_repaint_proc(canvas,paint_win,display, xwin, NULL); + + /** + ** put a new mark. + **/ + + XSetForeground(display, gc, colors[RED]); + cross(size, mark_x, mark_y); + } + return XV_OK; +} + + + +Load(item,event) + Panel_item item; + Event *event; +{ + extern Frame load_file(); + (void)load_file(frame,300,150,NULL); + return XV_OK; +} + + +size_proc(item, i_sz, event) + Panel_item item; + int i_sz; + Event *event; +{ + tmp_size = i_sz; + return XV_OK; +} + + +width_proc(item, i_wd, event) + Panel_item item; + int i_wd; + Event *event; +{ + tmp_width = i_wd; + return XV_OK; +} + + +filter_proc(item, i_fl, event) + Panel_item item; + int i_fl; + Event *event; +{ + tmp_filter = i_fl; + return XV_OK; +} + + +cutoff_proc(item, i_ct, event) + Panel_item item; + int i_ct; + Event *event; +{ + tmp_cutoff = i_ct; + return XV_OK; +} + + +exit_proc(item, event) + Panel_item item; + Event *event; +{ + if (event_action(event) == ACTION_SELECT) + { + xv_destroy_safe(frame); + return(XV_OK); + } + else + return(XV_ERROR); +} + + +show_prop_frame(item, event) +Frame item; +Event *event; +{ + xv_set(prop_subframe, XV_SHOW, TRUE, NULL); +} + + +cross(size, loc_x, loc_y) + int size, loc_x, loc_y; +{ + int ii; + + if(size == 1) + { + XDrawLine(display,xwin,gc, loc_x-2,loc_y, loc_x+2,loc_y); + XDrawLine(display,xwin,gc, loc_x, loc_y-2,loc_x, loc_y+2); + } + else if(size == 2) + { + for(ii=0; ii<2; ii++) + { + XDrawLine(display,xwin,gc,loc_x-2,loc_y+ii,loc_x+3,loc_y+ii); + XDrawLine(display,xwin,gc,loc_x+ii,loc_y-2,loc_x+ii,loc_y+3); + } + } + else + { + for(ii= -1; ii<2; ii++) + { + XDrawLine(display,xwin,gc,loc_x-4,loc_y+ii,loc_x+4,loc_y+ii); + XDrawLine(display,xwin,gc,loc_x+ii,loc_y-4,loc_x+ii,loc_y+4); + } + } +} + + +int ScoreMatch(a,b) + char a,b; +{ + int i, intc; + char c; + static int tmatr[16] = {'-','a','c','m','g','r','s','v', + 't','w','y','h','k','d','b','n'}; + + static int matr[128] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0x01,0xe,0x02,0x0d,0,0,0x04,0x0b,0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06, + 0x08,0x08,0x07,0x09,0x00,0xa,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04, + 0x0b,0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06,0x08,0x08,0x07,0x09,0x00,0x0a, + 0,0,0,0,0x00,0 + }; + + if(a == '-' || b == '-') return(mscore); + + if(matr[a] == matr[b]) return(mscore); + + intc = matr[a] & matr[b]; + + if((intc==matr[a]) || (intc==matr[b])) return(pmscore); + + return(mmscore); +} + + +int ToDisplay(tSeq, x, y) +Sequence *tSeq; /* 0 and 1 sequences. */ +int x, y; /* axes. */ + /* return the color to draw. */ +{ + int ii, jj, kk, cnt; + char this_score; + char GetBase(); + + if(strcmp(tSeq[0].type, "PROTEIN") == 0 && + strcmp(tSeq[1].type, "PROTEIN") == 0) + { + if((this_score=AAmatr[tSeq[0].c_elem[x]][tSeq[1].c_elem[y]]) + < cutoff) + return WHITE; + + if(direction == 0 || direction == 1) + { + cnt = 1; + for(kk = 1; + cnt < filter && + kk < filter && + x - kk >= 0 && y - kk >= 0 && + AAmatr[tSeq[0].c_elem[x-kk]][tSeq[1].c_elem[y-kk]] >= cutoff; + kk++, cnt++); + + for(kk = 1; + cnt < filter && + kk < filter && + x + kk < Xaxis && y + kk < Yaxis && + AAmatr[tSeq[0].c_elem[x+kk]][tSeq[1].c_elem[y+kk]] >= cutoff; + kk++, cnt++); + + if(cnt >= filter) + return ((this_score+9)/4+1 +11); + + if(direction == 1) + return WHITE; + } + + if(direction == 0 || direction == -1) + { + cnt = 1; + for(kk = 1; + cnt < filter && + kk < filter && + y + kk < Yaxis && x - kk >= 0 && + AAmatr[tSeq[0].c_elem[x-kk]][tSeq[1].c_elem[y+kk]] >= cutoff; + kk++, cnt++); + + for(kk = 1; + cnt < filter && + kk < filter && + y - kk >= 0 && x + kk < Xaxis && + AAmatr[tSeq[0].c_elem[x+kk]][tSeq[1].c_elem[y-kk]] >= cutoff; + kk++, cnt++); + + if(cnt >= filter) + return ((this_score+9)/4+1 +11); /* +11 to use grey scale.*/ + return WHITE; + } + } + else if((strcmp(tSeq[0].type, "DNA") == 0 || strcmp(tSeq[0].type, "RNA") == 0) && + (strcmp(tSeq[1].type, "DNA") == 0 || strcmp(tSeq[1].type, "RNA") == 0)) + { + if((this_score=ScoreMatch(tSeq[0].c_elem[x],GetBase(tSeq[1],y))) + == mmscore) + return WHITE; + + if(direction == 0 || direction == 1) + { + cnt = 1; + for(kk = 1; + cnt < filter && + kk < filter && + x - kk >= 0 && y - kk >= 0 && + ScoreMatch(tSeq[0].c_elem[x-kk],GetBase(tSeq[1],y-kk)) + != mmscore; + kk++, cnt++); + + for(kk = 1; + cnt < filter && + kk < filter && + x + kk < Xaxis && y + kk < Yaxis && + ScoreMatch(tSeq[0].c_elem[x+kk],GetBase(tSeq[1],y+kk)) + != mmscore; + kk++, cnt++); + + if(cnt >= filter) + return this_score; + + if(direction == 1) + return WHITE; + } + + if(direction == 0 || direction == -1) + { + cnt = 1; + for(kk = 1; + cnt < filter && + kk < filter && + y + kk < Yaxis && x - kk >= 0 && + ScoreMatch(tSeq[0].c_elem[x-kk],GetBase(tSeq[1],y+kk)) + != mmscore; + kk++, cnt++); + + for(kk = 1; + cnt < filter && + kk < filter && + y - kk >= 0 && x + kk < Xaxis && + ScoreMatch(tSeq[0].c_elem[x+kk],GetBase(tSeq[1],y-kk)) + != mmscore; + kk++, cnt++); + + if(cnt >= filter) + return this_score; + return WHITE; + } + } + else + { + fprintf(stderr, "%cCan't plot sequences with types %s vs. %s.\n", + 7, tSeq[0].type, tSeq[1].type); + exit(1); + } +} + + +dir_proc(item, ii, event) +Panel_item item; +int ii; +Event *event; +{ + if(ii == 0) + tmp_direction = 1; + else if(ii == 1) + tmp_direction = -1; + else + tmp_direction = 0; + + return XV_OK; +} + + + +compd_proc(item, ii, event) +Panel_item item; +int ii; +Event *event; +{ + if(ii == 0) + tmp_compd = 'N'; + else + tmp_compd = 'Y'; + + return XV_OK; +} + + +ok_proc(item, event) +Panel_item item; +Event *event; +{ + direction = tmp_direction; + compd = tmp_compd; + size = tmp_size; + filter = tmp_filter; + cutoff = tmp_cutoff; + + XClearWindow(display, xwin); + + if(tmp_width != width) + { + mark_y = mark_y/width * tmp_width; + mark_x = mark_x/width * tmp_width; + width = tmp_width; + + (void)xv_set(h_scrollbar, + SCROLLBAR_OBJECT_LENGTH, Xaxis*width+margin, + SCROLLBAR_VIEW_START, + MIN(MAX(0,mark_x - 100), + MAX(0,Xaxis*width+margin-(int)xv_get(canvas,XV_WIDTH))), + NULL); + + (void)xv_set(v_scrollbar, + SCROLLBAR_OBJECT_LENGTH, Yaxis*width+margin, + SCROLLBAR_VIEW_START, + MIN(MAX(0,mark_y - 100), + MAX(0,Yaxis*width+margin-(int)xv_get(canvas, XV_HEIGHT))), + NULL); + } + + canvas_repaint_proc(canvas,paint_win,display, xwin, NULL); + return XV_OK; +} + + +char BaseComp(c) +char c; +{ + unsigned char in,out, case_bit; + static int tmatr[16] = {'-','a','c','m','g','r','s','v', + 't','w','y','h','k','d','b','n'}; + + static int matr[128] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0x00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04,0x0b,0, + 0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06,0x08,0x08,0x07,0x09, + 0x00,0x0a,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04,0x0b, + 0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06,0x08,0x08,0x07, + 0x09,0x00,0x0a,0,0,0,0,0x00,0 + }; + +/* +* Save Case bit... +*/ + case_bit = c & 32; + out = 0; + in = matr[c]; + if(in&1) + out|=8; + if(in&2) + out|=4; + if(in&4) + out|=2; + if(in&8) + out|=1; + + c = tmatr[out] | case_bit; + + return c; +} + + +char GetBase(tSeq, y) +Sequence tSeq; +int y; +{ + if(compd == 'N') + return tSeq.c_elem[y]; + if(compd == 'Y') + return BaseComp(tSeq.c_elem[y]); +} diff --git a/HGL_SRC/HGLfuncs.c b/HGL_SRC/HGLfuncs.c new file mode 100755 index 0000000..dfb1281 --- /dev/null +++ b/HGL_SRC/HGLfuncs.c @@ -0,0 +1,3165 @@ + +/**************************************************************** + * + * This is a set of functions defined for the genome + * project. + * + ****************************************************************/ + + +#ifndef _GLOBAL_DEFS_H +#define _GLOBAL_DEFS_H +#include "global_defs.h" +#endif + +#define MAXLINELEN 256 + +static char Default_DNA_Trans[16] = { +'-', 'a','c','m','g','r','s','v','t','w','y','h','k','d','b','n' }; + + +/*********** + * + * WriteRecord() outputs one record at a time in HGL format. + * Only the fields in the fields_array will be output. All the + * fields will be output if fields_array is NULL. + * + * fp : pointer to the output file. + * tSeq: pointer to the record. + * fields_array: contains the field ids of the selected fields. + * array_size: number of selected fields. + * + * Returns: 1 if any field is printed; + * 0 if no field is printed; + * -1 if anything is wrong. + * + **********/ + +int +WriteRecord(fp, tSeq, fields_array, array_size) +FILE *fp; +const Sequence *tSeq; +int *fields_array; +int array_size; +{ + int i, save_str_size, tt; + int all_fields = FALSE; + int first_field = TRUE; + char temp_str[256]; + char *save_str; + char *ptr; + + save_str = (char *)Calloc(256, 1); + save_str_size = 256; + + /* When all the fields are selected. */ + if(fields_array == NULL) + { + all_fields = TRUE; + fields_array = (int *)Calloc(NUM_OF_FIELDS, sizeof(int)); + + for(i=0; icreation_date[0] != 0 ) + { + sprintf(save_str,"\n%s\t%d/%d/%d ", + at[fields_array[i]], + tSeq->creation_date[1], + tSeq->creation_date[2], + tSeq->creation_date[0]); + + if(tSeq->creation_date[3]>=0) + { + if(tSeq->creation_date[4] < 0) + tSeq->creation_date[4] = 0; + if(tSeq->creation_date[5] < 0) + tSeq->creation_date[5] = 0; + sprintf(save_str, "%s%d:%d:%d", + save_str, + tSeq->creation_date[3], + tSeq->creation_date[4], + tSeq->creation_date[5]); + } + } + else if (fields_array[i] == e_probing_date && + tSeq->probing_date[0] != 0 ) + { + sprintf(save_str,"\n%s\t%d/%d/%d ", + at[fields_array[i]], + tSeq->probing_date[1], + tSeq->probing_date[2], + tSeq->probing_date[0]); + + if(tSeq->probing_date[3]>=0) + { + if(tSeq->probing_date[4] < 0) + tSeq->probing_date[4] = 0; + if(tSeq->probing_date[5] < 0) + tSeq->probing_date[5] = 0; + sprintf(save_str, "%s%d:%d:%d", + save_str, + tSeq->probing_date[3], + tSeq->probing_date[4], + tSeq->probing_date[5]); + } + } + else if (fields_array[i] == e_autorad_date && + tSeq->autorad_date[0] != 0 ) + { + sprintf(save_str,"\n%s\t%d/%d/%d ", + at[fields_array[i]], + tSeq->autorad_date[1], + tSeq->autorad_date[2], + tSeq->autorad_date[0]); + + if(tSeq->autorad_date[3]>=0) + { + if(tSeq->autorad_date[4] < 0) + tSeq->autorad_date[4] = 0; + if(tSeq->autorad_date[5] < 0) + tSeq->autorad_date[5] = 0; + sprintf(save_str, "%s%d:%d:%d", + save_str, + tSeq->autorad_date[3], + tSeq->autorad_date[4], + tSeq->autorad_date[5]); + } + } + else if ( fields_array[i] == e_c_elem && + tSeq->c_elem != NULL ) + { + ptr = tSeq->c_elem; + sprintf(save_str,"\n%s\t\"",at[fields_array[i]]); + while ( ptr < tSeq->c_elem + tSeq->seqlen ) + { + if ( ptr != tSeq->c_elem ) + strcat(save_str,"\n"); + strncpy(temp_str, ptr, MIN(60, tSeq->c_elem +tSeq->seqlen-ptr)); + temp_str[MIN(60, tSeq->c_elem+tSeq->seqlen - ptr)] = '\0'; + + /* Gurantee strlen(temp_str) chars for the string, + * one for \n, one for ", and one for \0. + */ + while(save_str_size - strlen(save_str) < strlen(temp_str)+3) + { + save_str_size *= 2; + save_str = (char *)Realloc(save_str,save_str_size); + } + strcat(save_str, temp_str); + ptr += 60; + } + strcat(save_str,"\""); + } + else if ( fields_array[i] == e_comments && + tSeq->commentslen != 0) + { + while(save_str_size < 20+tSeq->commentslen) + { + save_str_size *= 2; + save_str = (char *)Realloc(save_str,save_str_size); + } + + strcat(save_str,"\n"); + strcat(save_str,at[fields_array[i]]); + strcat(save_str,"\t\"\n"); + + /* put a \0 at the end of comments. */ + while(tSeq->commentslen + 1 > tSeq->commentsmaxlen) + { + tSeq->commentsmaxlen *= 2; + tSeq->comments = (char *) + Realloc(tSeq->comments, + tSeq->commentsmaxlen); + } + tSeq->comments[tSeq->commentslen] = '\0'; + + /* clean up the leading empty lines.*/ + tt = 0; + while(tSeq->comments[tt] == '\n' || tSeq->comments[tt] == ' ') + tt++; + tSeq->commentslen -= tt; + strcat(save_str,tSeq->comments+tt); + strcat(save_str,"\""); + } + else if (fields_array[i] == e_laneset && tSeq->laneset != -1) + sprintf(save_str,"\n%s\t\t%d", + at[fields_array[i]],tSeq->laneset); + else if (fields_array[i] == e_strandedness && tSeq->strandedness != 0) + sprintf(save_str,"\n%s\t%d", + at[fields_array[i]],tSeq->strandedness); + else if (fields_array[i] == e_direction && tSeq->direction != 0) + sprintf(save_str,"\n%s\t%d", + at[fields_array[i]],tSeq->direction); + else if (fields_array[i] == e_orig_strand && tSeq->orig_strand != 0) + sprintf(save_str,"\n%s\t%d", + at[fields_array[i]],tSeq->orig_strand); + else if (fields_array[i] == e_orig_direction && tSeq->orig_direction != 0) + sprintf(save_str,"\n%s\t%d", + at[fields_array[i]],tSeq->orig_direction); + else if (fields_array[i] == e_offset) + sprintf(save_str,"\n%s\t\t%d", + at[fields_array[i]],tSeq->offset); + else if (fields_array[i] == e_group_number && tSeq->group_number != 0) + sprintf(save_str,"\n%s\t%d", + at[fields_array[i]],tSeq->group_number); + else if (fields_array[i] == e_group_ID) + sprintf(save_str,"\n%s\t%d", + at[fields_array[i]],tSeq->group_ID); + else if (fields_array[i] == e_type && tSeq->type[0] != '\0' ) + sprintf(save_str,"\n%s\t\t\"%s\"", + at[fields_array[i]],tSeq->type); + else if (fields_array[i] == e_barcode && tSeq->barcode[0] != '\0' ) + sprintf(save_str,"\n%s\t\t\"%s\"", + at[fields_array[i]],tSeq->barcode); + else if (fields_array[i] == e_name && tSeq->name[0] != '\0' ) + sprintf(save_str,"\n%s\t\t\"%s\"", + at[fields_array[i]],tSeq->name); + else if (fields_array[i] == e_status && tSeq->status[0] != '\0' ) + sprintf(save_str,"\n%s\t\t\"%s\"", + at[fields_array[i]],tSeq->status); + else if (fields_array[i] == e_walk && tSeq->walk[0] != '\0' ) + sprintf(save_str,"\n%s\t\t\"%s\"", + at[fields_array[i]],tSeq->walk); + else if (fields_array[i] == e_sequence_ID && + tSeq->sequence_ID[0] != '\0' ) + sprintf(save_str,"\n%s\t\"%s\"", + at[fields_array[i]],tSeq->sequence_ID); + else if (fields_array[i] == e_creator && tSeq->creator[0] != '\0') + sprintf(save_str,"\n%s\t\t\"%s\"", + at[fields_array[i]],tSeq->creator); + else if (fields_array[i]==e_film && tSeq->film[0]!='\0') + sprintf(save_str,"\n%s\t\t\"%s\"", + at[fields_array[i]],tSeq->film); + else if (fields_array[i] == e_membrane && tSeq->membrane[0] != '\0') + sprintf(save_str,"\n%s\t\"%s\"", + at[fields_array[i]],tSeq->membrane); + else if (fields_array[i] == e_source_ID && tSeq->source_ID[0] != '\0') + sprintf(save_str,"\n%s\t\"%s\"", + at[fields_array[i]],tSeq->source_ID); + else if (fields_array[i] == e_contig && tSeq->contig[0] != '\0') + sprintf(save_str,"\n%s\t\t\"%s\"", + at[fields_array[i]],tSeq->contig); + else if (fields_array[i] == e_baggage && tSeq->baglen != 0) + { + if(save_str_size < tSeq->baglen+2) + { + save_str_size = tSeq->baglen+2; + save_str = (char *)Realloc(save_str,save_str_size); + } + + save_str[0] = '\n'; + save_str[1] = '\0'; + + /* put a \0 at the end of baggage. */ + strncat(save_str, tSeq->baggage, tSeq->baglen); + while(save_str[tSeq->baglen-1] == '\n') + { + tSeq->baglen--; + } + save_str[tSeq->baglen] = '\0'; + } + if(save_str[0] != '\0') + { + if (first_field == TRUE) + { + first_field = FALSE; + fprintf(fp,"{"); + } + fprintf(fp,"%s",save_str); + } + } + + if (first_field == FALSE) + { + fprintf(fp,"\n}\n"); + } + + if(all_fields == TRUE && fields_array != NULL) + { + Cfree(fields_array); + fields_array = NULL; + } + if(save_str != NULL) + { + Cfree(save_str); + save_str = NULL; + } + + if (first_field == TRUE) + return 0; + else + return 1; +} + + + +/********* + * + * ReadRecord() reads one record from fp into tSeq. fp remains at + * the finishing position so that next time when ReadRecord() is + * called, it reads the next record. + * + * The caller program should LOCATE MEMORY for the tSeq before calling. + * + * ReadRecord() returns: + * TRUE if no error; + * FALSE if anything is wrong + * -1 if end-of-file is reached + * + **********/ + +int +ReadRecord(fp, tSeq) +FILE *fp; +Sequence *tSeq; +{ + char field_name[20], line[256], orig_line[256]; + int temp_str_size, start, end, l, max_len = 255; + char *fgets_ret, *temp_str, *fgets_ret1; + int start_rec = FALSE; + int need_to_read = TRUE; + char started = 'F'; + void InitRecord(); + void FreeRecord(); + + temp_str = (char *)Calloc(256, 1); + temp_str_size = 256; + + InitRecord(tSeq); + + if(tSeq->c_elem == NULL) + { + tSeq->c_elem = (char *)Calloc(256, 1); + tSeq->seqmaxlen = 256; + } + tSeq->c_elem[0] = '\0'; + + + /* read file line-by-line. */ + while (need_to_read == TRUE && + ((fgets_ret = fgets(line, max_len, fp)) != NULL || + start_rec == TRUE)) + { + strcpy(orig_line, line); + end = strlen(line) -1; + while(end>=0 && (line[end] == ' ' || + line[end] == '\t' || + line[end] == ',' || + line[end] == '\n') ) + end--; + + /* ignore empty lines. */ + if(end == -1) + continue; + + if(line[end] == '{') + started = 'T'; + + /* to ignore the lines between a } and a {. */ + while(started == 'F' && fgets_ret != NULL) + { + fgets_ret = fgets(line, max_len, fp); + strcpy(orig_line, line); + end = strlen(line) -1; + while(end>=0 && (line[end] == ' ' || + line[end] == '\t' || + line[end] == ',' || + line[end] == '\n') ) + end--; + + /* ignore empty lines. */ + if(end == -1) + continue; + + if(line[end] == '{') + started = 'T'; + } + + if(fgets_ret == NULL) + return -1; + + if (end < 0) + { + } + else if ((line[end] == '}') && (end==0)) + { + start_rec = FALSE; + need_to_read = FALSE; + } + else if (line[end] == '{' && end <= 10) + { + start_rec = TRUE; + } + else + { + if (line[end]=='}') + { + need_to_read = FALSE; + start_rec = FALSE; + } + + /* locate the tag. */ + start = 0; + while(line[start] == ' ' || + line[start] == '\t'|| + line[start] == '\n'|| + line[start] == '{' ) + start++; + + end = start +1; + while(line[end] != ' ' && + line[end] != '\t' && + line[end] != '\n' && + line[end] != '\0') + end++; + strncpy(field_name, line+start, end-start); + field_name[end-start] = '\0'; + + /* process the field value. */ + + /* + * creation_date, probing_date, or autorad_date + */ + + if ( strcmp(field_name,"creation-date") == 0) + { + while(!isdigit(line[end])) + end++; + if(strToDate(line + end, tSeq->creation_date) == -1) + { + return FALSE; + } + } + else if (strcmp(field_name,"probing-date") == 0) + { + while(line[end] != '\0' && !isdigit(line[end])) + end++; + + if(line[end] != '\0' && + strToDate(line + end, tSeq->probing_date) == -1) + { + return FALSE; + } + } + else if ( strcmp(field_name,"autorad-date") == 0) + { + while(line[end] != '\0' && !isdigit(line[end])) + end++; + if(line[end] != '\0' && + strToDate(line + end, tSeq->autorad_date) == -1) + { + return FALSE; + } + } + + /* + * sequence or comments. + */ + + else if (strcmp(field_name,"sequence") == 0 || + strcmp(field_name,"comments") == 0 ) + { + temp_str[0] = '\0'; + + /* locate the first ". */ + while(line[end++] != '"'); + start = end; + end = strlen(line); + + /* ---"\n\0. */ + if(line[end-2] == '"') + end -= 2; + else if(line[end-1] == '\n' && + strcmp(field_name,"sequence") == 0) + end--; + + while(temp_str_size < end-start+1 ) + { + temp_str_size *= 2; + temp_str = (char *)Realloc(temp_str, temp_str_size); + } + if(end - start > 0) + strncat(temp_str, line+start, end-start); + + /* Read the second line of the seq. or comments, if any. + end-start<0 is the case that " is the only char this line.*/ + if (line[strlen(line)-2] != '"' || end-start<0) + { + while((fgets_ret1 = fgets(line, max_len, fp)) != NULL) + { + /* IGNORE empty lines. 5/4/92 */ + int empty_line = 0; + while(line[empty_line] == ' ') + empty_line++; + if(line[empty_line] == '\n') + { + continue; + /* strncat(temp_str, line, end); 5/4/92 */ + } + + l = strlen(line) -1; + if(line[l-1] == '"') + end = l-1; + else + end = l; + + if(line[end] == '\n' && + strcmp(field_name,"comments") == 0) + end++; + + /* Gurantee 'end' chars for the string, one for ", + * and one for \0. + */ + while(temp_str_size - strlen(temp_str) < end+3 ) + { + temp_str_size *= 2; + temp_str=(char *)Realloc(temp_str,temp_str_size); + } + strncat(temp_str, line, end); + + if(line[l-1] == '"') + break; + } + if(fgets_ret1 == NULL && need_to_read == TRUE) + { + fprintf(stderr, "ReadRecord(): incomplete record.\n"); + return FALSE; + } + } + + l = strlen(temp_str); + if(strcmp(field_name,"comments") == 0 ) + { + if(tSeq->commentsmaxlen == 0) + { + tSeq->comments = (char *)Calloc(l+1, 1); + tSeq->commentsmaxlen = l+1; + } + else + { + while(tSeq->commentslen+l+1>tSeq->commentsmaxlen) + { + tSeq->commentsmaxlen *= 2; + tSeq->comments = (char *) + Realloc(tSeq->comments, tSeq->commentsmaxlen); + } + } + tSeq->comments[tSeq->commentslen] = '\0'; + strcat(tSeq->comments, temp_str); + tSeq->commentslen += l; + } + else /* it is the sequence. */ + { + if(tSeq->seqmaxlen == 0) + { + tSeq->c_elem = (char *)Calloc(l+1, 1); + } + else if(l+1>tSeq->seqmaxlen) + { + tSeq->c_elem = (char *)Realloc(tSeq->c_elem, l+1); + } + tSeq->seqmaxlen = l+1; + tSeq->seqlen = l; + strcpy(tSeq->c_elem, temp_str); + } + } + + /* + * Integer or String. + */ + + else + { + /* locate the value: a string or an integer. */ + + while(line[end] == ' ' || line[end] == '\t') + end++; + if (line[end] == '"') + { + /* It is a string. */ + end++; + start = end; + while(line[end] != '\0' && line[end] != '"') + end++; + /* + * strncat will not put a \0 at the end of a string + * if the copying string is longer than n. + */ + line[end++] = '\0'; + } + else + { + /* It is an integer. */ + start = end; + while(line[end] != ' ' && + line[end] != '\t' && + line[end] != '\n' && + line[end] != '\0') + end++; + strncpy(temp_str, line+start, end-start+1); /*4/26 add 1*/ + temp_str[end-start] = '\0'; + } + + /* assign to an integer field. */ + if (strcmp(field_name,"laneset") == 0 ) + tSeq->laneset = atoi(temp_str); + else if (strcmp(field_name,"strandedness") == 0 ) + tSeq->strandedness = atoi(temp_str); + else if (strcmp(field_name,"direction") == 0) + tSeq->direction = atoi(temp_str); + else if (strcmp(field_name,"orig_strand") == 0 ) + tSeq->orig_strand = atoi(temp_str); + else if (strcmp(field_name,"orig_direction") == 0 ) + tSeq->orig_direction = atoi(temp_str); + else if (strcmp(field_name,"offset") == 0 ) + tSeq->offset = atoi(temp_str); + else if (strcmp(field_name,"group-number") == 0 ) + tSeq->group_number = atoi(temp_str); + else if (strcmp(field_name,"group-ID") == 0 ) + tSeq->group_ID = atoi(temp_str); + + /* assign to a string field. */ + else if (strcmp(field_name,"type") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->type, line+start, end-start); + tSeq->type[end-start] = '\0'; + } + else if (strcmp(field_name,"barcode") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->barcode, line+start, end-start); + tSeq->barcode[end-start] = '\0'; + } + else if (strcmp(field_name,"name") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->name, line+start, end-start); + tSeq->name[end-start] = '\0'; + } + else if (strcmp(field_name,"status") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->status, line+start, end-start); + tSeq->status[end-start] = '\0'; + } + else if (strcmp(field_name,"walk") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->walk, line+start, end-start); + tSeq->walk[end-start] = '\0'; + } + else if (strcmp(field_name,"sequence-ID") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->sequence_ID, line+start, end-start); + tSeq->sequence_ID[end-start] = '\0'; + } + else if (strcmp(field_name,"creator") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->creator, line+start, end-start); + tSeq->creator[end-start] = '\0'; + } + else if (strcmp(field_name,"film") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->film, line+start, end-start); + tSeq->film[end-start] = '\0'; + } + else if (strcmp(field_name,"membrane") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->membrane, line+start, end-start); + tSeq->membrane[end-start] = '\0'; + } + else if (strcmp(field_name,"source-ID") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->source_ID, line+start, end-start); + tSeq->source_ID[end-start] = '\0'; + } + else if (strcmp(field_name,"contig") == 0 ) + { + if(end - start > 31) end = start + 31; + strncpy(tSeq->contig, line+start, end-start); + tSeq->contig[end-start] = '\0'; + } + else + { + if(tSeq->bagmaxlen == 0) + { + tSeq->bagmaxlen = 4*strlen(orig_line); + tSeq->baggage = (char *)Calloc(tSeq->bagmaxlen, 1); + } + else + { + while(tSeq->bagmaxlenbaglen+2+strlen(orig_line)) + { + tSeq->bagmaxlen *= 2; + tSeq->baggage = (char *) + Realloc(tSeq->baggage, tSeq->bagmaxlen); + } + } + if(tSeq->baglen == 0) + { + /* + tSeq->baggage[0] = '\n'; + tSeq->baggage[1] = '\0'; + tSeq->baglen = 1; + */ + tSeq->baggage[0] = '\0'; + } + + /* strcat(tSeq->baggage, "\n");*/ + strcat(tSeq->baggage, orig_line); + tSeq->baglen += strlen(orig_line); + } + } + } + } + + if(temp_str != NULL) + { + Cfree(temp_str); + temp_str = NULL; + } + + if ( start_rec == FALSE && fgets_ret == NULL) + { + /* end of file, did not get a record. */ + return -1; + } + else + return TRUE; +} + + +/********* + * + * Initialize a record. + * + * Note: no memory allocation is performed. + * + **********/ + +void +InitRecord(tSeq) +Sequence *tSeq; +{ + int i; + + strcpy(tSeq->type, "DNA"); + tSeq->barcode[0] = '\0'; + tSeq->name[0] = '\0'; + tSeq->status[0] = '\0'; + strcpy(tSeq->walk, "FALSE"); + tSeq->sequence_ID[0] = '\0'; + + tSeq->c_elem = NULL; + tSeq->seqlen = 0; + tSeq->seqmaxlen = 0; + + for (i = 0; i<6; i++) + { + tSeq->creation_date[i] = 0; + tSeq->probing_date[i] = 0; + tSeq->autorad_date[i] = 0; + } + + tSeq->creator[0] = '\0'; + tSeq->film[0] = '\0'; + tSeq->membrane[0] = '\0'; + tSeq->source_ID[0] = '\0'; + tSeq->contig[0] = '\0'; + tSeq->laneset = -1; + tSeq->direction = 1; /* (1/-1/0),default: 5 to 3. */ + tSeq->strandedness = 1; /* (1/2/0), default: primary.*/ + tSeq->orig_direction= 0; /* (0 unknown, -1:3'->5', 1:5'->3') */ + tSeq->orig_strand = 0; /* (0 unknown, 1:primary, 2:secondary) */ + tSeq->offset = 0; + + tSeq->comments = NULL; + tSeq->commentslen = 0; + tSeq->commentsmaxlen = 0; + + tSeq->baggage = NULL; + tSeq->baglen = 0; + tSeq->bagmaxlen = 0; + tSeq->group_number = 0; + tSeq->group_ID = 0; +} + + + +void +CopyRecord(to, from) +Sequence *from, *to; +{ + int i; + + InitRecord(to); + + strcpy(to->type, from->type); + + strcpy(to->barcode, from->barcode); + strcpy(to->name, from->name); + strcpy(to->status,from->status); + strcpy(to->walk,from->walk); + strcpy(to->sequence_ID, from->sequence_ID); + + if(from->c_elem != NULL) + { + to->seqlen = from->seqlen; + to->seqmaxlen = from->seqmaxlen; + to->c_elem = (char *)Calloc(to->seqmaxlen, 1); + strncpy(to->c_elem, from->c_elem, to->seqlen); + to->c_elem[to->seqlen] = '\0'; + } + + for (i = 0; i<6; i++) + { + to->creation_date[i] = from->creation_date[i]; + to->probing_date[i] = from->probing_date[i]; + to->autorad_date[i] = from->autorad_date[i]; + } + + strcpy(to->creator, from->creator); + strcpy(to->film, from->film); + strcpy(to->membrane, from->membrane); + strcpy(to->source_ID, from->source_ID); + strcpy(to->contig, from->contig); + to->laneset = from->laneset; + to->strandedness = from->strandedness; + to->orig_direction = from->orig_direction; + to->orig_strand = from->orig_strand; + to->direction = from->direction; + to->offset = from->offset; + + if(from->comments != NULL) + { + to->commentsmaxlen = from->commentsmaxlen; + to->commentslen = from->commentslen; + to->comments = (char *)Calloc(to->commentsmaxlen, 1); + strncpy(to->comments, from->comments, to->commentslen); + to->comments[to->commentslen] = '\0'; + } + + if(from->baggage != NULL) + { + to->baglen = from->baglen; + to->bagmaxlen = from->bagmaxlen; + to->baggage = (char *)Calloc(to->bagmaxlen, 1); + strncpy(to->baggage, from->baggage, to->baglen); + to->baggage[to->baglen] = '\0'; + } + + to->group_number = from->group_number; + to->group_ID = from->group_ID; +} + + + + +/********* + * + * Clean the contents of a record without changing the memory size. + * + **********/ + +void +CleanRecord(tSeq) +Sequence *tSeq; +{ + int i; + + strcpy(tSeq->type, "DNA"); + tSeq->name[0] = '\0'; + tSeq->barcode[0] = '\0'; + tSeq->status[0] = '\0'; + strcpy(tSeq->walk, "FALSE"); + tSeq->sequence_ID[0] = '\0'; + + if(tSeq->c_elem != NULL) + tSeq->c_elem[0] = '\0'; + tSeq->seqlen = 0; + + for (i = 0; i<6; i++) + { + tSeq->creation_date[i] = 0; + tSeq->probing_date[i] = 0; + tSeq->autorad_date[i] = 0; + } + + tSeq->creator[0] = '\0'; + tSeq->film[0] = '\0'; + tSeq->membrane[0] = '\0'; + tSeq->source_ID[0] = '\0'; + tSeq->contig[0] = '\0'; + tSeq->laneset = -1; + tSeq->strandedness = 1; /* (1/2/0), default. primary. */ + tSeq->direction = 1; /* (1/-1/0),default. 5 to 3. */ + tSeq->orig_direction= 0; + tSeq->orig_strand = 0; + tSeq->offset = 0; + + if(tSeq->comments != NULL) + tSeq->comments[0] = '\0'; + tSeq->commentslen = 0; + + if(tSeq->baggage != NULL) + tSeq->baggage[0] = '\0'; + tSeq->baglen = 0; + tSeq->group_number = 0; + tSeq->group_ID = 0; +} + + + +/********* + * + * Free memory for a record. + * + **********/ + +void +FreeRecord(tSeq) +Sequence **tSeq; +{ + Cfree((*tSeq)->c_elem); + Cfree((*tSeq)->comments); + Cfree((*tSeq)->baggage); + Cfree((*tSeq)); + (*tSeq)->c_elem = NULL; + (*tSeq)->comments = NULL; + (*tSeq)->baggage = NULL; + (*tSeq) = NULL; +} + + +static max_day[2][13] = { +{ 0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, +{ 0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} }; + + + +/*********** + * + * strToDate() locates first six integers and translates them + * into a date. + * + * String should have the format of "mm/dd/yy hh/mn/sc xm", + * with anything except digit as the delimiters. + * + * Order in the date array is (0->5): (yy mm dd hh mn sc). + * + * Returns FALSE if anything is wrong, TRUE otherwise. + * + **********/ + +int +strToDate(str, date) +const char *str; +int date[]; +{ + int leap; + char temp_str[2]; + char longstr[256]; + + /* locate 6 integers. */ + + strcpy(longstr, str); + strcat(longstr, " -1/-1/-1 "); + sscanf(longstr, "%d%*c%d%*c%d%*c%d%*c%d%*c%d%2s", + &date[1],&date[2],&date[0],&date[3], + &date[4],&date[5],temp_str); + + /* verify year. */ + if(date[0] >= 100) + date[0] -= 1900; + + /* verify month. */ + if(date[1] > 12 || date[1] < 1) + { + fprintf(stderr,"invalid month %s\n", str); + return FALSE; + } + + /* verify day. */ + if ((date[0] % 4 == 0 && date[0] % 100 != 0) || + date[0] % 400 == 0) + leap = 1; + else + leap = 0; + + if(date[2] > max_day[leap][date[1]] || + date[2] < 1) + { + fprintf(stderr,"invalid day %s\n", str); + return FALSE; + } + + /* verify time. */ + if (strncmp(temp_str,"pm",2)==0) + date[3] += 12; + if (date[3]<-1 || date[3]>23 || + date[4]<-1 || date[4]>59 || + date[5]<-1 || date[5]>59 ) + { + fprintf(stderr,"invalid time %s\n", str); + return FALSE; + } + + return TRUE; +} + + +/********** + * + * Default_IUPAC_Trans() translates an ASCII IUPAC code into + * an (char) integer. + * + **********/ + +char +Default_IUPAC_Trans(base) +char base; +{ + int i; + char c; + c = base | 32; + + if(c == 'u') + return (char ) 8; + + if(c == 'p') + return (char) 5; + + for(i=0; i<16; i++) + { + if(c == Default_DNA_Trans[i]) + { + return ( (char) i); + } + } + fprintf(stderr, "Character %c is not IUPAC coded.\n", base); + return -1; +} + +char *uniqueID(); + +/*********** + * + * MakeConsensus() takes an array of aligned sequence and an + * initialized 'Sequence' consensus. It modifies the consensus. + * + * The memory that 'consensus' has located will be reused, and + * consensus->seqmaxlen will be modified if necessary. + * + * Returns TRUE if successful, FALSE otherwise. + * + **********/ + +int +MakeConsensus(aligned, numOfAligned, consensus, group) +Sequence aligned[]; /* input. */ +int numOfAligned; /* input. */ +Sequence *consensus; /* input and output. */ +int group; /* Group number (if zero, use all groups) */ +{ + char occurence; + int i, j, index; + int max_cons = INT_MIN; + int min_offset = INT_MAX; + char temp_str[2]; + unsigned char case_bit; + + /* + * Search for the minimun offset. + */ + + for (i=0; ioffset = min_offset; + + if(aligned[0].contig[0] != '\0') + { + strcpy(consensus->name, aligned[0].contig); + strcat(consensus->name, "."); + } + else if(strncmp(aligned[0].name, "cons.", 5) != 0) + { + strcpy(consensus->name, "cons."); + strcat(consensus->name, aligned[0].name); + } + strcpy(consensus->sequence_ID, uniqueID()); + strcpy(consensus->contig, aligned[0].contig); + + for(j=min_offset; j= aligned[i].offset && + j < aligned[i].offset+aligned[i].seqlen) + { + index = j-aligned[i].offset; + + if(aligned[i].c_elem[index] == '-') + case_bit = 32; + else if(case_bit == 0) + case_bit |= (aligned[i].c_elem[index] & 32); + + occurence = occurence | + Default_IUPAC_Trans(aligned[i].c_elem[index]); + + if(occurence != 1 && occurence != 2 && + occurence != 4 && occurence != 8) + case_bit = 32; + /* + printf("%1c", aligned[i].c_elem[index]); + */ + } + /* + else + printf(" "); + */ + } + } + + sprintf(temp_str, "%1c", Default_DNA_Trans[(int) occurence]); + if(case_bit == 0) + temp_str[0] = toupper(temp_str[0]); + + if(InsertElems(consensus, j, temp_str)== FALSE) + return FALSE; + /* + printf(" cons[%d]=%1c\n", j - min_offset, + consensus->c_elem[j - min_offset]); + */ + } + return TRUE; +} + + + +/*********** + * + * MakeScore() takes an array of aligned sequence, and generates + * a consensus. Note, memory for (Sequence* consensus) should be + * located before it is passed to this function. + * + * Returns TRUE if successful, FALSE otherwise. + * + **********/ + +int +MakeScore(aligned, numOfAligned, consensus, group) +Sequence aligned[]; /* input. */ +int numOfAligned; /* input. */ +Sequence *consensus; /* input and output. */ +int group; +{ + int i, j, index, score; + int max_cons = INT_MIN; + int min_offset = INT_MAX; + int As, Cs, Ts, Gs, Ns, tot_in_grp; + char temp_str[2], occurence, base; + int max_occ; + + static char map[17] = "0123456789ABCDEF"; + + /* + * Search for the minimum offset. + */ + + for (i=0; ioffset = min_offset; + + if(aligned[0].contig[0] != '\0') + { + strcpy(consensus->name, aligned[0].contig); + strcat(consensus->name, "."); + } + else if(strncmp(aligned[0].name, "cons.", 5) != 0) + { + strcpy(consensus->name, "cons."); + strcat(consensus->name, aligned[0].name); + } + strcpy(consensus->sequence_ID, uniqueID()); + strcpy(consensus->contig, aligned[0].contig); + + for(j=min_offset; j= aligned[i].offset && + j < aligned[i].offset+aligned[i].seqlen) + { + tot_in_grp++; + index = j-aligned[i].offset; + + /* + occurence = Default_IUPAC_Trans(aligned[i].c_elem[index]); + if((occurence & 01) == 01) + As++; + if((occurence & 02) == 02) + Cs++; + if((occurence & 04) == 04) + Gs++; + if((occurence & 010) == 010) + Ts++; + */ + + base = (aligned[i].c_elem[index]|32); + + if(base == 'a') + As++; + else if(base == 'c') + Cs++; + else if(base == 'g') + Gs++; + else if(base == 't') + Ts++; + else if(base == 'n' || base == '-') + Ns++; + /* + printf("%1c", aligned[i].c_elem[index]); + */ + } + /* + else + printf(" "); + */ + } + } + + max_occ = MAX(As, MAX(Cs, MAX(Gs,Ts))); + + /* socre = [0,E], F:all mismatches are either 'n' or '-' */ + if(Ns != 0 && max_occ+Ns == tot_in_grp) + score = 15; + else + score = max_occ*14/tot_in_grp; + + /* + if( score > 0xF ) + { + if (InsertElems(consensus, j, "F") == FALSE) + { + return FALSE; + } + } + else + { + */ + + sprintf(temp_str,"%1c", map[score]); + if(InsertElems(consensus, j, temp_str) == FALSE) + { + return FALSE; + } + + /* + printf(" %2d-%2d-%2d-%2d %2d cons[%d]=%1c\n", + Ts, Gs, Cs, As, score, j, + consensus->c_elem[j]); + */ + } + return TRUE; +} + + +/*********** + * + * MakePhyloMask() takes an array of aligned sequence, and generates + * a mask that has a '0' for all columns except the columns which contain + * a, c, g, t and u only. + * + * Returns TRUE if successful, FALSE otherwise. + * + **********/ + +int +MakePhyloMask(aligned, numOfAligned, consensus, group, acgtu) +Sequence aligned[]; /* input. */ +int numOfAligned; /* input. */ +Sequence *consensus; /* input and output. */ +int acgtu[]; +int group; +{ + int i, j, cnt, max_cons = INT_MIN, min_offset = INT_MAX; + + /* + * Search for the minimum offset. + */ + + for (i=0; ioffset = min_offset; + strcpy(consensus->name, "mask"); + strcpy(consensus->type, "MASK"); + strcpy(consensus->sequence_ID, uniqueID()); + strcpy(consensus->contig, aligned[0].contig); + + consensus->seqlen = max_cons - min_offset; + if(consensus->seqmaxlen == 0) + { + consensus->c_elem = (char *)Calloc(max_cons - min_offset+5, 1); + consensus->seqmaxlen = max_cons - min_offset + 5; + } + else if(consensus->seqmaxlen < max_cons - min_offset) + { + consensus->seqmaxlen = max_cons - min_offset + 5; + consensus->c_elem = (char *)Realloc(consensus->c_elem, + max_cons - min_offset + 5); + } + + cnt = 0; + for(j=min_offset; jc_elem[j-min_offset] = '1'; + for(i=0; i= aligned[i].offset+aligned[i].seqlen || + acgtu[aligned[i].c_elem[j-aligned[i].offset]] == 0) + { + consensus->c_elem[j-min_offset] = '0'; + cnt++; + break; + } + } + } + } + fprintf(stderr, "\nNumber of 1s in mask: %d\n", max_cons-min_offset-cnt); + fprintf(stderr, "Number of 0s in mask: %d\n\n", cnt); + return TRUE; +} + + +/*********** + * + * MajorityCons() takes an array of aligned sequence, and generates + * a MAJORITY consensus. + * Note, memory for (Sequence* consensus) should be + * located before it is passed to this function. + * + * Returns TRUE if successful, FALSE otherwise. + * + **********/ + +int +MajorityCons(aligned, numOfAligned, consensus, group, major_perc) +Sequence aligned[]; /* input. */ +int numOfAligned; /* input. */ +Sequence *consensus; /* input and output. */ +int group, major_perc; +{ + int i, j, index, score, ii, base, max; + int max_cons = INT_MIN; + int min_offset = INT_MAX; + char temp_str[2], occurence; + int *cnts, tot_in_grp; + unsigned char case_bit; + + cnts = (int *)Calloc(16, sizeof(int)); + + /* + * Search for the minimum offset. + */ + + for (i=0; ioffset = min_offset; + + if(aligned[0].contig[0] != '\0') + { + strcpy(consensus->name, aligned[0].contig); + strcat(consensus->name, "."); + } + else if(strncmp(aligned[0].name, "cons.", 5) != 0) + { + strcpy(consensus->name, "cons."); + strcat(consensus->name, aligned[0].name); + } + strcpy(consensus->sequence_ID, uniqueID()); + strcpy(consensus->contig, aligned[0].contig); + + for(j=min_offset; j= aligned[i].offset && + j < aligned[i].offset+aligned[i].seqlen) + { + tot_in_grp++; + index = j-aligned[i].offset; + + if(aligned[i].c_elem[index] == '-') + case_bit = 32; + else if(case_bit == 0) + case_bit |= (aligned[i].c_elem[index] & 32); + + occurence |= + Default_IUPAC_Trans(aligned[i].c_elem[index]); + cnts[(int)Default_IUPAC_Trans(aligned[i].c_elem[index])]++; + + if(case_bit == 0 && + occurence != 1 && occurence != 2 && + occurence != 4 && occurence != 8) + case_bit = 32; + } + } + } + + max = 0; + for(ii = 0; ii < 16; ii++) + { + if(cnts[ii] > max) + { + max = cnts[ii]; + base = ii; + } + } + if(max*100/tot_in_grp >= major_perc) + { + /* follow the majority rule. */ + sprintf(temp_str,"%1c", Default_DNA_Trans[base]); + } + else + { + /* use IUPAC code. */ + sprintf(temp_str,"%1c", + Default_DNA_Trans[(int) occurence]); + } + + if(case_bit == 0) + temp_str[0] = toupper(temp_str[0]); + + if(InsertElems(consensus, j, temp_str) == FALSE) + { + return FALSE; + } + } + return TRUE; +} + + +/*********** + * + * ReadGDEtoHGL() reads a GDE formated file into an array of HGL structure. + * + * Return -1 if anything is wrong, number_of_sequence otherwise. + * + ***********/ + +int +ReadGDEtoHGL(fp, tSeq_arr) +FILE *fp; +Sequence **tSeq_arr; +{ + char line[MAXLINELEN]; + int ptr, num_seq, max_num_seq = 20; + int seq_len = 200; + char *newline; + + (*tSeq_arr) = (Sequence *)Calloc(max_num_seq, sizeof(Sequence)); + num_seq = -1; + while(fgets(line, MAXLINELEN-2, fp) != NULL) /* spaces for \n\0 */ + { + /* ptr points to the last char. */ + ptr = strlen(line)-1; + + /* clear up the tail. */ + while(ptr>=0 && (line[ptr] == '\n' || + line[ptr] == ' ' || + line[ptr] == '\t')) + ptr--; + line[ptr+1] = '\0'; + + if(ptr <= 0) + { + /* it is an empty line. */ + } + else if(line[0] == '#') + { + if(++num_seq == max_num_seq) + { + max_num_seq *= 2; + /* printf("max_num_seq = %d\n", max_num_seq); */ + (*tSeq_arr) = (Sequence *)Realloc((*tSeq_arr), + max_num_seq*sizeof(Sequence)); + } + + InitRecord((*tSeq_arr)[num_seq]); + + if (line[ptr] == '<') + { + (*tSeq_arr)[num_seq].direction = 2; /* 3to5 */ + line[ptr] = '\0'; + } + else if (line[ptr] == '>') + { + (*tSeq_arr)[num_seq].direction = 1; /* 5to3 */ + line[ptr] = '\0'; + } + strcpy((*tSeq_arr)[num_seq].sequence_ID, line+1); + } + else + { + ptr = 0; + if((*tSeq_arr)[num_seq].seqlen == 0) + { + /* determine the offset. */ + while(line[ptr] != '\0' && line[ptr] == '-') + { + ptr++; + } + (*tSeq_arr)[num_seq].offset += ptr; + } + + if(line[ptr] != '\0') + { + newline = line + ptr; + + if((*tSeq_arr)[num_seq].seqmaxlen == 0) + { + (*tSeq_arr)[num_seq].c_elem = + (char *)Calloc(seq_len, 1); + (*tSeq_arr)[num_seq].c_elem[0] = '\0'; + (*tSeq_arr)[num_seq].seqmaxlen = seq_len; + } + else + { + while((*tSeq_arr)[num_seq].seqlen + strlen(newline) + 1 + > (*tSeq_arr)[num_seq].seqmaxlen) + { + seq_len *= 2; + (*tSeq_arr)[num_seq].c_elem = (char *) + Realloc((*tSeq_arr)[num_seq].c_elem, seq_len); + (*tSeq_arr)[num_seq].seqmaxlen = seq_len; + } + } + strcat((*tSeq_arr)[num_seq].c_elem, newline); + (*tSeq_arr)[num_seq].seqlen = strlen((*tSeq_arr)[num_seq].c_elem); + } + } + } + + return (num_seq + 1); +} + + + + +/******** + * + * InsertElems returns TRUE if successful, FALSE otherwise. + * + ********/ + +int +InsertElems(seq,pos,c) +Sequence *seq; /* Sequence */ +int pos; /* Position (in respect to the master consensus) + * to insert BEFORE + * always move string to the right. */ +char c[]; /*Null terminated array of elements to insert */ +{ + int dashes, j,len; + + len = strlen(c); + + if(seq->seqlen == 0) + { + /* get rid of '-'s at right. */ + /* + dashes = len-1; + while(dashes >= 0 && c[dashes] == '-') + dashes--; + if(dashes < 0) + { + seq->offset = pos; + return TRUE; + } + c[dashes+1] = '\0'; + */ + + /* clear out '-'s at left. */ + dashes = 0; + /* + while(c[dashes] == '-') + dashes++; + + c += dashes; + len = strlen(c); + pos += dashes; + */ + + if(seq->seqmaxlen == 0) + { + seq->c_elem = (char *)Calloc(len+1, 1); + seq->seqmaxlen = len + 1; + } + else if(len+1 >= seq->seqmaxlen) + { + seq->c_elem = (char *)Realloc(seq->c_elem, len+1); + seq->seqmaxlen = len+1; + } + + strcpy(seq->c_elem, c); + seq->seqlen = len; + seq->offset = pos; + return TRUE; + } + + /* to make sure there is a space for '\0'. */ + if(seq->seqlen > seq->seqmaxlen) + { + fprintf(stderr, + "InsertElems(): seqlen>seqmaxlen. Something is wrong.\n"); + return FALSE; + } + else + { + while(seq->seqlen+1 >= seq->seqmaxlen) + { + seq->seqmaxlen *= 2; + seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); + } + } + seq->c_elem[seq->seqlen] = '\0'; + + if(pos < seq->offset) /* insert to the left of the seq. */ + { + /* ignore the dashes at the left. */ + dashes = 0; + /* + while(dashes < len && c[dashes] == '-') + dashes++; + if(c[dashes] == '\0') + { + seq->offset += len; + return TRUE; + } + c += dashes; + len -= dashes; + */ + + if(seq->seqlen + len + seq->offset - pos > seq->seqmaxlen) + { + seq->seqmaxlen = seq->seqlen+len+seq->offset-pos+256; + seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); + } + + /* copy the old string including the last '\0'. */ + for(j=seq->seqlen; j>=0; j--) + seq->c_elem[j+len+seq->offset-pos] = seq->c_elem[j]; + + /* insert dashes. */ + for(j=len; joffset-pos; j++) + seq->c_elem[j] = '-'; + + /* copy the inserted string. */ + for(j=0; jc_elem[j] = c[j]; + + /* detector. */ + if(c[j] != '\0') + fprintf(stderr, "InsertElems: Problem.....\n"); + + seq->seqlen = strlen(seq->c_elem); + + /* seq->offset = pos; commented on 6-3-91 */ + seq->offset = pos + dashes; + if(dashes > 0) + printf("\nInsertElems(): dashes is not zero.\n\n"); + } + + else if(pos - seq->offset >= seq->seqlen) /* insert to the right. */ + { + /* ignore the dashes at the right. */ + /* + dashes = len -1; + while(dashes >= 0 && c[dashes] == '-') + dashes--; + if(dashes < 0) + return TRUE; + len = dashes+1; + c[len] = '\0'; + */ + + if(pos - seq->offset + len > seq->seqmaxlen) + { + seq->seqmaxlen = pos - seq->offset + len + 256; + seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); + } + + /* insert dashes. */ + for(j=seq->seqlen; joffset; j++) + seq->c_elem[j] = '-'; + + /* copy the inserted string. */ + for(j=0; jc_elem[pos - seq->offset + j] = c[j]; + seq->c_elem[pos-seq->offset+len] = '\0'; + + /* detector. */ + if(c[j] != '\0') + fprintf(stderr, "InsertElems: Problem too .....\n"); + + seq->seqlen = strlen(seq->c_elem); + } + else /* insert into the seq. */ + { + if(seq->seqlen + len > seq->seqmaxlen) + { + seq->seqmaxlen = seq->seqlen + len + 256; + seq->c_elem = (char *)Realloc(seq->c_elem, seq->seqmaxlen); + } + + /* move the bottom part of the older string including the last '\0'. */ + for(j=seq->seqlen; j>=pos-seq->offset; j--) + seq->c_elem[j+len] = seq->c_elem[j]; + + /* copy the inserted string. */ + for(j=0; jc_elem[pos - seq->offset + j] = c[j]; + + /* detector. */ + if(c[j] != '\0') + fprintf(stderr, "InsertElems: Problem too too .....\n"); + + seq->seqlen = strlen(seq->c_elem); + } + + return TRUE; +} + + + + +/****************************************************************** + * + * int GetArgs(argArray, numArgs) + * Arg *argArray; + * int numArgs; + * + * Return TRUE if successful, FALSE otherwise. + * + ******************************************************************/ + +#define MAX_ARGS 50 /* maximum args this can process */ + +int +GetArgs(argArray, numArgs, argc, argv) +Args *argArray; +int numArgs; +int argc; +char **argv; +{ + int i, j; + Args *curarg; + int noArgOK = TRUE; + + if ((argArray == NULL) || (numArgs == 0) || (numArgs > MAX_ARGS)) + { + fprintf(stderr, "GetArgs: Invalid number of args.\n"); + return FALSE; + } + + /* + * Test if all are either 'default' or 'optional'. + */ + curarg = argArray; + for (i=0; istrvalue[0] == '\0' && curarg->optional == 'F') + { + noArgOK = FALSE; + break; + } + } + + /* + * show usage if some arg is required but no arg is + * supllied on command line. + */ + if(noArgOK == FALSE && argc == 1) + { + fprintf(stderr, "\n%s arguments:\n\n", argv[0]); + curarg = argArray; + + for (i = 0; i < numArgs; i++, curarg++) + { + fprintf(stderr, " -%c %s ", curarg->tag, curarg->prompt); + if (curarg->optional == 'T') + fprintf(stderr, " [Optional]"); + fprintf(stderr, "\n"); + if (curarg->strvalue[0] != '\0') + fprintf(stderr, " default = %s\n", curarg->strvalue); + } + fprintf(stderr, "\n"); + return FALSE; + } + + /* + * Process + */ + for (i = 1; i < argc; i++) + { + if (argv[i][0] != '-') + { + fprintf(stderr, "Arguments must start with -"); + return FALSE; + } + + /* check the tag. */ + curarg = argArray; + for (j = 0; j < numArgs; j++, curarg++) + { + if ((argv[i][1]|32) == (curarg->tag|32)) + break; + } + if (j == numArgs) + { + fprintf(stderr, "Invalid argument tag in %s\n", argv[i]); + return FALSE; + } + + strcpy(curarg->strvalue, argv[i]+2); + if(curarg->strvalue[0] == '\'' + && curarg->strvalue[strlen(curarg->strvalue)-1] == '\'') + { + char ttmm[256]; + strcpy(ttmm, curarg->strvalue+1); + ttmm[strlen(ttmm)-1] = '\0'; + strcpy(curarg->strvalue, ttmm); + } + } + return TRUE; +} + + +/********* + * + * GetCond interprets the -c argument, the condition. + * + * The condition will be set to NULL if no condition is specified, + * that is, if you pass '&p' as the address of a cond* structure, + * p will be set to NULL if no condition [(p == NULL) = TRUE]. + * + * Return TRUE if successful, FALSE otherwise. + * + *********/ + +int +GetCond(arg, cond) +char *arg; +str_cond **cond; +{ + int start, end, i, found; + char message_buf[1000]; + + if ( strcmp(arg, "null")==0) + { + (*cond) = NULL; + return TRUE; + } + else + { + (*cond) = (str_cond *)Calloc(1, sizeof(str_cond)); + + start = end = 0; + + /* find the field name. */ + while (('a'<= arg[end] && arg[end]<='z') || + ('A'<= arg[end] && arg[end]<='Z') || + arg[end] == '-' ) + end++; + + found = FALSE; + for (i=0; ifield = i; /* condition on field &at[i]. */ + found = TRUE; + break; + } + } + if (found == FALSE) + { + strncpy(message_buf, arg, end-start); + message_buf[end-start] = '\0'; + fprintf(stderr, "Field %s not found.\n", message_buf); + return FALSE; + } + + start = end; + end++; + while (arg[end] == '=' || + arg[end] == '!' || + arg[end] == '>' || + arg[end] == '<' ) + end++; + strncpy((*cond)->symbol, arg+start, end-start); + (*cond)->symbol[end-start] = '\0'; + if (strlen((*cond)->symbol)>2 || + strlen((*cond)->symbol)<1 || + (strlen((*cond)->symbol)==1 && + *((*cond)->symbol) !='>' && + *((*cond)->symbol) != '<') || + (strlen((*cond)->symbol)==2 && + (strncmp((*cond)->symbol,"!=",2)!= 0 ) && + (strncmp((*cond)->symbol,"==",2)!= 0 ) && + (strncmp((*cond)->symbol,">=",2)!= 0 ) && + (strncmp((*cond)->symbol,"<=",2)!= 0 ) + ) + ) + { + fprintf(stderr, "Invalid condition.\n"); + return FALSE; + } + + if(arg[end] == '"' && arg[strlen(arg) - 1] == '"') + { + end++; + arg[strlen(arg) - 1] = '\0'; + } + + (*cond)->value = (char *)Calloc(strlen(arg) - end + 2, 1); + strcpy((*cond)->value, arg+end); + } + return TRUE; +} + + +/********* + * + * GetFields interprets the -f arguments, the fields list. + * + * Returns number of selected fields, 0 if anything is wrong. + * + *********/ + +int +GetFields(arg, selected_fields) +char *arg; +int selected_fields[]; +{ + int start, end, i, found, list_done, i_selected; + char message_buf[1000]; + + if ( strcmp(arg, "all") == 0 ) + { + selected_fields[0] = -1; + return NUM_OF_FIELDS; + } + else + { + start = end = 0; + list_done = FALSE; + i_selected = 0; + + while ( list_done == FALSE ) + { + while (arg[end] != '\0' && arg[end] != ',') + { + end++ ; + } + if (arg[end] == '\0') + { + list_done = TRUE; + } + found = FALSE; + for (i=0; i= pl && + strncmp(string+i, pattern, pl) == 0) + num_app++; + } + + return num_app; +} + + +/******* + * + * FindPatternNC() searches string for pattern , CASE INSENSITIVE. + * Returns the number of appearences. + * + *******/ + +int +FindPatternNC(string, pattern) +const char *string; +const char *pattern; +{ + int i, j, sl, pl, num_app = 0; + + if(string == NULL || (sl = strlen(string)) == 0) + return 0; + + pl = strlen(pattern); + + for(i = 0; i <= sl-pl; i++) + { + j = 0; + while(j < pl && (string[i+j]|32) == (pattern[j]|32)) + j++; + + if(j == pl) + num_app++; + } + + return num_app; +} + + +/******* + * + * Complementary() CHANGES the given DNA/RNA string to its complementary, + * and returns TRUE. Returns FALSE if anything is wrong and keep the + * given string unchanged. + * + *******/ + +int +Complementary(sequence, type) +char *sequence; +char type; +{ + int i, l; + char *temp_str; + + l = strlen(sequence); + temp_str = (char *)Calloc(l+1, sizeof(char)); + if( type == 'D' || type == 'd') + type = 0; + else if(type == 'R' || type == 'r') + type = 1; + else + { + fprintf(stderr, + "Complementary(): type unknown. Type is D/d/R/r\n"); + return (int) NULL; + } + + for(i=0; i 1) + { + fprintf(stderr, + "%s has 15 repatitive base(s) %s\n", + PossibleOligo, subseq); + i++; + BadOligo = TRUE; + } + } + */ + + /* + * To ensure that the probe is not going to hybridize + * with itself: + */ + for(PO_index = 0; + BadOligo==FALSE && PO_index<=PO_len-no_repeat_len; + PO_index++) + { + SubStr(PossibleOligo, PO_index, no_repeat_len, subseq); + strcpy(scd_str, subseq); + Complementary(scd_str, 'd'); + Reverse(scd_str); + + if(FindPattern(PossibleOligo, scd_str) > 0) + { + fprintf(stderr, + "%s may hybridize with itself: %s vs. %s.\n", + PossibleOligo, subseq, scd_str); + i++; + BadOligo = TRUE; + } + } + + for(PO_index = 0; + BadOligo == FALSE && PO_index <= PO_len-2*check_len; + PO_index++) + { + SubStr(PossibleOligo, PO_index, check_len, subseq); + Complementary(subseq, 'd'); + strcpy(scd_str, subseq); + Reverse(scd_str); + + /* + if(FindPattern2(PossibleOligo,subseq,PO_index)>0) + { + fprintf(stderr, "%s has self-compl %s\n", + PossibleOligo, subseq); + i += PO_index+1; + BadOligo = TRUE; + } + else + */ + + if(FindPattern2(PossibleOligo,scd_str,PO_index)>0) + { + fprintf(stderr, "%s has 2nd struct %s\n", + PossibleOligo, scd_str); + i += PO_index+1; + BadOligo = TRUE; + } + } + if(BadOligo == FALSE) + { + seq_set[seq_cnt] = (char *) + Calloc(strlen(PossibleOligo)+1, sizeof(char)); + strcpy(seq_set[seq_cnt], PossibleOligo); + + if(++seq_cnt == max_num_probe) + { + max_num_probe *= 2; + seq_set = (char **) + Realloc(seq_set, max_num_probe*sizeof(char *)); + } + i++; + } + } /* end of l. */ + } /* end of i. */ + + seq_set[seq_cnt] = NULL; + + if(seq_cnt == 0) + return NULL; + + return seq_set; +} + + + +/* ALWAYS COPY the result from uniqueID() to a char[32], + * (strlen(hostname)+1+10). Memory is lost when the function + * is finished. + */ +char vname[32]; +char *uniqueID() +{ + char hname[32],/* vname[32], rtm 18.III.98 */ tstr[32]; + time_t *tp; + static cnt = 0; + int ll; + + tp = (time_t *)Calloc(1, sizeof(time_t)); + + if(gethostname(hname, 32) == -1) + { + fprintf(stderr, "UniqueID(): Failed to get host name.\n"); + exit(1); + } + + time(tp); + sprintf(tstr, ":%d:%ld", cnt, *tp); + if((ll = strlen(tstr)) > 31) + { + strncpy(vname, tstr, 31); + vname[31] = '\0'; + } + else + { + ll = strlen(hname)-(31-ll); + if(ll < 0) + ll = 0; + sprintf(vname, "%s%s", hname+ll, tstr); + } + cnt++; + Cfree(tp); + return(vname); +} + + + +/* return the percentage of GCcontents. */ + +int GCcontent(seq) +char *seq; +{ + int l, gc=0, j; + + l = strlen(seq); + + for (j=0; jcomments, tSeq->c_elem); +} + + + +Find2(string,key) + char *key,*string; + /* + * Like find, but returns the index of the leftmost + * occurence, and -1 if not found. + * Note in this program, T==U, and case insensitive. + */ +{ + int i,j,len1,len2,dif,flag = FALSE; + char *target; + + if(string == NULL || string[0] == '\0') + return -1; + + len2 = strlen(string); + target = (char *) Calloc(len2+1, 1); + for(i = 0; i0) + for(j=0;jsequence_ID); + } + else if(temp_line[0] == '#') + { + strncpy(seq->name, temp_line+1, 31); + seq->name[31] = '\0'; + ii = 0; + while(ii < strlen(seq->name) && + seq->name[ii] != ' ' && + seq->name[ii] != '\n') + ii++; + seq->name[ii] = '\0'; + + seq->seqmaxlen = 256; + seq->c_elem=(char *)Calloc(seq->seqmaxlen,1); + seq->seqlen = 0; + while(fgets(temp_line, 1000, fp) != NULL) + { + l1 = strlen(temp_line); + + if(temp_line[l1 - 1] == '\n') + { + l1--; + temp_line[l1] = '\0'; + } + + while(seq->seqmaxlen < + seq->seqlen + strlen(temp_line) + 1) + { + seq->seqmaxlen *= 2; + seq->c_elem = (char *) + Realloc(seq->c_elem, seq->seqmaxlen); + } + + strcat(seq->c_elem, temp_line); + seq->seqlen += strlen(temp_line); + } + + if(seq->seqlen == 0) + { + fprintf(stderr, "\n%s\n","Sequence is empty."); + return FALSE; + } + } + } + return -1; +} + + +void heapify(seq_set, seq_size, heap_size, elem, Pkey, Skey, order) +int seq_size, elem, heap_size, **order; +char Pkey[], Skey[]; +Sequence *seq_set; +{ + int l, r, temp, largest; + + l = 2*elem+1; + r = 2*elem+2; + + if(l <= heap_size && + CompKey(seq_set[(*order)[l]], seq_set[(*order)[elem]], + Pkey, Skey) > 0) + largest = l; + else + largest = elem; + + if(r <= heap_size && + CompKey(seq_set[(*order)[r]], seq_set[(*order)[largest]], + Pkey, Skey) > 0) + largest = r; + + if(largest != elem) + { + temp = (*order)[elem]; + (*order)[elem] = (*order)[largest]; + (*order)[largest] = temp; + heapify(seq_set,seq_size,heap_size,largest,Pkey,Skey,order); + } +} + + +heapsort(seq_set, seq_size, Pkey, Skey, order) +int seq_size, **order; +char Pkey[], Skey[]; +Sequence *seq_set; +{ + int ii, temp, heap_size; + + /* + * build_heap(seq_set, seq_size, &heap_size, order); + */ + heap_size = seq_size-1; + + for(ii = (seq_size-1)/2; ii>=0; ii--) /* (L-1)/2-1?? */ + { + heapify(seq_set, seq_size, heap_size, ii,Pkey,Skey,order); + } + + for(ii = seq_size-1; ii>0; ii--) + { + temp = (*order)[0]; + (*order)[0] = (*order)[ii]; + (*order)[ii] = temp; + heap_size--; + heapify(seq_set, seq_size, heap_size, 0, Pkey,Skey,order); + } +} + + + + +/* + * Return >0, ==0, <0. + */ + +int CompKey(seq1, seq2, Pkey, Skey) + Sequence seq1, seq2; + char Pkey[], Skey[]; +{ + int ii, jj, Pret; + char b1[32], b2[32]; + + if(strcmp(Pkey, "type") == 0) + { + Pret = strcmp(seq1.type, seq2.type); + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "name") == 0) + { + Pret = strcmp(seq1.name, seq2.name); + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "sequence-ID") == 0) + { + Pret = strcmp(seq1.sequence_ID, seq2.sequence_ID); + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "creator") == 0) + { + Pret = strcmp(seq1.creator, seq2.creator); + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "offset") == 0) + { + Pret = seq1.offset - seq2.offset; + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "group-ID") == 0) + { + Pret = seq1.group_ID - seq2.group_ID; + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "barcode") == 0) + { + if(seq1.barcode[0] == 'P') + strcpy(b1, seq1.barcode+2); + else + strcpy(b1, seq1.barcode); + + if(seq2.barcode[0] == 'P') + strcpy(b2, seq2.barcode+2); + else + strcpy(b2, seq2.barcode); + + Pret = strcmp(b1, b2); + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "seqlen") == 0) + { + Pret = seq1.seqlen - seq2.seqlen; + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "creation-date") == 0) + { + seq1.creation_date[0] %= 100; + seq2.creation_date[0] %= 100; + Pret = seq1.creation_date[0]*10000 + + seq1.creation_date[1]*100 + + seq1.creation_date[2] + - seq2.creation_date[0]*10000 + - seq2.creation_date[1]*100 + - seq2.creation_date[2]; + if(Pret == 0) + { + Pret = seq1.creation_date[3]*10000 + + seq1.creation_date[4]*100 + + seq1.creation_date[5] + - seq2.creation_date[3]*10000 + - seq2.creation_date[4]*100 + - seq2.creation_date[5]; + } + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "probing-date") == 0) + { + seq1.probing_date[0] %= 100; + seq2.probing_date[0] %= 100; + Pret = seq1.probing_date[0]*10000 + + seq1.probing_date[1]*100 + + seq1.probing_date[2] + - seq2.probing_date[0]*10000 + - seq2.probing_date[1]*100 + - seq2.probing_date[2]; + if(Pret == 0) + { + Pret = seq1.probing_date[3]*10000 + + seq1.probing_date[4]*100 + + seq1.probing_date[5] + - seq2.probing_date[3]*10000 + - seq2.probing_date[4]*100 + - seq2.probing_date[5]; + } + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "autorad_date") == 0) + { + seq1.autorad_date[0] %= 100; + seq2.autorad_date[0] %= 100; + Pret = seq1.autorad_date[0]*10000 + + seq1.autorad_date[1]*100 + + seq1.autorad_date[2] + - seq2.autorad_date[0]*10000 + - seq2.autorad_date[1]*100 + - seq2.autorad_date[2]; + if(Pret == 0) + { + Pret = seq1.autorad_date[3]*10000 + + seq1.autorad_date[4]*100 + + seq1.autorad_date[5] + - seq2.autorad_date[3]*10000 + - seq2.autorad_date[4]*100 + - seq2.autorad_date[5]; + } + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "film") == 0) + { + Pret = strcmp(seq1.film, seq2.film); + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "membrane") == 0) + { + Pret = strcmp(seq1.membrane, seq2.membrane); + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + else if(strcmp(Pkey, "contig") == 0) + { + Pret = strcmp(seq1.contig, seq2.contig); + if(Pret != 0 || Skey[0] == '\0') return Pret; + } + + else + { + fprintf(stderr,"CompKey(): Invalid primary key %s.\n",Pkey); + exit(1); + } + + if(strcmp(Skey, "type") == 0) + { + return (strcmp(seq1.type, seq2.type)); + } + else if(strcmp(Skey, "name") == 0) + { + return (strcmp(seq1.name, seq2.name)); + } + else if(strcmp(Skey, "sequence-ID") == 0) + { + return (strcmp(seq1.sequence_ID, seq2.sequence_ID)); + } + else if(strcmp(Skey, "creator") == 0) + { + return (strcmp(seq1.creator, seq2.creator)); + } + else if(strcmp(Skey, "offset") == 0) + { + return (seq1.offset - seq2.offset); + } + else if(strcmp(Skey, "group-ID") == 0) + { + return (seq1.group_ID - seq2.group_ID); + } + else if(strcmp(Skey, "barcode") == 0) + { + if(seq1.barcode[0] == 'P') + strcpy(b1, seq1.barcode+2); + else + strcpy(b1, seq1.barcode); + + if(seq2.barcode[0] == 'P') + strcpy(b2, seq2.barcode+2); + else + strcpy(b2, seq2.barcode); + + return (strcmp(b1, b2)); + } + else if(strcmp(Skey, "seqlen") == 0) + { + return(seq1.seqlen - seq2.seqlen); + } + else if(strcmp(Skey, "creation-date") == 0) + { + seq1.creation_date[0] %= 100; + seq2.creation_date[0] %= 100; + Pret = seq1.creation_date[0]*10000 + + seq1.creation_date[1]*100 + + seq1.creation_date[2] + - seq2.creation_date[0]*10000 + - seq2.creation_date[1]*100 + - seq2.creation_date[2]; + if(Pret != 0) + return Pret; + + return(seq1.creation_date[3]*10000 + + seq1.creation_date[4]*100 + + seq1.creation_date[5] + - seq2.creation_date[3]*10000 + - seq2.creation_date[4]*100 + - seq2.creation_date[5]); + } + else if(strcmp(Skey, "probing-date") == 0) + { + seq1.probing_date[0] %= 100; + seq2.probing_date[0] %= 100; + Pret = seq1.probing_date[0]*10000 + + seq1.probing_date[1]*100 + + seq1.probing_date[2] + - seq2.probing_date[0]*10000 + - seq2.probing_date[1]*100 + - seq2.probing_date[2]; + if(Pret != 0) + return Pret; + + return(seq1.probing_date[3]*10000 + + seq1.probing_date[4]*100 + + seq1.probing_date[5] + - seq2.probing_date[3]*10000 + - seq2.probing_date[4]*100 + - seq2.probing_date[5]); + } + else if(strcmp(Skey, "autorad_date") == 0) + { + seq1.autorad_date[0] %= 100; + seq2.autorad_date[0] %= 100; + Pret = seq1.autorad_date[0]*10000 + + seq1.autorad_date[1]*100 + + seq1.autorad_date[2] + - seq2.autorad_date[0]*10000 + - seq2.autorad_date[1]*100 + - seq2.autorad_date[2]; + if(Pret != 0) + return Pret; + + return(seq1.autorad_date[3]*10000 + + seq1.autorad_date[4]*100 + + seq1.autorad_date[5] + - seq2.autorad_date[3]*10000 + - seq2.autorad_date[4]*100 + - seq2.autorad_date[5]); + } + else if(strcmp(Skey, "film") == 0) + { + return(strcmp(seq1.film, seq2.film)); + } + else if(strcmp(Skey, "membrane") == 0) + { + return(strcmp(seq1.membrane, seq2.membrane)); + } + else if(strcmp(Skey, "contig") == 0) + { + return(strcmp(seq1.contig, seq2.contig)); + } + else + { + fprintf(stderr, "CompKey(): Invalid secondary key %s.\n",Skey); + exit(1); + } +} + + + +int Lock(fname) + char *fname; +{ + char buffer[1024]; + FILE *fp; + int wait = 0; + + while((fp = fopen(fname, "r")) == NULL) + { + sleep(1); + if(++wait == 30) + { + fprintf(stderr, "File %s not available, Try later.\n\n", fname); + return FALSE; + } + } + fclose(fp); + sprintf(buffer, "mv %s %s.locked", fname, fname); + system(buffer); + return TRUE; +} + + +void Unlock(fname) +char *fname; +{ + char buffer[1024]; + sprintf(buffer, "mv %s.locked %s", fname, fname); + system(buffer); +} + + +AppendComments(seq, str) +Sequence *seq; +char *str; +{ + int ii, jj, kk; + + kk = strlen(str); + + if(seq->commentsmaxlen == 0) + { + seq->comments = (char *)Calloc(kk+1, 1); + seq->commentsmaxlen = kk+1; + seq->commentslen = 0; + } + else if(seq->commentslen+kk+1>seq->commentsmaxlen) + { + seq->commentsmaxlen += 2*(kk+1); + seq->comments = (char *) + Realloc(seq->comments, seq->commentsmaxlen); + } + seq->comments[seq->commentslen] = '\0'; + seq->comments[seq->commentslen] = '\0'; + strcat(seq->comments, str); + seq->commentslen = strlen(seq->comments); +} diff --git a/HGL_SRC/HGLfuncs.o b/HGL_SRC/HGLfuncs.o new file mode 100755 index 0000000..05f85b1 Binary files /dev/null and b/HGL_SRC/HGLfuncs.o differ diff --git a/HGL_SRC/MAP_ChooseFile.c b/HGL_SRC/MAP_ChooseFile.c new file mode 100755 index 0000000..debfef9 --- /dev/null +++ b/HGL_SRC/MAP_ChooseFile.c @@ -0,0 +1,703 @@ +/* +Copyright (c) 1989-1990, University of Illinois board of trustees. All +rights reserved. Written by Michael Maciukenas at the Center for Prokaryote +Genome Analysis. Design and implementation guidance by Steven Smith, Carl +Woese. +*/ +/* File picker by Mike Maciukenas +** Allows the user to search up and down the directory tree, and choose a +** file. +** "Open" descends down into a directory, or chooses a file (depending ** on what is selected). The user may also press return after choosing +** a file or directory, to do the same thing. +** "Up Dir" ascends to the parent directory. +** "Cancel" cancels the operation. +** The user may also type a directory into the "Directory:" field. When the +** user presses return (or tab, or newline), the contents of the new directory +** will be shown. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define GBUFSIZ 256 /* buffer size, remove when adding to Steve's code */ + +#define FL_VIEW_H 15 /* # of files to show in one page, originally */ + + +/* structure for a linked list that allows sorting of filenames */ +typedef struct namedata {char *FileN; /* file name */ + int type; /* flag: 1 if directory '/' + ** 2 if executable '*' + ** 3 if symbolic link '@' + ** 4 if socket '=' + ** 0 if normal */ + struct namedata *Next; /* next in list */ + } NameData; + +Frame fl_getframe = XV_NULL; /* frame, is set to XV_NULL by free_mem(), + ** load_file() checks this to see if it should + ** destroy an existing frame */ +Scrollbar fl_scroll; /* the scrollbar for the file list canvas */ +Canvas fl_FileList; /* the file list canvas */ +Panel_item fl_DirText; /* the text item that displays the directory */ +Panel fl_Getpanel; /* the panel, contains buttons, and DirText */ +GC fl_gc; /* gc to use for drawing file names, just the default GC with + ** the frame's font copied in. */ +int fl_current_picked, fl_current_len; /* the current item picked in the file + ** list, and the current number of items + ** in the file list */ +int fl_cell_h, fl_width, fl_ascent; /* the height of the font, the width of the + ** canvas, and the default ascent of the + ** font, all used for drawing into the file + ** list canvas */ +Xv_opaque data; + +extern int set_offset, *matrix; +/* extern BuildMatrix(); */ + +NameData *fl_start; /* the root node for the linked list of filenames */ + +Frame load_file(Parentframe, x, y, passdata) +/* pick a file for loading. */ +Frame Parentframe; +int x, y; +Xv_opaque passdata; +{ + + /* callback procedures */ + int fl_open_btn_lf(), fl_up_dir_btn(), lf_cancel_btn(); + void fl_show_list_lf(); + void fl_list_select_lf(); + Panel_setting fl_dir_typed(); + /* interposed destroy function */ + Notify_value fl_free_mem(); + + char dirname[GBUFSIZ]; + Display *display; + Xv_screen screen; + int screen_no; + Xv_Font font; + XFontStruct *font_data; + + data=passdata; + + /* create the frame */ + fl_getframe = xv_create(Parentframe, FRAME, + FRAME_LABEL, "Choose File", + FRAME_SHOW_RESIZE_CORNER, FALSE, + XV_X, x, + XV_Y, y, + NULL); + notify_interpose_destroy_func(fl_getframe, fl_free_mem); + + /* get font characteristics */ + font = xv_get(fl_getframe, XV_FONT); + fl_cell_h = xv_get(font, FONT_DEFAULT_CHAR_HEIGHT); + fl_width = 50*xv_get(font, FONT_DEFAULT_CHAR_WIDTH); + font_data = (XFontStruct *)xv_get(font, FONT_INFO); + fl_ascent = font_data->ascent; + + /* create the panel and panel buttons */ + fl_Getpanel = xv_create(fl_getframe, PANEL, + NULL); + (void) xv_create(fl_Getpanel, PANEL_BUTTON, + PANEL_LABEL_STRING, "Open", + PANEL_NOTIFY_PROC, fl_open_btn_lf, + NULL); + (void) xv_create(fl_Getpanel, PANEL_BUTTON, + PANEL_LABEL_STRING, "Up Dir", + PANEL_NOTIFY_PROC, fl_up_dir_btn, + NULL); + (void) xv_create(fl_Getpanel, PANEL_BUTTON, + PANEL_LABEL_STRING, "Cancel", + PANEL_NOTIFY_PROC, lf_cancel_btn, + NULL); + /* create the "Directory:" field, initialized to the current working dir */ + getcwd(dirname, GBUFSIZ); + fl_DirText = xv_create(fl_Getpanel, PANEL_TEXT, + PANEL_LABEL_STRING,"Directory:", + XV_X, xv_col(fl_Getpanel, 0), + XV_Y, xv_row(fl_Getpanel, 1), + PANEL_VALUE_STORED_LENGTH, GBUFSIZ, + PANEL_VALUE_DISPLAY_LENGTH, 30, + PANEL_VALUE, dirname, + PANEL_NOTIFY_LEVEL, PANEL_SPECIFIED, + PANEL_NOTIFY_STRING, "\n\r\t", + PANEL_NOTIFY_PROC, fl_dir_typed, + NULL); + + window_fit(fl_Getpanel); + + /* create the file list canvas, below the above panel */ + fl_FileList = xv_create(fl_getframe, CANVAS, + XV_X, 0, + WIN_BELOW, fl_Getpanel, + XV_WIDTH, fl_width, + XV_HEIGHT, FL_VIEW_H*fl_cell_h+7, + CANVAS_REPAINT_PROC, fl_show_list_lf, + CANVAS_AUTO_EXPAND, FALSE, + CANVAS_AUTO_SHRINK, FALSE, + CANVAS_WIDTH, fl_width, + CANVAS_HEIGHT, fl_cell_h, + CANVAS_RETAINED, FALSE, + OPENWIN_AUTO_CLEAR, FALSE, + NULL); + fl_scroll = xv_create(fl_FileList, SCROLLBAR, + SCROLLBAR_DIRECTION, SCROLLBAR_VERTICAL, + SCROLLBAR_PIXELS_PER_UNIT, fl_cell_h, + SCROLLBAR_VIEW_LENGTH, fl_view_h(), + SCROLLBAR_PAGE_LENGTH, fl_view_h(), + NULL); + xv_set(canvas_paint_window(fl_FileList), + WIN_EVENT_PROC, fl_list_select_lf, + WIN_CONSUME_EVENTS, WIN_MOUSE_BUTTONS, LOC_DRAG, WIN_ASCII_EVENTS, NULL, + NULL); + xv_set(fl_Getpanel, XV_WIDTH, xv_get(fl_FileList, XV_WIDTH), NULL); + + /* set up the gc for drawing into the file list */ + display = (Display *)xv_get(fl_getframe, XV_DISPLAY); + screen = (Xv_screen)xv_get(fl_getframe, XV_SCREEN); + screen_no = (int)xv_get(screen, SCREEN_NUMBER); + fl_gc = XCreateGC(display, RootWindow(display, screen_no), + 0, NULL); + XCopyGC(display, DefaultGC(display, DefaultScreen(display)), + 0xFFFFFFFF, fl_gc); + XSetFont(display, fl_gc, xv_get(font, XV_XID)); +/* +* Added S.Smith 2/5/91 +*/ + XSetForeground(display,fl_gc,BlackPixel(display,DefaultScreen(display))); + XSetBackground(display,fl_gc,WhitePixel(display,DefaultScreen(display))); + + + /* set up the extra trailing node for the linked list, makes insertion + ** into the list easier */ + fl_start = (NameData *)calloc(1, sizeof(NameData)); + fl_start->FileN = (char *)NULL; + fl_start->Next = NULL; + + /* make the list, showing files in the application`s current directory + */ + (void) fl_make_list(); + + window_fit(fl_getframe); + xv_set(fl_getframe, XV_SHOW, TRUE, NULL); + return(fl_getframe); +} + + +int fl_open_btn_lf(item, event) +/* callback procedure for the open button. If it's a directory, switch to +** the new directory, otherwise return the filename +*/ +Panel_item item; +Event *event; +{ + int i, end, r; + char namebuf[GBUFSIZ], thestr[GBUFSIZ]; + NameData *current; + Frame top_frame; + + if(fl_current_picked != -1) /* then an item is selected. Work with it */ + { + /* find item in list */ + current = fl_start; + for(i=0; iNext; + strcpy(namebuf, current->FileN); + if(current->type == 1) /* then it's a directory, so switch to it */ + { + if(fl_checkdir(namebuf)) + { + chdir(namebuf); + (void) fl_make_list(); + fl_set_dirtext(fl_DirText); + return XV_OK; + } + } + else /* it's a file name, so return it */ + { + if(fl_checkdir(xv_get(fl_DirText, PANEL_VALUE))) /* then valid dir */ + { + if(current->type != 0) /* then it's not a regular file, so strip off + ** the extra type character: *, =, /, or @ */ + namebuf[strlen(namebuf)-1]='\0'; + /* create the file string (with full directory path) */ + getcwd(thestr, GBUFSIZ); + if(thestr[strlen(thestr)-1] != '/') + strcat(thestr, "/"); + strcat(thestr, namebuf); + + /* call load data procedure. */ + + top_frame = (Frame) xv_get( + (Frame) xv_get( + (Panel) xv_get(item,XV_OWNER), + XV_OWNER), + XV_OWNER); + + xv_set(top_frame, FRAME_LABEL, thestr, NULL); + +/******** + if(strcmp(thestr+(strlen(thestr)-3), "GDE") == 0) + { + LoadGDEData(thestr); + set_offset = 0; + } + else if(strcmp(thestr+(strlen(thestr)-3), "HGL") == 0) + { +*********/ + if((r = LoadHGLData(thestr)) == -1) + { + return -1; + } + set_offset = r; +/******** + } + else + printf("File format unknown. Should be .HGL or .GDE \n"); +********/ + +/* + rect = (Rect *)xv_get(canvas,CANVAS_VIEWABLE_RECT,view_win); + XClearArea(display,xwin, + rect->r_left, rect->r_top, + rect->r_width, rect->r_height, + 0); + (void)xv_set(view_win, + CANVAS_HEIGHT, max_lines*max_pbl+margin, + CANVAS_WIDTH, max_dots/min_scale+margin, + NULL); +*/ + xv_destroy_safe(fl_getframe); + return XV_OK; + } + else + { /* invalid directory, so show notice*/ + int result; + Panel panel = (Panel)xv_get(fl_FileList, PANEL_PARENT_PANEL); + + result = notice_prompt(panel, NULL, + NOTICE_MESSAGE_STRINGS, "Invalid Directory specified.", NULL, + NOTICE_FOCUS_XY, event_x(event), event_y(event), + NOTICE_BUTTON_YES, "Change Directory", + NULL); + } + } + } +} + +int fl_up_dir_btn(item, event) +/* go up one directory */ +Panel_item item; +Event *event; +{ + char dirname[GBUFSIZ]; + + /* pretty simple, just go up, show it, and change the "Directory:" field */ + (void) chdir(".."); + (void) fl_make_list(); + fl_set_dirtext(fl_DirText); + return XV_OK; +} + +Panel_setting fl_dir_typed(item, event) +/* handle when user types return, newline, or tab in the "Directory:" field. +** if it's a valid directory, it moves to it, otherwise, display a notice +*/ +Panel_item item; +Event *event; +{ + int error; + char dirname[GBUFSIZ]; + + switch (event_action(event)) + { + case '\n': + case '\r': + case '\t': + { + if(fl_checkdir(xv_get(fl_DirText, PANEL_VALUE))) + { /* valid directory, chdir to it and show it */ + chdir(xv_get(fl_DirText, PANEL_VALUE)); + fl_make_list(); + fl_set_dirtext(fl_DirText); + } + else + { /* invalid directory, so show notice */ + int result; + Panel panel = (Panel)xv_get(fl_FileList, PANEL_PARENT_PANEL); + + result = notice_prompt(panel, NULL, + NOTICE_MESSAGE_STRINGS, "Invalid Directory specified.", NULL, + NOTICE_FOCUS_XY, event_x(event), event_y(event), + NOTICE_BUTTON_YES, "Change Directory", + NULL); + } + return PANEL_NONE; + }; + /* if it wasn't \n, \t, or \r, pass event on to standard + ** panel_text handler + */ + default: + return(panel_text_notify(item, event)); + } +} + +int lf_cancel_btn(item, event) +/* handle the cancel button. Just destroys the frame and returns +*/ +Panel_item item; +Event *event; +{ + + LoadHGLData(""); + xv_destroy_safe(fl_getframe); + return XV_OK; +} + +fl_readln(file, buf) +FILE *file; +char *buf; +{ + int i; + int ch; + + ch = getc(file); + if(ch==EOF) + { + buf[0]='\0'; + return; + } + i=0; + do + { + buf[i++]=ch; + ch = getc(file); + } while(ch!='\n'); + buf[i]='\0'; +} + +int fl_make_list() +/* Creates a list of files, out of the current working directory. It then +** tells the file list canvas to refresh itself. The list sits attached to +** fl_start, for reading by the show_list() routine. +*/ +{ + FILE *dirp; /* for directory data */ + int i, list_len, cur_pos; + char dirname[GBUFSIZ], tempbuf[GBUFSIZ]; + NameData *current, *temp; /* structures for reading + ** and sorting file names */ + int notdone; + struct stat statbuf; /* for checking if a file + ** name is a directory */ + int pid = getpid(); /* for creation of temp + ** file for directory list */ + char tmpcmd[GBUFSIZ]; /* for holding ls command */ + char tmpname[GBUFSIZ]; /* for holding file names */ + + + getcwd(dirname, GBUFSIZ); + sprintf(tmpcmd, "cd %s;ls -aF > /usr/tmp/.svlffil%d", dirname, pid); + sprintf(tmpname, "/usr/tmp/.svlffil%d", pid); + system(tmpcmd); + dirp = fopen(tmpname, "r"); + if (dirp == NULL) /* just a check to make sure */ + { + fprintf(stderr, "fl_make_list was passed bad directory name\n"); + return(-1); + } + else + { + /* free up the old list, to build a new one */ + for(current = fl_start; current->FileN != (char *)NULL; i++) + { + temp = current; + current = current->Next; + free(temp->FileN); + free(temp); + }; + /* set up the linked list for sorting */ + fl_start = (NameData *)calloc(1, sizeof(NameData)); + fl_start->FileN = (char *)NULL; + fl_start->Next = NULL; + /* read through the directory entries */ + list_len = 0; + for(fl_readln(dirp, tempbuf); tempbuf[0] != '\0'; fl_readln(dirp, tempbuf)) + { + /* don't include "." and ".." in the list */ + if((strcmp(tempbuf,"./")!=0)&& + (strcmp(tempbuf,"../")!=0)) + { + /* find the right spot in the list to insert the new name */ + current = fl_start; + notdone = 1; + while(notdone) + if(current->FileN == NULL) + notdone = 0; + else if(strcmp(tempbuf, current->FileN)>0) + current = current->Next; + else + notdone = 0; + /* insert the new name */ + temp = (NameData *)calloc(1, sizeof(NameData)); + temp->FileN = current->FileN; + temp->type = current->type; + temp->Next = current->Next; + ++list_len; + current->Next = temp; + /* set flag for file type */ + switch(tempbuf[strlen(tempbuf)-1]) + { + case '/': /* directory */ + { + current->type = 1; + break; + } + case '@': /* symbolic link */ + { + current->type = 3; + break; + } + case '=': /* socket */ + { + current->type = 4; + break; + } + case '*': /* executable */ + { + current->type = 2; + break; + } + default: + { + current->type = 0; + break; + } + } + current->FileN = (char *)calloc(1, strlen(tempbuf)+1); + strcpy(current->FileN,tempbuf); + }; + } + fclose(dirp); + sprintf(tmpcmd, "rm %s", tmpname); + system(tmpcmd); + + /* adjust the Canvas size, and refresh it */ + fl_current_len = list_len; + cur_pos = xv_get(fl_scroll, SCROLLBAR_VIEW_START); + xv_set(fl_FileList, CANVAS_HEIGHT, + (list_len+fl_view_h()+1)*fl_cell_h, + NULL); + /* scrollbars bomb with zero-length objects */ + if(list_len == 0) ++list_len; + /* reset scrollbar */ + xv_set(fl_scroll, SCROLLBAR_VIEW_START, 0, + SCROLLBAR_OBJECT_LENGTH, list_len, + NULL); + /* refresh canvas */ + wmgr_refreshwindow(canvas_paint_window(fl_FileList)); + fl_current_picked = -1; + return(0); + } +} + +fl_set_dirtext(fl_DirText) +/* sets the "Directory:" field according to the current directory +** fl_DirText is the Xview pointer to the fl_DirText Panel Item +*/ +Panel_item fl_DirText; +{ + char dirbuf[GBUFSIZ]; + + getcwd(dirbuf, GBUFSIZ); + xv_set(fl_DirText, PANEL_VALUE, dirbuf, NULL); + +} + +int fl_checkdir(dirname) +/* check if a directory can be opened. directory can be specified by +** full root name or by current name. returns true if it can be opened. +*/ +char *dirname; +{ + DIR *dirp; + + dirp = opendir(dirname); + if(dirp == NULL) /* not available, user cannot enter */ + return(0); + else + { + closedir(dirp); /* must close it */ + return(1); + } +} + +void fl_show_list_lf(canvas, paint_window, repaint_area) +/* repaint procedure for the file list canvas. Repaints all file names in +** the damaged area */ +Canvas canvas; +Xv_Window paint_window; +Rectlist *repaint_area; +{ + NameData *current; + int i; + int start_draw, end_draw; + Display *dpy; + Window xwin; + + + /* make sure AUTO_CLEAR is off, this routine will do it itself */ + while(xv_get(fl_FileList, OPENWIN_AUTO_CLEAR)!=FALSE) + { + fprintf(stderr, "lf:found bug--OPENWIN_AUTO_CLEAR still TRUE"); + xv_set(fl_FileList, OPENWIN_AUTO_CLEAR, FALSE, NULL); + } + /* make sure RETAINED is off, this routine will repaint itself */ + while(xv_get(fl_FileList, CANVAS_RETAINED)!=FALSE) + { + fprintf(stderr, "lf:found bug--CANVAS_RETAINED still TRUE"); + xv_set(fl_FileList, CANVAS_RETAINED, FALSE, NULL); + } + /* get display and window */ + dpy = (Display *)xv_get(paint_window, XV_DISPLAY); + xwin = (Window)xv_get(paint_window, XV_XID); + + /* clear the area given us by Xview, for simplicity, we clear the + ** smallest rectangle that encloses all of the destroyed areas, the + ** rl_bound rectangle */ + XClearArea(dpy, xwin, + repaint_area->rl_bound.r_left, + repaint_area->rl_bound.r_top, + repaint_area->rl_bound.r_width, + repaint_area->rl_bound.r_height, + 0); + /* the next 3 lines calculate which file names must be drawn, by where the + ** top and bottom of the rl_bound rectangle lie */ + start_draw = repaint_area->rl_bound.r_top; + end_draw = (repaint_area->rl_bound.r_height + start_draw - 1) / fl_cell_h; + start_draw = (start_draw - 1) / fl_cell_h; + + /* find the first element to draw in the list */ + current = fl_start; + for(i = 0; (iNext != NULL); i++) + current = current->Next; + /* now start drawing them */ + for(; (i<=end_draw) && (current->Next != NULL); i++) + { + XDrawString(dpy, xwin, fl_gc, 5, i*fl_cell_h+fl_ascent, current->FileN, + strlen(current->FileN)); + /* add a box if we are drawing the currently picked one */ + if(i==fl_current_picked) + { + XDrawRectangle(dpy, xwin, fl_gc, + 2, i*fl_cell_h, + xv_get(canvas, XV_WIDTH)-11-xv_get(fl_scroll, XV_WIDTH), + fl_cell_h); + } + current = current->Next; + } +} + +void fl_list_select_lf(paint_window, event) +/* callback procedure for events that happen in the file list canvas. Checks +** mouse button press or drag, and for when the user types return */ +Xv_window paint_window; +Event *event; +{ + int picked, cur_pos; + Window xwin = (Window)xv_get(paint_window, XV_XID); + Display *dpy; + + dpy = (Display *)xv_get(paint_window, XV_DISPLAY); + /* get the current position of the scrollbar for future reference */ + cur_pos = xv_get(fl_scroll, SCROLLBAR_VIEW_START); + + /* first, check for user picking a file name */ + if((event_action(event) == ACTION_SELECT)|| + (event_action(event) == LOC_DRAG)) + { + picked = (event_y(event) - 1) / fl_cell_h; + /* make sure the file picked is on screen. if it is not, + ** we just ignore it. this avoids wierd stuff, like being + ** able to pick files that aren't shown on screen */ + if((picked >= cur_pos)&& + (picked < cur_pos+fl_view_h())&& + (picked < fl_current_len)) + { + /* efficiency: ignore if it is already picked */ + if(picked != fl_current_picked) + { + XSetFunction(dpy, fl_gc, GXclear); + XDrawRectangle(dpy, xwin, fl_gc, + 2, fl_current_picked*fl_cell_h, + xv_get(fl_FileList, XV_WIDTH)-11- + xv_get(fl_scroll, XV_WIDTH), + fl_cell_h); + XSetFunction(dpy, fl_gc, GXcopy); + XDrawRectangle(dpy, xwin, fl_gc, + 2, picked*fl_cell_h, + xv_get(fl_FileList, XV_WIDTH)-11- + xv_get(fl_scroll, XV_WIDTH), + fl_cell_h); + fl_current_picked = picked; + } + } + } + /* user may have pressed return, then just call the open button + ** callback procedure. PANEL_FIRST_ITEM gets the pointer to the + ** open button itself, since it happens to be the first item on + ** the panel. fl_open_btn doesn't really use this parameter, but + ** just in case it ever does, we include it. */ + else if((event_is_ascii(event))&&(event_action(event) == '\r')) + fl_open_btn_lf(xv_get(fl_Getpanel, PANEL_FIRST_ITEM), event); + else + return; +} +int fl_view_h() +/* returns the current height (in # of file names displayed) of the file list */ +{ + return (((int)xv_get(fl_FileList, XV_HEIGHT))/fl_cell_h); +} + +Notify_value +fl_free_mem(client, status) +/* clean up when the frame is destroyed. Frees up the memory used in the +** linked list of file names, and sets the Frame variable (getframe) to null */ +Notify_client client; +Destroy_status status; +{ + NameData *current, *temp; + int i; + +switch (status) + { + case DESTROY_CHECKING: + return NOTIFY_DONE; + case DESTROY_CLEANUP: + { + for(current = fl_start; current->FileN != (char *)NULL; i++) + { + temp = current; + current = current->Next; + free(temp->FileN); + free(temp); + }; + fl_getframe = XV_NULL; + return notify_next_destroy_func(client, status); + } + default: + return NOTIFY_DONE; + } +} diff --git a/HGL_SRC/MAP_ChooseFile.o b/HGL_SRC/MAP_ChooseFile.o new file mode 100755 index 0000000..2bbac68 Binary files /dev/null and b/HGL_SRC/MAP_ChooseFile.o differ diff --git a/HGL_SRC/MakeCons b/HGL_SRC/MakeCons new file mode 100755 index 0000000..a1777f5 Binary files /dev/null and b/HGL_SRC/MakeCons differ diff --git a/HGL_SRC/MakeCons.c b/HGL_SRC/MakeCons.c new file mode 100755 index 0000000..01b4241 --- /dev/null +++ b/HGL_SRC/MakeCons.c @@ -0,0 +1,178 @@ +#include +#include +#include "global_defs.h" + +main(ac,av) + int ac; + char *av[]; +{ + Sequence cons; /* master alignment */ + Sequence *master; /* Current Walking sets to add */ + int cursize, maxsize = 10, ii; + char str[2], cons_type; + FILE *file, *consout_fp, *maskout_fp; + int conserved_color, variable_color, partial_color, major_perc; + + if(ac == 1) + { + fprintf(stderr, "Usage:\n"); + fprintf(stderr, + "%s %s\n\t%s\n\t%s\n\t%s\n\t%s\n\t%s\n\t%s\n\t%s\n", + av[0], + "sequence-file", + "[-iupac] IUPAC consensus. Default", + "[-majority percent] Majority consensus (default percent: 75)", + "[-maskv colorv] Variable position color", + "[-maskc colorc] Conserved position color", + "[-maskp colorp] Partially conserved color", + "[-consout output-consensus] Default: stdout", + "[-maskout output-mask]"); + exit (0); + } + + InitRecord(&cons); + + if((file = fopen(av[1],"r")) == NULL) + { + fprintf(stderr, "Can't open sequence-file %s.\n", av[1]); + exit(1); + } + + + master = (Sequence*)Calloc(maxsize,sizeof(Sequence)); + + cursize = 0; + while(ReadRecord(file,&(master[cursize])) != -1) + { + SeqNormal(&master[cursize]); + if(++cursize == maxsize) + { + maxsize *= 2; + master = (Sequence*) + Realloc(master, maxsize*sizeof(Sequence)); + } + + master[cursize].group_number = 99999; + } + + fclose(file); + + cons_type = ' '; /* 'i':IUPAC, 'm':majority, 'k':mask */ + consout_fp = stdout; + maskout_fp = NULL; + conserved_color = 8; /* black */ + variable_color = 3; /* red */ + + ii = 2; + while(ii < ac) + { + if(strcmp(av[ii], "-iupac") == 0) + { + cons_type = 'i'; + } + else if(strcmp(av[ii], "-majority") == 0) + { + cons_type = 'm'; + ii++; + major_perc = atoi(av[ii]); + } + else if(strcmp(av[ii], "-maskv") == 0) + { + variable_color = atoi(av[++ii]); + if(cons_type == ' ') + cons_type = 'k'; + } + else if(strcmp(av[ii], "-maskc") == 0) + { + conserved_color = atoi(av[++ii]); + if(cons_type == ' ') + cons_type = 'k'; + } + else if(strcmp(av[ii], "-maskp") == 0) + { + partial_color = atoi(av[++ii]); + if(cons_type == ' ') + cons_type = 'k'; + } + else if(strcmp(av[ii], "-consout") == 0) + { + if((consout_fp = fopen(av[++ii], "w")) == NULL) + { + fprintf(stderr, "Can't open output file %s.\n",av[ii]); + consout_fp = stdout; + } + } + else if(strcmp(av[ii], "-maskout") == 0) + { + if((maskout_fp = fopen(av[++ii], "w")) == NULL) + { + fprintf(stderr, "Can't open output file %s.\n",av[ii]); + } + } + else + { + fprintf(stderr, "Invalid flag %s\n", av[ii]); + } + ii++; + } + + if(cons_type == ' ') + cons_type = 'i'; + + if(cons_type != 'k') + { + if(maskout_fp != NULL) + { + /* Useful only when output to GDE. */ + fprintf(maskout_fp, "length:%d\n", cons.seqlen); + fprintf(maskout_fp, "start:\n"); + } + + if((cons_type == 'i' && + MakeConsensus(master,cursize,&cons,0,cons_type)==FALSE) || + (cons_type == 'm' && + MajorityCons(master,cursize,&cons,0, major_perc) == FALSE)) + { + fprintf(stderr, "Failed to make consensus.\n"); + exit(1); + } + WriteRecord(consout_fp, &cons, NULL, 0); + } + else + { + if(MakeScore(master,cursize,&cons,0) == FALSE) + { + fprintf(stderr, "Failed to make consensus.\n"); + exit(1); + } + + /*WriteRecord(stdout, &cons, NULL, 0); + printf("\n\n");*/ + + if(maskout_fp == NULL) + { + maskout_fp = stdout; + } + + fprintf(maskout_fp, "length:%d\n", cons.seqlen); + fprintf(maskout_fp, "start:\n"); + + for(ii = 0; ii < cons.seqlen; ii++) + { + switch(cons.c_elem[ii]) + { + case 'F': + fprintf(maskout_fp, "%d\n", partial_color); + break; + case 'E': + fprintf(maskout_fp, "%d\n", conserved_color); + break; + default: + fprintf(maskout_fp, "%d\n", variable_color); + break; + } + } + } + fclose(consout_fp); + fclose(maskout_fp); +} diff --git a/HGL_SRC/Makefile b/HGL_SRC/Makefile new file mode 100755 index 0000000..95a04e1 --- /dev/null +++ b/HGL_SRC/Makefile @@ -0,0 +1,43 @@ + +CC = cc +#FLAGS = -g +OPENWINHOME = /usr/openwin +MFILE = +INCDIR = -I$(OPENWINHOME)/include +LIBDIR = -L$(OPENWINHOME)/lib +LIBS = -lxview -lolgx -lX11 + +libs.o = Alloc.o HGLfuncs.o + +all: mapview MakeCons Consto01mask PrintStrat Translate heapsortHGL DotPlotTool + +Alloc.o: Alloc.c + $(CC) $(FLAGS) -c Alloc.c + +HGLfuncs.o: HGLfuncs.c + $(CC) $(FLAGS) -c HGLfuncs.c + +MAP_ChooseFile.o: MAP_ChooseFile.c + $(CC) $(FLAGS) -c MAP_ChooseFile.c $(INCDIR) + +mapview: mapview.c MAP_ChooseFile.o $(libs.o) + $(CC) $(FLAGS) -o $@ $@.c MAP_ChooseFile.o $(libs.o) $(INCDIR) $(LIBDIR) $(LIBS) + +DotPlotTool: DotPlotTool.c MAP_ChooseFile.o $(libs.o) + $(CC) $(FLAGS) -o $@ $@.c MAP_ChooseFile.o $(libs.o) \ +$(INCDIR) $(LIBDIR) $(LIBS) + +MakeCons: MakeCons.c $(libs.o) $(MKFILE) + $(CC) $(FLAGS) -o $@ $@.c $(libs.o) + +Translate: Translate.c $(libs.o) $(MKFILE) + $(CC) $(FLAGS) -o $@ $@.c $(libs.o) + +heapsortHGL: heapsortHGL.c $(libs.o) $(MKFILE) + $(CC) $(FLAGS) -o $@ $@.c $(libs.o) + +PrintStrat: PrintStrat.c $(libs.o) $(MKFILE) + $(CC) $(FLAGS) -o $@ $@.c $(libs.o) + +Consto01mask: Consto01mask.c $(libs.o) $(MKFILE) + $(CC) $(FLAGS) -o $@ $@.c $(libs.o) diff --git a/HGL_SRC/PrintStrat b/HGL_SRC/PrintStrat new file mode 100755 index 0000000..0087fa7 Binary files /dev/null and b/HGL_SRC/PrintStrat differ diff --git a/HGL_SRC/PrintStrat.c b/HGL_SRC/PrintStrat.c new file mode 100755 index 0000000..17ade0b --- /dev/null +++ b/HGL_SRC/PrintStrat.c @@ -0,0 +1,195 @@ +#include "global_defs.h" + +main(ac,av) + int ac; + char **av; +{ + Sequence *data; + int i,j,k,numseqs=0,maxlen = 0,minlen=999999999; + int lines_printed, Success, maxsize, ss; + int width,scale = 0; + int len[1000]; + FILE *infile; + char a,b, style[32]; + int WIDTH; + + if(ac == 1) + { + fprintf(stderr,"Usage:%s\n", av[0]); + fprintf(stderr, " -in alignment_file [-width max_line_width (50)]\n"); + fprintf(stderr, " [-scale scale (1)] [-style comp|poster (comp)]\n"); + exit(0); + } + + for(i = 1; i < ac; i+=2) + { + if(av[i][0] != '-') + { + fprintf(stderr, "\nInvalid flag %s.\n", av[i]); + fprintf(stderr, "Type %s for usage.\n\n", av[0]); + exit(1); + } + if(i+1 == ac) + { + fprintf(stderr, "\nMissing value for flag %s.\n\n", av[i]); + exit(1); + } + } + + i = 1; + scale = 1; + WIDTH = 50; + infile = NULL; + strcpy(style, "comp"); + while(i < ac) + { + if(strcmp(av[i], "-in") == 0) + { + if((infile = fopen(av[++i],"r")) == NULL) + { + fprintf(stderr,"Cannot open %s\n",av[i]); + exit(1); + } + } + else if(strcmp(av[i], "-scale") == 0) + { + sscanf(av[++i],"%d",&scale); + if(scale == 0) + scale = 1; + } + else if(strcmp(av[i], "-width") == 0) + { + sscanf(av[++i], "%d", &WIDTH); + } + else if(strcmp(av[i], "-style") == 0) + { + strcpy(style, av[++i]); + } + else + { + fprintf(stderr, "\nUnknow flag %s\n\n", av[i]); + exit(1); + } + i++; + } + + if(infile == NULL) + { + fprintf(stderr, "\nWhat do you want to print.%c\n\n", 7); + exit(1); + } + + /* + * Read in alignment... + */ + maxsize = 64; + data = (Sequence *)Calloc(maxsize, sizeof(Sequence)); + for(numseqs = 0;Success != -1;numseqs++) + { + Success = ReadRecord(infile,&(data[numseqs])); + if(numseqs == maxsize-1) + { + maxsize *= 2; + data = (Sequence*)Realloc(data, + maxsize*sizeof(Sequence)); + } + + for(j=0; j 0 || strcmp(style, "poster") == 0) + { + minlen = MIN(minlen,data[k].offset); + maxlen = MAX(maxlen,data[k].seqlen+data[k].offset); + } + } + + for(j=minlen;j j+width || + data[i].offset+data[i].seqlen=data[i].offset)) + if(scale == 1) + putchar(data[i].c_elem[k-data[i].offset]); + else + putchar('-'); + else putchar(' '); + } + } + } + if(lines_printed) + { + printf("\n "); + ss = j+1; + while(ss < MIN(j+WIDTH*scale, maxlen+1)) + { + printf("|---------"); + ss += 10*scale; + } + + printf("\n "); + ss = j+1; + while(ss < MIN(j+WIDTH*scale, maxlen+1)) + { + printf("%-10d",ss); + ss += 10*scale; + } + printf("\n"); + } + if(strcmp(style, "poster") == 0) + printf("\f"); + } + putchar('\n'); + exit(0); +} + + +int indx(pos,seq) + int pos; + Sequence *seq; +{ + int j,count=0; + if(pos < seq->offset) + return (0); + if(pos>seq->offset+seq->seqlen) + pos = seq->offset+seq->seqlen; + pos -= seq->offset; + for(j=0;jc_elem[j] != '-') + if(seq->c_elem[j] != '~') + count++; + return (count); +} diff --git a/HGL_SRC/Translate b/HGL_SRC/Translate new file mode 100755 index 0000000..91e68fc Binary files /dev/null and b/HGL_SRC/Translate differ diff --git a/HGL_SRC/Translate.c b/HGL_SRC/Translate.c new file mode 100755 index 0000000..7406d39 --- /dev/null +++ b/HGL_SRC/Translate.c @@ -0,0 +1,387 @@ +#include +#include +#include +#include "global_defs.h" + +char vert_mito[512][4] = +{ +"AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", +"ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Ter", "AGC","Ser", +"AGG","Ter", "AGT","Ser", "ATA","Met", "ATC","Ile", "ATG","Met", +"ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", +"CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", +"CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", +"CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", +"GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", +"GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", +"GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", +"TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", +"TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", +"TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" + }, +mycoplasma[512][4] = +{ +"AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", +"ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", +"AGG","Arg", "AGT","Ser", "ATA","Ile", "ATC","Ile", "ATG","Met", +"ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", +"CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", +"CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", +"CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", +"GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", +"GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", +"GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", +"TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", +"TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", +"TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" }, +universal[512][4] = +{ +"AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", +"ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", +"AGG","Arg", "AGT","Ser", "ATA","Ile", "ATC","Ile", "ATG","Met", +"ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", +"CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", +"CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Leu", "CTC","Leu", +"CTG","Leu", "CTT","Leu", "GAA","Glu", "GAC","Asp", "GAG","Glu", +"GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", +"GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", +"GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", +"TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", +"TCT","Ser", "TGA","Ter", "TGC","Cys", "TGG","Trp", "TGT","Cys", +"TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" }, +yeast[512][4] = +{ +"AAA","Lys", "AAC","Asn", "AAG","Lys", "AAT","Asn", "ACA","Thr", +"ACC","Thr", "ACG","Thr", "ACT","Thr", "AGA","Arg", "AGC","Ser", +"AGG","Arg", "AGT","Ser", "ATA","Met", "ATC","Ile", "ATG","Met", +"ATT","Ile", "CAA","Gln", "CAC","His", "CAG","Gln", "CAT","His", +"CCA","Pro", "CCC","Pro", "CCG","Pro", "CCT","Pro", "CGA","Arg", +"CGC","Arg", "CGG","Arg", "CGT","Arg", "CTA","Thr", "CTC","Thr", +"CTG","Thr", "CTT","Thr", "GAA","Glu", "GAC","Asp", "GAG","Glu", +"GAT","Asp", "GCA","Ala", "GCC","Ala", "GCG","Ala", "GCT","Ala", +"GGA","Gly", "GGC","Gly", "GGG","Gly", "GGT","Gly", "GTA","Val", +"GTC","Val", "GTG","Val", "GTT","Val", "TAA","Ter", "TAC","Tyr", +"TAG","Ter", "TAT","Tyr", "TCA","Ser", "TCC","Ser", "TCG","Ser", +"TCT","Ser", "TGA","Trp", "TGC","Cys", "TGG","Trp", "TGT","Cys", +"TTA","Leu", "TTC","Phe", "TTG","Leu", "TTT","Phe" +}; + + +char three_to_one[23][5] = { +"AlaA", "ArgR", "AsnN", "AspD", +"AsxB", "CysC", "GlnQ", "GluE", +"GlxZ", "GlyG", "HisH", "IleI", +"LeuL", "LysK", "MetM", "PheF", +"ProP", "SerS", "ThrT", "TrpW", +"TyrY", "ValV", "Ter*" +}; + + + +main(ac,av) +int ac; +char **av; +{ + int Success = TRUE,cursize,tbl,i,j,k,maxsize = 10,frame=1, + min_frame = 0,ltrs=0, sep=0, tmp_num_frame, print_comp=FALSE; + Sequence temp,*seqs; + FILE *file; + char number[5],tr_tbl[512][4]; + extern char *Realloc(); + extern Translate_NA_AA(); + + + if(ac == 1) + { + fprintf(stderr, + "Options:[-tbl codon_table] [-frame #] [-min_frame #] [-3] [-sep] GDEfile\n"); + fprintf(stderr," 1=universal 1=first frame Shortest AA Three letter don't seperate\n"); + fprintf(stderr," 2=mycoplasma 2=second frame sequence codes groups\n"); + fprintf(stderr," 3=yeast 3=third frame to translate\n"); + fprintf(stderr," 4=Vert. mito. 6=All six\n"); + exit(0); + } + for(j=1;jname, 80); + temp=(char*)Calloc(seq->seqlen+1,sizeof(char)); + for(i=0;iseqlen;i++) + temp[i] = '-'; + + if(letter_code == 1) + { +/* +* Triple letter codes +*/ + strcpy(seq->type,"TEXT"); + } + else + { +/* +* Single Letter Codes +*/ + strcpy(seq->type , "PROT"); + } + +/* +* Skip over r_frame valid characters (skip ' ','-','~') +*/ + for(true_start=0,i=0; iseqlen;true_start++) + { + c=seq->c_elem[true_start]; + if(index(" -~",c) == NULL) + i++; + } + for(pos=true_start;posseqlen;pos++) + { + c=seq->c_elem[pos]; + if(index(" -~",c) == NULL) + { + c &= (255-32); /*upper case*/ + if(c == 'U') c = 'T'; +/* +* We have a valid character... +*/ + if(fptr == 0) + start = pos; + + codon[fptr++] = c; + /* +* Translate the codon... +*/ + if(fptr == 3) + { + /* +* Place default code 'X' in case translation fails +*/ + temp[start] = 'X'; + for(i=0;i<512;i+=2) + if(strcmp(codon,table[i]) == 0) + { + if(letter_code == 1) + { + temp[start] = table[i+1][0]; + temp[start+1] = table[i+1][1]; + temp[start+2] = table[i+1][2]; + } + else + temp[start] = ThreeToOne + (table[i+1]); + i = 512; + } + fptr = 0; +/* +* Check to see if it is a valid ORF +*/ + if((strncmp("Ter",&(temp[start]),3) == 0) || + (temp[start] == '*')) + { +/* +* If the ORF is too small, clear it out... +*/ + if(codon_count < min_frame) + { +/* + * Should we seperate the groups out, or not?!?! + * + */ + if (!sep) + { +/* +* If reading from stop to stop, leave the elading stop codon +*/ + if(temp[last_start]=='*' || + strncmp("Ter",&(temp[last_start]),3) == 0) + for(i=last_start+3;iseqlen;i++) + temp[i] = '-'; + } + } + else + { + if (sep) + { + sprintf(strtmp, "_%d", grp++); + strncat(seq->name, strtmp, 20); + save_c_elem = seq->c_elem; + seq->c_elem = temp; + WriteRecord(stdout,seq,NULL,0); + strncpy(seq->name, save_name, 80); + seq->c_elem = save_c_elem; + for(i=0;iseqlen;i++) + temp[i] = '-'; + } + } + codon_count = 0; + last_start = start; +/* + * Don't bother continuing if too close to end to meat min_frame requirements + * + */ + if ((pos + (3 * min_frame)) > seq->seqlen) + break; + } + else + codon_count++; + } + } + } + Cfree(seq->c_elem); + if (!sep) + { + seq->c_elem = temp; + WriteRecord(stdout,seq,NULL,0); + } + Cfree(temp); + return; +} + + +char ThreeToOne(s) +char s[]; +{ + extern char three_to_one[][5]; + int j; + + for(j=0;j<23;j++) + if(strncmp(s,three_to_one[j],3) == 0) + return(three_to_one[j][3]); + Warning("ThreeToOne, code not found"); + return('*'); +} + + + diff --git a/HGL_SRC/global_defs.h b/HGL_SRC/global_defs.h new file mode 100755 index 0000000..26f784e --- /dev/null +++ b/HGL_SRC/global_defs.h @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include + +#define const /* const is not defined in non-ansi C, + * so it does not affect anything. + * Take this define off when using + * ANSI C compiler. + */ + +#define NUM_OF_FIELDS 25 /* number of fields in "struct Sequence" other + than *len and *maxlen fields. + update this number when changing at[] list. */ + +/* C style T/F definitions. */ +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef MAX +#define MAX(a,b) ( (a) > (b) ? (a) : (b)) +#endif + +#ifndef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b)) +#endif + +typedef struct +{ + char type[32]; /* DNA, RNA, AA/PROTEIN, TEXT, SCORE(0-F) */ + char status[32]; /* unmade, pending, unsolved, solved. */ + char name[64]; + char sequence_ID[32]; + + char *c_elem; + int seqlen; + int seqmaxlen; + + int creation_date[6]; /* yy/mm/dd/hh/mn/sc */ + char creator[32]; + char film[32]; + char membrane[32]; + int laneset; + char source_ID[32]; + char contig[32]; + int strandedness; /* 0: unspecified, 1:pri(default), 2:sec */ + int direction; /* 0: unspecified, 1:5to3>(default), -1:3to5< */ + int offset; + char *comments; + int commentslen; + int commentsmaxlen; + char *baggage; + int baglen; + int bagmaxlen; + int group_number; + int group_ID; + char barcode[16]; + int orig_direction; /* 0: unknown, 1:5'->3', 0:3'->5'*/ + int orig_strand; /* 0: unknown, 1:primary, 0:secondary */ + int probing_date[6]; + int autorad_date[6]; + char walk[32]; /* "TRUE", "FALSE" or whatever */ +} Sequence; + + +/*** + *** Elements in at[] and e_tags should be IN THE SAME ORDER. + ***/ + +static char *at[NUM_OF_FIELDS] = { + "type", + "status", + "name", + "sequence-ID", + "sequence" , + "creation-date" , + "creator" , + "film" , + "membrane" , + "laneset" , + "source-ID" , + "contig" , + "strandedness" , + "direction" , + "offset" , + "comments" , + "baggage", + "group-number", + "barcode", + "orig_direction", + "orig_strand", + "probing-date", + "autorad-date", + "group-ID", + "walk" +}; + + +enum e_tags { e_type, /*0*/ + e_status, + e_name, /*2*/ + e_sequence_ID , /*3*/ + e_c_elem , + e_creation_date, + e_creator, /*6*/ + e_film, + e_membrane, + e_laneset, + e_source_ID, + e_contig, + e_strandedness, + e_direction, + e_offset, /*14*/ + e_comments, + e_baggage, + e_group_number, /*17*/ + e_barcode, /*18*/ + e_orig_direction, + e_orig_strand, /*20*/ + e_probing_date, + e_autorad_date, + e_group_ID, /*23*/ + e_walk + }; + +typedef struct { + char symbol[2]; + int field; + char *value; +} str_cond ; + +typedef struct { + char prompt[20]; + int optional; /* 'T' or 'F' */ + char tag; + char strvalue[100]; /* default value or fill in by this program. */ +} Args; + diff --git a/HGL_SRC/global_defs.h.orig b/HGL_SRC/global_defs.h.orig new file mode 100755 index 0000000..7f0422d --- /dev/null +++ b/HGL_SRC/global_defs.h.orig @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include + +#define const /* const is not defined in non-ansi C, + * so it does not affect anything. + * Take this define off when using + * ANSI C compiler. + */ + +#define NUM_OF_FIELDS 24 /* number of fields in "struct Sequence" other + than *len and *maxlen fields. + update this number when changing at[] list. */ + +/* c style T/F definitions. */ +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef MAX +#define MAX(a,b) ( (a) > (b) ? (a) : (b)) +#endif + +#ifndef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b)) +#endif + +typedef struct +{ + char type[32]; /* DNA, RNA, AA/PROTEIN, TEXT, SCORE(0-F) */ + char status[32]; /* unmade, pending, unsolved, solved. */ + char name[64]; + char sequence_ID[32]; + + char *c_elem; + int seqlen; + int seqmaxlen; + + int creation_date[6]; /* yy/mm/dd/hh/mn/sc */ + char creator[32]; + char film[32]; + char membrane[32]; + int laneset; + char source_ID[32]; + char contig[32]; + int strandedness; /* 0: unspecified, 1:pri(default), 2:sec */ + int direction; /* 0: unspecified, 1:5to3>(default), -1:3to5< */ + int offset; + char *comments; + int commentslen; + int commentsmaxlen; + char *baggage; + int baglen; + int bagmaxlen; + int group_number; + int group_ID; + char barcode[16]; + int orig_direction; /* 0: unknown, 1:5'->3', 0:3'->5'*/ + int orig_strand; /* 0: unknown, 1:primary, 0:secondary */ + int probing_date[6]; + int autorad_date[6]; +} Sequence; + + +/*** + *** Elements in at[] and e_tags should be IN THE SAME ORDER. + ***/ + +static char *at[NUM_OF_FIELDS] = { + "type", + "status", + "name", + "sequence-ID", + "sequence" , + "creation-date" , + "creator" , + "film" , + "membrane" , + "laneset" , + "source-ID" , + "contig" , + "strandedness" , + "direction" , + "offset" , + "comments" , + "baggage", + "group-number", + "barcode", + "orig_direction", + "orig_strand", + "probing-date", + "autorad-date", + "group-ID" +}; + + +enum e_tags { e_type, /*0*/ + e_status, + e_name, /*2*/ + e_sequence_ID , /*3*/ + e_c_elem , + e_creation_date, + e_creator, /*6*/ + e_film, + e_membrane, + e_laneset, + e_source_ID, + e_contig, + e_strandedness, + e_direction, + e_offset, /*14*/ + e_comments, + e_baggage, + e_group_number, /*17*/ + e_barcode, /*18*/ + e_orig_direction, + e_orig_strand, /*20*/ + e_probing_date, + e_autorad_date, + e_group_ID /*23*/ + }; + +typedef struct { + char symbol[2]; + int field; + char *value; +} str_cond ; + +typedef struct { + char prompt[20]; + int optional; /* 'T' or 'F' */ + char tag; + char strvalue[100]; /* default value or fill in by this program. */ +} Args; + diff --git a/HGL_SRC/heapsortHGL b/HGL_SRC/heapsortHGL new file mode 100755 index 0000000..b692971 Binary files /dev/null and b/HGL_SRC/heapsortHGL differ diff --git a/HGL_SRC/heapsortHGL.c b/HGL_SRC/heapsortHGL.c new file mode 100755 index 0000000..a28daf5 --- /dev/null +++ b/HGL_SRC/heapsortHGL.c @@ -0,0 +1,79 @@ +#include +#include "global_defs.h" + +main(argc, argv) +int argc; +char **argv; +{ + int ii, seq_size, seq_maxsize, *order; + char Pkey[32], Skey[32]; + Sequence *seq_set; + FILE *fp; + + if(argc == 1) + { + fprintf(stderr, "\n%s\n%s\n%s\n%s\n%s\n%s\n", + "Description:", + " Sorts HGL records by primary and secondary keys, output the", + " result to stdout. Sort in descending order if 'decs' is specified.", + " Valid keys are:", + " type name sequence-ID creator offset barcode group-ID", + " seqlen film membrane contig probing-date creation-date", + " autorad-date"); + + fprintf(stderr, "\nUsage: \n"); + fprintf(stderr,"heapsortHGL filename primaryKey [secondaryKey] [decs]\n\n"); + exit(0); + } + + if(argc == 2) + { + fprintf(stderr, "Primary Key is Required.\n"); + exit(1); + } + strcpy(Pkey, argv[2]); + Skey[0] = '\0'; + if(argc > 3 && strcmp(argv[3], "decs") != 0) + { + strcpy(Skey, argv[3]); + } + seq_size = 0; + seq_maxsize = 64; + seq_set = (Sequence *)Calloc(seq_maxsize, sizeof(Sequence)); + + if((fp=fopen(argv[1], "r")) == NULL) + { + fprintf(stderr, "Can't open file %s.\n", argv[1]); + exit(1); + } + + while(ReadRecord(fp, &(seq_set[seq_size])) != -1) + { + SeqNormal(&(seq_set[seq_size])); + if(++seq_size == seq_maxsize) + { + seq_maxsize *= 2; + seq_set = (Sequence *) + Realloc(seq_set, seq_maxsize*sizeof(Sequence)); + } + } + + order = (int *)Calloc(seq_size, sizeof(int)); + for(ii = 0; ii= 0; ii--) + WriteRecord(stdout, &(seq_set[order[ii]]), NULL, 0); + } + else + { + /* output in ascending order. */ + for(ii = 0; ii +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef _GLOBAL_DEFS_H +#include "global_defs.h" +#define _GLOBAL_DEFS_H +#endif + +#define min_pbl 1 +#define max_pbl 15 +#define min_scale 1 +#define max_scale 1000 +#define min_lwidth 1 +#define max_lwidth 5 +#define margin 100 + +#define WHITE 15 +#define BLACK 8 +#define RED 3 +#define MASK_S2 5 +#define MASK_S1 7 +#define MASK_S0 10 + +Frame frame, prop_subframe; +Canvas canvas; +Xv_Window paint_win; +Display *display; +GC gc; +Window xwin; +Cms cms; +Scrollbar h_scrollbar; +Panel_item scale_slider, pbl_slider, lwidth_slider, color_chooser; +Menu color_menu; + +int pbl = 10; +int scale = 2; +int lwidth = 2; +int line_space= 12; +char FROM_RESIZE = 'F'; + +int max_dots = 1000; +/* This value will be dynamically assigned. + * It is used to decide the paint window size. + */ + +int drawarea_min_y, drawarea_max_y; +unsigned long *colors; +char clear_mark = 'F'; + +int mark_x=0; /* record the x,y of the paint window, */ +int mark_y=0; /* not the canvas or the view window. */ +int set_offset;/* The offset of the sequences being displayed. */ +int save_for_mark_y; + +/* wrapping up. */ +int canvas_h, line_p_page; + +typedef struct +{ + char name[32]; + int direction, strandedness, size, max_size; + int orig_strand, orig_direction; + int *dots; /* Dynamic 2D array. */ +} INFO; + +INFO *info; +int info_size; + +/* synchronization. */ +char Lsync_fname[128]; +char Lsync_YesNo = 'N'; /*Location synchronization Yes or No. */ +char Csync_fname[128]; +char Csync_YesNo = 'N'; /*Color synchronization Yes or No. */ + +main(argc, argv) + int argc; + char *argv[]; +{ + Panel panel; + int LoadHGLData(); + extern exit_proc(); + extern scale_proc(); + extern pbl_proc(); + extern footer_proc(); + extern canvas_repaint_proc(); + extern canvas_resize_proc(); + extern Frame load_file(); + extern Load(); + extern Lsync_proc(); +/* extern Csync_proc();*/ + extern show_prop_frame(); + extern lwidth_proc(); + extern l_arrow(), r_arrow(); + int i; + + static Xv_singlecolor cms_colors[] = { + {0,128,0}, + {255,192,0}, + {255,0,255}, + {225,0,0}, + {0,192,192}, + {0,192,0}, /* green */ + {0,0,255}, + {128,0,255}, /* purple */ + {0,0,0}, + {36,36,36}, + {72,72,72}, + {109,109,109}, + {145,145,145}, + {182,182,182}, + {218,218,218}, + {255,255,255} + }; + + char filename[128]; + FILE *fp; + Rect *rect; + + /* malloc_debug(2); */ + + for(i = 1; imax_scale || scalemax_pbl || pbl1) + { + xv_set(frame,FRAME_LABEL, filename, NULL); + + rect = (Rect *)xv_get(canvas,CANVAS_VIEWABLE_RECT,paint_win); + XClearArea(display, xwin, + rect->r_left, rect->r_top, + rect->r_width, rect->r_height, + 0); + + if((set_offset = LoadHGLData(filename)) == -1) + { + exit(1); + } + } + +/* line_p_page = (canvas_h -line_space + pbl/2) / line_space; */ + + xv_main_loop(frame); +} + + + +/*** + * + * LoadHGLData() reads HGL format data from fp_name, stores + * the information in a global struct array 'info[]'. Return minumn + * offset if successful, -1 if anything is wrong. + * + ***/ + +int + LoadHGLData(name_str) +char *name_str; +{ + Sequence *tSeq; + int iSeq, iSeg; + int dot_cnt; + int min_offset = INT_MAX; + FILE *fp_fname; + static int info_max_size = 0; + + if(strcmp(name_str, "") == 0) + return -1; + + if((fp_fname = fopen(name_str,"r")) == NULL) + { + fprintf(stderr,"File not found: %s\n", name_str); + exit(1); + } + + max_dots = 1000; + mark_x = mark_y = 0; + + iSeq = 0; + tSeq = (Sequence *)Calloc(1, sizeof(Sequence)); + + if(info_max_size == 0) + { + info_max_size = 256; + info = (INFO *)Calloc(info_max_size, sizeof(INFO)); + } + + while(ReadRecord(fp_fname, tSeq) != -1) + { + SeqNormal(tSeq); + + if(iSeq == info_max_size) + { + info_max_size *= 2; + info = (INFO *)Realloc(info, sizeof(INFO)*info_max_size); + } + strcpy(info[iSeq].name, tSeq->name); + + info[iSeq].direction = tSeq->direction; + info[iSeq].strandedness = tSeq->strandedness; + info[iSeq].orig_strand = tSeq->orig_strand; + info[iSeq].orig_direction = tSeq->orig_direction; + if(tSeq->orig_strand == 2) + info[iSeq].orig_direction *= -1; + + min_offset = MIN(min_offset, tSeq->offset); + max_dots = MAX(max_dots, tSeq->offset+tSeq->seqlen); + + if(info[iSeq].max_size == 0) + { + info[iSeq].max_size = 8; + info[iSeq].dots=(int *)Calloc(info[iSeq].max_size*2, + sizeof(int)); + } + + iSeg = 0; + POS2(info[iSeq].dots, iSeg, 0) = tSeq->offset; + + dot_cnt = 0; + while(dot_cnt < tSeq->seqlen) + { + if(tSeq->c_elem[dot_cnt] != '-' && + (tSeq->c_elem[dot_cnt+1] == '-' || + dot_cnt+1 == tSeq->seqlen)) + { + POS2(info[iSeq].dots, iSeg++, 1) = tSeq->offset+dot_cnt; + if(iSeg == info[iSeq].max_size) + { + info[iSeq].max_size *= 2; + info[iSeq].dots = + (int *)Realloc(info[iSeq].dots, + sizeof(int)*2*info[iSeq].max_size); + } + } + else if(tSeq->c_elem[dot_cnt] == '-' && + tSeq->c_elem[dot_cnt+1] != '-') + { + POS2(info[iSeq].dots, iSeg, 0) = tSeq->offset + dot_cnt+1; + } + dot_cnt++; + } + + info[iSeq].size = iSeg; + iSeq++; + } + info_size = iSeq; + + fclose(fp_fname); + FreeRecord(&tSeq); + + if (min_offset != 0) + { + max_dots -= min_offset; + for(iSeq = 0; iSeqr_top; + drawarea_max_y = drawarea_min_y + rect->r_height; + } + /* else: this is to redraw the cleared area. + * drawarea_min_y and drawarea_max_y have been set by + * the caller procedure, so don't reset they. + */ + + /* + * Set drawarea_min_x, drawarea_max_x. + */ + + rect=(Rect *)xv_get(canvas,CANVAS_VIEWABLE_RECT,paint_win); + drawarea_min_x = rect->r_left -1; + drawarea_max_x = rect->r_left + rect->r_width; + canvas_h = rect->r_height; + + y = 0; + + for(iSeq = 0; iSeq canvas_h) ? + line_space : y + line_space; + + /* + if(info[iSeq].orig_strand == 1) + XSetForeground(display,gc,colors[MASK_S1]); + else if(info[iSeq].orig_strand == 2 ) + XSetForeground(display,gc,colors[MASK_S2]); + else + */ + XSetForeground(display,gc,colors[MASK_S0]); + + /*if(info[iSeq].direction == -1 || info[iSeq].direction == 2 )*/ + if(info[iSeq].orig_direction == -1) + { + l_arrow(lwidth, info[iSeq].orig_strand, + POS2(info[iSeq].dots,0,0)/scale,y); + } + else if( info[iSeq].orig_direction == 1 ) + { + r_arrow(lwidth, info[iSeq].orig_strand, + POS2(info[iSeq].dots, + info[iSeq].size -1, 1)/scale,y); + } + + for(iSeg=0; iSeg= drawarea_min_x && + drawarea_min_y <= y && y <= drawarea_max_y) + { + for(i=0; i last_Lsync_time) + { + Lsync_fp = fopen(Lsync_fname, "r"); + loc_name[0] = '\0'; + while(fgets(line, 256, Lsync_fp) != NULL) + { + if(strncmp(line, "Col:", 4) == 0) + loc_col = atoi(line+4); + else if(strncmp(line, "SeqID:", 6) == 0) + { + strcpy(loc_name, line+6); + loc_name[strlen(loc_name)-1] = '\0'; + } + } + fclose(Lsync_fp); + + if(loc_name[0] == ' ' || loc_col == INT_MAX) + { + fprintf(stderr, "Bad status file.\n"); + } + else + { + iSeq = 0; + while(iSeqr_width/2, + (int)xv_get(h_scrollbar, + SCROLLBAR_OBJECT_LENGTH) + - (int)xv_get(canvas, XV_WIDTH))), + NULL); + last_Lsync_time = stbuf.st_mtime; + } + } + } + } + } + else if(event_id(event) == LOC_WINEXIT) + { + win_refuse_kbd_focus(canvas); + /* win_refuse_kbd_focus(paint_win); */ + if(Lsync_YesNo == 'Y') + { + /* update the status file. */ + ii = 0; + while(ii<20 && (Lsync_fp = fopen(Lsync_fname, "r+")) == NULL) + { + ii++; + } + if(ii == 20) + { + fprintf(stderr, "Can't open status file for updating: %s\n", + Lsync_fname); + } + else + { + char *temp_file; + int file_maxlen = 256, file_len = 0; + + temp_file = (char *)Calloc(file_maxlen, 1); + while(fgets(line, 256, Lsync_fp) != NULL) + { + if(strncmp(line, "Col:", 4) == 0) + { + sprintf(line, "Col:%d\n", mark_x / scale); + } + else if(strncmp(line, "SeqID:", 6) == 0) + { + sprintf(line, "SeqID:%s\n", + info[print_line-1].name); + } + + file_len += strlen(line); + if(file_len+1 >=file_maxlen) + { + file_maxlen *= 2; + temp_file = (char *)Realloc(temp_file, file_maxlen); + } + strcat(temp_file , line); + } + fseek(Lsync_fp, 0L, 0); + fprintf(Lsync_fp, "%s", temp_file); + Cfree(temp_file); + } + fclose(Lsync_fp); + } + } + + if (event_is_down(event)) + { + if(event_action(event)==ACTION_SELECT) + { + mark_x = event_x(event)*scale + 0.5*scale; + mark_y = event_y(event); + need_to_paint = 'T'; + } + else if(event_is_key_right(event) || + event_is_ascii(event)) + { + int return_int; + + return_int = XLookupString(event->ie_xevent,/*not used */ + return_str, /*not used*/ + return_len, /*not used*/ + &keysym,NULL ); + + switch(keysym) + { + case XK_F31: /* center, key 5 on the right keyboard. */ + rect = (Rect *) + xv_get(canvas,CANVAS_VIEWABLE_RECT,paint_win); + + (void)xv_set(h_scrollbar, + SCROLLBAR_VIEW_START, + MAX(0, MIN(mark_x/scale - rect->r_width/2, + (int)xv_get(h_scrollbar, SCROLLBAR_OBJECT_LENGTH) + - (int)xv_get(canvas, XV_WIDTH))), + NULL); + break; + case XK_F27: + /* the home key. */ + (void)xv_set(h_scrollbar, SCROLLBAR_VIEW_START,0, NULL); + break; + case XK_Left: + if(mark_x - scale >1) mark_x -= scale; + need_to_paint = 'T'; + break; + case XK_Right: + mark_x += scale; + need_to_paint = 'T'; + break; + case XK_Up: + if (mark_y>= 2*line_space - pbl/2) + mark_y -= line_space; + else + mark_y = line_p_page*line_space + lwidth; + need_to_paint = 'T'; + break; + case XK_Down: + if((mark_y - 2*line_space+pbl/2)/line_space +1 POS2(info[iSeq].dots,iSeg,1)) + { + accu_pos += POS2(info[iSeq].dots,iSeg,1) - + POS2(info[iSeq].dots, iSeg ,0)+1; + iSeg++; + } + else if(mark_x>= POS2(info[iSeq].dots,iSeg,0)) + { + if(position != INT_MIN) + { + /* ambiguous location. *** Set Foreground?? */ + sprintf(buf, + "Mouse location: Line = ? Position = ? Column = %d", + set_offset + mark_x + 1 ); + xv_set(frame,FRAME_LEFT_FOOTER, buf, NULL); + xv_set(frame,FRAME_RIGHT_FOOTER,"Sequence: ",NULL); + return XV_OK; + } + position = accu_pos + mark_x - POS2(info[iSeq].dots, iSeg, 0) + 1; + print_line = iSeq+1; + break; + } + else + { + break; + } + } + iSeq += line_p_page; + } + + if(position != INT_MIN) + { + sprintf(buf, + "Mouse location: Line = %d Position = %d Column = %d", + print_line, position, set_offset + mark_x + 1 ); + xv_set(frame,FRAME_LEFT_FOOTER, buf, NULL); + sprintf(buf,"Sequence: %s\n",info[print_line-1].name); + save_for_mark_y = print_line; + } + else + { + sprintf(buf, + "Mouse location: Line = Position = Column = %d", + set_offset+mark_x+1); + xv_set(frame,FRAME_LEFT_FOOTER, buf, NULL); + sprintf(buf,"Sequence: \n"); + } + + xv_set(frame,FRAME_RIGHT_FOOTER, buf, NULL); + } + return XV_OK; +} + + + +Load(item,event) + Panel_item item; + Event *event; +{ + extern Frame load_file(); /* rtm 18.III.98 */ + (void)load_file(frame,300,150,NULL); + return XV_OK; +} + + + +canvas_resize_proc(canvas, canvas_width, canvas_height) +Canvas canvas; +int canvas_width, canvas_height; +{ + int tt; + FROM_RESIZE = 'T'; + canvas_h = canvas_height; + line_p_page = (canvas_h -line_space + pbl/2) / line_space; + + tt = save_for_mark_y % line_p_page; + if(tt == 0) + tt = line_p_page; + + mark_y = tt*line_space; +} + + + +scale_proc(item, i_scale, event) + Panel_item item; + int i_scale; + Event *event; +{ + scale = i_scale; + + (void)xv_set(h_scrollbar, + SCROLLBAR_OBJECT_LENGTH, MAX((int)xv_get(canvas, XV_WIDTH), + max_dots/scale+margin), + SCROLLBAR_VIEW_START, MAX(0, MIN(mark_x/scale - 100, + max_dots/scale+margin - + (int)xv_get(canvas, XV_WIDTH))), + NULL); + + (void)xv_set(paint_win, XV_WIDTH, + MAX((int)xv_get(canvas,XV_WIDTH),max_dots/scale+margin), + NULL); + + XClearWindow(display, xwin); + + canvas_repaint_proc(canvas,paint_win,display, xwin, NULL); + return XV_OK; +} + + + +pbl_proc(item, i_pbl, event) + Panel_item item; + int i_pbl; + Event *event; +{ + int tt; + + pbl = i_pbl; + line_space = pbl + lwidth; + line_p_page = (canvas_h -line_space + pbl/2) / line_space; + + tt = save_for_mark_y % line_p_page; + if(tt == 0) + tt = line_p_page; + + mark_y = tt*line_space; + + XClearWindow(display, xwin); + + canvas_repaint_proc(canvas,paint_win,display, xwin, NULL); + return XV_OK; +} + + + +lwidth_proc(item, i_lw, event) + Panel_item item; + int i_lw; + Event *event; +{ + int tt; + + lwidth = i_lw; + line_space = pbl + lwidth; + line_p_page = (canvas_h -line_space + pbl/2) / line_space; + + tt = save_for_mark_y % line_p_page; + if(tt == 0) + tt = line_p_page; + + mark_y = tt*line_space; + + XClearWindow(display, xwin); + + canvas_repaint_proc(canvas,paint_win,display, xwin, NULL); + return XV_OK; +} + + + +exit_proc(item, event) + Panel_item item; + Event *event; +{ + if (event_action(event) == ACTION_SELECT) + { + xv_destroy_safe(frame); + return(XV_OK); + } + else + return(XV_ERROR); +} + + + +Lsync_proc(item, value, event) +Panel_item item; +int value; +Event *event; +{ + if(value == 0) + Lsync_YesNo = 'N'; + else if(value == 1) + Lsync_YesNo = 'Y'; + else + fprintf(stderr, "Lsync_proc: value= %d\n", value); +} + + +show_prop_frame(item, event) +Frame item; +Event *event; +{ + xv_set(prop_subframe, XV_SHOW, TRUE, NULL); +} + + + +r_arrow(size, strand, loc_x, loc_y) +int size, strand, loc_x, loc_y; +{ + int ii; + + if(size == 1) + { + if(strand < 2) + XDrawLine(display,xwin,gc,loc_x, loc_y,loc_x-2,loc_y-2); + if(strand == 0 || strand == 2) + XDrawLine(display,xwin,gc,loc_x, loc_y,loc_x-2,loc_y+2); + } + + /* draw the wings. */ + for(ii=1; ii <= size/2+1; ii++) + { + if(strand < 2) + XDrawLine(display,xwin,gc, + loc_x-ii, loc_y-ii, loc_x-ii, loc_y-1); + if(strand == 0 || strand == 2) + XDrawLine(display,xwin,gc, + loc_x-ii, loc_y+size-1+ii, loc_x-ii, + loc_y+size); /* actually, loc_y+size-1+1.*/ + } + + /* draw the tip. */ + for(ii = 1; strand == 0 && ii < (size+1)/2; ii++) + { + XDrawLine(display,xwin,gc, + loc_x+ii, loc_y+ii, loc_x+ii, loc_y+size-1-ii); + } + + for(ii=1; strand==1 && ii < size; ii++) + { + XDrawLine(display,xwin,gc, + loc_x+ii, loc_y+ii, loc_x+ii, loc_y+size-1); + } + + for(ii=1; strand==2 && ii < size; ii++) + { + XDrawLine(display,xwin,gc, + loc_x+ii, loc_y, loc_x+ii, loc_y+size-1-ii); + } +} + + + +l_arrow(size, strand, loc_x, loc_y) +int size, strand, loc_x, loc_y; +{ + int ii; + + if(size == 1) + { + if(strand < 2) + XDrawLine(display,xwin,gc,loc_x, loc_y,loc_x+2,loc_y-2); + if(strand == 0 || strand == 2) + XDrawLine(display,xwin,gc,loc_x, loc_y,loc_x+2,loc_y+2); + } + + /* draw the wings. */ + for(ii=1; ii <= size/2+1; ii++) + { + if(strand < 2) + XDrawLine(display,xwin,gc, + loc_x+ii, loc_y-ii, loc_x+ii, loc_y-1); + if(strand == 0 || strand == 2) + XDrawLine(display,xwin,gc, + loc_x+ii, loc_y+size-1+ii, loc_x+ii, + loc_y+size); /* actually, loc_y+size-1+1.*/ + } + + /* draw the tip. */ + for(ii = 1; strand == 0 && ii < (size+1)/2; ii++) + { + XDrawLine(display,xwin,gc, + loc_x-ii, loc_y+ii, loc_x-ii, loc_y+size-1-ii); + } + + for(ii=1; strand==1 && ii < size; ii++) + { + XDrawLine(display,xwin,gc, + loc_x-ii, loc_y+ii, loc_x-ii, loc_y+size-1); + } + + for(ii=1; strand==2 && ii < size; ii++) + { + XDrawLine(display,xwin,gc, + loc_x-ii, loc_y, loc_x-ii, loc_y+size-1-ii); + } +} + + + +cross(size, loc_x, loc_y) + int size, loc_x, loc_y; +{ + int ii; + + if(size == 1) + { + XDrawLine(display,xwin,gc, loc_x-2,loc_y, loc_x+2,loc_y); + XDrawLine(display,xwin,gc, loc_x, loc_y-2,loc_x, loc_y+2); + } + else if(size == 2) + { + for(ii=0; ii<2; ii++) + { + XDrawLine(display,xwin,gc,loc_x-2,loc_y+ii,loc_x+3,loc_y+ii); + XDrawLine(display,xwin,gc,loc_x+ii,loc_y-2,loc_x+ii,loc_y+3); + } + } + else + { + for(ii= -1; ii<2; ii++) + { + XDrawLine(display,xwin,gc,loc_x-4,loc_y+ii,loc_x+4,loc_y+ii); + XDrawLine(display,xwin,gc,loc_x+ii,loc_y-4,loc_x+ii,loc_y+4); + } + } +} + diff --git a/HGL_SRC/plot.icon b/HGL_SRC/plot.icon new file mode 100755 index 0000000..90badc0 --- /dev/null +++ b/HGL_SRC/plot.icon @@ -0,0 +1,33 @@ +/* Format_version=1, Width=64, Height=61, Depth=1, Valid_bits_per_item=16 + */ + 0x7fff,0xffff,0xffff,0xfffe,0x7ffd,0xffff,0xffff,0xfffe, + 0x7fff,0xffff,0xffff,0xfffe,0x7fff,0xffff,0xffff,0xfffe, + 0x7fff,0xffff,0xffff,0xf9fe,0x7fff,0xffff,0xbfff,0xfffe, + 0x7fff,0xfffc,0xffff,0xdffe,0x7f7f,0xfff1,0xbdff,0x9ffe, + 0x7f7f,0xffc1,0xffff,0xfffe,0x7fff,0xfd1f,0xffff,0xfffe, + 0x7fff,0xf84f,0xf183,0xfffe,0x7fff,0xfddf,0xbbff,0xfffe, + 0x7ffe,0xf1fe,0x06ef,0xfffe,0x7fff,0xf3e1,0xffff,0xfffe, + 0x7ffe,0xe31f,0xffff,0xbffe,0x7fff,0xf8ff,0xebff,0xcffa, + 0x7ff9,0xc7ff,0x9cff,0xfffa,0x7fff,0xdeef,0xff3f,0xfdfa, + 0x7ffd,0x3fe5,0xffdf,0xfffe,0x7fff,0x7ff3,0xbfe3,0xff7e, + 0x7ffe,0x6ed7,0xffff,0xfffe,0x7ffd,0x6cdf,0xf7fe,0x7ffe, + 0x7f79,0xfedf,0x8bff,0xff7e,0x7ffb,0x7dd4,0x117f,0xffbe, + 0x7f3b,0x5de4,0x083f,0xdffe,0x7f9c,0x5de0,0x1f3f,0xdf7e, + 0x7ffc,0x76e1,0x2987,0xfffe,0x7fdc,0x6fe0,0x0fcf,0xfffe, + 0x7ffe,0x6fc8,0x25f3,0xeffe,0x7fef,0x2fc0,0x12fd,0xeffe, + 0x7efb,0x9888,0x0df5,0xfffe,0x7ffe,0xdf22,0x0fbe,0xfffe, + 0x7fff,0x27f0,0x1bfa,0xfffe,0x7fff,0xf006,0xb0ff,0x6ffe, + 0x7fff,0xe7fc,0x1d7a,0xfffe,0x7fff,0xfbc6,0x1c78,0xfffe, + 0x7fdf,0xfebf,0xf9cb,0xeffe,0x7fef,0xbfe1,0xf3fa,0xfffe, + 0x7fe7,0xdfff,0xf7fa,0xfffe,0x7ff7,0xcfff,0xcffd,0xbffe, + 0x7ffb,0xfd9d,0x0cbb,0xfffe,0x7fff,0xfe06,0x7fbb,0xfffe, + 0x7fff,0xeff0,0xbda7,0xfff6,0x7fff,0x3fdd,0xfbe7,0xfff6, + 0x7fff,0x9d3f,0xffef,0xfff6,0x7fff,0xffff,0xffdf,0xfffe, + 0x7fff,0xfffe,0x7fbf,0xfffe,0x7fff,0xffff,0xff7f,0xfffe, + 0x7fff,0xe3ff,0xfeff,0xfffe,0x7fff,0xfc2f,0xe1ff,0xfffe, + 0x7fff,0xfff1,0x3fff,0xfefe,0x7fdf,0xffff,0xffff,0xfffe, + 0x7eff,0xffff,0xffff,0xfffe,0x7fff,0xffff,0xffff,0xfffe, + 0x7fff,0xffff,0xffff,0xfffe,0x7fff,0xefff,0xffff,0xfffe, + 0x7fff,0xffff,0xffef,0xfffe,0x7fff,0xffff,0xff9f,0xfdfe, + 0x7ffd,0xffff,0xe7ff,0xfbfe,0x7fff,0xffff,0xffff,0xfffe, + 0x7fff,0xffff,0xffff,0xfffe diff --git a/ZUKER/.reg b/ZUKER/.reg new file mode 100755 index 0000000..e69de29 diff --git a/ZUKER/GDEmenus.DNA b/ZUKER/GDEmenus.DNA new file mode 100755 index 0000000..0216547 --- /dev/null +++ b/ZUKER/GDEmenus.DNA @@ -0,0 +1,32 @@ +item:MFOLD +itemmethod:shelltool (tr 'a-z' 'A-Z' < seqGB > .GDE.tmp.caps; ZUKERGDE.sh .GDE.tmp.caps $CT $GDE_HELP_DIR/ZUKER/ > out1 && $METHOD < out1; Zuk_to_gen < $CT >file.gen; gde file.gen& textedit RegionTable; /bin/rm -f RegionTable out1 seqGB* .GDE.tmp.caps)&itemhelp:MFOLD.help + +in:seqGB +informat:genbank +insave: + +arg:METHOD +argtype:chooser +arglabel:RNA type +argchoice:Fold Linear RNA:lrna +argchoice:Fold Circular RNA:crna + +arg:CT +argtype:text +arglabel:ct File Name +argtext:ctFile + +item:Draw Secondary structure +itemmethod:(LoopTool $TEMPLATE in1 ; /bin/rm -f in1) & +itemhelp:LoopTool.help + +arg:TEMPLATE +argtype:chooser +arglabel:Use template file ./loop.temp? +argchoice:No: +argchoice:Yes:-t loop.temp + +in:in1 +informat:genbank +insave: + diff --git a/ZUKER/Makefile b/ZUKER/Makefile new file mode 100755 index 0000000..c16303c --- /dev/null +++ b/ZUKER/Makefile @@ -0,0 +1,59 @@ +# +# SHELL = /bin/sh +# can add -O2 to FFLAGS to get maximum optimization +# use -Bstatic to compile with static library. +# + +#CFLAGS = -e -Bstatic +CFLAGS = -g +FC = g77 + +all: lrna crna + +lrna: lrna.o lin.o lmrna1.o lmrna2.o lsort.o formid.o multid.o + $(FC) $(CFLAGS) -o lrna lrna.o lin.o lmrna1.o lmrna2.o lsort.o formid.o multid.o + +crna: crna.o circ.o cmrna1.o cmrna2.o csort.o formid.o multid.o + $(FC) $(CFLAGS) -o crna crna.o circ.o cmrna1.o cmrna2.o csort.o formid.o multid.o + +formid.o: formid.f + $(FC) $(CFLAGS) -c -o formid.o formid.f + +multid.o: multid.f + $(FC) $(CFLAGS) -c -o multid.o multid.f + +lrfd.inc: lin.inc + cp lin.inc rfd.inc + +lrna.o: rna.f lrfd.inc + $(FC) $(CFLAGS) -c -o lrna.o rna.f + +lin.o: lin.f lrfd.inc + $(FC) $(CFLAGS) -c -o lin.o lin.f + +lmrna1.o: mrna1.f lrfd.inc + $(FC) $(CFLAGS) -c -o lmrna1.o mrna1.f + +lmrna2.o: mrna2.f lrfd.inc + $(FC) $(CFLAGS) -c -o lmrna2.o mrna2.f + +lsort.o: sort.f lrfd.inc + $(FC) $(CFLAGS) -c -o lsort.o sort.f + +crfd.inc: circ.inc + cp circ.inc rfd.inc + +crna.o: rna.f crfd.inc + $(FC) $(CFLAGS) -c -o crna.o rna.f + +circ.o: circ.f crfd.inc + $(FC) $(CFLAGS) -c -o circ.o circ.f + +cmrna1.o: mrna1.f crfd.inc + $(FC) $(CFLAGS) -c -o cmrna1.o mrna1.f + +cmrna2.o: mrna2.f crfd.inc + $(FC) $(CFLAGS) -c -o cmrna2.o mrna2.f + +csort.o: sort.f crfd.inc + $(FC) $(CFLAGS) -c -o csort.o sort.f diff --git a/ZUKER/Makefile.DEC b/ZUKER/Makefile.DEC new file mode 100755 index 0000000..0bd4d70 --- /dev/null +++ b/ZUKER/Makefile.DEC @@ -0,0 +1,57 @@ +# +# SHELL = /bin/sh +# can add -O2 to FFLAGS to get maximum optimization +# use -Bstatic to compile with static library. +# + +CFLAGS = -extend_source + +all: lrna crna + +lrna: lrna.o lin.o lmrna1.o lmrna2.o lsort.o formid.o multid.o + f77 $(CFLAGS) -o lrna lrna.o lin.o lmrna1.o lmrna2.o lsort.o formid.o multid.o + +crna: crna.o circ.o cmrna1.o cmrna2.o csort.o formid.o multid.o + f77 $(CFLAGS) -o crna crna.o circ.o cmrna1.o cmrna2.o csort.o formid.o multid.o + +formid.o: formid.f + f77 $(CFLAGS) -c -o formid.o formid.f + +multid.o: multid.f + f77 $(CFLAGS) -c -o multid.o multid.f + +lrfd.inc: lin.inc + cp lin.inc rfd.inc + +lrna.o: rna.f lrfd.inc + f77 $(CFLAGS) -c -o lrna.o rna.f + +lin.o: lin.f lrfd.inc + f77 $(CFLAGS) -c -o lin.o lin.f + +lmrna1.o: mrna1.f lrfd.inc + f77 $(CFLAGS) -c -o lmrna1.o mrna1.f + +lmrna2.o: mrna2.f lrfd.inc + f77 $(CFLAGS) -c -o lmrna2.o mrna2.f + +lsort.o: sort.f lrfd.inc + f77 $(CFLAGS) -c -o lsort.o sort.f + +crfd.inc: circ.inc + cp circ.inc rfd.inc + +crna.o: rna.f crfd.inc + f77 $(CFLAGS) -c -o crna.o rna.f + +circ.o: circ.f crfd.inc + f77 $(CFLAGS) -c -o circ.o circ.f + +cmrna1.o: mrna1.f crfd.inc + f77 $(CFLAGS) -c -o cmrna1.o mrna1.f + +cmrna2.o: mrna2.f crfd.inc + f77 $(CFLAGS) -c -o cmrna2.o mrna2.f + +csort.o: sort.f crfd.inc + f77 $(CFLAGS) -c -o csort.o sort.f diff --git a/ZUKER/Makefile.sun b/ZUKER/Makefile.sun new file mode 100755 index 0000000..373037b --- /dev/null +++ b/ZUKER/Makefile.sun @@ -0,0 +1,57 @@ +# +# SHELL = /bin/sh +# can add -O2 to FFLAGS to get maximum optimization +# use -Bstatic to compile with static library. +# + +CFLAGS = -e -Bstatic + +all: lrna crna + +lrna: lrna.o lin.o lmrna1.o lmrna2.o lsort.o formid.o multid.o + f77 $(CFLAGS) -o lrna lrna.o lin.o lmrna1.o lmrna2.o lsort.o formid.o multid.o -lV77 + +crna: crna.o circ.o cmrna1.o cmrna2.o csort.o formid.o multid.o + f77 $(CFLAGS) -o crna crna.o circ.o cmrna1.o cmrna2.o csort.o formid.o multid.o -lV77 + +formid.o: formid.f + f77 $(CFLAGS) -c -o formid.o formid.f -lV77 + +multid.o: multid.f + f77 $(CFLAGS) -c -o multid.o multid.f -lV77 + +lrfd.inc: lin.inc + cp lin.inc rfd.inc + +lrna.o: rna.f lrfd.inc + f77 $(CFLAGS) -c -o lrna.o rna.f -lV77 + +lin.o: lin.f lrfd.inc + f77 $(CFLAGS) -c -o lin.o lin.f -lV77 + +lmrna1.o: mrna1.f lrfd.inc + f77 $(CFLAGS) -c -o lmrna1.o mrna1.f -lV77 + +lmrna2.o: mrna2.f lrfd.inc + f77 $(CFLAGS) -c -o lmrna2.o mrna2.f -lV77 + +lsort.o: sort.f lrfd.inc + f77 $(CFLAGS) -c -o lsort.o sort.f -lV77 + +crfd.inc: circ.inc + cp circ.inc rfd.inc + +crna.o: rna.f crfd.inc + f77 $(CFLAGS) -c -o crna.o rna.f -lV77 + +circ.o: circ.f crfd.inc + f77 $(CFLAGS) -c -o circ.o circ.f -lV77 + +cmrna1.o: mrna1.f crfd.inc + f77 $(CFLAGS) -c -o cmrna1.o mrna1.f -lV77 + +cmrna2.o: mrna2.f crfd.inc + f77 $(CFLAGS) -c -o cmrna2.o mrna2.f -lV77 + +csort.o: sort.f crfd.inc + f77 $(CFLAGS) -c -o csort.o sort.f -lV77 diff --git a/ZUKER/README b/ZUKER/README new file mode 100755 index 0000000..f52e0a5 --- /dev/null +++ b/ZUKER/README @@ -0,0 +1,114 @@ + MFOLD - Prediction of RNA secondary structure by free energy + minimization. + - Version 2.0 + - Michael Zuker and John Jaeger + - lrna : folds linear RNA sequences + - crna : folds circular RNA sequences + +The original version (1.0) was designed by Michael Zuker and +programmed by Eric Nelson in the summer of 1987 in the Division of +Biological Sciences at the National Research Council of Canada. John +Jaeger added the tetraloop bonus energy feature and created the +BATGEN program for batch file generation. + +Version 2.0 corrects a number of small bugs from the original +program. These were added to version 1 and itemized in the +ERRATA.LIST file that was distributed along with version 1. The major +improvements of version 2 are : +1. During the generation of suboptimal foldings, the number of new +base pairs that are sufficiently different from base pairs that have +already been found must be greater than the WINDOW parameter. This +feature was added during the summer of 1989, and was made part of +version 1 (item 11 in the ERRATA.LIST file distributed with this +version). The effect is to eliminate structures that contain just a +few new base pairs. +2. Temperature dependent folding. This was added in the fall of 1989 +and was never a feature of version 1. +3. The multiple sequence option will now generate suboptimal foldings +of all the sequences in an input file. This can be used (for +example), to compute all foldings within 10% of the minimum energy +for all the 5S RNAs conained in a single file. + +The programs were adapted to run in a UNIX environment during the +winter of 1990. M. Zuker ported all the code except for the dotplt +subroutine. Roland Gaboury created dotplt 'de novo' using the IRIS +GL. + +METHOD : A dynamic programming algorithm is used to find optimal and +suboptimal foldings of an RNA molecule starting from linear sequence +data. Auxiliary information can be used to constrain the folding. + +Energy data from : +S.M. Freier et al., Proc. Natl. Acad. Sci. USA, 83, 9373-9377, 1986. +D.H. Turner et al., Cold Spring Harbor Symposia on Quantitative Biology, +52, 123-133, 1987. +D.H. Turner et al., Annu. Rev. Biophys. Biophys. Chem 17, 167-192 (1988). +This last reference has all the dangling end and terminal mismatch data. + +References : +M. Zuker +On Finding All Suboptimal Foldings of an RNA Molecule. +Science, 244, 48-52, (1989) + +J. A. Jaeger, D. H. Turner and M. Zuker +Improved Predictions of Secondary Structures for RNA. +Proc. Natl. Acad. Sci. USA, BIOCHEMISTRY, 86, 7706-7710, (1989) + +J. A. Jaeger, D. H. Turner and M. Zuker +Predicting Optimal and Suboptimal Secondary Structure for RNA. +in "Molecular Evolution: Computer Analysis of Protein and +Nucleic Acid Sequences", R. F. Doolittle ed. +Methods in Enzymology, 183, 281-306 (1989) + +see also: +M. Zuker +Computer Prediction of RNA Structure. +in "RNA Processing", J. E. Dahlberg and J. N. Abelson eds. +Methods in Enzymology, 180, 262-288, (1989) + +M. Zuker +The Use of Dynamic Programming Algorithms in RNA Secondary +Structure Prediction. +in "Mathematical Methods for DNA Sequences", M. S. Waterman ed. +CRC Press, Inc., 159-184, (1989) + +A.B. Jacobson, M. Zuker and A. Hirashima +Comparitive Studies on the Secondary Structure of the RNAs of +Related RNA Coliphages. +in "Molecular Biology of RNA: New Perspectives", M. Inouye and +B. S. Dudock eds. +Academic Press, Inc., 331-354, (1987) + + +Use : + +lrna and crna are ready to use. They can be run by issuing the +commands : +lrna + or +crna + +To have these programs run in their own shell, you can use the +commands : +zsh lrna +or +zsh crna + +lrna and crna have many files in common. However, the include file +'rfd.inc' is different for lrna and crna. lrna uses lin.inc as its +rfd.inc, and crna uses circ.inc. This is taken care of automatically +in Makefile. The folding dimensions can be altered by changing +lin.inc (linear RNA folding) or circ.inc (circular RNA folding). If +lin.inc or circ.inc is changed, then ALL subroutines used by lrna or +crna must be recompiled with the exception of formid and multid. This +is done automatically by Makefile. + +files.list contains a list and description of all files used for +folding. Note the sample runs of the ALU sequence and the batch input +files (bat1.com and bat2.com that produced them). + +mfold.user is a user's manual. + +mfold.doc is a glossary of functions, subroutines and variables. + +mfold.flow is a crude flow chart of the main program of mfold. diff --git a/ZUKER/ZUKERGDE.sh b/ZUKER/ZUKERGDE.sh new file mode 100755 index 0000000..6256956 --- /dev/null +++ b/ZUKER/ZUKERGDE.sh @@ -0,0 +1,23 @@ +#!/bin/csh + +echo " " +echo 0 +echo 2 +echo $1 +echo " " +echo " " +echo " " +cp $3/*.dat /tmp +echo /tmp/dangle.dat +echo /tmp/loop.dat +echo /tmp/stack.dat +echo /tmp/tstack.dat +echo /tmp/tloop.dat +echo /tmp/miscloop.dat +echo n +echo y +echo $2 +echo y +echo RegionTable +echo 8 +echo " " diff --git a/ZUKER/begin.dat b/ZUKER/begin.dat new file mode 100755 index 0000000..129effd --- /dev/null +++ b/ZUKER/begin.dat @@ -0,0 +1,21 @@ +MFOLD - Prediction of RNA secondary structure by free energy minimization. + - Version 2.0 : suboptimal folding with temperature dependence + - Michael Zuker and John Jaeger + - LRNA : folds linear RNA sequences + - CRNA : folds circular RNA sequences + +Any research that uses these programs should cite : + + M. Zuker + On Finding All Suboptimal Foldings of an RNA Molecule. + Science, 244, 48-52, (1989) + + J. A. Jaeger, D. H. Turner and M. Zuker + Improved Predictions of Secondary Structures for RNA. + Proc. Natl. Acad. Sci. USA, BIOCHEMISTRY, 86, 7706-7710, (1989) + + J. A. Jaeger, D. H. Turner and M. Zuker + Predicting Optimal and Suboptimal Secondary Structure for RNA. + in "Molecular Evolution: Computer Analysis of Protein and + Nucleic Acid Sequences", R. F. Doolittle ed. + Methods in Enzymology, 183, 281-306 (1989) diff --git a/ZUKER/circ.f b/ZUKER/circ.f new file mode 100755 index 0000000..30ab377 --- /dev/null +++ b/ZUKER/circ.f @@ -0,0 +1,559 @@ +c Energy funtion. +c ERG is the energy of a loop closed by I,J (new numbering). +c IP,JP is the other closing base-pair when MODE = 2 or 3. +c +c 1/2 Asym. loop correction +c Extrapolate loops with dG(n)=dG(30)+1.75*ln(n/30) +c Hairpins of 3 have no terminal stack. +c + function erg(mode,i,j,ip,jp) + include 'rfd.inc' + dimension e(4) + integer*2 tlink,tlptr + logical fce + +100 if (mode.eq.1) then +c Read energy files + call ergread + erg = 0 + return + endif + + erg = 0 +c Do not allow prohibited bases to pair. + if (force(i).eq.1.or.force(j).eq.1) then + erg = infinity + return + endif + + if (mode.lt.6) then +c Add bonus energy to force base-pairs. + if (force(i).eq.2.or.force(j).eq.2.or.fce(i,j)) then + erg = erg + eparam(9) + if (force(i).eq.2.and.force(j).eq.2) erg = erg + eparam(9) + endif + endif + + goto (100,200,300,400,500,600,700),mode + +c Nucleotide accesssibility option. +200 if (force(i).eq.3.or.force(jp).eq.3) then + erg = infinity + return + endif +c Stacking energy. + erg = erg + stack(numseq(i),numseq(j),numseq(ip),numseq(jp)) + . + eparam(1) + return + +300 size1 = ip - i - 1 + size2 = j - jp - 1 + if (size1.eq.0.or.size2.eq.0) then +c Check for nucleotide accessibility. + if (size1.eq.0.and.force(i).eq.3) then + erg = infinity + return + endif + if (size2.eq.0.and.force(jp).eq.3) then + erg = infinity + return + endif + size = size1+size2 +c Bulge loop. + if (size.eq.1) then + erg = erg + stack(numseq(i),numseq(j),numseq(ip),numseq(jp)) + . + bulge(1) + eparam(2) + else if (size.gt.30) then + loginc = int(prelog*log((float(size)/30.0))) + erg = erg + bulge(30) + loginc + eparam(2) + else + erg = erg + bulge(size) + eparam(2) + endif + return + else + size = size1+size2 + lopsid = abs((size1-size2)) +c Interior loop. + if (size.gt.30) then + loginc = int(prelog*log((float(size)/30.0))) + erg = erg + tstk(numseq(i),numseq(j),numseq(i+1),numseq(j-1)) + . + tstk(numseq(jp),numseq(ip),numseq(jp+1),numseq(ip-1)) + . + inter(30) + loginc + eparam(3) + . + min0(maxpen,(lopsid*poppen(min0(4,size1,size2)))) + else + erg = erg + tstk(numseq(i),numseq(j),numseq(i+1),numseq(j-1)) + . + tstk(numseq(jp),numseq(ip),numseq(jp+1),numseq(ip-1)) + . + inter(size) + eparam(3) + . + min0(maxpen,(lopsid*poppen(min0(4,size1,size2)))) + endif + return + endif + +400 size = j-i-1 +c Hairpin loop. + if ((size.eq.3).and.fce(i,j).and.seq(hstnum(i+1)).eq.' ') then +c Closed excision + erg = eparam(9) + return + endif + if (size.gt.30) then + loginc = int(prelog*log((float(size)/30.0))) + erg = erg + tstk(numseq(i),numseq(j),numseq(i+1),numseq(j-1)) + . + hairpin(30) + loginc + eparam(4) + else if (size.lt.4) then + erg=erg+hairpin(size)+eparam(4) + else +c + tlink=0 + if (size.eq.4) then + key=((numseq(i+4)*8+numseq(i+3))*8+numseq(i+2))*8+numseq(i+1) + tlptr=1 + do while ((tlptr.le.numoftloops).and.(tloop(tlptr,1).ne.key)) + tlptr=tlptr+1 + enddo + if (tlptr.le.numoftloops) tlink=tloop(tlptr,2) + endif + erg = erg + tstk(numseq(i),numseq(j),numseq(i+1),numseq(j-1)) + . + hairpin(size) + eparam(4) + tlink + endif + return + +c Multi-branch loop. +500 do 501 ii = 1,4 +501 e(ii) = infinity + + if (i.le.n-2) then + + ind1 = (n-1)*i + ind2 = (n-1)*(i+1) + + else if (i.eq.n-1) then + + ind1 = (n-1)*i + ind2 = -n + + else + + ind1 = -n + ind2 = -1 + + endif + + + do k = i+2,j-3 +c EPARAM(6) is the energy penalty for each single-stranded base +c in a multi-loop. EPARAM(10) is the energy penalty for each base-pair +c closing a multi-loop. +c No dangling ends next to the I,J base-pair. + e(1) = min0(e(1),wst(ind1+k)+work(k+1,mod(j-1,3))) +c I+1 dangles on the I,J base-pair. + e(2) = min0(e(2),dangle(numseq(i),numseq(j),numseq(i+1),1) + . + wst(ind2+k) + work(k+1,mod(j-1,3)) + eparam(6)) +c J-1 dangles on the I,J base-pair. + e(3) = min0(e(3),dangle(numseq(i),numseq(j),numseq(j-1),2) + . + wst(ind1+k) + work(k+1,mod(j-2,3)) + eparam(6)) +c Both I+1 and J-1 dangle on the I,J base-pair. + e(4) = min0(e(4),dangle(numseq(i),numseq(j),numseq(i+1),1) + . + dangle(numseq(i),numseq(j),numseq(j-1),2) + . + wst(ind2+k) + work(k+1,mod(j-2,3)) + 2*eparam(6)) + enddo +c EPARAM(5) is the energy penalty for closing a multi-loop. + erg = erg + eparam(5) + eparam(10) + min0(e(1),e(2),e(3),e(4)) + return + +c Dangling base stacking energy. IP dangles over the I,J +c base-pair. 3' or 5' dangle if JP = 1 or 2 respectively. +600 erg = erg + dangle(numseq(i),numseq(j),numseq(ip),jp) + return + +700 if (force(i).eq.3.or.force(jp).eq.3) then + erg = infinity + return + endif +c Terminal stack or mismatch energy. + erg = erg + tstk(numseq(i),numseq(j),numseq(ip),numseq(jp)) + return + end + + + + + subroutine fill +c This subroutine computes the arrays of optimal energies. + include 'rfd.inc' + dimension inc(5,5) + data loop/3/,inc/0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0 + ./ + + vmin = infinity + if (n.le.80) then + pinc = 5 + elseif (n.le.100) then + pinc = 2 + else + pinc = 1 + endif + pcnt = pinc + crit = n*n*n/50 + + do j = 1,2*n-1 +c How far along is the computation? + if (n.gt.10) then + if (j.le.n) then + if (j**3.ge.pcnt*crit) then + write (6,1000) pcnt + pcnt = pcnt + pinc + endif + else + if ((2*n-j)**3.le.(100-pcnt)*crit) then + write (6,1000) pcnt + pcnt = pcnt + pinc + endif + endif + endif +1000 format ('+',5x,i4,'%') + + do i = min0(j,n),max0(1,j-n+1),-1 + vij = infinity + wij = infinity + if (j-i.le.loop) goto 300 +c Test for a prohibited base-pair or a pair which cannot form +c a base-pair. + if (vst((n-1)*(i-1)+j).eq.1.or.inc(numseq(i),numseq(j)).eq.0) + . goto 200 +c Compute VIJ, the minimum energy of the fragment from I to J +c inclusive where I and J base-pair with one another. +c Perhaps I,J closes a hairpin loop. + vij = min0(vij,erg(4,i,j,i,j)) + if (j-i-1.ge.loop+2) then +c Perhaps I,J stacks over I+1,J-1. + vij = min0(vij,erg(2,i,j,i+1,j-1)+v(i+1,j-1)) + endif +c Search for a bulge or interior loop. + if (j-i-1.ge.loop+3) then + do d = j-i-3,1,-1 + do ip = i+1,j-1-d + jp = d+ip + if (j-i-2-d.gt.eparam(7)) goto 100 + if (abs(ip-i+jp-j).le.eparam(8)) then + if (ip.gt.n) then + vij = min0(vij,erg(3,i,j,ip,jp)+vst((n-1)* + . (ip-n-1)+jp-n)) + else + vij = min0(vij,erg(3,i,j,ip,jp)+vst((n-1)* + . (ip-1)+jp)) + endif + endif + enddo + enddo + endif + +100 if (j-i-1.ge.2*loop+4) then +c Perhaps I,J closes a multi-loop. + vij=min0(vij,erg(5,i,j,i,j)) + endif + + +c Compute WIJ, the minimum energy of a non-empty folding on I to +c J inclusive. This is the circular folding program and so there +c are no exterior bases. +200 wij = min0 ( wij, eparam(10)+vij, v(i+1,j)+eparam(6)+eparam(10)+ + . erg(6,j,i+1,i,2), v(i,j-1)+eparam(6)+eparam(10)+ + . erg(6,j-1,i,j,1), v(i+1,j-1)+2*eparam(6)+eparam(10)+ + . erg(6,j-1,i+1,i,2)+erg(6,j-1,i+1,j,1), + . w(i+1,j)+eparam(6), w(i,j-1)+eparam(6) ) + + if (j-i-1.gt.2*loop+2) then + index = (n-1)*(i-1) +c Check for open bifurcation. + do k = i,j-1 + wij = min0(wij,wst(index+k)+work(k+1,mod(j,3))) + enddo + endif + +c Store VIJ and WIJ. They can be regarded as elements V(I,J) +c and W(I,J) in a two dimensional array. They are actually +c stored in the one dimensional arrays VST and WST in position +c (N-1)*(I-1) + J. +c Columns J,J-1 and J-2 of W are stored again in the WORK array. +c This is done to reduce virtual memory swaps. +300 vst((n-1)*(i-1)+j) = vij + wst((n-1)*(i-1)+j) = wij + work(i,mod(j,3)) = wij + if (j.gt.n) then +c VMIN is the minimum folding energy of the entire sequence. +c vmin = min0(vmin,vst((n-1)*(i-1)+j)+vst((n-1)*(j-n-1)+i)) + vmin = min(vmin,vst((n-1)*(i-1)+j)+vst((n-1)*(j-n-1)+i)) + endif + enddo + if (j.ge.n) then + do k = j+1,n+1,-1 +c Fill in some WORK array values before beginning work on the +c next column. + work(k,mod(j+1,3)) = wst((k-n-1)*(n-1)+j+1-n) + enddo + endif + enddo + return + end + +c Used to recall values of V which are actually stored in VST. + function v(i,j) + include 'rfd.inc' + + if (i.gt.n) then + v = vst((n-1)*(i-n-1)+j-n) + else + v = vst((n-1)*(i-1)+j) + endif + return + end + + +c Used to recall values of W which are actually stored in WST. + function w(i,j) + include 'rfd.inc' + + if (i.gt.n) then + w = wst((n-1)*(i-n-1)+j-n) + else + w = wst((n-1)*(i-1)+j) + endif + return + end + + +c Computes an optimal structure on the subsequence II to JI where +c II and JI must base-pair with one another. ERROR = 0 is normal +c termination. +c NFORCE is the number of forced base-pairs encountered in the traceback. + subroutine trace(ii,ji,nforce,error) + include 'rfd.inc' + logical fce + + error = 0 + +c Zero the appropriate region of BASEPR. + if (ji.le.n) then + do k=ii,ji + basepr(k) = 0 + enddo + else + do k=1,ji-n + basepr(k) = 0 + enddo + do k = ii,n + basepr(k) = 0 + enddo + endif +c Initialize the stack of outstanding base-pairs and push +c II, JI and V(II,JI) on to the stack. The fourth stack position +c is unused in this subroutine. + call initst + call push(ii,ji,v(ii,ji),0) + nforce = 0 + +c Pull a fragment and its expected energy from the stack. +c End if there are no fragments left. +100 stz = pull(i,j,e,xx) + if (stz.ne.0) return + +c Do I and J base-pair with one another? + if (e.eq.v(i,j)) goto 300 + + tst = w(i+1,j) + eparam(6) + do while (e.eq.tst) +c Whittle away from the 5' end. + i = i + 1 + if (i.ge.j) goto 100 + e = w(i,j) + tst = w(i+1,j) + eparam(6) + enddo + + tst = w(i,j-1) + eparam(6) + do while (e.eq.tst) +c Whittle away from the 3' end. + j = j - 1 + if (i.ge.j) goto 100 + e = w(i,j) + tst = w(i,j-1) + eparam(6) + enddo + + tst1 = v(i+1,j) + eparam(6) + eparam(10) + + . dangle(numseq(j),numseq(i+1),numseq(i),2) + tst2 = v(i,j-1) + eparam(6) + eparam(10) + + . dangle(numseq(j-1),numseq(i),numseq(j),1) + tst3 = v(i+1,j-1) + 2*eparam(6) + eparam(10) + + . dangle(numseq(j-1),numseq(i+1),numseq(i),2) + . + dangle(numseq(j-1),numseq(i+1),numseq(j),1) + if (e.eq.tst1) then +c I dangles over I+1,J. + i = i + 1 + e = v(i,j) + else if (e.eq.tst2) then +c J dangles over I,J-1. + j = j - 1 + e = v(i,j) + else if (e.eq.tst3) then +c Both I and J dangle over I+1,J-1. + i = i + 1 + j = j - 1 + e = v(i,j) + endif +c Check for stem closing a multi-loop. + if (e.eq.v(i,j)+eparam(10)) e = v(i,j) + + if (e.ne.v(i,j)) then +c Cannot chop away at ends any more and still the ends do not +c base-pair with one another. Structure MUST bifurcate (OPEN). + k = i+1 +200 if (k.eq.j) then +c Structure will not split. Error + ii = hstnum(i) + ji = hstnum(j) + error = 10 + return + endif + if (e.eq.w(i,k) + w(k+1,j)) then +c Best structure on I,J splits into best structures on I,K and +c K+1,J. Push these fragments on to the stack. + call push(i,k,w(i,k),0) + call push(k+1,j,w(k+1,j),0) + goto 100 + else + k = k + 1 + goto 200 + endif + endif + +c Base-pair found. Base-pairs are stored in the range 1 <= I < J <= N. +300 if (j.le.n) then + basepr(i) = j + basepr(j) = i + else if (i.gt.n) then + basepr(i-n) = j-n + basepr(j-n) = i-n + i = i - n + j = j - n + else + basepr(j-n) = i + basepr(i) = j-n + endif +c Check if this is a forced base-pair. + if (force(i).eq.2.or.force(j).eq.2.or.fce(i,j)) + . nforce = nforce + 1 + if (force(i).eq.2.and.force(j).eq.2) nforce = nforce + 1 +c Perhaps I,J stacks over I+1,J-1? + if (e.eq.erg(2,i,j,i+1,j-1) + . + v(i+1,j-1)) then + i = i + 1 + j = j - 1 + e = v(i,j) + goto 300 + endif +c Perhaps I,J closes a hairpin loop? + if (e.eq.erg(4,i,j,i,j)) goto 100 + +c Define E' ( EP in the program ) to be E corrected by a +c possible bonus energy for forced base-pairing. +c + ep = e + if (force(i).eq.2.or.force(j).eq.2.or.fce(i,j)) + . ep = ep - eparam(9) + if (force(i).eq.2.and.force(j).eq.2) ep = ep - eparam(9) + + k = i+2 +c Perhaps I,J closes a multi-loop? +400 if (k.ge.j-3) goto 500 + if (ep.eq.w(i+1,k) + w(k+1,j-1) + eparam(10) + eparam(5)) then +c Multi-loop. No dangling ends on I,J. + call push(i+1,k,w(i+1,k),0) + call push(k+1,j-1,w(k+1,j-1),0) + goto 100 + else if (ep.eq.erg(6,i,j,i+1,1)+w(i+2,k)+w(k+1,j-1)+eparam(10)+ + . eparam(6)+eparam(5)) then +c Multi-loop. I+1 dangles over I,J base-pair. + call push(i+2,k,w(i+2,k),0) + call push(k+1,j-1,w(k+1,j-1),0) + goto 100 + else if (ep.eq.erg(6,i,j,j-1,2)+w(i+1,k)+w(k+1,j-2)+eparam(10)+ + . eparam(6)+eparam(5)) then +c Multi-loop. J-1 dangles over I,J base-pair. + call push(i+1,k,w(i+1,k),0) + call push(k+1,j-2,w(k+1,j-2),0) + goto 100 + else if (ep.eq.erg(6,i,j,i+1,1)+erg(6,i,j,j-1,2)+w(i+2,k) + . +w(k+1,j-2)+ eparam(10)+2*eparam(6)+eparam(5)) then +c Multi-loop. Both I+1 and J-1 dangle over the I,J base-pair. + call push(i+2,k,w(i+2,k),0) + call push(k+1,j-2,w(k+1,j-2),0) + goto 100 + else + k = k + 1 + goto 400 + endif + + +c None of the above work. I,J MUST close a bulge or interior loop. +500 do d = j-i-3,1,-1 + do ip = i+1,j-1-d + jp = d+ip + if (j-i-2-d.gt.eparam(7)) then +c Error, bulge or interior loop not found. + ii = hstnum(i) + ji = hstnum(j) + error = 11 + return + endif + if (abs(ip-i+jp-j).le.eparam(8)) then + if (e.eq.erg(3,i,j,ip,jp)+v(ip,jp)) then + i = ip + j = jp + e = v(i,j) + goto 300 + endif + endif + enddo + enddo +c Error, bulge or interior loop not found. + ii = hstnum(i) + ji = hstnum(j) + error = 11 + return + end + + +c Store results of a SAVE run for a CONTINUATION run. + subroutine putcont + include 'rfd.inc' + + write(30) n,nsave,vmin,listsz,seqlab + write(30) stack,tstk,dangle,hairpin,bulge,inter,eparam + write(30) (vst(i),i=1,n*n) + write(30) (wst(i),i=1,n*n) + write(30) (seq(i),i=nsave(1),nsave(2)) + write(30) ((list(i,j),i=1,100),j=1,4) + write(30) tloop,numoftloops + write(30) (poppen(i),i=1,4),maxpen,prelog + return + end + +c Read results of a SAVE run for a CONTINUATION run. + subroutine getcont + include 'rfd.inc' + + read(30,end=10) n,nsave,vmin,listsz,seqlab + read(30,end=10) stack,tstk,dangle,hairpin,bulge,inter,eparam + read(30,end=10) (vst(i),i=1,n*n) + read(30,end=10) (wst(i),i=1,n*n) + read(30,end=10) (seq(i),i=nsave(1),nsave(2)) + read(30,end=10) ((list(i,j),i=1,100),j=1,4) + read(30,err=10) tloop,numoftloops + read(30,err=10) (poppen(i),i=1,4),maxpen,prelog + goto 11 + +10 call errmsg(40,0,0) + +11 return + end + diff --git a/ZUKER/circ.inc b/ZUKER/circ.inc new file mode 100755 index 0000000..691f11f --- /dev/null +++ b/ZUKER/circ.inc @@ -0,0 +1,42 @@ + implicit integer (a-z) + +c parameter (maxn=625,fldmax=2*maxn) + parameter (maxn=1500,maxn2=3000) + parameter (fldmax=maxn2) + parameter (infinity=16000,sortmax=30000) + parameter (mxbits=(maxn*(maxn+1)+31)/32) + parameter (maxtloops=40) + parameter (maxsiz=10000) + + integer*2 vst(maxn*maxn),wst(maxn*maxn) + integer poppen(4),maxpen + real prelog + dimension newnum(maxsiz),hstnum(fldmax),force(fldmax), + . numseq(fldmax), work(fldmax,0:2), + . stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30) + dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2) + common /main/ vst,wst,newnum,hstnum,force,numseq,work,stack,tstk, + . dangle,hairpin,bulge,inter,eparam,cntrl,nsave,poppen,maxpen,prelog + + character*1 seq(maxsiz) +c character*5 inbuf + character*10 progtitle + character*30 seqlab + common /seq/ seq,seqlab + data progtitle/'crna'/ + + dimension list(100,4) + common /list/ list,listsz + common /nm/ n,vmin + + dimension basepr(maxn) + common /traceback/ basepr + + dimension heapi(sortmax+1),heapj(sortmax+1) + common /heap/ heapi,heapj,num + + integer*2 marks(mxbits),force2(mxbits) + common/bits/marks,force2 + + integer*2 tloop(maxtloops,2),numoftloops + common/tloops/tloop,numoftloops diff --git a/ZUKER/ctFile b/ZUKER/ctFile new file mode 100755 index 0000000..efcf9c1 --- /dev/null +++ b/ZUKER/ctFile @@ -0,0 +1,77 @@ + 76 ENERGY = -19.6 BOVTRF2 + 1 G 0 2 72 1 + 2 C 1 3 71 2 + 3 C 2 4 70 3 + 4 G 3 5 69 4 + 5 A 4 6 68 5 + 6 A 5 7 67 6 + 7 A 6 8 66 7 + 8 U 7 9 65 8 + 9 A 8 10 0 9 + 10 G 9 11 0 10 + 11 C 10 12 0 11 + 12 U 11 13 0 12 + 13 C 12 14 0 13 + 14 A 13 15 0 14 + 15 G 14 16 0 15 + 16 U 15 17 64 16 + 17 U 16 18 63 17 + 18 G 17 19 62 18 + 19 G 18 20 61 19 + 20 G 19 21 60 20 + 21 A 20 22 59 21 + 22 G 21 23 56 22 + 23 A 22 24 55 23 + 24 G 23 25 54 24 + 25 C 24 26 53 25 + 26 G 25 27 0 26 + 27 U 26 28 0 27 + 28 U 27 29 0 28 + 29 A 28 30 0 29 + 30 G 29 31 48 30 + 31 A 30 32 47 31 + 32 C 31 33 46 32 + 33 U 32 34 45 33 + 34 G 33 35 0 34 + 35 A 34 36 0 35 + 36 A 35 37 0 36 + 37 G 36 38 0 37 + 38 A 37 39 0 38 + 39 U 38 40 0 39 + 40 C 39 41 0 40 + 41 U 40 42 0 41 + 42 A 41 43 0 42 + 43 A 42 44 0 43 + 44 A 43 45 0 44 + 45 G 44 46 33 45 + 46 G 45 47 32 46 + 47 U 46 48 31 47 + 48 C 47 49 30 48 + 49 C 48 50 0 49 + 50 C 49 51 0 50 + 51 U 50 52 0 51 + 52 G 51 53 0 52 + 53 G 52 54 25 53 + 54 U 53 55 24 54 + 55 U 54 56 23 55 + 56 C 55 57 22 56 + 57 G 56 58 0 57 + 58 A 57 59 0 58 + 59 U 58 60 21 59 + 60 C 59 61 20 60 + 61 C 60 62 19 61 + 62 C 61 63 18 62 + 63 G 62 64 17 63 + 64 G 63 65 16 64 + 65 G 64 66 8 65 + 66 U 65 67 7 66 + 67 U 66 68 6 67 + 68 U 67 69 5 68 + 69 C 68 70 4 69 + 70 G 69 71 3 70 + 71 G 70 72 2 71 + 72 C 71 73 1 72 + 73 A 72 74 0 73 + 74 C 73 75 0 74 + 75 C 74 76 0 75 + 76 A 75 0 0 76 diff --git a/ZUKER/dangle.dat b/ZUKER/dangle.dat new file mode 100755 index 0000000..7e7e373 --- /dev/null +++ b/ZUKER/dangle.dat @@ -0,0 +1,89 @@ + + + X X X X +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + AX AX AX AX + A C G U + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . . . . . . . . . -0.8 -0.5 -0.8 -0.6 + + + X X X X +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + CX CX CX CX + A C G U + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . . . . . -1.7 -0.8 -1.7 -1.2 . . . . + + + X X X X +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + GX GX GX GX + A C G U + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . -1.1 -0.4 -1.3 -0.6 . . . . -0.8 -0.5 -0.8 -0.6 + + + X X X X +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + UX UX UX UX + A C G U + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' +-0.7 -0.1 -0.7 -0.1 . . . . -1.2 -0.5 -1.2 -0.7 . . . . + + + X X X X +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + A A A A + AX CX GX UX + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . . . . . . . . . -0.3 -0.1 -0.2 -0.2 + + + X X X X +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + C C C C + AX CX GX UX + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . . . . . -0.2 -0.3 -0.0 -0.0 . . . . + + + X X X X +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + G G G G + AX CX GX UX + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . -0.5 -0.2 -0.2 -0.1 . . . . -0.2 -0.2 -0.2 -0.2 + + + X X X X +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + U U U U + AX CX GX UX + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' +-0.3 -0.3 -0.4 -0.2 . . . . -0.2 -0.2 -0.2 -0.2 . . . . + diff --git a/ZUKER/file.gen b/ZUKER/file.gen new file mode 100755 index 0000000..3041066 --- /dev/null +++ b/ZUKER/file.gen @@ -0,0 +1,13 @@ +HELIX +Sequence +ZZZZZZZZZZ +LOCUS HELIX TEXT 76 BP +ORIGIN + 1 [[[[[[[[-- -----[[[[[ [[[[[----[ [[[------- ----]]]]-- --]]]]--]] + 61 ]]]]]]]]]] ]]---- +// +LOCUS Sequence RNA 76 BP +ORIGIN + 1 GCCGAAAUAG CUCAGUUGGG AGAGCGUUAG ACUGAAGAUC UAAAGGUCCC UGGUUCGAUC + 61 CCGGGUUUCG GCACCA +// diff --git a/ZUKER/files.list b/ZUKER/files.list new file mode 100755 index 0000000..9d524be --- /dev/null +++ b/ZUKER/files.list @@ -0,0 +1,301 @@ + Directory of files + + Suboptimal RNA folding package - M. Zuker, J. A. Jaeger, D. H. Turner + + Version 2.0 + +Makefile - Standard UNIX makefile for RNA folding programs. + +alu.ct - A concatenation of 15 ct files containing part of the output + produced by executing lrna using bat2.com as input. + ( lrna ' occurring in column 1 of a +c record. The sequence itself start 2 lines down from the record +c containing '>'. + elseif (reclin(1:1).eq.'>') then + found = .true. + stype = 'PIR ' +c DO WHILE (.NOT.ENDFIL) + 630 if (reclin(1:1).eq.'>') then + idcnt = idcnt + 1 + sline(idcnt) = line + 1 + seqids(idcnt) = reclin(5:25) + endif + line = line + 1 + read(66,120,end = 410,err=991) reclin +c ENDDO + if (.not.endfil) goto 630 +c EMBL (european) format, recognized by the key phrase ID in the +c first column of the record. + elseif (reclin(1:5).eq.'ID ') then + found = .true. + stype = 'EMBL ' +c scrolling through to find the key phrase 'SQ Sequence' because +c the line after this key phrase occurrance is where the sequence +c occurs in the file; +c DO WHILE (.NOT.ENDFIL) + 640 if (reclin(1:5).eq.'ID ') then + idcnt = idcnt + 1 + seqids(idcnt) = reclin(6:index(reclin,' ')) + elseif (reclin(1:13).eq.'SQ Sequence') then + sline(idcnt) = line + endif + line = line + 1 + read(66,120,end = 410,err=991) reclin +c ENDDO + if (.not.endfil) goto 640 +c NRC format, recognized by '(' occurring in the first column of +c a record. The record line following this line holds the number +c of elements in the sequence and the name of the sequence. The +c following line signals the beginning of the sequence. +c The sequence itself is read in by using variable format +c described by the '(' record line. This format statement on the +c record containing '(' ,must also be retrieved. + elseif (reclin(1:1).eq.'(') then + found = .true. + stype = 'NRC ' +c DO WHILE (.NOT.ENDFIL) + 650 if (reclin(1:1).eq.'(') then + idcnt = idcnt + 1 + fmtseq(idcnt) = reclin(1:index(reclin,')')) + line = line + 1 + read(66,140,end = 410,err = 991) + . seqnum(idcnt),seqids(idcnt) + sline(idcnt) = line + endif + line = line + 1 + read(66,120,end = 410,err=991) reclin +c ENDDO + 140 format(i4,5x,a30) + if (.not.endfil) goto 650 + endif +c keep scrolling through the file until a key phrase signalling +c a format type is recognized or the end of file is found + line = line + 1 +c ENDDO + if (.not.found) goto 600 +c if a format type has not been found then this cannot be a +c sequence file and return to main. + 410 if (.not.found) then + write(6,105) ' No sequence identifiers found in this file ' + 105 format(a50) + return + endif +c if this is a valid format type proceed + used = .true. + endif +c IF USED this call before start at listing identifiers +c listing the sequence identifiers +c allow the user to input the number requested +c or control d to finish here and return to +c entering a new filename + valid = .false. +c keep outputting the list of sequence identifiers +c while NOT VALID choice of sequence identifier is inputted +c DO WHILE (.NOT.VALID) + 660 write(6,150) 'Available sequences in ',filnam + 150 format(/,1x,a23,a50) + do 525 k = 1,idcnt,2 + if (sline(k).ne.0) then + if (k .eq. idcnt) then + write (6,151) k,'.',seqids(k) + else + write (6,191) k,'.',seqids(k),k+1,'.',seqids(k+1) + endif + 151 format(1x,i3,a1,1x,a30) + 191 format(1x,i3,a1,1x,a30,3x,i3,a1,1x,a30) + endif + 525 continue + choice = ' ' + seqid = ' ' + pointr = 0 + write(6,152) + . 'Choose sequence by number or name , or ? for relist; ' + write(6,152) + . ' defaults to the first one, / for new file.' + 152 format(1x,a60) + read(5,153,end=10) choice + 153 format(a30) +c error checking of the inputted CHOICE and determining +c if this input is a number, a default value, a relist command +c or the name of the sequence identifier + i = 1 + do while (choice(i:i).eq.' '.and.i.lt.6) + i = i + 1 + enddo + if ((choice(i:i).ge.'1').and.(choice(i:i).le.'9')) then +c using COLLATING sequence to convert character to number value + pointr = ichar(choice(i:i)) - ichar('0') + if (choice(i+1:i+1).ne.' ') then + pointr = ichar(choice(i+1:i+1)) - ichar('0') + 10 * pointr + endif + if (choice(i+2:i+2).ne.' ') then + pointr = ichar(choice(i+2:i+2)) - ichar('0') + 10 * pointr + endif + if (pointr.gt.idcnt) then + write(6,154) ' NUMBER CHOICE BETWEEN 1 AND ',idcnt + 154 format(a30,i2) + else + seqid = seqids(pointr) + valid = .true. + endif + elseif (ichar(choice(6:6)).eq.32) then + pointr = 1 + seqid = seqids(1) + valid = .true. + elseif (choice(1:1).eq.'?') then + continue + else +c find out if the inputted choice is in the list of seq identifiers. + pointr = 0 + do 535 k = 1,idcnt + if (seqids(k).eq.choice) then + seqid = seqids(k) + pointr = k + valid = .true. + endif + 535 continue + if (pointr.eq.0) then + write(6,105) 'Does not match any in given list' + endif + endif +c ENDDO + if (.not.valid) goto 660 +c having obtained a valid sequence choice or default of +c one available sequence, rewind the file and retrieve the +c sequence + rewind(66,err = 992) +c having retrieved the line number in the file where this +c identifier occurs from SLINE(POINTR) scroll through the +c file until this line is reached +c +c SUN WARNING: brancd into block. + do 550 i = 1,sline(pointr) + read(66,120,end = 410,err=991) reclin + 550 continue +c if nrc type then read sequence according to format type + if (stype.eq.'NRC ') then + nseq = seqnum(pointr) +c if the number in the NRC sequence is greater than +c the maximum number sent down from main, then +c truncate to NMAX and output a message to the user. + if (nseq.gt.nmax) then + write(6,160) ' Sequence truncated to ',nseq + 160 format(1x,a30,i5) + nseq = nmax + endif + read(66,fmtseq(pointr),end = 420,err=991) + . (seq(k),k=1,seqnum(pointr)) +c Else if not type NRC then find the sequence by taking +c each letter of the next records until end-of-sequence +c indicator found or number in the sequence NMAX is reached + else + n = 1 + found = .false. +c DO WHILE (.NOT.FOUND) + 670 read(66,120,end = 420,err=991) reclin +c there is a need for the two positions to start reading the +c sequence; to accommodate the GENBANK format and the STANFORD +c end-of-sequence checks. + if (stype.eq.'GENBANK ') then + start = 10 + else + start = 1 + endif + do 565 i = start,80 +c if the number in the sequence is less than the desired +c number, NMAX sent down from main, then retrieve sequence + if (n.le.nmax) then + if (.not.found) then + seq(n) = reclin(i:i) +c check if an early end of sequence + if ((seq(n).eq.'1').or.(seq(n).eq.'2').or. + . (reclin(1:1).eq.'/').or.(seq(n).eq.'*')) then + seq(n) = ' ' + nseq = n - 1 + found = .true. +c if not an end-of-sequence character but have gone far +c enough, then truncate to NMAX + elseif (n.eq.nmax) then + nseq = nmax + found = .true. + write(6,160) ' Sequence truncated to ',nseq +c if not a end-of-sequence character, check to see if it is +c not a blank character. Blank characters will not be added +c to the sequence. + elseif (seq(n).ne.' ') then + n = n + 1 + endif + endif +c if the number of sequence characters found is Greater than +c NMAX then all of the sequence has been found. + else + found = .true. + nseq = n + write(6,160) + . ' Sequence truncated to ',nseq + endif + 565 continue +c ENDDO + if (.not.found) goto 670 + endif + 420 return +c 991 stop ' ERROR IN READING FILE ' +c 992 stop ' ERROR IN REWINDING FILE' +c 999 stop ' END of SESSION...GOOD BYE' + 991 call exit(1) + 992 call exit(1) + 999 call exit(1) + end diff --git a/ZUKER/install.csh b/ZUKER/install.csh new file mode 100755 index 0000000..f246683 --- /dev/null +++ b/ZUKER/install.csh @@ -0,0 +1,2 @@ +make +cp lrna crna ZUKERGDE.sh ../bin diff --git a/ZUKER/lin.f b/ZUKER/lin.f new file mode 100755 index 0000000..cb90dd4 --- /dev/null +++ b/ZUKER/lin.f @@ -0,0 +1,837 @@ +c Energy funtion. +c ERG is the energy of a loop closed by I,J (new numbering). +c IP,JP is the other closing base-pair when MODE = 2 or 3. +c The ends of the sequence cannot be contained in a hairpin, bulge +c or interior loop. By convention, the ends of the sequence are +c put into a special kind of multi-loop. This can be called an +c exterior loop or an open multi-loop. +c +c 1/2 Asym. loop correction +c Extrapolate loops with dG(n)=dG(30)+1.75*ln(n/30) +c +c* Hairpins of 3 have no terminal stack. +c + function erg(mode,i,j,ip,jp) + include 'rfd.inc' + dimension e(4) + integer*2 tlink,tlptr + logical fce +c + +100 if (mode.eq.1) then +c Read energy files. + call ergread + erg = 0 + return + endif + + erg = 0 +c Do not allow prohibited base to pair. + if (force(i).eq.1.or.force(j).eq.1) then + erg = infinity + return + endif + + if (mode.lt.6) then +c Add bonus energy to force base-pairs. + if ((force(i).eq.2).or.(force(j).eq.2).or.fce(i,j)) + . then + erg = erg + eparam(9) + if ((force(i).eq.2).and.(force(j).eq.2)) + . erg = erg + eparam(9) + endif + + + endif + + goto (100,200,300,400,500,600,700),mode + +c Nucleotide accessibility option. +200 if (force(i).eq.3.or.force(jp).eq.3) then + erg = infinity + return + endif +c Molecule is not circular. N is not covalently bonded to N+1. + if (i.eq.n.or.j.eq.n+1) then + erg = infinity + return + endif +c Stacking energy. + erg = erg + stack(numseq(i),numseq(j),numseq(ip),numseq(jp)) + . + eparam(1) + return + +300 if ((i.le.n.and.ip.gt.n).or.(jp.le.n.and.j.gt.n)) then +c Loop is not allowed to contain the ends of the sequence. + erg = infinity + return + endif +c + + size1 = ip - i - 1 + size2 = j - jp - 1 + if (size1.eq.0.or.size2.eq.0) then +c Check for nucleotide accessibility. + if (size1.eq.0.and.force(i).eq.3) then + erg = infinity + return + endif + if (size2.eq.0.and.force(jp).eq.3) then + erg = infinity + return + endif + size = size1+size2 +c Bulge loop energy. + if (size.eq.1) then + erg = erg + stack(numseq(i),numseq(j),numseq(ip),numseq(jp)) + . + bulge(size) + eparam(2) + elseif (size.gt.30) then + loginc = int(prelog*log((float(size)/30.0))) + erg = erg + bulge(30) + loginc + eparam(2) + else + erg = erg + bulge(size) + eparam(2) + endif + return + else + size = size1+size2 + lopsid = abs((size1-size2)) +c Interior loop. + if (size.gt.30) then + loginc = int(prelog*log((float(size)/30.0))) + erg = erg + tstk(numseq(i),numseq(j),numseq(i+1),numseq(j-1)) + . + tstk(numseq(jp),numseq(ip),numseq(jp+1),numseq(ip-1)) + . + inter(30) + loginc + eparam(3) + . + min0(maxpen,(lopsid*poppen(min0(4,size1,size2)))) + else + erg = erg + tstk(numseq(i),numseq(j),numseq(i+1),numseq(j-1)) + . + tstk(numseq(jp),numseq(ip),numseq(jp+1),numseq(ip-1)) + . + inter(size) + eparam(3) + . + min0(maxpen,(lopsid*poppen(min0(4,size1,size2)))) + endif + return + endif + +400 if (i.le.n.and.j.gt.n) then +c Hairpin loop must not contain the ends of the sequence. + erg = infinity + return + endif +c + size = j-i-1 + if ((size.eq.3).and.fce(i,j).and.seq(hstnum(i+1)).eq.' ') then +c Closed excision + erg = eparam(9) + return + endif + if (size.gt.30) then + loginc = int(prelog*log((float(size)/30.0))) + erg = erg + tstk(numseq(i),numseq(j),numseq(i+1),numseq(j-1)) + . + hairpin(30) + loginc + eparam(4) + else if (size .lt. 4) then +c +c* Special case for hairpin of 3 +c + erg = erg + hairpin(size) + eparam(4) + else +c + tlink=0 + if (size.eq.4) then + key=((numseq(i+4)*8+numseq(i+3))*8+numseq(i+2))*8+numseq(i+1) + tlptr=1 + do while ((tlptr.le.numoftloops).and.(tloop(tlptr,1).ne.key)) + tlptr=tlptr+1 + enddo + if (tlptr.le.numoftloops) tlink=tloop(tlptr,2) + endif + erg = erg + tstk(numseq(i),numseq(j),numseq(i+1),numseq(j-1)) + . + hairpin(size) + eparam(4) + tlink + endif + return + +c Multi-branch (or multi-) loop closed by I,J. +500 do 501 ii = 1,4 +501 e(ii) = infinity + + if (i+2.gt.j-3) then +c There are at most 3 bases between I and J. The fragment from +c I to J inclusive contains the origen. + e(1) = 0 + if (i.ne.n) e(2) = dangle(numseq(i),numseq(j),numseq(i+1),1) + if (j.ne.n+1) e(3) = dangle(numseq(i),numseq(j),numseq(j-1),2) + if (i.ne.n.and.j.ne.n+1) then + e(4) = dangle(numseq(i),numseq(j),numseq(i+1),1) + + . dangle(numseq(i),numseq(j),numseq(j-1),2) + endif + else if (i.ge.n-1) then +c I is at or next to the end of the sequence. + e(1) = w2(n+1,j-1) + if (i.ne.n) then + e(2) = dangle(numseq(i),numseq(j),numseq(i+1),1) + w2(n+1,j-1) + e(4) = dangle(numseq(i),numseq(j),numseq(i+1),1) + + . dangle(numseq(i),numseq(j),numseq(j-1),2) + w2(n+1,j-2) + endif + e(3) = dangle(numseq(i),numseq(j),numseq(j-1),2) + w2(n+1,j-2) + else if (j.eq.n+1.or.j.eq.n+2) then +c J is at or next to the end of the sequence. + e(1) = wst2((n-1)*i+n) + e(2) = dangle(numseq(i),numseq(j),numseq(i+1),1) + + . wst2((n-1)*(i+1)+n) + if (j.ne.n+1) then + e(3) = dangle(numseq(i),numseq(j),numseq(j-1),2)+wst2((n-1)*i+n) + e(4) = dangle(numseq(i),numseq(j),numseq(i+1),1) + + . dangle(numseq(i),numseq(j),numseq(j-1),2) + wst2((n-1)*(i+1)+n) + endif + else + ind1 = (n-1)*i + ind2 = ind1 + n - 1 + do k = i+2,j-3 + if (k.eq.n) then +c When K = N, the structure splits into two disconnected +c pieces. This open multi-loop ( exterior loop ) is not given the +c usual EPARAM(5),EPARAM(6) and EPARAM(10) destabilizing energies. +c + ind3 = -n +c No dangling ends next to the I,J base-pair. +c e(1) = min0(e(1),wst2(ind1+k)+wst2(ind3+j-1)) + e(1) = min(e(1),wst2(ind1+k)+wst2(ind3+j-1)) +c I+1 dangles on the I,J base-pair. + e(2) = min0(e(2),dangle(numseq(i),numseq(j),numseq(i+1),1) + . + wst2(ind2+k) + wst2(ind3+j-1)) +c J-1 dangles on the I,J base-pair. + e(3) = min0(e(3),dangle(numseq(i),numseq(j),numseq(j-1),2) + . + wst2(ind1+k) + wst2(ind3+j-2)) +c Both I+1 and J-1 dangle on the I,J base-pair. + e(4) = min0(e(4),dangle(numseq(i),numseq(j),numseq(i+1),1) + . + dangle(numseq(i),numseq(j),numseq(j-1),2) + . + wst2(ind2+k) + wst2(ind3+j-2)) + else +c When K is not N, the ends of the sequence are not in the +c loop. This is a proper multi-loop with an energy of EPARAM(6) +c for each single-stranded base, an energy of EPARAM(10) for +c each closing base-pair, plus and extra energy of EPARAM(5). +c No dangling ends next to the I,J base-pair. + e(1) = min0(e(1),wst1(ind1+k)+work1(k+1,mod(j-1,3))+eparam(5) + . +eparam(10)) +c I+1 dangles on the I,J base-pair. + e(2) = min0(e(2),dangle(numseq(i),numseq(j),numseq(i+1),1) + . + wst1(ind2+k) + work1(k+1,mod(j-1,3)) + eparam(5) + . + eparam(6) + eparam(10) ) +c J-1 dangles on the I,J base-pair. + e(3) = min0(e(3),dangle(numseq(i),numseq(j),numseq(j-1),2) + . + wst1(ind1+k) + work1(k+1,mod(j-2,3)) + eparam(5) + . + eparam(6) + eparam(10) ) +c Both I+1 and J-1 dangle on the I,J base-pair. + e(4) = min0(e(4),dangle(numseq(i),numseq(j),numseq(i+1),1) + . + dangle(numseq(i),numseq(j),numseq(j-1),2) + . + wst1(ind2+k) + work1(k+1,mod(j-2,3)) + eparam(5) + . + 2*eparam(6) + eparam(10) ) + endif + enddo + endif + + erg = erg + min0(e(1),e(2),e(3),e(4)) + return + +c Dangling base stacking energy. IP dangles over the I,J base-pair. +c 3' or 5' dangle if JP = 1 or 2 respectively. +600 erg = erg + dangle(numseq(i),numseq(j),numseq(ip),jp) + return + +700 if (force(i).eq.3.or.force(jp).eq.3) then + erg = infinity + return + endif +c Terminal stack or mismatch energy. + erg = erg + tstk(numseq(i),numseq(j),numseq(ip),numseq(jp)) + return + end function erg + + + + + + subroutine fill +c This subroutine computes the arrays of optimal energies. + include 'rfd.inc' + dimension inc(5,5),e1(5),e2(5) + data loop/3/,inc/0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0 + ./ + + vmin = infinity + if (n.le.80) then + pinc = 5 + elseif (n.le.100) then + pinc = 2 + else + pinc = 1 + endif + pcnt = pinc + crit = n*n*n/50 + + do j = 1,2*n-1 +c How far along is the computation? + if (n.gt.10) then + if (j.le.n) then + if (j**3.ge.pcnt*crit) then + write (6,1000) pcnt + pcnt = pcnt + pinc + endif + else + if ((2*n-j)**3.le.(100-pcnt)*crit) then + write (6,1000) pcnt + pcnt = pcnt + pinc + endif + endif + endif +1000 format ('+',5x,i4,'%') + + do i = min0(j,n),max0(1,j-n+1),-1 + vij = infinity + w1ij = infinity + w2ij = 0 + if (j.le.n) then + if (j-i.le.loop) goto 300 + else + if (i.eq.n.or.j.eq.n+1) goto 100 + endif +c Test for a prohibited base-pair or a pair which cannot form. + if (vst((n-1)*(i-1)+j).eq.1.or.inc(numseq(i),numseq(j)).eq.0) + . goto 200 +c Compute VIJ, the minimum energy of the fragment from I to J +c inclusive where I and J base-pair with one another. +c Perhaps I,J closes a hairpin loop. + vij = min0(vij,erg(4,i,j,i,j)) + if (j-i-1.ge.loop+2.or.j.gt.n) then +c Perhaps I,J stacks over I+1,J-1. + vij = min0(vij,erg(2,i,j,i+1,j-1)+v(i+1,j-1)) + endif +c Search for the best bulge or interior loop closed by I,J. + if (j-i-1.ge.loop+3.or.j.gt.n) then + do d = j-i-3,1,-1 + do ip = i+1,j-1-d + jp = d+ip + if (j-i-2-d.gt.eparam(7)) goto 100 + if (abs(ip-i+jp-j).le.eparam(8)) then + if (ip.gt.n) then + vij = min0(vij,erg(3,i,j,ip,jp)+vst((n-1)* + . (ip-n-1)+jp-n)) + else + vij = min0(vij,erg(3,i,j,ip,jp)+vst((n-1)* + . (ip-1)+jp)) + endif + endif + enddo + enddo + endif + +100 if (j-i-1.ge.2*loop+4.or.j.gt.n) then +c Search for the best multi-loop closed by I,J. + vij=min0(vij,erg(5,i,j,i,j)) + endif +c Compute W1IJ and W2IJ. +c A multi-loop containing N and 1 (ie. N+1) as single-stranded +c bases is called an exterior loop. W1IJ is the minimum folding +c energy of a non-empty folding on I to J inclusive where an +c exterior loop is given an energy of EPARAM(5) plus EPARAM(6) +c per single-stranded exterior base plus EPARAM(10) per +c double-stranded exterior base-pair in addition to +c possible dangling base energies. W2IJ is similarly +c defined except that the folding can be empty and that an +c exterior loop is given no energy other than possible dangling +c base energies. +200 do ii = 1,5 + e1(ii) = infinity + e2(ii) = infinity + enddo + if (i.ne.n) then +c Add single-stranded I to an optimal structure containing +c the base-pair I,J. + e1(1) = v(i+1,j) + eparam(10) + eparam(6) + erg(6,j,i+1,i,2) + e1(4) = w1(i+1,j) + eparam(6) + e2(1) = v(i+1,j) + erg(6,j,i+1,i,2) + e2(4) = w2(i+1,j) + endif + if (j.ne.n+1) then +c Add single-stranded J to an optimal structure containing +c the base-pair I,J. + e1(2) = v(i,j-1) + eparam(10) + eparam(6) + erg(6,j-1,i,j,1) + e1(5) = w1(i,j-1) + eparam(6) + e2(2) = v(i,j-1) + erg(6,j-1,i,j,1) + e2(5) = w2(i,j-1) + endif + if (i.ne.n.and.j.ne.n+1) then +c Add single-stranded I and J to an optimal structure containing +c the base-pair I+1,J-1. + e1(3) = v(i+1,j-1) + eparam(10) + 2*eparam(6) + + . erg(6,j-1,i+1,i,2) + erg(6,j-1,i+1,j,1) + e2(3) = v(i+1,j-1) + erg(6,j-1,i+1,i,2) + . + erg(6,j-1,i+1,j,1) + endif + + w1ij = min0(eparam(10)+vij,e1(1),e1(2),e1(3),e1(4),e1(5)) + w2ij = min0(vij,w2ij,e2(1),e2(2),e2(3),e2(4),e2(5)) + + if (j-i-1.gt.2*loop+2.or.j.gt.n) then + index = (n-1)*(i-1) +c Search for an open bifurcation. + do k = i,j-1 + if (k.eq.n) then + w1ij = min0(w1ij,wst2(index+k)+work2(k+1)) + else + w1ij = min0(w1ij,wst1(index+k)+work1(k+1,mod(j,3))) + w2ij = min0(w2ij,wst2(index+k)+work2(k+1)) + endif + enddo + endif + +c Store VIJ, W1IJ and W2IJ. They can be regarded as elements +c V(I,J), W1(I,J) and W2(I,J) in two dimensional arrays. They +c are actually stored in one dimensional arrays VST, WST1 and +c WST2 is position (N-1)*(I-1) + J. +c Columns J,J-1 and J-2 of W1 are also stored in the work array, +c WORK1. Column J of W2 is stored again in the work array WORK2. +c This is done to reduce virtual memory swaps. +300 vst((n-1)*(i-1)+j) = vij + wst1((n-1)*(i-1)+j) = w1ij + wst2((n-1)*(i-1)+j) = w2ij + work1(i,mod(j,3)) = w1ij + work2(i) = w2ij + if (j.gt.n) then +c VMIN is the minimum folding energy of the entire sequence. +c vmin = min0(vmin,vst((n-1)*(i-1)+j)+vst((n-1)*(j-n-1)+i)) + vmin = min(vmin,vst((n-1)*(i-1)+j)+vst((n-1)*(j-n-1)+i)) + endif + enddo + if (j.ge.n) then + do k = j+1,n+1,-1 +c Fill in some work array values before beginning on +c the next column. + work1(k,mod(j+1,3)) = wst1((k-n-1)*(n-1)+j+1-n) + work2(k) = wst2((k-n-1)*(n-1)+j+1-n) + enddo + endif + enddo + return + end +c Used to recall values of V which are actually stored in VST. + function v(i,j) + include 'rfd.inc' + + if (i.gt.n) then + v = vst((n-1)*(i-n-1)+j-n) + else + v = vst((n-1)*(i-1)+j) + endif + return + end + +c Used to recall values of W1 which are actually stored in WST1. + function w1(i,j) + include 'rfd.inc' + + if (i.gt.n) then + w1 = wst1((n-1)*(i-n-1)+j-n) + else + w1 = wst1((n-1)*(i-1)+j) + endif + return + end + +c Used to recall values of W2 which are actually stored in WST2. + function w2(i,j) + include 'rfd.inc' + + if (i.gt.n) then + w2 = wst2((n-1)*(i-n-1)+j-n) + else + w2 = wst2((n-1)*(i-1)+j) + endif + return + end + + + + + +c Computes an optimal structure on the subsequence from II to JI +c where II and JI must base-pair with each other. ERROR = 0 +c indicates a normal termination. +c NFORCE is the number of forced base-pairs encountered during the +c traceback. +c Base-pair information is stored in the array BASEPR. + subroutine trace(ii,ji,nforce,error) + include 'rfd.inc' + logical fce + + error = 0 + +c Zero the appropriate region of BASEPR. + if (ji.le.n) then + do k=ii,ji + basepr(k) = 0 + enddo + else + do k=1,ji-n + basepr(k) = 0 + enddo + do k = ii,n + basepr(k) = 0 + enddo + endif +c Initialize the stack of outstanding base-pairs and push II,JI, +c V(II,JI) and 0 on to the stack. + call initst + call push(ii,ji,v(ii,ji),0) + nforce = 0 + +100 i = j + do while (i.eq.j) +c Pull a fragment ( I to J ) and its expected energy ( E ) from +c the stack. OPENL = 1 indicates that the free bases are part of +c an exterior loop. OPENL = 0 (ie. closed) indicates that the +c free bases are part of a multi-loop. + stz = pull(i,j,e,openl) + if (stz.ne.0) return + enddo +c Do I and J base-pair with one another? + if (e.eq.v(i,j)) goto 300 + + if (openl.eq.0) then + + do while (e.eq.w1(i+1,j)+eparam(6)) +c Whittle away from the 5' end. + i = i + 1 + e = w1(i,j) + if (i.ge.j) goto 100 + enddo + do while (e.eq.w1(i,j-1)+eparam(6)) +c Whittle away from the 3' end. + j = j - 1 + e = w1(i,j) + if (i.ge.j) goto 100 + enddo + + if (e.eq.v(i+1,j)+eparam(10)+eparam(6)+erg(6,j,i+1,i,2)) then +c I dangles over I+1,J. + i = i + 1 + e = v(i,j) + elseif (e.eq.v(i,j-1)+eparam(10)+eparam(6)+erg(6,j-1,i,j,1)) then +c J dangles over I,J-1. + j = j - 1 + e = v(i,j) + elseif (e.eq.v(i+1,j-1) + eparam(10) + 2*eparam(6) + + . erg(6,j-1,i+1,i,2) + erg(6,j-1,i+1,j,1) ) then +c Both I and J dangle over I+1,J-1. + i = i + 1 + j = j - 1 + e = v(i,j) + endif +c Check for stem closing a multi-loop. + if (e.eq.v(i,j)+eparam(10)) e = v(i,j) + + else + + do while (e.eq.w2(i+1,j)) +c Whittle away at the 5' end. + i = i + 1 + if (i.ge.j) goto 100 + enddo + do while (e.eq.w2(i,j-1)) +c Whittle away at the 3' end. + j = j - 1 + if (i.ge.j) goto 100 + enddo + + if (e.eq.v(i+1,j)+erg(6,j,i+1,i,2)) then +c I dangles over I+1,J. + i = i + 1 + e = v(i,j) + elseif (e.eq.v(i,j-1) + erg(6,j-1,i,j,1)) then +c J dangles over I,J-1. + j = j - 1 + e = v(i,j) + elseif (e.eq.v(i+1,j-1)+erg(6,j-1,i+1,i,2)+erg(6,j-1,i+1,j,1)) + . then +c Bothe I and J dangle over I+1,J-1. + i = i + 1 + j = j - 1 + e = v(i,j) + endif + endif + + if (e.ne.v(i,j)) then +c Cannot chop away at the ends any more and still the ends do not +c base-pair with one another. Structure MUST bifucate (OPENL). + k = i +200 if (k.eq.j) then +c Structure will not split. Error + ii = hstnum(i) + ji = hstnum(j) + error = 10 + return + endif + if (openl.eq.0.and.e.eq.w1(i,k) + w1(k+1,j)) then +c Best structure on I,J splits into best structures on I,K +c and K+1,J. Push these fragments on to the stack. (OPENL = 0) + call push(i,k,w1(i,k),0) + call push(k+1,j,w1(k+1,j),0) + goto 100 + else if (openl.eq.1.and.e.eq.w2(i,k) + w2(k+1,j)) then +c Best structure on I,J splits into best structures on I,K +c and K+1,J. Push these fragments on to the stack. (OPENL = 1) + call push(i,k,w2(i,k),1) + call push(k+1,j,w2(k+1,j),1) + goto 100 + else + k = k + 1 + goto 200 + endif + endif + +c Base-pair found. All base-pairs are stored in the range 1 <= I < J <= N. +c If I and J form a base-pair, then BASEPR(I) = J and BASEPR(J) = I. +300 if (j.le.n) then + basepr(i) = j + basepr(j) = i + else if (i.gt.n) then + basepr(i-n) = j-n + basepr(j-n) = i-n + i = i - n + j = j - n + else + basepr(j-n) = i + basepr(i) = j-n + endif + + openl = 0 + +c Check if this is a forced base-pair. + if (force(i).eq.2.or.force(j).eq.2.or.fce(i,j)) + . nforce = nforce + 1 + if (force(i).eq.2.and.force(j).eq.2) nforce = nforce + 1 +c Perhaps I,J stacks over I+1,J-1? + if (i.ne.n.and.j.ne.n+1) then + if (e.eq.erg(2,i,j,i+1,j-1) + . + v(i+1,j-1)) then + i = i + 1 + j = j - 1 + e = v(i,j) + goto 300 + endif + endif + +c Perhaps I,J closes a hairpin loop? + if (e.eq.erg(4,i,j,i,j)) goto 100 + +c E' ( EP in the program ) is E corrcted for possible forced +c base-pairs. +c + ep = e + if (force(i).eq.2.or.force(j).eq.2.or.fce(i,j)) + . ep = ep - eparam(9) + if (force(i).eq.2.and.force(j).eq.2) ep = ep - eparam(9) + + if (i+2.gt.j-3) then +c Tidy up loose ends (trivial). + if (ep.eq.0.or.(i.ne.n.and.ep.eq.erg(6,i,j,i+1,1))) then + + goto 100 + + elseif (j.ne.n+1.and.ep.eq.erg(6,i,j,j-1,2)) then + + goto 100 + + elseif (i.ne.n.and.j.ne.n+1.and. + . ep.eq.erg(6,i,j,i+1,1) + erg(6,i,j,j-1,2)) then + + goto 100 + + else + ii = hstnum(i) + ji = hstnum(j) + error = 12 + return + endif + + else if (i.ge.n-1) then +c Up to one base hanging on to I. + if (ep.eq.w2(n+1,j-1)) then + call push(n+1,j-1,w2(n+1,j-1),1) + goto 100 + elseif (i.ne.n) then + + if (ep.eq.erg(6,i,j,i+1,1) + w2(n+1,j-1)) then + call push(n+1,j-1,w2(n+1,j-1),1) + goto 100 + else if (ep.eq.erg(6,i,j,i+1,1)+erg(6,i,j,j-1,2) + w2(n+1,j-2)) + . then + call push(n+1,j-2,w2(n+1,j-2),1) + goto 100 + endif + + elseif (ep.eq.erg(6,i,j,j-1,2) + w2(n+1,j-2)) then + call push(n+1,j-2,w2(n+1,j-2),1) + goto 100 + + else + ii = hstnum(i) + ji = hstnum(j) + error = 12 + return + endif + + else if (j.eq.n+1.or.j.eq.n+2) then +c Up to one base hanging on to J. + if (ep.eq.w2(i+1,n)) then + call push(i+1,n,w2(i+1,n),1) + goto 100 + + elseif (ep.eq.erg(6,i,j,i+1,1) + w2(i+2,n)) then + call push(i+2,n,w2(i+2,n),1) + goto 100 + + elseif (j.ne.n+1) then + + if (ep.eq.erg(6,i,j,j-1,2)+w2(i+1,n)) then + call push(i+1,n,w2(i+1,n),1) + goto 100 + elseif (ep.eq.erg(6,i,j,i+1,1) + erg(6,i,j,j-1,2) + w2(i+2,n)) + . then + call push(i+2,n,w2(i+2,n),1) + goto 100 + endif + + else + ii = hstnum(i) + ji = hstnum(j) + error = 12 + return + endif + + else + + k = i+2 +c Perhaps I,J closes a multi-loop? +400 do while (k.le.j-3) + if (k.ne.n) then + if (ep.eq.w1(i+1,k) + w1(k+1,j-1) + eparam(10) + eparam(5)) + . then +c Multi-loop. No dangling ends on I,J. + call push(i+1,k,w1(i+1,k),0) + call push(k+1,j-1,w1(k+1,j-1),0) + goto 100 + else if (ep.eq.erg(6,i,j,i+1,1)+w1(i+2,k)+w1(k+1,j-1) + + . eparam(10) + eparam(6) + eparam(5)) then +c Multi-loop. I+1 dangles over the I,J base-pair. + call push(i+2,k,w1(i+2,k),0) + call push(k+1,j-1,w1(k+1,j-1),0) + goto 100 + else if (ep.eq.erg(6,i,j,j-1,2)+w1(i+1,k)+w1(k+1,j-2) + + . eparam(10) + eparam(6) + eparam(5)) then +c Multi-loop. J-1 dangles over the I,J base-pair. + call push(i+1,k,w1(i+1,k),0) + call push(k+1,j-2,w1(k+1,j-2),0) + goto 100 + else if (ep.eq.erg(6,i,j,i+1,1)+erg(6,i,j,j-1,2)+w1(i+2,k) + . +w1(k+1,j-2)+eparam(10)+2*eparam(6)+eparam(5)) then +c Multi-loop. Both I+1 and J-1 dangle over the I,J base-pair. + call push(i+2,k,w1(i+2,k),0) + call push(k+1,j-2,w1(k+1,j-2),0) + goto 100 + endif + else + if (ep.eq.w2(i+1,k) + w2(k+1,j-1)) then +c Exterior loop. No ends dangling on I,J. + call push(i+1,k,w2(i+1,k),1) + call push(k+1,j-1,w2(k+1,j-1),1) + goto 100 + else if (ep.eq.erg(6,i,j,i+1,1)+w2(i+2,k)+w2(k+1,j-1)) then +c Exterior loop. I+1 dangles over the I,J base-pair. + call push(i+2,k,w2(i+2,k),1) + call push(k+1,j-1,w2(k+1,j-1),1) + goto 100 + else if (ep.eq.erg(6,i,j,j-1,2)+w2(i+1,k)+w2(k+1,j-2)) then +c Exterior loop. J-1 dangles over the I,J base-pair. + call push(i+1,k,w2(i+1,k),1) + call push(k+1,j-2,w2(k+1,j-2),1) + goto 100 + else if (ep.eq.erg(6,i,j,i+1,1)+ + . erg(6,i,j,j-1,2)+w2(i+2,k)+w2(k+1,j-2)) then +c Exterior loop. Both I+1 and J-1 dangle over the I,J base-pair. + call push(i+2,k,w2(i+2,k),1) + call push(k+1,j-2,w2(k+1,j-2),1) + goto 100 + endif + endif + k = k + 1 + enddo + + endif + +c None of the above work. I,J MUST close a bulge or interior loop. +500 do d = j-i-3,1,-1 + do ip = i+1,j-1-d + jp = d+ip + if (j-i-2-d.gt.eparam(7)) then +c Error, bulge or interior loop not found. + ii = hstnum(i) + ji = hstnum(j) + error = 11 + return + endif + if (abs(ip-i+jp-j).le.eparam(8)) then + if (e.eq.erg(3,i,j,ip,jp)+v(ip,jp)) then + i = ip + j = jp + e = v(i,j) + goto 300 + endif + endif + enddo + enddo +c Error, bulge or interior loop not found. + ii = hstnum(i) + ji = hstnum(j) + error = 11 + return + end + + + + +c Store results of a SAVE run for a continuation run. + subroutine putcont + include 'rfd.inc' + + write(30) n,nsave,vmin,listsz,seqlab + write(30) stack,tstk,dangle,hairpin,bulge,inter,eparam + write(30) (vst(i),i=1,n*n) + write(30) (wst1(i),i=1,n*n) + write(30) (wst2(i),i=1,n*n) + write(30) (seq(i),i=nsave(1),nsave(2)) + write(30) ((list(i,j),i=1,listsz),j=1,4) + write(30) tloop,numoftloops + write(30) (poppen(i),i=1,4),maxpen,prelog + return + end +c Read results from a SAVE run for a CONTINUATION run. + subroutine getcont + include 'rfd.inc' + + read(30,err=10) n,nsave,vmin,listsz,seqlab + read(30,err=10) stack,tstk,dangle,hairpin,bulge,inter,eparam + read(30,err=10) (vst(i),i=1,n*n) + read(30,err=10) (wst1(i),i=1,n*n) + read(30,err=10) (wst2(i),i=1,n*n) + read(30,err=10) (seq(i),i=nsave(1),nsave(2)) + read(30,err=10) ((list(i,j),i=1,listsz),j=1,4) + read(30,err=10) tloop,numoftloops + read(30,err=10) (poppen(i),i=1,4),maxpen,prelog + goto 11 + +10 call errmsg(40,0,0) + +11 return + end + + diff --git a/ZUKER/lin.inc b/ZUKER/lin.inc new file mode 100755 index 0000000..bab54f7 --- /dev/null +++ b/ZUKER/lin.inc @@ -0,0 +1,60 @@ + implicit integer (a-z) + parameter (maxn=1500,maxn2=3000) + parameter (fldmax=maxn2) + parameter (infinity=16000,sortmax=30000) + parameter (mxbits=(maxn*(maxn+1)+31)/32) + parameter (maxtloops=40) + parameter (maxsiz=10000) + + integer*2 vst(maxn*maxn),wst1(maxn*maxn),wst2(maxn*maxn) + integer poppen(4),maxpen + real prelog + + dimension newnum(maxsiz),hstnum(maxn2),force(maxn2),numseq(maxn2), + . work1(maxn2,0:2),work2(maxn2), + . stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30) + dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2) +c common /main/ newnum,hstnum,force,work1,work2, + common /main/ newnum,hstnum,force,work1,work2, + . stack,tstk,dangle,hairpin,bulge,inter,eparam,cntrl,nsave,n, + . numseq,poppen,prelog,maxpen,vst,wst1,wst2 + + character*1 seq(maxsiz) +c character*5 inbuf + character*10 progtitle + character*30 seqlab + common /seq/ seq,seqlab + dimension list(100,4) + common /list/ list,listsz + common /nm/ vmin + data progtitle/'lrna'/ + + dimension basepr(maxn) + common /traceback/ basepr + + dimension heapi(sortmax+1),heapj(sortmax+1) + common /heap/ heapi,heapj,num + + integer*2 marks(mxbits),force2(mxbits) + common/bits/marks,force2 + + integer*2 tloop(maxtloops,2),numoftloops + common/tloops/tloop,numoftloops + + + + + + + + + + + + + + + + + + diff --git a/ZUKER/loop.dat b/ZUKER/loop.dat new file mode 100755 index 0000000..8ca7974 --- /dev/null +++ b/ZUKER/loop.dat @@ -0,0 +1,34 @@ +DESTABILIZING ENERGIES BY SIZE OF LOOP (INTERPOLATE WHERE NEEDED) +hp3 ave calc no tmm;hp4 ave calc with tmm; ave all bulges +SIZE INTERNAL BULGE HAIRPIN +------------------------------------------------------- +1 . 3.9 . +2 4.1 3.1 . +3 4.5 3.5 4.5 +4 4.9 4.2 5.5 +5 5.3 4.8 4.9 +6 5.7 5.0 5.1 +7 5.9 5.2 5.2 +8 6.0 5.3 5.5 +9 6.1 5.4 5.8 +10 6.3 5.5 5.9 +11 6.4 5.7 6.0 +12 6.4 5.7 6.1 +13 6.5 5.8 6.2 +14 6.6 5.9 6.3 +15 6.7 6.0 6.4 +16 6.8 6.1 6.4 +17 6.8 6.1 6.5 +18 6.9 6.2 6.5 +19 6.9 6.2 6.6 +20 7.0 6.3 6.7 +21 7.1 6.3 6.7 +22 7.1 6.4 6.8 +23 7.1 6.4 6.8 +24 7.2 6.5 6.9 +25 7.2 6.5 6.9 +26 7.3 6.5 6.9 +27 7.3 6.6 7.0 +28 7.4 6.7 7.0 +29 7.4 6.7 7.1 +30 7.4 6.7 7.1 diff --git a/ZUKER/looprna.ct b/ZUKER/looprna.ct new file mode 100755 index 0000000..dfe2383 --- /dev/null +++ b/ZUKER/looprna.ct @@ -0,0 +1,29 @@ + 28 ENERGY = 1600.0 looprna + 1 a 0 2 2 1 + 2 a 1 3 1 2 + 3 a 2 4 0 3 + 4 u 3 5 0 4 + 5 a 4 6 0 5 + 6 u 5 7 0 6 + 7 g 6 8 0 7 + 8 g 7 9 0 8 + 9 a 8 10 0 9 + 10 a 9 11 0 10 + 11 u 10 12 0 11 + 12 u 11 13 0 12 + 13 a 12 14 0 13 + 14 g 13 15 0 14 + 15 a 14 16 0 15 + 16 g 15 17 0 16 + 17 u 16 18 0 17 + 18 a 17 19 0 18 + 19 u 18 20 0 19 + 20 u 19 21 0 20 + 21 c 20 22 0 21 + 22 c 21 23 0 22 + 23 a 22 24 0 23 + 24 u 23 25 0 24 + 25 a 24 26 0 25 + 26 u 25 27 0 26 + 27 u 26 28 0 27 + 28 u 27 0 0 28 diff --git a/ZUKER/looprna.out b/ZUKER/looprna.out new file mode 100755 index 0000000..5e45895 Binary files /dev/null and b/ZUKER/looprna.out differ diff --git a/ZUKER/looprna.reg b/ZUKER/looprna.reg new file mode 100755 index 0000000..75d632a --- /dev/null +++ b/ZUKER/looprna.reg @@ -0,0 +1 @@ + ( 1) 1 2 3 1600.0 diff --git a/ZUKER/mfold.doc b/ZUKER/mfold.doc new file mode 100755 index 0000000..bf9e51a --- /dev/null +++ b/ZUKER/mfold.doc @@ -0,0 +1,338 @@ + Glossary of subroutines, functions and some of the variables + used by the RNA folding programs + +Global variables: {implicit integer} + + (data defined) + + infinity - large positive integer used to indicate impossible basepairs + fldmax - largest value of 2*n + sortmax - largest size of heap in sort module ( + 1) + mxbits - largest size of marks and force2 + maxn - size of largest processed fragment that can be folded + fldmax - twice maxn - the program actally folds a doubled up fragment + subject to certain constraints + maxtloops- the maximum number of distinguished tetra-loops that can be used + + (dynamic valued) + + basepr(maxn) + Output array for traceback. + + bulge(30) + Table of bulge loop energies. + + cntrl(10) + Control Parameters: + (1) run type + 0 normal + 1 save + 2 continuation + (2) output type + 1 lineout + 2 ct file + 3 region table + 4 lineout + ct file + 5 lineout + region table + 6 ct file + region table + 7 lineout + ct file + region table + (3) lineout record length + (4) lineout unit number + (5) depending on run mode + run mode = 0 + not used (this is terminal type in VAX/VMS version) + run mode = 1 + not used + run mode = 2 + number of sequences to be folded + (6) depending on run mode + run mode = 0 + minimum vector size for dotplt + run mode = 1 or 2 + maximum number of tracebacks + (7) run mode + 0 sub-optimal plot + 1 n-best + 2 multiple foldings + (8) percentage for sort + (9) window for dotplt and sortout routines + (10) reserved for future expansion + + dangle(5,5,5,2) + Table of dangling end energies. + + eparam(10) + Energy Parameters: + (1) Extra stack energy + (2) Extra bulge energy + (3) Extra loop energy (interior) + (4) Extra loop energy (hairpin) + (5) Extra loop energy (multi) + (6) Multi loop energy/single-stranded base + (7) Maximum size of interior loop + (8) Maximum lopsidedness of an interior loop + (9) Bonus Energy used to force base pairs + (10) Multi loop energy/closing base-pair + + force(2 * maxn) + Single force array. + + force2(maxbits) {int*2} + Double force array. Accessed through fce and sfce. (bit addressing) + + hairpin(30) + Table of hairpin loop energies. + + hstnum(2 * maxn) + Array used for conversion of new to old numbering of sequence fragment. + + i + Mainly used to designate the 5' end of a segment. + + ip + i' - used mainly to designate the 3' end of a segment beginning + with i ( i' > i ). + + inter(30) + Table of interior loop energies. + + j + Mainly used to designate the 3' end of a segment. + + jp + j' - used mainly to designate the 5' end of a segment beginning + with j ( j' < j ). + + list(100,4) + List of options selected in MENU. Up to 100 can be selected. + + listsz + The number of options selected from the MENU. + + maxn + Maximum size of a fragment which can be folded. + + marks(maxbits) {int*2} + Pair marking array for preventing identical tracebacks. + Accessed through mark and smark. A base pair i.j is mapped + into a single bit of this array. Any base pair which occurs + in a structure which is computed is "set" from 0 to 1. In + addition, base pairs close to these base pairs (within a + distance of 'window') are also "set". Base pairs which are + "set" will not be chosen for computing new foldings. + + n + The size of the fragment to be folded after processing. The + fragment is doubled in size for computational purposes. + + newnum(5000) + newnum(i) is the numbering of the Ith element in the original + sequence in the fragment to be folded. If the ith element is + not in the chosen fragment, then newnum(i) = 0. + + nsave(2) + 5' and 3' ends of the fragment to be folded (historical numbering). + + numseq(2 * maxn) + Holds sequence converted from characters to integers. + + seq(5000) {char} + original sequence returned from formid. + + seqlab {char*30} + Sequence label. + + stack(5,5,5,5) + Table of stacking energies. + + tstk(5,5,5,5) + Table of terminal stacking energies. + + vst(maxn squared) {int*2} + Energy array v(i,j) is stored as vst((n-1)*(i-1)+j). v(i,j) is + the minimum folding energy of the segment closed by the base + pair i.j. + + work(2 * maxn,0:2) + Holds columns of wst to minimize paging during multi-loop + searchs. In the linear version there is work1(2*maxn,0:2) + and work2(2*maxn). + + wst(maxn squrard) {int*2} + Energy array mapped as vst is. In the linear version this + array is split into wst1(maxn sq) and wst2(maxn sq). The + energy array W(i,j) is stored as WST((n-1)*(i-1)+j). W(i,j) is + the minimum folding energy of the segment from i to j. + wst1 penalizes all exterior single-stranded bases and base + pairs as if they were in an interior loop. wst2 does not do this. + + +Routines: + + build_heap(err) + Reads in all the v energies within a user-specified percentage + (up to an upper limit built into the program) and sorts them + into inverse partial order (a heap with the lowest value at the + top). + var heap(sortmax) : heap array + + convt(str) + Returns the value of the integer held in the character string + str. + + ct(r) + Puts the results of trace into a ct file. + + device + Gets run information from user. + + digit(row,column,pos,bmax,b) + Adds the sequence numbers to the linout result. + + dotplt(iret,jret,jump)@@@ + Sub-optimal plot option. + + ene(i,j) + Returns v(i,j) + v(j,i+n). + + enefiles + Reads in the appropriate energy file names. + + erg(mode,i,j,ip,jp) + Returns the value from the energy tables indicated by the + parameter values. + var e(4) : (linear only) array for minimum energies + + ergread + Reads in the appropriate energy rules from a file. + + errmsg(err,i,j) + Prints error message number (err) and stops if appropriate. + i and j are numbers passed to be included in some error + messages. + + fill + Uses the energy function (erg) to fill the matrices v and w + (or w1 and w2) with the folding energies. The matrices are + accessed by the rest of the program by either the functions + v(i,j) and w(i,j) {or w1,w2}, or by vst(k) and wst(k) {wst1, + wst2} where k = (n-1)*(i-1)+j. + var inc(5,5) : base pair possibilities - 1,2,3,4,5 correspond + to A,C,G,U and X (other) respectively. inc(i,j) = 1 if and only + if base types i and j can form a base pair. Thus the program + allows one to define G.G as a base pair if desired. + + find(unit,len,str) + Searches the file under unit number unit until it finds the + string str (of length len). + + fce(ii,ji) + Returns .true. if sfce has been called on (i,j), .false. otherwise. + This is how information on forced base pairs is used. + + formid(seqid,seq,nseq,nmax,used) + Standard routine for reading in sequences. + + getcont + Reads a save file. + + heap_sort + Sorts the results of build_heap into linear order. + var sort(sortmax) : the heap is sorted into this array + + initst + Initializes the program stack. The stack is + four integers wide and up to 50 levels deep. + The stack is used to keep track of outstanding + fragments during the traceback. + + linout(n1,n2,energy,iret,jret,error) + Puts the results of trace into line printer format. + + listout + Prints the options selected already by the user. Called from + menu. + + mark(i,j) + Returns .true. if smark has been called on (i,j), .false. otherwise. + Used to determine which base pairs have occurred in structures + already computed or are close to such base pairs. + + menu + Gets run options required by user. + + mseq(i) + On first call, opens a sequence file and returns the number of + sequences as i. On subsequent calls, reads in sequence number i + using multid. + + multid(seqid,seq,nseq,nmax,used,rnum) + Modified formid for use with multiple folding. + + out + Debug routine, not used during normal program execution. + Will print out the energy tables in a semi-readable format. + + outputs + Gets output options desired by user (enables some or all of + lineout, region table, ct file). + + process + Converts the initial rna data into forms useable by the + program. + + pull(a,b,c,d) + Pulls the four parameters off the stack. Returns 0 if the + stack was not empty. + + push(a,b,c,d) + Pushes the four parameters onto the stack. + + putcont + Saves the program information into a "save file" after fill + is called. + + regtab + Puts the results of trace into a region table. + + sfce(ii,ji) + Sets a mark on a point i,j so that the program will include + it in any folding by giving this base pair a bonus energy of + eparam(9). + + smark(i,j) + Sets a mark on a point (base pair) i.j so that the program + knows the point has been included in an already computed + structure or is "close" to such a point. + + sortout(i,j,rep,err) + Used for "N Best" run. On the first pass this routine will + call build_heap and heap_sort, and return the (i,j) pair + with the lowest energy. From then on the routine will return + the (i,j) pair with the lowest energy that hasn't been marked + by smark. + + stest(stack,sname) + Tests the stack tables for symmetry, used after ergread. + + swap(i,j) + Puts (i = j) and (j = i) + + trace(ii,ji,nforce,error) + Computes the best folding containing the basepair ii.ji. + The output is put into the basepr(k) array. If basepr(k) = 0, + then k is single-stranded. If basepr(k) = k' > 0, then k + pairs with k'. + + vector (i,j,vopt,vinc,ina) {dotplt module} + Prints out all the stacking regions within vinc energy of vopt + along the diagonal starting with the point (i,j) {along axis, + top right corner is origin} + + v(i,j) + Returns the v-energy value of (i,j) mapped into vst. + + w(i,j) + Returns the w-energy value of (i,j) mapped into wst. In the + linear version of the program, this routine is split into + w1(i,j) and w2(i,j) which map into wst1 and wst2. diff --git a/ZUKER/mfold.flow b/ZUKER/mfold.flow new file mode 100755 index 0000000..8a545bd --- /dev/null +++ b/ZUKER/mfold.flow @@ -0,0 +1,74 @@ + MFOLD + +Flow chart for RNA folding - main +--------------------------------- + -----> call device ------------. + ^ | | + | | read energy files + ^ (cont run) | | + | | (reg) (save) + ^ call cdump--. | | + | call outputs | + ^ | '--------- call menu + | |-----------------------' ** + ^ | | + | mrep = 1 --(multiple)-- read MREPth sequence + err msg | | + ^ zero bit arrays <---------------' + | call process + (n*2.gt.fldmax)----------' + | + ----(n ok)----- + | | + (save or reg run) (cont run) + | | + call fill--(reg)-->| + | | + putcont<-----(save run) | + | | + stop. rep = 1 + jump = 1 + flag = .true. + | + |<------ * + | + (mult or n best)----------------(plot run) + | | | + sortout(rep) (jump.eq.1) (jump.eq.2) + | | | | + (err) (no err) dotplt(start) dotplt(cross-hairs) + | | '-> jump = 2 <-' +error message->rep++ | + mrep++ | + / \ | + (reps done) (more to go) | + | | | + flag = .false.------------------------------->(not flag)->. + (flag) | + | | + <----(err) <---trace1 | + | | | + errmsg (no err) | + | | | + -----------------|------->(not flag)->| + (flag) | + trace2 | + <----(err) <---' | | + errmsg (no err) | + | | | + '-----------------|------->(not flag)->| + | | + calculate energy | + | | | + (plot or n-best) (mult) | + | | | + mark basepairs | | + | | | + '-- call selected outputs | + |<-----------' + ------------------ + | | + (plot or n-best) (mult)------ + | | | | + (flag) (not flag) (done) (not done) + goto * '-> stop <-' goto ** diff --git a/ZUKER/mfold.user b/ZUKER/mfold.user new file mode 100755 index 0000000..705a937 --- /dev/null +++ b/ZUKER/mfold.user @@ -0,0 +1,358 @@ + + + + Sub-optimal RNA Folding Program Users Manual + -------------------------------------------- + + Michael Zuker, Eric Nelson and John Jaeger + + + +Start: + +Initially, the following menu appears + + Enter run type + 0 Regular run (default) + 1 Save run + 2 Continuation run + +In a regular run the program takes an RNA sequence as input, computes +the energy matrix for the molecule, and produces various foldings as +output. Since the computation of the energy matrix uses a great deal +of time and resourses, the matrix can be saved before any output is +generated (a save run) and later used to produce output (a +continuation run). + +Regular or Continuation run -> step b + + +Step a: + +At this point a prompt will appear asking for the name of the file +into which the save matrix can be stored. + +-> step f + +Step b: + +The following menu will be displayed + + Enter run mode + 0 Sub-optimal plot (default) + 1 N best + 2 Multiple molecules + +If the program is run in 'Sub-optimal plot' ("dot plot") mode, the +energy matrix will be displayed graphically after it is computed. In +'N-best' mode the program will generate the suboptimal foldings +within a certain percentage of the minimum energy. If +'Multiple molecules' ("multi") mode is chosen the program will +run the N-best mode with every complete sequence in a file. This last +option MUST be done in a regular run mode. + +N best or multi mode -> step d + + +Step c: + +A prompt for the minimum number of points 'in a row' that will appear +on the energy dot plot. Helices that are smaller than this number +will not appear on the dot plot. + +-> step e + + +Step d: + +Two prompts asking for values of 'N-best' parameters now appear: the +percentage above the optimal energy which foldings must be within, and +N. + + +Step e: + +A prompt for the window parameter. The distance between any pair of +computed foldings must be more than window. A simpler distance +function is defined in: +1. Zuker M + On Finding All Suboptimal Foldings of an RNA Molecule. + Science, 244, 48-52, (1989) +2. Zuker M + The Use of Dynamic Programming Algorithms in RNA Secondary + Structure Prediction. + in "Mathematical Methods for DNA Sequences", M. S. Waterman ed. + CRC PRESS, INC., 159-184, (1989) + +The new definition of distance requires that any two computed +foldings must contain more than 'window' base pairs that are in one +folding and not in the other. + +Continuation run -> step h + + +Step f: + +At this point a prompt for the name of a file containing one or more +sequences (in Stanford, Genbank, EMBL, PIR, or NRC format) will +appear. If the program is being run in 'multi' mode all of the +sequences in the file will be folded, otherwise the program will ask +for a selection from the file's contents (a portion of a sequence). +Sequence data must be in upper case. The program recognizes A, C, G, +and T or U. The characters B, Z, H, and V or W are recognized as A, +C, G, and T or U respectively; but they are flagged by the program as +being accessible to nuclease cleavage. A flagged base can pair only +if its 3' neighbor is single stranded. + + +Step g: + +Six files containing energy information are needed to run the +program, and the names of these files are now requested. The +default energy files are organized as follows: + +dangle.dat - single base stacking +loop.dat - hairpin, bulge and interior loops +stack.dat - base pair stacking energies +tstack.dat - stacking energies for terminal mismatched pairs in + interior and hairpin loops +tloop.dat - a list of distinguished tetra-loops and the bonus eneries + given to them. If you do not want to use this file, create + a dummy file containing a few blank lines and use it instead. +miscloop.dat - some miscellaneous energies (see files.list). + +These files can be replaced by dangle.025, loop.025, stack.025 etc. +for folding at (for example) 25 deg. + +-> step i + + +Step h: + +For a continuation run, a file previously created by a save run needs +to be read in at this point. A prompt will appear asking for +identification of this file. After the file is read, the energy rules +and parameters used during the save run are output either to a file or +the screen. + +Step i: + +Three different types of folding output formats can be produced: +printer (which shows the secondary structure in a rough, but directly +readable format), ct file, and Region table (both ct files and region +tables can be used as input to certain other programs). Prompts will +appear asking which types of output need to be produced. + + +Step j: + +Main menu (see apendix A) + +Save run -> halt +N-best and multi mode -> produce folding output + + +Step k: + +Enter Dotplot section (see appendix B) + + + + + Appendix A + Main Menu + + +The following menu will appear: + + + 1 Energy Parameter 6 Single Prohibit + 2 Single Force 7 Double Prohibit + 3 Double Force 8 Begin Folding + 4 Closed Excision 9 Show current + 5 Open Excision 10 Clear current + +Selections 2 through 7 provide a way for the user to directly alter +the possible secondary structure by forcing or prohibiting particular +base-pairs. Each time one of these parameters is chosen, it is added +to a list held in memory - selection 8 will print the list and 10 will +erase the list. If '8' is chosen from the menu the program will +continue past this section. + +NB : Options 2 and 3 force base pairs to occur. +Base pairs are forced by giving them a bonus energy (EPARAM(9) in the +program code). These energies are subtracted during the traceback +algorithm so that the computed structures have the correct energies. +Unfortunately, there is no way to subtract the bonus energies from +the energy dot plots. Moreover, each forced base pair contain two +bonus energies because of the nature of the algorithm. For example, +suppose that an optimal folding of an RNA contains 3 forced base +pairs ( default bonus energy is 50.0 kcal per forced base pair ) and +that the correct folding energy is -180.0 kcal/mole. Internally, the +energy will be -180.0 - (3+1) x 50.0 = -380.0 kcal/mole. To find +foldings within 10% of the correct energy, one needs to compute +foldings to within 18.0 kcal of -180.0 - 3 x 50.0 = -330.0 kcal/mole. +This comes out to -312.0 kcal/mole. The ratio of -312.0 to -380.0 is +82%, so that one would request the 18% level of suboptimality! This +confustion only exists when base pairs are forced. Each closed +excision counts as one forced base pair. + +Choosing '1' from the above menu will result in the following (when +the default 37 deg. energy files have been chosen) : + + + Energy Parameters (10ths kcal/mole) + + 1 Extra stack energy [ 0] + 2 Extra bulge energy [ 0] + 3 Extra loop energy (interior) [ 0] + 4 Extra loop energy (hairpin) [ 0] + 5 Extra loop energy (multi) [ 46] + 6 Multi loop energy/single-stranded base [ 4] + 7 Maximum size of interior loop [ 30] + 8 Maximum lopsidedness of an interior loop [ 30] + 9 Bonus Energy [ -500] + 10 Multi loop energy/closing base-pair [ 1] + + +The energy parameters (along with the energy rules, which are read in +from files) decide what a given folding will look like. For example, +one could reduce the probability of a bulge loop by increasing +parameter 2. + +Note that parameters 7 and 8 limit the maximum size and lopsidedness +of bulge and interior loops. The default values of 30 should be +sufficient for folding at 37 deg or less. If you wish to fold at high +temperatures, it would be wise to increase these parameters to 60 or +even 100. Note that this will increase folding times! + + + + + + + Appendix B + Dotplot on the IRIS + + + + When the DOTPLOT is chosen in a regular or continuation run, +a non-resizable, non-movable window is created on which the +triangular energy dot plot is displayed, along with some other +useful information. All energy values are displayed in +kilocalories/mole, and the i,j basepair locations are displayed in +actual historical numbers (from the original sequence). Energy +increments are integers in 10ths of a kcal/mole. + +POPUP MENUS + In this version of DOTPLOT, all interaction with the program +(except for point picking...see below) is done with popup menus. To cause +the popup menu to be displayed, press the right mouse button. To select an +item from the popup menu, drag the crosshairs over the item that you want +to select, and release the mouse button. + +OPTIMAL SCORE + + This number represents the lowest possible energy for an i,j +pair. This is the minimum RNA folding energy. If you are in +multicolor mode (see COLORS below), the points whose scores are +equal to the optimal will ALWAYS be displayed as black filled +rectangles. + +ENERGY INCREMENT + + This represents the highest possible deviation in energy (in +kcal) for which a point will be plotted. All base pairs that are in +foldings within this increment from the minimum folding energy will +be plotted. This increment can be changed by selecting "Enter new +increment" from the popup menu. A one-line window will be displayed +at the bottom of the screen prompting you for a new energy +increment, entered in 10ths of a kcal. After entering a valid +number and pressing , the screen will redraw with the new +energy increment. Note that points that have already been found in +previous computed structures (as well as points within WINDOW of +these base pairs) will NOT be replotted when the energy dot plot is +redrawn. This allows the user to select base pairing regions +different from those that have already been found. + +POINT PICKING + + One of the features of DOTPLOT is the ability to select a +base pair by picking a point using the crosshairs. To do this, just +click with the left or middle mouse button on the point that you +want. DOTPLOT will optimize this selection by looking at the eight +points surrounding the point picked, and use the point with minimum +energy, not necessarily the exact point picked. After you have +clicked on a point, the historical numbering will be displayed as an +(i,j) basepair. + +COMPUTING THE STRUCTURE + + After you have selected a valid i,j basepair, you can +compute the best folding containing that structure selected by +selecting "Compute structure for last i,j" from the popup menu. +After computing the structure, the program will automatically return +to DOTPLOT without you ever knowing that it had left. NOTE : If the +computed structure contains fewer than WINDOW new base pairs that +are insufficiently different from base pairs already computed, the +structre will not be outputted. + +THE TEXTPORT + + If you had selected the output of foldings to go to the screen, you +can use the textport to view them. Just select "Toggle textport on/off" +from the popup menu and the textport will appear. Although this is the +same window that you ran the DOTPLOT-calling program from, YOU CANNOT ENTER +SHELL COMMANDS IN THE TEXTPORT (it will not respond to text input...it is +simply a text output window). + +COLORS + + DOTPLOT has the ability to display the plot in up to seven colors +(including black). Select the number of colors desired by moving the +crosshairs over the "Colors ->" entry on the main popup menu and then +moving the cursor to the right. This will activate a "rollover" menu from +which you can the select the number of colors. DOTPLOT determines the +color of the point to be plotted (except for optimal points, which are +always black) by dividing the difference between the minimum energy +and the minimum energy plus the energy increment into n-1 regions, +where n is the number of colors. Black is reserved for optimal base +pairs only. Each region has an associated color, and a point falling +in that region will be plotted in that color. The order of the +colors in decreasing optimality is: + + BLACK (optimal) + RED + GREEN + YELLOW + BLUE + MAGENTA + CYAN + +P-NUM PLOT + + DOTPLOT allows you to plot the number of base pairs that the +ith base can form ( P-num(i) ) versus i (historical numbering). +P-num(i) is the ordinate versus all i's in the segment (abscissa). +Select this from the popup menu and a red rubber-band window will +attach itself to the cross-hairs. Drag this window out to the +desired size, and the plot will be drawn for the already defined +energy increment. If the energy increment changes, the plot will be +redrawn. To get rid of the P-num plot, you can click on the +top-right "close box" on the window, select "close" from the +window-margin popup menu, or select "Toggle P-num plot" from the +main popup menu. Also, you may iconify the p-num plot window by +clicking on the top left "iconify" box. + +NOTE: Although iconifying the p-num plot window will not affect the DOTPLOT +routine or its parent program, it MAY stop control of the program until the +window is un-iconified/closed (why this happens is yet unknown and may be +corrected in the future). + +QUITTING + + To exit the program running DOTPLOT (and the DOTPLOT routine +itself), just select "quit" from the popup menu. + + + diff --git a/ZUKER/miscloop.dat b/ZUKER/miscloop.dat new file mode 100755 index 0000000..2fbc2e9 --- /dev/null +++ b/ZUKER/miscloop.dat @@ -0,0 +1,12 @@ +FREE ENERGY BY SIZE OF LOOP +misc. loop functions + > internal, bulge or hairpin loops > 30: dS(T)=dS(30)+param*ln(n/30) + 1.079 +asymmetric internal loops: the ninio equation + > the maximum correction + 3.0 + > the f(m) array (see ninio for details) + .4 .3 .2 .1 +multibranched loops + > offset, free base penalty, helix penalty + 4.6 .4 .1 diff --git a/ZUKER/mrna1.f b/ZUKER/mrna1.f new file mode 100755 index 0000000..016d6fb --- /dev/null +++ b/ZUKER/mrna1.f @@ -0,0 +1,668 @@ + subroutine process +c Process RNA sequence to be folded. + include 'rfd.inc' + +c Selected fragment is from NSAVE(1) to NSAVE(2) in historical +c numbering. + do i = nsave(1),nsave(2) + newnum(i) = 0 + enddo +c LIST contains information on excisions, and on forced or prohibited +c base-pairs. + ptr = 0 +100 if (ptr.eq.listsz) goto 400 + ptr = ptr + 1 + if (list(ptr,1).eq.4) goto 200 + if (list(ptr,1).eq.5) goto 300 + goto 100 +c Closed excision beween LIST(PTR,2) and LIST(PTR,3) ( historical +c numbering ) . +200 do i = list(ptr,2)+4,list(ptr,3)-1 + newnum(i) = 1 + enddo + goto 100 +c Open excision beween LIST(PTR,2) and LIST(PTR,3) ( historical +c numbering ) . +300 do i = list(ptr,2),list(ptr,3) + newnum(i) = 1 + enddo + goto 100 + +400 n = 0 + do k = nsave(1),nsave(2) +c Generate new numbering of fragment ( 1 to N ). + if (newnum(k).eq.0) then + n = n+1 + newnum(k) = n + else +c An excised base gets a new numbering of 0. + newnum(k) = 0 + endif + enddo + + if (n*2.gt.fldmax) goto 700 + +c Zero the FORCE and VST arrays. + do i = 1,n + force(i) = 0 + if (cntrl(1).ne.2) then + do j = i,i+n-1 + vst((n-1)*(i-1)+j) = 0 + enddo + endif + enddo + + do k = nsave(1),nsave(2) + i = newnum(k) + if (i.gt.0) then +c Non-excised bases are examined to determine their type. +c A - type 1 +c B - an A accessible to nuclease cleavage +c C - type 2 +c Z - a C accessible to nuclease cleavage +c G - type 3 +c H - a G accessible to nuclease cleavage +c U/T - type 4 +c V/W - a U/T accessible to nuclease cleavage +c anything else - type 5 +c HSTNUM stores historical numbering +c NUMSEQ stores nucleotide type. + hstnum(i) = k + numseq(i) = 5 + if (seq(k) .eq. 'A') numseq(i) = 1 + if (seq(k) .eq. 'B') then + numseq(i) = 1 + force(i) = 3 + endif + if (seq(k) .eq. 'C') numseq(i) = 2 + if (seq(k) .eq. 'Z') then + numseq(i) = 2 + force(i) = 3 + endif + if (seq(k) .eq. 'G') numseq(i) = 3 + if (seq(k) .eq. 'H') then + numseq(i) = 3 + force(i) = 3 + endif + if (seq(k) .eq. 'U'.or.seq(k).eq.'T') numseq(i) = 4 + if (seq(k) .eq. 'V'.or.seq(k).eq.'W') then + numseq(i) = 4 + force(i) = 3 + endif + endif + enddo + + ptr = 0 +500 if (ptr.eq.listsz) goto 600 + ptr = ptr + 1 + i = list(ptr,2) + j = list(ptr,3) + k = list(ptr,4) + if (list(ptr,1).eq.2.or.list(ptr,1).eq.6) k = j + goto (500,520,530,540,500,560,570),list(ptr,1) +c Force bases I to I+K-1 to be double-stranded. +520 do x = i,i+k-1 + force(newnum(x)) = 2 + enddo + goto 500 +c Force base-pairs I.J , I+1.J-1 , ... I+K-1.J-K+1. +530 do x = 0,k-1 + call sfce(newnum(i+x),newnum(j-x)) + enddo + goto 500 +c Force the ends of a closed excision to base-pair. +540 call sfce(newnum(i),newnum(j)) + do ii = i+1,i+3 + seq(ii) = ' ' + enddo + goto 500 +c Prohibit bases I to I+K-1 from base-pairing. +560 do ii = i,i+k-1 + force(newnum(ii)) = 1 + enddo + goto 500 +c Prohibit the base-pairs I.J , I+1,J-1 , ... I+K-1.J-K+1. +570 if (cntrl(1).ne.2) then + do x = 0,k-1 + vst((n-1)*(newnum(i+x)-1)+newnum(j-x)) = 1 + vst((n-1)*(newnum(j-x)-1)+newnum(i+x)+n) = 1 + enddo + endif + goto 500 +c Double up the sequence. +600 do i = 1,n + hstnum(i+n) = hstnum(i) + force(i+n) = force(i) + numseq(i+n) = numseq(i) + enddo +700 return + end + + +c Used in reading the energy files. + function convt(str) + implicit integer (a-z) + character*5 str + logical neg + + neg = .false. + place = 0 + convt = 0 + + do i = 5,1,-1 + if (str(i:i).eq.'-') then + neg = .true. + else + if (str(i:i).ge.'0'.and.str(i:i).le.'9') then + convt = convt + 10**place * (ichar(str(i:i)) - ichar('0')) + place = place+1 + endif + endif + enddo + if (neg) convt = convt * -1 + return + end + +c Reads energy file names and open the files for reading. + subroutine enefiles + character*40 filen + +10 write (6,*) 'Enter dangle energy file name (default dangle.dat)' + read (5,100,end=1) filen + if (filen.eq.' ') filen = 'dangle.dat' + open(10,file=filen,status='OLD',err=10) + +20 write (6,*) 'Enter loop energy file name (default loop.dat)' + read (5,100,end=1) filen + if (filen.eq.' ') filen = 'loop.dat' + open(11,file=filen,status='OLD',err=20) + +30 write (6,*) 'Enter stack energy file name (default stack.dat)' + read (5,100,end=1) filen + if (filen.eq.' ') filen = 'stack.dat' + open(12,file=filen,status='OLD',err=30) + +40 write (6,*) 'Enter tstack energy file name (default tstack.dat)' + read (5,100,end=1) filen + if (filen.eq.' ') filen = 'tstack.dat' + open(13,file=filen,status='OLD',err=40) + +50 write (6,*) 'Enter tloop energy file name (default tloop.dat)' + read (5,100,end=1) filen + if (filen.eq.' ') filen = 'tloop.dat' + open(29,file=filen,status='OLD',err=50) + +60 write (6,*) 'Enter misc. loop energy file name (default + . miscloop.dat)' + read (5,100,end=1) filen + if (filen.eq.' ') filen = 'miscloop.dat' + open(32,file=filen,status='OLD',err=60) + +100 format(a40) + goto 2 +1 call exit(1) +2 return + end + +c Error message subroutine. + subroutine errmsg(err,i,j) + include 'rfd.inc' + + if (err.eq.10) write (6,10) i,j + if (err.eq.11) write (6,11) i,j + if (err.eq.12) write (6,12) i,j + if (err.eq.20) then + write (6,20) i,j + stop + endif + if (err.eq.21) then + write (6,21) + err = 0 + endif + if (err.eq.30) write (6,30) i + if (err.eq.31) then + write (6,31) sortmax,i,j + err = 0 + endif + if (err.eq.40) then + write (6,40) + stop + endif + return + +10 format(' Open bifurcation not found between ',i4,' and ',i4) +11 format(' Bulge or interior loop closed by (',i4,',',i4, + .') not found') +12 format(' Closed bifurcation not found between ',i4,' and ',i4) +20 format(' Base pair between ',i3,' and ',i3,' conflicts with ', + . 'at least one other pair') +21 format(' Buffer overflow in lineout') +30 format(' End reached at traceback ',i4) +31 format(' More than ',i5,' basepairs in sort at (',i4,',',i4,')') +40 format(' Premature end of save file') + end +c Initialize the stack. + subroutine initst + implicit integer (a-z) + dimension stk(50,4) + common /stk/ stk,sp + + sp = 0 + return + end +c Add A,B,C,D to the bottom of the stack. + subroutine push(a,b,c,d) + implicit integer (a-z) + dimension stk(50,4) + common /stk/ stk,sp + + sp = sp + 1 + if (sp.gt.50) then + write (6,*) 'ERROR - STACK OVERFLOW' + stop + endif + stk(sp,1) = a + stk(sp,2) = b + stk(sp,3) = c + stk(sp,4) = d + return + end +c Retrieve A,B,C,D from the bottom of the stack and decrease the +c stack size by one. + function pull(a,b,c,d) + implicit integer (a-z) + dimension stk(50,4) + common /stk/ stk,sp + + if (sp.eq.0) then + pull = 1 + return + endif + a = stk(sp,1) + b = stk(sp,2) + c = stk(sp,3) + d = stk(sp,4) + sp = sp - 1 + pull = 0 + return + end + + +c Line printer output of a secondary structure. + subroutine linout(n1,n2,energy,iret,jret,error) +c + include 'rfd.inc' + character array(6,900),dash,bl,dot + real energy + integer unit +c + data dash/'-'/,bl/' '/,dot/'.'/,amax/900/ + + print *,' Output of a secondary structure...' +c +c WRITE SEQUENCE LABEL AND ENERGY +c + unit = cntrl(4) + if(unit.eq.0) then + print *,' NO PRINTER OUTPUT SELECTED...' + print *,' ...exiting subroutine outputs.' + return + endif + print *,'unit is ',unit,'.' + hstn1 = hstnum(n1) + hstn2 = hstnum(n2) + write(unit,103) hstn1,hstn2,seqlab,energy +c +c INITIALIZE TRACEBACK +c + call initst + call push(n1,n2,0,0) + nstem = 0 + go to 3 +c +c OUTPUT PORTION OF STRUCTURE +c +5 do while (1.eq.1) + write(unit,106) + ll = countr + if(cntrl(3).lt.ll) ll = cntrl(3) + do k = 1,6 + if(unit.eq.6) write(unit,105) (array(k,i),i = 1,ll) + if(unit.ne.6) write(unit,104) (array(k,i),i = 1,ll) + enddo + if(countr.le.cntrl(3)) go to 3 + do k = 1,5 + do j = 1,6 + array(j,k) = bl + array(j,k+5) = bl + enddo + array(2,k+5) = dot + array(5,k+5) = dot + enddo + k = 10 + ll = cntrl(3)+1 + do i = ll,countr + k = k+1 + do j = 1,6 + array(j,k) = array(j,i) + enddo + enddo + countr = k + enddo + +c +3 do k = 1,amax + do j = 1,6 + array(j,k) = bl + enddo + enddo +c +c FILL IN OUTPUT MATRIX +c + nstem = pull(i,j,countr,xx) + if (nstem.ne.0) go to 99 +c +c LOOK FOR DANGLING ENDS +c +12 ip = i + jp = j + do while (basepr(ip).eq.0) + ip = ip+1 + if(ip.ge.j) go to 16 + enddo + do while (basepr(jp).eq.0) + jp = jp-1 + enddo + k = ip-i + if(j-jp.gt.k) k = j-jp + if(k.eq.0) go to 17 + ii = ip + jj = jp + pos = countr+k+1 + if(pos.gt.amax) then + error = 21 + go to 99 + endif + do kk = 1,k + pos = pos-1 + ii = ii-1 + jj = jj+1 + if(ii.ge.i) then + i2 = hstnum(ii) + array(2,pos) = seq(i2) + if(10*(i2/10).eq.i2) call digit(1,i2,pos,amax,array) + else + array(2,pos) = dash + endif + if(jj.le.j) then + j2 = hstnum(jj) + array(5,pos) = seq(j2) + if(10*(j2/10).eq.j2) call digit(6,j2,pos,amax,array) + else + array(5,pos) = dash + endif + enddo + countr = countr+k + go to 17 +c +c HAIRPIN LOOP +c +16 if(i.ge.j) go to 5 + half = (j-i+2)/2 + ii = i-1 + jj = j+1 + do k = 1,half + ii = ii+1 + jj = jj-1 + countr = countr+1 + if(countr.gt.amax) then + error = 21 + go to 99 + endif + if(seq(hstnum(ii)).eq.' ') go to 40 + i2 = hstnum(ii) + j2 = hstnum(jj) + if(10*(i2/10).eq.i2.and.ii.lt.jj) + . call digit(1,i2,countr,amax,array) + if(10*(j2/10).eq.j2) call digit(6,j2,countr,amax,array) + if(k.ne.half) then + array(2,countr) = seq(i2) + array(5,countr) = seq(j2) + else + if(ii.lt.jj) array(3,countr) = seq(i2) + array(4,countr) = seq(j2) + endif +22 enddo + go to 5 +c +c 'CLOSED' EXCISION FOUND +c +40 array(3,countr) = dash + array(4,countr) = dash + go to 5 +c +c STACKING OR BIFURCATION +c +17 i = ip + j = jp + if(basepr(i).eq.j) go to 24 +c +c CHECK FOR KNOT +c + if(basepr(i).ge.basepr(j).or.i.ge.basepr(i).or.basepr(j).ge.j) + . then + iret = hstnum(i) + jret = hstnum(basepr(i)) + error = 20 + go to 99 + endif +c +c BIFURCATION MUST OCCUR +c + countr = countr+2 + if(countr.gt.amax) then + error = 21 + go to 99 + endif + call push(basepr(i)+1,j,countr,0) + j = basepr(i) +c +c STACKING REGION +c +24 countr = countr+1 + if(countr.gt.amax) then + error = 21 + go to 99 + endif + ii = hstnum(i) + jj = hstnum(j) + array(3,countr) = seq(ii) + array(4,countr) = seq(jj) + if(10*(ii/10).eq.ii) call digit(1,ii,countr,amax,array) + if(10*(jj/10).eq.jj) call digit(6,jj,countr,amax,array) + if(i.eq.iret.and.j.eq.jret) then + array(2,countr) = '|' + array(5,countr) = '^' + end if + i = i+1 + j = j-1 + if(basepr(i)-j) 12,24,12 + +99 return + +103 format(' FOLDING BASES ',i4,' TO ',i4,' OF ',a30,/ + .' ENERGY = ',f8.1) +104 format(220a1) +105 format(' ',220a1) +106 format(' ') + end + +c Puts the number COLUMN in row ROW and column POS of the array B. +c The least significant digit ends up in column POS. If the number +c is too large to fit, a period is put in column POS and row ROW. + subroutine digit(row,column,pos,bmax,b) + implicit integer (a-z) + integer pos,column,bmax,d(10) + character*1 b(6,bmax),bl,c(10),dot + data bl/' '/,c/'0','1','2','3','4','5','6','7','8','9'/,dot/'.'/ +c + size=1 + n=column +1 p=n/10 + q=n-10*p + d(size)=q + if(p.eq.0) go to 2 + n=p + size=size+1 + go to 1 +2 if(pos-size.lt.0) go to 3 + do k=1,size + q=pos-k+1 + if(b(row,q).ne.bl) go to 3 + enddo + p=pos + do 4 k=1,size + q=d(k) + b(row,p)=c(q+1) +4 p=p-1 + return +3 b(row,pos)=dot + return + end + +c Generates a region table for the Shapiro and Maizel DRAW program. + subroutine regtab + include 'rfd.inc' + real r + + k = 1 + region = 1 + do while (k.lt.n) + r = 0.0 + regsz = 1 + kst = k + if (k.lt.basepr(k)) then + do while (basepr(k+1).eq.basepr(k)-1.and.k.lt.n) + regsz = regsz + 1 + r = r + float(erg(2,k,basepr(k),k+1,basepr(k+1))) / 10.0 + k = k + 1 + enddo + write (22,100) region,hstnum(kst),hstnum(basepr(kst)),regsz,r + region = region + 1 + endif + k = k + 1 + enddo + return + +100 format(' (',i5,')',3x,3(i5,3x),f7.1) + end + +c Generates a CT file. (Richard Feldmann) + subroutine ct(r) + include 'rfd.inc' + real r + + write(21,100) n,r,seqlab + do k = 1,n + k1 = k+1 + if (k.eq.n) k1 = 0 + write (21,200) k, seq(hstnum(k)),k-1,k1,basepr(k),hstnum(k) + enddo + return + +100 format(i5,1x,'ENERGY = ',f7.1,4x,a30) +200 format(i5,1x,a1,3x,4i5) + end + + + +c Menu subroutine for RNA folding program. +c Allows the user to set energy parameters and to +c add auxiliary information. + subroutine menu + + include 'rfd.inc' +c data listsz/0/ rtm 11.II.99 + listsz=0 + + +10 if (listsz.ge.100) goto 800 + write (6,900) +50 write (6,901) + read (5,*,end=1,err=1) choice + if (choice.lt.1.or.choice.gt.10) goto 50 + goto (100,200,300,400,400,200,300,800,60,70),choice + +60 call listout(6) + goto 10 + +70 listsz = 0 + goto 10 +1 call exit(1) + + +100 write (6,1000) (eparam(i),i=1,10) +101 write (6,1001) + read (5,1002,end=10,err=10) parm + if (parm.lt.1.or.parm.gt.10) goto 10 + write (6,1003) + read (5,*,end=10,err=10) val + eparam(parm) = val + goto 100 +1000 format(/, + . 10x,' Energy Parameters (10ths kcal/mole)',//, + . 10x,' 1 Extra stack energy [',i5,']',/, + . 10x,' 2 Extra bulge energy [',i5,']',/, + . 10x,' 3 Extra loop energy (interior) [',i5,']',/, + . 10x,' 4 Extra loop energy (hairpin) [',i5,']',/, + . 10x,' 5 Extra loop energy (multi) [',i5,']',/, + . 10x,' 6 Multi loop energy/single-stranded base [',i5,']',/, + . 10x,' 7 Maximum size of interior loop [',i5,']',/, + . 10x,' 8 Maximum lopsidedness of an interior loop [',i5,']',/, + . 10x,' 9 Bonus Energy [',i5,']',/, + . 10x,'10 Multi loop energy/closing base-pair [',i5,']',//) +1001 format(' Enter Parameter to be changed ( for main menu) ' + .,$) +1002 format(i6) +1003 format(' Enter new value ',$) + +200 write (6,2001) + read (5,*,end=10,err=10) i,k + listsz = listsz + 1 + list(listsz,1) = choice + list(listsz,2) = i + list(listsz,3) = k + list(listsz,4) = -1 + goto 10 +2001 format(' Enter base and length ',$) + +300 write (6,3001) + read (5,*,end=10,err=10) i,j,k + listsz = listsz + 1 + list(listsz,1) = choice + list(listsz,2) = i + list(listsz,3) = j + list(listsz,4) = k + goto 10 +3001 format(' Enter base pair and length ',$) + +400 write (6,4001) + read (5,*,end=10,err=10) i,j + listsz = listsz + 1 + list(listsz,1) = choice + list(listsz,2) = i + list(listsz,3) = j + list(listsz,4) = -1 + goto 10 +4001 format(' Enter begining and end ',$) + +800 return + +900 format(/, + . 10x,'1 Energy Parameter 6 Single Prohibit',/, + . 10x,'2 Single Force 7 Double Prohibit',/, + . 10x,'3 Double Force 8 Begin Folding ',/, + . 10x,'4 Closed Excision 9 Show current ',/, + . 10x,'5 Open Excision 10 Clear current ',//) +901 format(' Enter Choice ',$) + end diff --git a/ZUKER/mrna2.f b/ZUKER/mrna2.f new file mode 100755 index 0000000..618798c --- /dev/null +++ b/ZUKER/mrna2.f @@ -0,0 +1,743 @@ + subroutine listout(u) +c This subroutine lists current choices on excisions and on +c forced or prohibited base-pairs. + integer u + common /list/ list,listsz + dimension list(100,4) + character*20 choices(7) + data choices/'Energy Parameter ','Single Force ', + .'Double Force ','Closed Excision ','Open Excision ', + .'Single Prohibit ','Double Prohibit '/ + + if (listsz.eq.0) then + write(u,*) ' No choices currently defined' + else + write(u,*) ' ' + write(u,*) ' Current Choices' + do 100 i = 1,listsz + if (list(i,1).eq.3.or.list(i,1).eq.7) then + write(u,1000) choices(list(i,1)),(list(i,k),k = 2,4) + else + write(u,1001) choices(list(i,1)),(list(i,k),k = 2,3) + endif +100 continue + write(u,*) ' ' + endif + return +1000 format(10x,a20,': (',i4,',',i4,') ',i4) +1001 format(10x,a20,': ',i4,',',i4) + end + +c Control subroutine for RNA folding. + subroutine device + include 'rfd.inc' + character*40 sfile,str + logical used + used = .false. + +c What kind of run is this? ( regular, save or continuation ) + write (6,2000) + read (5,2001,end=1) cntrl(1) + write (6,*) ' ' + if (cntrl(1).lt.0.or.cntrl(1).gt.2) cntrl(1) = 0 + + if (cntrl(1).eq.1) then + cntrl(7) = 1 + else +c What mode is the program to be run in? +c dot plot, automatic sorted tracebacks of one sequence +c fragment or suboptimal foldings of every complete +c sequence in a file. +9 write (6,1002) + read (5,2001,end=1) cntrl(7) + if (cntrl(7).lt.0.or.cntrl(7).gt.2) cntrl(7) = 0 + if (cntrl(1).eq.2.and.cntrl(7).eq.2) then + write (6,*) + .'Combination of continuation run and multiple foldings disallowed' + write (6,*) ' ' + goto 9 + endif + write (6,*) ' ' + endif +c Folding multiple sequences is treated as a sort run. +c Find total number of sequences to be folded in a multiple +c sequence run. + + if (cntrl(7).eq.2) call mseq(cntrl(5)) + + if (cntrl(7).eq.0) then +c Prompt for terminal type. NOTE: NOT NEEDED ON IRIS VERSION +c$$$ write (6,1000) +c$$$ read (5,2001,end=7,err=7) cntrl(5) +c$$$7 if (cntrl(5).lt.1.or.cntrl(5).gt.3) cntrl(5) = 2 + write (6,1001) + elseif (cntrl(1).ne.1) then +c Prompt for controls on sort. + write (6,1004) + read (5,2001,end=1) cntrl(8) + write (6,1003) + endif + if (cntrl(1).ne.1) then + read (5,2001,end=1) cntrl(6) + if (cntrl(6).lt.1) cntrl(6) = 1 + + write (6,1005) + read (5,2001,end=1) cntrl(9) + write (6,*) ' ' + endif +c Prompt for SAVE file name for a save/continuation run. + if (cntrl(1).ne.0) then +4 write (6,3000) + read (5,3001,end=1) sfile + if (cntrl(1).eq.1) then + str = 'unknown' + else + str = 'OLD' + endif + if (sfile.eq.' ') sfile= 'fold.sav' + open(30,err=2,file=sfile,status=str,form='UNFORMATTED') + goto 3 +2 if (cntrl(1).eq.2) goto 4 +3 if (cntrl(1).eq.2) call getcont + endif +c Obtain sequence. Original length is N. +c A fragment from NSAVE(1) to NSAVE(2) is selected. +c After PROCESS, N becomes the length of the processed sequence +c to be folded. + if (cntrl(1).ne.2.and.cntrl(7).ne.2) then + call formid(seqlab,seq,n,maxsiz,used) + write(6,4000) seqlab,n + write (6,*) 'Enter start of fragment (default 1)' + read (5,4001,end=1) nsave(1) + if (nsave(1).le.0) nsave(1) = 1 + write (6,4002) n + read (5,4001,end=1) nsave(2) + if (nsave(2).le.0) nsave(2) = n + endif + + +1000 format(1x,'Enter terminal type',/,5x,'1 VGT100', + . /,5x,'2 Visual 102 (default)',/,5x,'3 Tektronics 4105') +1001 format(/,' Enter minimum vector size for plot (default 1) ',$) +1002 format(1x,'Enter run mode',/,5x,'0 Sub-optimal plot (default)', + . /,5x,'1 N Best',/,5x,'2 Multiple Molecules') +1003 format(/,' Enter number of tracebacks (default 1) ',$) +1004 format(/,' Enter percentage for sort (default 0) ',$) +1005 format(/,' Enter window size (default 0) ',$) +2000 format(1x,'Enter run type',/,5x,'0 Regular run (default)', + . /,5x,'1 Save run',/,5x,'2 Continuation run') +2001 format(i6) +3000 format(' Enter save file name (default fold.sav)') +3001 format(a30) +4000 format(/,' ',a30,5x,i5,' nucleotides',/) +4001 format(i10) +4002 format(1x,'Enter end of fragment (default ',i5,')') + return +1 call exit(1) + end + +c Obtain multiple sequences from a sequence file using MULTID. + subroutine mseq(i) + include 'rfd.inc' + logical used + data used/.false./ + + if (.not.used) then + call multid(seqlab,seq,n,maxsiz,used,i) + write (6,*) ' ' + else + call multid(seqlab,seq,n,maxsiz,used,i) + write(6,4000) seqlab,n + endif + nsave(1) = 1 + nsave(2) = n + return +4000 format(/,' ',a30,5x,i5,' nucleotides',/) + end + +c Set up output units and files for RNA folding. + subroutine outputs + include 'rfd.inc' + character*40 str,dstr + character*1 in + + str(1:1) = ' '; + +c Examine sequence label to get default names for output files. + + k = 1 + do while ((seqlab(k:k).lt.'A'.or.seqlab(k:k).gt.'Z').and. + . (seqlab(k:k).lt.'a'.or.seqlab(k:k).gt.'z')) + k = k + 1 + enddo + slen = min0(30,25+k) + do while (seqlab(slen:slen).eq.' ') + slen = slen - 1 + enddo + j = 0 + do i = k,slen + j = j + 1 + if ((seqlab(i:i).ge.'A'.and.seqlab(i:i).le.'Z').or. + . (seqlab(i:i).ge.'a'.and.seqlab(i:i).le.'z').or. + . (seqlab(i:i).ge.'0'.and.seqlab(i:i).le.'9')) then + + dstr(j:j) = seqlab(i:i) + else + dstr(j:j) = '_' + endif + enddo + slen = j + +c Line printer output. Get name and open file for write. + cntrl(2) = 0 + write (6,5010) + read (5,5000,end=1) in + if (in.ne.'N'.and.in.ne.'n') then + cntrl(2) = 1 + write (6,5011) + read (5,5000,end=1) in + if (in.eq.'N'.or.in.eq.'n') then + dstr(slen+1:slen+4) = '.out' +51 write (6,5012) dstr(1:slen+4) + read (5,5001) str + if (str(1:1).eq.' ') str = dstr(1:slen+4) + cntrl(4) = 20 +c open(20,file=str,recl=255,status='unknown',err=51) + open(20,file=str,status='unknown',err=51) +c vf90: Warning, line 203: RECL with ACCESS=SEQUENTIAL could not be translated. +c vf90: Warning, line 203: Specifier removed, but may not yield same results. (RECL) + else + cntrl(4) = 6 + endif + write (6,5013) + read (5,5014,end=1) cntrl(3) + if (cntrl(3).eq. 0) cntrl(3) = 80 + endif + +c CT file output. Get name and open file for write. + write (6,5020) + read (5,5000,end=1) in + if (in.eq.'Y'.or.in.eq.'y') then + cntrl(2) = 2 + 2*cntrl(2) + dstr(slen+1:slen+3) = '.ct' +52 write (6,5021) dstr(1:slen+3) + read (5,5001) str + if (str(1:1).eq.' ') str = dstr(1:slen+3) + open(21,file=str,status='unknown',err=52) + endif + +c Region table output. Get name and open file for write. + write (6,5030) + read (5,5000,end=1) in + if (in.eq.'Y'.or.in.eq.'y') then + if (cntrl(2).eq.1.or.cntrl(2).eq.2) cntrl(2) = cntrl(2) + 1 + cntrl(2) = cntrl(2) + 3 + dstr(slen+1:slen+4) = '.reg' +53 write (6,5031) dstr(1:slen+4) + read (5,5001) str + if (str(1:1).eq.' ') str = dstr(1:slen+4) +c rtm 11.II.98 : 52 below -> 53 + open(22,file=str,status='unknown',err=53) + endif + write (6,*) ' ' + return +1 call exit(1) + +5000 format(a1) +5001 format(a40) +5010 format(' Do you want printer output? (Y,n) ',$) +5011 format(' Output to terminal? (Y,n) ',$) +5012 format(' Enter output file name (default ',a,')') +5013 format(' Enter number of columns on printer (default 80) ',$) +5014 format(i10) +5020 format(' Do you want ct file? (y,N) ',$) +5021 format(' Enter ct file name (default ',a,')') +5030 format(' Do you want region table? (y,N) ',$) +5031 format(' Enter region table file name (default ',a,')') + + end + +c Reads energy files. + subroutine ergread + + include 'rfd.inc' + logical endfile + logical find + character*80 inrec + character*5 temp + real a,b,c,d + integer*2 convt + +c TLoop INFORMATION IN + call gettloops + +c Get misc loop info + if(find(32,3,' > ')) stop 'Premature end of MISCLOOP.DAT' + read (32,*) prelog + prelog=prelog*10 + endfile = find(32,3,' > ') + read (32,*) a + maxpen=int(a*10) + endfile = find(32,3,' > ') + read (32,*) a,b,c,d + poppen(1)=int(a*10) + poppen(2)=int(b*10) + poppen(3)=int(c*10) + poppen(4)=int(d*10) + endfile = find(32,3,' > ') +c Set default values of eparam. + eparam(1) = 0 + eparam(2) = 0 + eparam(3) = 0 + eparam(4) = 0 + eparam(7) = 30 + eparam(8) = 30 + eparam(9) = -500 + read (32,*) a,b,c + eparam(5)=int(a*10) + eparam(6)=int(b*10) + eparam(10)=int(c*10) + +c DANGLE IN + + do a = 1,5 + do b = 1,5 + do c = 1,5 + do d = 1,2 + dangle(a,b,c,d) = 0 + enddo + enddo + enddo + enddo + endfile = find(10,3,'<--') + if (.not.endfile) then + do var4 = 1,2 + do var1 = 1,4 + if (endfile) goto 150 + read(10,100,end=150) inrec + do var2 = 1,4 + do var3 = 1,4 + j = 0 + tstart = (var2-1)*20 + (var3-1)*5 + 1 + temp = inrec(tstart:tstart+4) + do i = 2,4 + if (temp(i-1:i+1).eq.' . ') j = infinity + enddo + if (temp(1:1).eq.'.'.or.temp(5:5).eq.'.') j = infinity + if (j.eq.0) j = convt(temp) + if(j.ne.infinity) dangle(var1,var2,var3,var4) = j + enddo + enddo + endfile = find(10,3,'<--') + enddo + enddo + else + write (6,*) 'ERROR - DANGLE ENERGY FILE NOT FOUND' + stop + endif + +100 format(a80) + goto 200 + +150 write (6,*) 'ERROR - PREMATURE END OF DANGLE ENERGY FILE' + stop + + +c INTERNAL,BULGE AND HAIRPIN IN + +200 endfile = find(11,5,'-----') + i = 1 +201 read(11,100,end=300) inrec + j = -1 + do ii = 1,3 + j = j + 6 + do while (inrec(j:j).eq.' ') + j = j + 1 + enddo + temp = inrec(j:j+4) + k = 0 + do jj = 2,4 + if (temp(jj-1:jj+1).eq.' . ') k = infinity + enddo + if (temp(1:1).eq.'.'.or.temp(5:5).eq.'.') k = infinity + if (k.eq.0) k = convt(temp) + if (ii.eq.1) inter(i) = k + if (ii.eq.2) bulge(i) = k + if (ii.eq.3) hairpin(i) = k + enddo + i = i + 1 + if (i.le.30) goto 201 + +c STACK IN + +300 do a = 1,5 + do b = 1,5 + do c = 1,5 + do d = 1,5 + stack(a,b,c,d) = infinity + enddo + enddo + enddo + enddo + endfile = find(12,3,'<--') + if (.not.endfile) then + do var1 = 1,4 + do var3 = 1,4 + if (endfile) goto 350 + read(12,100,end=350) inrec + do var2 = 1,4 + do var4 = 1,4 + j = 0 + tstart = (var2-1)*20 + (var4-1)*5 + 1 + temp = inrec(tstart:tstart+4) + do i = 2,4 + if (temp(i-1:i+1).eq.' . ') j = infinity + enddo + if (temp(1:1).eq.'.'.or.temp(5:5).eq.'.') j = infinity + if (j.eq.0) j = convt(temp) + stack(var1,var2,var3,var4) = j + enddo + enddo + enddo + endfile = find(12,3,'<--') + enddo + else + write (6,*) 'ERROR - STACK ENERGY FILE NOT FOUND' + stop + endif + call stest(stack,'STACK ') + + goto 400 + +350 write (6,*) 'ERROR - PREMATURE END OF STACK ENERGY FILE' + stop + +400 do a = 1,5 + do b = 1,5 + do c = 1,5 + do d = 1,5 + tstk(a,b,c,d) = infinity + enddo + enddo + enddo + enddo + endfile = find(13,3,'<--') + if (.not.endfile) then + do var1 = 1,4 + do var3 = 1,4 + if (endfile) goto 350 + read(13,100,end=450) inrec + do var2 = 1,4 + do var4 = 1,4 + j = 0 + tstart = (var2-1)*20 + (var4-1)*5 + 1 + temp = inrec(tstart:tstart+4) + do i = 2,4 + if (temp(i-1:i+1).eq.' . ') j = infinity + enddo + if (temp(1:1).eq.'.'.or.temp(5:5).eq.'.') j = infinity + if (j.eq.0) j = convt(temp) + tstk(var1,var2,var3,var4) = j + enddo + enddo + enddo + endfile = find(13,3,'<--') + enddo + else + write (6,*) 'ERROR - STACK ENERGY FILE NOT FOUND' + stop + endif +c** CALL STEST(TSTK,'TSTACK') + + close(10) + close(11) + close(12) + close(13) + goto 500 + +450 write (6,*) 'ERROR - PREMATURE END OF TSTACK ENERGY FILE' + stop + +500 return + end +c Symmetry test on stacking and terminal stacking energies. +c For all i,j,k,l between 1 and 4, STACK(i,j,k,l) MUST equal +c STACK(l,k,j,i). If this fails at some i,j,k,l; these numbers +c are printed out and the programs grinds to an abrupt halt! + subroutine stest(stack,sname) + integer stack(5,5,5,5),a,b,c,d + character*6 sname + + do a = 1,4 + do b = 1,4 + do c = 1,4 + do d = 1,4 + if (stack(a,b,c,d).ne.stack(d,c,b,a)) then + write (6,*) 'SYMMETRY ERROR' + write (6,101) sname,a,b,c,d,stack(a,b,c,d) + write (6,101) sname,d,c,b,a,stack(d,c,b,a) + stop + endif + enddo + enddo + enddo + enddo + return +101 format(5x,a6,'(',3(i1,','),i1,') = ',i10) + end + +c Writes out the numbers in the energy arrays of the folding program. + subroutine out(u) + include 'rfd.inc' + integer*2 tlptr,bptr,nbase + integer key + character*4 tlbuf + +c used for testing contents of energy arrays only +c not used in the mature program + + write (u,100) 'DANGLE' + do var4 = 1,2 + do var1 = 1,4 + do var2 = 1,4 + do var3 = 1,4 + o = dangle(var1,var2,var3,var4) + if (o.ne.infinity) then + write (u,101) o + else + write (u,102) + endif + enddo + enddo + write (6,103) + enddo + write (6,104) + enddo + + + write (u,100) 'TSTACK' + do var1 = 1,4 + do var3 = 1,4 + do var2 = 1,4 + do var4 = 1,4 + o = tstk(var1,var2,var3,var4) + if (o.ne.infinity) then + write (u,101) o + else + write (u,102) + endif + enddo + enddo + write (6,103) + enddo + write (6,104) + enddo + + write (u,100) 'STACK' + do var1 = 1,4 + do var3 = 1,4 + do var2 = 1,4 + do var4 = 1,4 + o = stack(var1,var2,var3,var4) + if (o.ne.infinity) then + write (u,101) o + else + write (u,102) + endif + enddo + enddo + write (6,103) + enddo + write (6,104) + enddo + + write (u,200) 'INTER','BULGE','HAIRPIN' + do i = 1,30 + write (u,201) i,inter(i),bulge(i),hairpin(i) + enddo + + write (u,100) 'TLoops' + do tlptr=1,numoftloops + key=tloop(tlptr,1) + do bptr=1,4 + nbase=mod(key, 8) + key=int(key/8) + if (nbase.eq.1) then + tlbuf(bptr:bptr)='A' + elseif (nbase.eq.2) then + tlbuf(bptr:bptr)='C' + elseif (nbase.eq.3) then + tlbuf(bptr:bptr)='G' + else + tlbuf(bptr:bptr)='U' + endif + enddo + write (u,205) tlbuf,tloop(tlptr,2) + enddo + return +100 format(//,a40,//) +101 format('+',i4,1x,$) +102 format('+',4('*'),1x,$) +103 format(' ') +104 format(/) +200 format(3a20,/,60('-'),/) +201 format(i4,i16,2i20) +205 format(a4,2x,i8) + end + +c Used in reading the energy files. +c Locates markers in the energy files so that data can be read +c properly. + function find(unit,len,str) + implicit integer (a-z) + logical find,flag + character*20 str + character*80 inrec + + find = .false. + flag = .false. + do while(.not.flag) + read(unit,100,end=200) inrec + count = 1 + do 101 i = 1,80-len+1 + if (inrec(i:i).eq.str(count:count)) then + count = count + 1 + if (count.gt.len) flag = .true. + if (inrec(i+1:i+1).ne.str(count:count)) count = 1 + endif +101 continue + enddo + + return +100 format(a80) +200 find = .true. + return + end + + + subroutine cdump + include 'rfd.inc' + character*40 name + character yn + + write (6,*) + . 'Enter file name for continuation dump (return for terminal)' + read (5,100,end=1) name + if (name.eq.' ') then + u = 6 + else + u = 31 + open(31,status='unknown',file=name) + endif + call listout(u) + write (u,101) 'Energy Parameters' + write (u,1000) eparam + write (6,*) 'Listing of energy files? (y/N)' + read(5,102) yn + if (yn.eq.'Y'.or.yn.eq.'y') then + call out(u) + endif + return +1 call exit(1) +100 format(a30) +101 format(a20,/) +102 format(a1) +1000 format(/, + . 10x,' 1 Extra stack energy [',i5,']',/, + . 10x,' 2 Extra bulge energy [',i5,']',/, + . 10x,' 3 Extra loop energy (interior) [',i5,']',/, + . 10x,' 4 Extra loop energy (hairpin) [',i5,']',/, + . 10x,' 5 Extra loop energy (multi) [',i5,']',/, + . 10x,' 6 Multi loop energy/single-stranded base [',i5,']',/, + . 10x,' 7 Maximum size of interior loop [',i5,']',/, + . 10x,' 8 Maximum lopsidedness of an interior loop [',i5,']',/, + . 10x,' 9 Bonus Energy [',i5,']',/, + . 10x,'10 Multi loop energy/closing base-pair [',i5,']',//) + end + subroutine gettloops +c +c* Read in TLoop sequences, convert to numeric form, and +c* convert energy to an integer (*10) +c + parameter (maxtloops=40,bufsiz=80) + integer*2 i,ptr,tloop(maxtloops,2),nseq(4) + integer*2 numoftloops + integer*2 convt + character*5 buffa + character*80 inbuf +c + common /tloops/tloop,numoftloops +c + numoftloops=0 +c +c* Throw out header +c + read (29,1) +c +c* Read a line and convert to numeric sequence and energy until EOF +c +10 read (29,2,end=99) inbuf + ptr=1 + numoftloops=numoftloops+1 + do while ((ptr.lt.bufsiz).and. + 2 (inbuf(ptr:ptr).eq.' ')) + ptr=ptr+1 + enddo +c Only take first four characters, since they're TETRAloops + buffa(1:4)=inbuf(ptr:ptr+3) + buffa(5:5)=' ' + call tonum(buffa,nseq) + tloop(numoftloops,1)=((nseq(4)*8+nseq(3))*8+nseq(2))*8+nseq(1) + ptr=ptr+4 + do while ((ptr.lt.bufsiz).and. + 2 (inbuf(ptr:ptr).eq.' ')) + ptr=ptr+1 + enddo +c Simple error czeck. + if (inbuf(ptr+4:ptr+4).ne.' ') then + write (*,5) inbuf + endif + buffa(1:4)=inbuf(ptr:ptr+3) + buffa(5:5)=' ' + tloop(numoftloops,2)=convt(buffa) + do i=1,ptr+4 + inbuf(i:i)=' ' + enddo + goto 10 +c +c* Normal ending +c +99 close(unit=29,status='KEEP') + return +1 format(/) +2 format (a) +5 format (1x,'Too many characters in numeric field of this line of',/, + 1 1x,'tloop.dat file: ',a) + end + + subroutine tonum(tloopseq,numeric) +c +c* Convert TLoopSeq to numeric format in Numeric. +c + character*5 tloopseq + integer*2 i,numeric(4) +c + do i=1,4 + if (tloopseq(i:i).eq.'A') then + numeric(i)=1 + elseif (tloopseq(i:i).eq.'C') then + numeric(i)=2 + elseif (tloopseq(i:i).eq.'G') then + numeric(i)=3 + elseif (tloopseq(i:i).eq.'U') then + numeric(i)=4 + elseif (tloopseq(i:i).eq.'T') then + numeric(i)=4 + else + write (*,1) tloopseq(i:i) +1 format (1x,'Unknown base in TLOOP file: ',a1) + stop + endif + enddo + return + end diff --git a/ZUKER/multid.f b/ZUKER/multid.f new file mode 100755 index 0000000..7d6e211 --- /dev/null +++ b/ZUKER/multid.f @@ -0,0 +1,354 @@ + subroutine multid(seqid,seq,nseq,nmax,used,rnum) +c REVISED VERSION OF FORMID -- TO WORK WITH MULTIPLE ALIGNMENTS +c RESEARCHER: M. ZUKER +c JUN 1986 +c DS TUDHOPE +c WRITTEN TO WORK ON VAX-11-750 UNDER VMS4.3 USING STANDARD F-77 +c #### NOTE #### USE OF CARRIAGE CONTROL LINE FEED SUPPRESSION +c IS USED IN THIS PROGRAM AND IS NOT STANDARD +c FORTRAN-77 +c SEE '$' IN FORMAT STATEMENT 102 + implicit integer (a-z) + logical found,endfil,valid,used + integer sline(500),seqnum(500),nseq,nmax,rnum + character*8 stype + character*30 seqids(500) +c character*30 choice,seqid + character*30 seqid + character*50 filnam,fmtseq(500) + character*80 reclin + character*1 seq(nmax) + data found/.false./, + . endfil/.false./, + . valid/.false./ +c ************************************************************** +c subroutine MULTID +c ************************************************************** +c PURPOSE: +c subroutine used to extract sequences from various +c format type files including +c STANFORD,GENBANK,EMBL,PIR,and,NRC +c This revised version of FORMID is shorter. Another +c variable RNUM is passed first into the subroutine on +c the first call to return the number of sequences in the +c file. The same variable is then used to request specific +c sequences in do_loops of the main program. +c This eliminates the task of having to select the sequences. +c ************************************************************** +c variable list table: +c * -- sent down from main returned unchanged +c ** - returned to main from subroutine +c ***- sent down from main and returned changed +c INTEGERS:IDCNT -- to keep track of the number of sequence +c identifiers found in the file +c I,K -- loop counters +c LINE -- to point to the record line number of a file +c N -- counter to extract the correct number of +c sequence elements +c * NMAX -- maximum length of sequence expected by user +c ** NSEQ -- length of the sequence retrieved from file +c POINTR -- pointer to point to sequence identifier +c chosen from array of identifiers +c RNUM -- the number of identified sequences returned +c to main if first use of the subroutine, +c the number sent down from main, identifying +c a sequence if not the first time used +c SEQNUM -- an ARRAY of the length of the sequences +c for the sequence identifiers found in the +c NRC-format type files +c SLINE -- an ARRAY of the record-line numbers where +c a sequence starts in a file +c START -- defines what column of a record to start +c reading the sequence in. +c CHARACTERS:CHOICE-- character string of length 20 to read in +c choice for sequence identifier to retrieve +c FILNAM -- character string of length 50 to read in +c the filename. +c FMTSEQ -- an ARRAY of characters, each length 50, +c describing how the sequence is to be read +c RECLIN -- a record of a file, length 80 characters +c ** SEQ -- an ARRAY of characters each length 1 to +c store the sequence +c ** SEQID -- retrieved name of the sequence +c SEQIDS -- an ARRAY of retrieved names of sequences +c STYPE -- character string of length 8 defining the +c format type of the file. +c LOGICALS:FOUND -- logical variable used in looping until +c something retrieved +c *** USED -- logical variable used in determining if the +c subroutine has been previously called +c VALID -- logical variable used in looping until +c some input valid +c +c +c if the subroutine has NOT been USED then must +c input the filename and do error checking on filename +c else branch to listing the sequences available in this file + if (.not.used) then +10 found = .false. + valid = .false. + used = .false. +c initialize for a new file by setting variables back +c to zero and blanking out old names + if (nmax.eq.0) nmax = 9999 + nseq = 0 + stype = ' ' + seqid = ' ' + filnam= ' ' + do 500 i = 1,100 + sline(i) = 0 + seqnum(i)= 0 + seqids(i)=' ' + fmtseq(i)= ' ' +500 continue +111 write(6,102) 'Input sequence file name (/ to end) ' +102 format(1x,a38,$) + read(5,110,end=999,err=111) filnam + if (filnam(1:1).eq.'/') goto 999 +110 format(a50) +c open the file only after a valid filename has been retrieved +c error in filename results in prompting for the input again + open(66,file=filnam,status='OLD',err=10) +c find sequence file format type and the sequence identifiers + idcnt = 0 + line = 1 +c DO WHILE (.NOT.FOUND) +600 read(66,120,end = 410,err = 991) reclin +120 format(a80) +c STANFORD format, recognized by the ';' in the first column +c of the record + if (reclin(1:1).eq.';') then +c DO WHILE (.NOT.ENDFIL) +610 line = line + 1 + read(66,120,end = 410,err=991) reclin +c to find the next sequence identifer scroll through the +c file until the first character in the line is not ';' + if (reclin(1:1).ne.';') then + found = .true. + stype ='STANFORD' + idcnt = idcnt + 1 + sline(idcnt)= line + seqids(idcnt) = reclin(1:30) + line = line + 1 + read(66,120,end = 410,err=991) reclin +c DO WHILE ((INDEX(RECLIN,'1').EQ.0).AND. +c . (INDEX(RECLIN,'2').EQ.0)) +c ENDDO +615 if (index(reclin,'1').eq.0) then + if (index(reclin,'2').eq.0) then + line = line + 1 + read(66,120,end = 410,err=991) reclin + goto 615 + endif + endif +c assume at least one line not a sequence identifier occurs +c after last sequence read in to get around the CTRL-L problem. +c therefore, after reading in the record containing the end +c of sequence identifier, read in another record + line = line + 1 + read(66,120,end = 410,err=991) reclin + endif +c ENDDO + if (.not.endfil) goto 610 +c GENBANK format, recognized by the word LOCUS starting in the +c first position of the record + elseif (reclin(1:5).eq.'LOCUS') then + found = .true. + stype = 'GENBANK ' + idcnt = 1 + seqids(idcnt) = reclin(13:27) +c DO WHILE (.NOT.ENDFIL) +620 line = line + 1 + read(66,120,end = 410,err=991) reclin +c scrolling through to find the key phrase ORIGIN because +c the line after this key phrase occurrance is where the sequence +c occurs in the file; +c read through the file obtaining other sequences by scrolling +c through to the '//' which signals the end of sequence +c Then start looking for another sequence identifier. + if (reclin(1:6).eq.'ORIGIN') then + sline(idcnt) = line +c DO WHILE (RECLIN(1:2).NE.'//') +625 if (reclin(1:2).ne.'//') then + line = line + 1 + read(66,120,end = 410,err=991) reclin + goto 625 +c ENDDO + endif + elseif (reclin(1:5).eq.'LOCUS') then + idcnt = idcnt + 1 + seqids(idcnt) = reclin(13:27) + endif +c ENDDO + if (.not.endfil) goto 620 +c PIR format, recognized by the '>' occurring in column 1 of a +c record. The sequence itself start 2 lines down from the record +c containing '>'. + elseif (reclin(1:1).eq.'>') then + found = .true. + stype = 'PIR ' +c DO WHILE (.NOT.ENDFIL) +630 if (reclin(1:1).eq.'>') then + idcnt = idcnt + 1 + sline(idcnt) = line + 1 + seqids(idcnt) = reclin(5:25) + endif + line = line + 1 + read(66,120,end = 410,err=991) reclin +c ENDDO + if (.not.endfil) goto 630 +c EMBL (european) format, recognized by the key phrase ID in the +c first column of the record. + elseif (reclin(1:5).eq.'ID ') then + found = .true. + stype = 'EMBL ' +c scrolling through to find the key phrase 'SQ Sequence' because +c the line after this key phrase occurrance is where the sequence +c occurs in the file; +c DO WHILE (.NOT.ENDFIL) +640 if (reclin(1:5).eq.'ID ') then + idcnt = idcnt + 1 + seqids(idcnt) = reclin(6:index(reclin,' ')) + elseif (reclin(1:13).eq.'SQ Sequence') then + sline(idcnt) = line + endif + line = line + 1 + read(66,120,end = 410,err=991) reclin +c ENDDO + if (.not.endfil) goto 640 +c NRC format, recognized by '(' occurring in the first column of +c a record. The record line following this line holds the number +c of elements in the sequence and the name of the sequence. The +c following line signals the beginning of the sequence. +c The sequence itself is read in by using variable format +c described by the '(' record line. This format statement on the +c record containing '(' ,must also be retrieved. + elseif (reclin(1:1).eq.'(') then + found = .true. + stype = 'NRC ' +c DO WHILE (.NOT.ENDFIL) +650 if (reclin(1:1).eq.'(') then + idcnt = idcnt + 1 + fmtseq(idcnt) = reclin(1:index(reclin,')')) + line = line + 1 + read(66,140,end = 410,err = 991) + . seqnum(idcnt),seqids(idcnt) + sline(idcnt) = line + endif + line = line + 1 + read(66,120,end = 410,err=991) reclin +c ENDDO +140 format(i4,5x,a30) + if (.not.endfil) goto 650 + endif +c keep scrolling through the file until a key phrase signalling +c a format type is recognized or the end of file is found + line = line + 1 +c ENDDO + if (.not.found) goto 600 +c if a format type has not been found then this cannot be a +c sequence file and return to main. +410 if (.not.found) then + write(6,105) ' No sequence identifiers found in this file ' +105 format(a50) + return + endif +c if this is a valid format type proceed + used = .true. +c if this is the first time used then return to main +c with RNUM, the number of identified sequences in the file + rnum = idcnt + endif +c IF USED this call before retreive a sequence using RNUM + seqid = ' ' + pointr = rnum + if ((pointr.lt.1).or.(pointr.gt.idcnt)) then +c stop ' ERROR IN MULTID, SEQUENCE REQUESTED NOT FOUND' + call exit(1) + else + seqid = seqids(pointr) + endif +c having obtained a valid sequence choice or default of +c one available sequence, rewind the file and retrieve the +c sequence + rewind(66,err = 992) +c having retrieved the line number in the file where this +c identifier occurs from SLINE(POINTR) scroll through the +c file until this line is reached + do 550 i = 1,sline(pointr) + read(66,120,end = 410,err=991) reclin +550 continue +c if nrc type then read sequence according to format type + if (stype.eq.'NRC ') then + nseq = seqnum(pointr) +c if the number in the NRC sequence is greater than +c the maximum number sent down from main, then +c truncate to NMAX and output a message to the user. + if (nseq.gt.nmax) then + write(6,160) ' Sequence truncated to ',nseq +160 format(1x,a30,i4) + nseq = nmax + endif + read(66,fmtseq(pointr),end = 420,err=991) + . (seq(k),k=1,seqnum(pointr)) +c Else if not type NRC then find the sequence by taking +c each letter of the next records until end-of-sequence +c indicator found or number in the sequence NMAX is reached + else + n = 1 + found = .false. +c DO WHILE (.NOT.FOUND) +670 read(66,120,end = 420,err=991) reclin +c there is a need for the two positions to start reading the +c sequence; to accommodate the GENBANK format and the STANFORD +c end-of-sequence checks. + if (stype.eq.'GENBANK ') then + start = 10 + else + start = 1 + endif + do 565 i = start,80 +c if the number in the sequence is less than the desired +c number, NMAX sent down from main, then retrieve sequence + if (n.le.nmax) then + if (.not.found) then + seq(n) = reclin(i:i) +c check if an early end of sequence + if ((seq(n).eq.'1').or.(seq(n).eq.'2').or. + . (reclin(1:1).eq.'/').or.(seq(n).eq.'*')) then + seq(n) = ' ' + nseq = n - 1 + found = .true. +c if not an end-of-sequence character but have gone far +c enough, then truncate to NMAX + elseif (n.eq.nmax) then + nseq = nmax + found = .true. + write(6,160) ' Sequence truncated to ',nseq +c if not a end-of-sequence character, check to see if it is +c not a blank character. Blank characters will not be added +c to the sequence. + elseif (seq(n).ne.' ') then + n = n + 1 + endif + endif +c if the number of sequence characters found is Greater than +c NMAX then all of the sequence has been found. + else + found = .true. + nseq = n + write(6,160) + . ' Sequence truncated to ',nseq + endif +565 continue +c ENDDO + if (.not.found) goto 670 + endif +420 return +c 991 stop ' ERROR IN READING FILE ' +c 992 stop ' ERROR IN REWINDING FILE' +c 999 stop ' END of SESSION...GOOD BYE' +991 call exit(1) +992 call exit(1) +999 call exit(1) + end diff --git a/ZUKER/rfd.inc b/ZUKER/rfd.inc new file mode 100755 index 0000000..bab54f7 --- /dev/null +++ b/ZUKER/rfd.inc @@ -0,0 +1,60 @@ + implicit integer (a-z) + parameter (maxn=1500,maxn2=3000) + parameter (fldmax=maxn2) + parameter (infinity=16000,sortmax=30000) + parameter (mxbits=(maxn*(maxn+1)+31)/32) + parameter (maxtloops=40) + parameter (maxsiz=10000) + + integer*2 vst(maxn*maxn),wst1(maxn*maxn),wst2(maxn*maxn) + integer poppen(4),maxpen + real prelog + + dimension newnum(maxsiz),hstnum(maxn2),force(maxn2),numseq(maxn2), + . work1(maxn2,0:2),work2(maxn2), + . stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30) + dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2) +c common /main/ newnum,hstnum,force,work1,work2, + common /main/ newnum,hstnum,force,work1,work2, + . stack,tstk,dangle,hairpin,bulge,inter,eparam,cntrl,nsave,n, + . numseq,poppen,prelog,maxpen,vst,wst1,wst2 + + character*1 seq(maxsiz) +c character*5 inbuf + character*10 progtitle + character*30 seqlab + common /seq/ seq,seqlab + dimension list(100,4) + common /list/ list,listsz + common /nm/ vmin + data progtitle/'lrna'/ + + dimension basepr(maxn) + common /traceback/ basepr + + dimension heapi(sortmax+1),heapj(sortmax+1) + common /heap/ heapi,heapj,num + + integer*2 marks(mxbits),force2(mxbits) + common/bits/marks,force2 + + integer*2 tloop(maxtloops,2),numoftloops + common/tloops/tloop,numoftloops + + + + + + + + + + + + + + + + + + diff --git a/ZUKER/rna.f b/ZUKER/rna.f new file mode 100755 index 0000000..5e5be0b --- /dev/null +++ b/ZUKER/rna.f @@ -0,0 +1,353 @@ +c MFOLD - Prediction of RNA secondary structure by free energy +c minimization. +c - Version 2.0 +c - Michael Zuker and John Jaeger +c - LRNA : folds linear RNA sequences +c - CRNA : folds circular RNA sequences +c +c The original version (1.0) was designed by Michael Zuker and +c programmed by Eric Nelson in the summer of 1987 in the Division of +c Biological Sciences at the National Research Council of Canada. John +c Jaeger added the tetraloop bonus energy feature and created the +c BATGEN program for batch file generation. +c +c Version 2.0 corrects a number of small bugs from the original +c program. These were added to version 1 and itemized in the +c ERRATA.LIST file that was distributed along with version 1. The major +c improvements of version 2 are : 1. During the generation of +c suboptimal foldings, the number of new base pairs that are +c sufficiently different from base pairs that have already been found +c must be greater than the WINDOW parameter. This feature was added +c during the summer of 1989, and was made part of version 1 (item 11 in +c the ERRATA.LIST file distributed with this version). The effect is to +c eliminate structures that contain just a few new base pairs. 2. +c Temperature dependent folding. This was added in the fall of 1989 and +c was never a feature of version 1. +c +c METHOD : A dynamic programming algorithm is used to find optimal and +c suboptimal foldings of an RNA molecule starting from linear sequence +c data. Auxiliary information can be used to constrain the folding. +c +c NB : Base pairs are forced by giving them a bonus energy (EPARAM(9) +c in the program code). These energies are subtracted during the +c traceback algorithm so that the computed sturctures have the correct +c energies. Unfortunately, there is no way to subtract the bonus +c energies from the energy dot plots. Moreover, each forced base pair +c contain two bonus energies because of the nature of the algorithm. +c For example, suppose that an optimal folding of an RNA contains 3 +c forced base pairs ( default bonus energy is 50.0 kcal per forced base +c pair ) and that the correct folding energy is -180.0 kcal/mole. +c Internally, the energy will be -180.0 - (3+1) x 50.0 = -380.0 +c kcal/mole. To find foldings within 10% of the correct energy, one +c needs to compute foldings to within 18.0 kcal of -180.0 - 3 x 50.0 = +c -330.0 kcal/mole. This comes out to -312.0 kcal/mole. The ratio of +c -312.0 to -380.0 is 82%, so that one would request the 18% level of +c suboptimality! This confustion only exists when base pairs are +c forced. Each closed excision counts as one forced base pair. +c +c Energy data from : +c S.M. Freier et al., Proc. Natl. Acad. Sci. USA, 83, 9373-9377, 1986. +c D.H. Turner et al., Cold Spring Harbor Symposia on Quantitative Biology, +c 52, 123-133, 1987. +c D.H. Turner et al., Annu. Rev. Biophys. Biophys. Chem 17, 167-192 (1988). +c This last reference has all the dangling end and terminal mismatch data. +c +c References : +c M. Zuker +c On Finding All Suboptimal Foldings of an RNA Molecule. +c Science, 244, 48-52, (1989) +c +c J. A. Jaeger, D. H. Turner and M. Zuker +c Improved Predictions of Secondary Structures for RNA. +c Proc. Natl. Acad. Sci. USA, BIOCHEMISTRY, 86, 7706-7710, (1989) +c +c J. A. Jaeger, D. H. Turner and M. Zuker +c Predicting Optimal and Suboptimal Secondary Structure for RNA. +c in "Molecular Evolution: Computer Analysis of Protein and +c Nucleic Acid Sequences", R. F. Doolittle ed. +c Methods in Enzymology, 183, 281-306 (1989) +c + include 'rfd.inc' + real energy + logical flag,mark + +c Fill screen with author and reference data. + call begin +c Initial setup for run. +5 call device + if (cntrl(1).ne.2) then +c Read energy information if this is not a continuation run. + call enefiles + call erg(1,0,0,0,0) + else +c dump out information read in from continuation file + call cdump + endif +c Determine output specifications if this is not a save run. + if (cntrl(1).ne.1) call outputs +c Call the menu if this is not a continuation run. + if (cntrl(1).ne.2) call menu + mrep = 1 +c CNTRL(7) = 0 - suboptimal dot plot +c 1 - N best sorted by energy +c 2 - best folding for all sequences in a file +10 if (cntrl(7).eq.2) call mseq(mrep) +c Process sequence before folding. + do i = 1,mxbits + marks(i) = 0 + force2(i) = 0 + enddo + call process + if (n*2.gt.fldmax) then + tt = fldmax/2 +c Fragment is too long. Try again. + write (6,*) 'Segment larger than ',tt + stop + endif + if (cntrl(1).ne.2) then +c Fill the optimal energy arrays except in a continuation run. + call fill + endif + + if (cntrl(1).eq.1) then +c Save the results from FILL in a SAVE run and then stop. + call putcont + stop + endif + + + rep = 1 + jump = 1 + flag = .true. + err = 0 + + do while (flag) + if (cntrl(7).eq.0) then +c Interactive dot plot returns IRET, JRET (new numbering). +c Zuker comments out call to dotplt : do not choose this option +C If you do, the program stops dead here. +c call dotplt(iret,jret,jump) + if(1.eq.1) stop ' Energy dot plot disabled.' + jump = 2 + else +c Automatic sort returns IRET,JRET (new numbering). + print *,'traceback' + call sortout(iret,jret,rep,err) + if (err.eq.30) then + flag = .false. + call errmsg(err,rep-1,0) + err = 0 + endif + rep = rep + 1 + endif +c First traceback yields the best structure on the included fragment +c from IRET to JRET. + if (flag) call trace(iret,jret,nforc1,err) + if (err.ne.0) call errmsg(err,iret,jret) + if (flag) then + it = iret+n +c Second traceback yields the best structure on the excluded fragment +c from IRET to JRET. + call trace(jret,it,nforc2,err) + if (err.ne.0) then + call errmsg(err,jret,it) + else +c The energy of the best structure containing the base-pair IRET, +c JRET is the sum of the energies of the optimal foldings on +c the included and excluded fragments. A correction is made for +c forced base-pairs. +c* CALL EFN(ENE,1,N) +c* WRITE (6,*) 'NEW ENERGY ',ENE + ene = v(iret,jret) + v(jret,iret+n) - eparam(9) * (nforc1+nforc2) + energy = float(ene) / 10.0 +c Count the number of new base pairs not within WINDOW +c of existing base pairs. + numbp = 0 + do k = 1,n + if(k.lt.basepr(k)) then + if(.not.mark(k,basepr(k))) numbp = numbp + 1 + endif + enddo + do k = 1,n + if(k.lt.basepr(k)) then +c Mark "traced-back" base pairs and also base-pairs +c which are close (within WINDOW = CNTRL(9) ). + call smark(k,basepr(k)) + if(cntrl(9).gt.0) then + do k1 = -cntrl(9),cntrl(9) + do k2 = -cntrl(9),cntrl(9) + if(k+k1.gt.0.and.k+k1.lt.basepr(k)+k2.and. + 1 basepr(k)+k2.le.n) call smark(k+k1,basepr(k)+k2) + enddo + enddo + endif + endif + enddo + if(numbp.le.cntrl(9)) then + rep = rep - 1 + go to 900 + endif + write (6,1010) rep - 1 +c 1010 format('+',i5$) +1010 format('+',i5) + if (cntrl(2).ne.2.and.cntrl(2).ne.3.and.cntrl(2).ne.6) then +c Line printer output. + call linout(1,n,energy,iret,jret,err) + endif + if (err.ne.0) then + call errmsg(err,iret,jret) + else + if (cntrl(2).ge.3.and.cntrl(2).ne.4) then +c Region table output. + call regtab + endif + if (mod(cntrl(2),2).eq.0.or.cntrl(2).eq.7) then +c CT file output. + call ct(energy) + endif + endif + endif + endif + if (cntrl(7).eq.1.and.rep.gt.cntrl(6)) flag = .false. +900 continue + enddo +c +c Multiple sequence option (CNTRL(7) = 2) +c If sequence number (MREP) is < total number of sequences +c (CNTRL(5)), go get another sequence. +c + if (cntrl(7).eq.2.and.mrep.lt.cntrl(5)) then + mrep = mrep + 1 + goto 10 + endif + stop + end + +c Marks a base-pair I,J. +c Assumes that 1 <= I <= J <= N. +c The information is stored in a single bit in the MARKS +c array. +c The conversion from double dimension to single is through the +c transformation I,J ==> (J-1)*J/2 + I . + subroutine smark(i,j) + include 'rfd.inc' + integer*2 bit + + posn = (((j-1)*j)/2) + i + word = (posn+15) / 16 + bit = mod(posn,16) +c marks(word) = iibset(marks(word),bit) + marks(word) = ibset(marks(word),bit) + return + end + +c Marks a forced base-pair I,J. +c The incoming base-pair II,JI is processed to an I,J +c base-pair satisfying 1 <= I <= J <= N. +c The information is stored in a single bit in the FORCE2 +c array. +c The conversion from double dimension to single is through the +c transformation I,J ==> (J-1)*J/2 + I . + subroutine sfce(ii,ji) + include 'rfd.inc' + integer*2 bit + + if (ii.gt.n) then + i = ii - n + j = ji - n + elseif (ji.gt.n) then + i = ji-n + j = ii + else + i = ii + j = ji + endif + + + posn = (((j-1)*j)/2) + i + word = (posn+15) / 16 + bit = mod(posn,16) + +c force2(word) = iibset(force2(word),bit) + force2(word) = ibset(force2(word),bit) + return + end + +c Retrieves information on whether or not the base-pair I,J +c has been marked by a traceback passing through or close to +c this pair. + logical function mark(i,j) + include 'rfd.inc' + integer*2 bit,one + + one = 1 + posn = (((j-1)*j)/2) + i + word = (posn+15) / 16 + bit = mod(posn,16) + +c set = iibits(marks(word),bit,one) + set = ibits(marks(word),bit,one) + mark = .false. + if (set.ne.0) mark = .true. + return + end + + +c Retrieves information on whether or not the base-pair I,J +c has been forced. + logical function fce(ii,ji) + include 'rfd.inc' + integer*2 bit,one + + if (ii.gt.n) then + i = ii - n + j = ji - n + elseif (ji.gt.n) then + i = ji-n + j = ii + else + i = ii + j = ji + endif + + one = 1 + posn = (((j-1)*j)/2) + i + word = (posn+15) / 16 + bit = mod(posn,16) + +c set = iibits(force2(word),bit,one) + set = ibits(force2(word),bit,one) + fce = .false. + if (set.ne.0) fce = .true. + return + end + +c fills screen with author and reference information + subroutine begin + character*1 ans + character*80 record + open(3,file='begin.dat',status='old',err=5) + write(6,1010) +1010 format(' ') +1 read(3,1020,end=2) record +1020 format(a80) + write(6,1030) record +c1030 format(' ',a80) +1030 format(a80) + go to 1 +2 write(6,1040) +c 1040 format(' Press to continue ...'$) +c 1040 format(' Press to continue ...'$) +1040 format(' Press to continue ...',$) + read(5,1050) ans +1050 format(a1) + return +5 write(6,1060) +1060 format(//' Author and reference file not available.'//) +c +c C.Wang copied the next 2 lines here. 6/12/91 +c to keep consistency. + 3 write(6,1040) + read(5,1050) ans + return + end diff --git a/ZUKER/sort.f b/ZUKER/sort.f new file mode 100755 index 0000000..0b53a3e --- /dev/null +++ b/ZUKER/sort.f @@ -0,0 +1,134 @@ + subroutine sortout(i,j,rep,err) + include 'rfd.inc' + logical mark +c The first time in (REP = 1), valid I,J base-pairs are +c sorted by energy. + err = 0 + if (rep.eq.1.or.cntrl(7).eq.2) then + call build_heap + call heap_sort + cntr = num + endif +c Select the next valid unmarked base-pair + do while (mark(heapi(cntr),heapj(cntr))) + if (cntr.eq.1) then + err = 30 + return + endif + cntr = cntr - 1 + enddo +c The base-pair I,J will be used to create a folding. + i = heapi(cntr) + j = heapj(cntr) + + return + end + + +c Add I,J to HEAPI and HEAPJ if the best energy of a folding containing +c I,J is no greater than a given percent ( CNTRL(8) ) of the minimum +c folding energy. + subroutine build_heap + include 'rfd.inc' + + crit = vmin + abs(vmin)*cntrl(8)/100 + + num = 0 + i = 1 + j = 2 + do while (i.lt.n) + if (ene(i,j).le.crit) then + if (num.eq.sortmax) then + err = 31 + call errmsg(err,hstnum(i),hstnum(j)) + goto 10 + endif + num = num + 1 + heapi(num) = i + heapj(num) = j + j = j + cntrl(9) + 1 + if (j.gt.n) then + i = i + 1 + j = i + 1 + endif + else + j = j +1 + if (j.gt.n) then + i = i + 1 + j = i + 1 + endif + endif + enddo + + do i = num+1,sortmax+1 + heapi(i) = 0 + heapj(i) = 0 + enddo + +10 do q = 2,num + cur = q + up = cur/2 + do while + . (ene(heapi(cur),heapj(cur)).lt.ene(heapi(up),heapj(up)).and. + .up.ge.1) + call swap(heapi(cur),heapi(up)) + call swap(heapj(cur),heapj(up)) + cur = cur/2 + up = cur/2 + enddo + enddo + + return + end + + +c Efficient sort of heap. + subroutine heap_sort + include 'rfd.inc' + + do ir = num-1,2,-1 + call swap(heapi(ir+1),heapi(1)) + call swap(heapj(ir+1),heapj(1)) + + up = 1 + c = 2 + do while (c.le.ir) + if (c.ne.ir) then + if (ene(heapi(c+1),heapj(c+1)).lt.ene(heapi(c),heapj(c))) + . then + c = c + 1 + endif + endif + if (ene(heapi(c),heapj(c)).lt.ene(heapi(up),heapj(up))) then + call swap(heapi(c),heapi(up)) + call swap(heapj(c),heapj(up)) + up = c + c = 2 * c + else + c = ir+1 + endif + enddo + enddo + return + end + +c ENE(k) is the minimum energy of a folding containing the base-pair +c I,J at heap(k). + function ene(i,j) + include 'rfd.inc' + + ene = v(i,j)+ v(j,i+n) + + return + end + + + + subroutine swap(i,j) + + k = i + i = j + j = k + + return + end diff --git a/ZUKER/stack.dat b/ZUKER/stack.dat new file mode 100755 index 0000000..3c48602 --- /dev/null +++ b/ZUKER/stack.dat @@ -0,0 +1,76 @@ +Data Arangement: + + Y + ------------------ + (X) A C G U + ------------------ + 5' ==> 3' + AX + AY + 3' <== 5' + (A) . . . . + (C) . . . . + (G) . . . . + (U) -0.7 -0.1 -0.7 -0.1 + +-------------------------------- +STACKING ENERGIES : + + Y Y Y Y +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + AX AX AX AX + AY CY GY UY + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . . . . . . . . . . . . -0.9 + . . . . . . . . . . . . . . -2.1 . + . . . . . . . . . . . . . -1.7 . -0.5 + . . . . . . . . . . . . -0.9 . -0.7 . + + + + Y Y Y Y +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + CX CX CX CX + AY CY GY UY + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . . . . . . . . -1.8 . . . . + . . . . . . . . . . -2.9 . . . . . + . . . . . . . . . -2.0 . -1.5 . . . . + . . . . . . . . -1.7 . -1.5 . . . . . + + + + Y Y Y Y +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + GX GX GX GX + AY CY GY UY + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . . . . . -2.3 . . . . . . . -0.5 + . . . . . . -3.4 . . . . . . . -1.9 . + . . . . . -2.9 . -1.3 . . . . . -1.5 . -0.5 + . . . . -2.1 . -1.9 . . . . . -0.7 . -0.5 . + + + + Y Y Y Y +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + UX UX UX UX + AY CY GY UY + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . -1.1 . . . . . . . -0.7 . . . . + . . -2.3 . . . . . . . -1.3 . . . . . + . -1.8 . -0.7 . . . . . -1.5 . -0.6 . . . . +-0.9 . -0.5 . . . . . -0.5 . -0.5 . . . . . + diff --git a/ZUKER/tloop.dat b/ZUKER/tloop.dat new file mode 100755 index 0000000..4221f80 --- /dev/null +++ b/ZUKER/tloop.dat @@ -0,0 +1,10 @@ + Seq Energy + ----------- + GAAA -2.0 + GCAA -2.0 + GAGA -2.0 + GUGA -2.0 + GGAA -2.0 + UUCG -2.0 + UACG -2.0 + GCGA -2.0 diff --git a/ZUKER/tstack.dat b/ZUKER/tstack.dat new file mode 100755 index 0000000..202b5a2 --- /dev/null +++ b/ZUKER/tstack.dat @@ -0,0 +1,76 @@ +Data Arangement: + + Y + ------------------ + (X) A C G U + ------------------ + 5' ==> 3' + AX + AY + 3' <== 5' + (A) . . . . + (C) . . . . + (G) . . . . + (U) -0.7 -0.1 -0.7 -0.1 + +-------------------------------- +STACKING ENERGIES : TERMINAL MISMATCHES AND BASE-PAIRS. + + Y Y Y Y +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + AX AX AX AX + AY CY GY UY + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . -1.0 . . . -0.7 . . . -1.1 -0.8 -1.0 -1.0 -1.0 + . . -1.1 . . . -1.1 . . . -1.6 . -0.7 -0.7 -0.7 -0.7 + . -1.9 . -1.5 . -1.0 . -0.9 . -1.9 . -1.5 -0.8 -1.0 -1.0 -1.0 +-0.8 . -0.8 . -0.7 . -0.7 . -0.8 . -0.8 . -0.8 -0.8 -0.8 -0.8 + + + + Y Y Y Y +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + CX CX CX CX + AY CY GY UY + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . -0.8 . . . -0.6 -1.9 -2.0 -1.9 -1.9 . . . -0.6 + . . -1.3 . . . -0.6 . -1.0 -1.1 -1.0 -0.8 . . -0.8 . + . -2.0 . -1.4 . -1.1 . -0.9 -1.9 -1.9 -1.9 -1.9 . -1.5 . -1.1 +-1.0 . -1.0 . -0.7 . -0.7 . -1.4 -1.5 -1.4 -1.2 -0.8 . -0.8 . + + + + Y Y Y Y +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + GX GX GX GX + AY CY GY UY + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' + . . . -1.1 -1.1 -1.3 -1.3 -1.3 . . . -1.2 -0.8 -1.0 -1.0 -1.0 + . . -1.3 . -1.1 -0.6 -0.6 -0.5 . . -1.4 . -0.7 -0.7 -0.7 -0.7 + . -1.9 . -1.5 -1.6 -1.5 -1.4 -1.5 . -1.9 . -1.6 -0.8 -1.0 -1.0 -1.0 +-1.0 . -1.0 . -0.8 -0.8 -0.8 -0.7 -1.0 . -1.0 . -0.8 -0.8 -0.8 -0.8 + + + + Y Y Y Y +------------------ ------------------ ------------------ ------------------ + A C G U A C G U A C G U A C G U +------------------ ------------------ ------------------ ------------------ + 5' --> 3' 5' --> 3' 5' --> 3' 5' --> 3' + UX UX UX UX + AY CY GY UY + 3' <-- 5' 3' <-- 5' 3' <-- 5' 3' <-- 5' +-1.0 -0.8 -1.1 -0.9 . . . -0.5 -1.5 -1.4 -1.5 -1.4 . . . -0.5 +-0.7 -0.6 -0.3 -0.5 . . -0.5 . -0.9 -0.9 -0.7 -0.7 . . -0.7 . +-1.1 -0.9 -1.2 -0.9 . -0.8 . -0.7 -1.5 -1.4 -1.6 -1.4 . -1.2 . -0.9 +-0.3 -0.6 -0.3 -0.5 -0.7 . -0.7 . -0.9 -1.1 -0.9 -0.9 -0.8 . -0.8 . + diff --git a/ZUKER/xtermxtermmake b/ZUKER/xtermxtermmake new file mode 100755 index 0000000..e69de29 diff --git a/bin/CAP2 b/bin/CAP2 new file mode 100755 index 0000000..e239ab7 Binary files /dev/null and b/bin/CAP2 differ diff --git a/bin/Install.csh b/bin/Install.csh new file mode 100755 index 0000000..9845227 --- /dev/null +++ b/bin/Install.csh @@ -0,0 +1,53 @@ +#/bin/csh + +mkdir bin + +#echo "Making blast..." +#cd BLAST +#Install.sh +#cd .. + +echo "Making clustal..." +cd CLUSTAL +make +cd .. + +echo "Making core GDE editor" +cd CORE +install.csh +cd .. + +echo "Making FASTA" +cd FASTA +install.csh +cd .. + +echo "Making Harvard Genome Lab functions" +cd HGL_SRC +install.csh +cd .. + +echo "Making looptool" +cd LOOPTOOL +make +cd .. + +echo "Making PHYLIP" +cd PHYLIP +install.csh +cd .. + +echo "Making ReadSeq" +cd READSEQ +install.csh +cd .. + +echo "Making other support programs" +cd SUPPORT +make +cd .. + +echo "Making Zuker MFOLD" +cd ZUKER +install.csh +cd .. diff --git a/bin/LoopTool b/bin/LoopTool new file mode 100755 index 0000000..3b32028 Binary files /dev/null and b/bin/LoopTool differ diff --git a/bin/Restriction b/bin/Restriction new file mode 100755 index 0000000..adcceec Binary files /dev/null and b/bin/Restriction differ diff --git a/bin/Zuk_to_gen b/bin/Zuk_to_gen new file mode 100755 index 0000000..4916b60 Binary files /dev/null and b/bin/Zuk_to_gen differ diff --git a/bin/count b/bin/count new file mode 100755 index 0000000..a9eeec2 Binary files /dev/null and b/bin/count differ diff --git a/bin/fasta2VESPA.pl b/bin/fasta2VESPA.pl new file mode 100755 index 0000000..38c836e --- /dev/null +++ b/bin/fasta2VESPA.pl @@ -0,0 +1,47 @@ +#!/usr/bin/perl + + +############################################################## +# @author : Wagied Davids +# @progname : fasta2snap.pl +# @proglang : Perl script +# @purpose : Fasta to SNAP format converter +# @input : Fasta format files +# @output : SNAP format files +# @date : 05.08.2001 +# @version : 0.001 +############################################################## + + + +use strict; + +my ($fileIN,$fileOUT); +my ($de,@seq); +my $seq; + +$fileIN= "infile"; +#$fileOUT=" "; + +open( FHIN, "$fileIN") || die "Error:$!"; +#open( FHOUT, ">$fileOUT" ) || die "Error:$!"; + +$/="%"; #input record seperator + +while(){ + + ($de,@seq)=split; + $seq=join("",@seq); + $seq= uc($seq); + + + $seq= substr($seq,0,-1); #> remaining at the end + print "$de\t\t$seq\n"; + #print FHOUT "$counter$de$seq\n"; + #print FHOUT "$de$seq\n"; + + +} +close(FHIN); +#close(FHOUT); + diff --git a/bin/fasta2VESPA.pl~ b/bin/fasta2VESPA.pl~ new file mode 100755 index 0000000..f384694 --- /dev/null +++ b/bin/fasta2VESPA.pl~ @@ -0,0 +1,47 @@ +#!/usr/bin/perl + + +############################################################## +# @author : Wagied Davids +# @progname : fasta2snap.pl +# @proglang : Perl script +# @purpose : Fasta to SNAP format converter +# @input : Fasta format files +# @output : SNAP format files +# @date : 05.08.2001 +# @version : 0.001 +############################################################## + + + +use strict; + +my ($fileIN,$fileOUT); +my ($de,@seq); +my $seq; + +$fileIN= "infile"; +#$fileOUT=" "; + +open( FHIN, "$fileIN") || die "Error:$!"; +#open( FHOUT, ">$fileOUT" ) || die "Error:$!"; + +$/="%"; #input record seperator + +while(){ + + ($de,@seq)=split; + $seq=join("",@seq); + $seq= uc($seq); + + + $seq= substr($seq,0,-1); #> remaining at the end + print "$de\t$seq\n"; + #print FHOUT "$counter$de$seq\n"; + #print FHOUT "$de$seq\n"; + + +} +close(FHIN); +#close(FHOUT); + diff --git a/bin/fasta2snap.pl b/bin/fasta2snap.pl new file mode 100755 index 0000000..76b358d --- /dev/null +++ b/bin/fasta2snap.pl @@ -0,0 +1,47 @@ +#!/usr/bin/perl + + +############################################################## +# @author : Wagied Davids +# @progname : fasta2snap.pl +# @proglang : Perl script +# @purpose : Fasta to SNAP format converter +# @input : Fasta format files +# @output : SNAP format files +# @date : 05.08.2001 +# @version : 0.001 +############################################################## + + + +use strict; + +my ($fileIN,$fileOUT); +my ($de,@seq); +my $seq; + +$fileIN= "infile"; +#$fileOUT=" "; + +open( FHIN, "$fileIN") || die "Error:$!"; +#open( FHOUT, ">$fileOUT" ) || die "Error:$!"; + +$/="#"; #input record seperator + +while(){ + + ($de,@seq)=split; + $seq=join("",@seq); + $seq= uc($seq); + + + $seq= substr($seq,0,-1); #> remaining at the end + print "$de\t$seq\n"; + #print FHOUT "$counter$de$seq\n"; + #print FHOUT "$de$seq\n"; + + +} +close(FHIN); +#close(FHOUT); + diff --git a/bin/fasta2snap.pl~ b/bin/fasta2snap.pl~ new file mode 100755 index 0000000..38f5ca0 --- /dev/null +++ b/bin/fasta2snap.pl~ @@ -0,0 +1,47 @@ +#!/usr/bin/perl + + +############################################################## +# @author : Wagied Davids +# @progname : fasta2snap.pl +# @proglang : Perl script +# @purpose : Fasta to SNAP format converter +# @input : Fasta format files +# @output : SNAP format files +# @date : 05.08.2001 +# @version : 0.001 +############################################################## + + + +use strict; + +my ($fileIN,$fileOUT); +my ($de,@seq); +my $seq; + +$fileIN= "infile"; +#$fileOUT=" "; + +open( FHIN, "$fileIN") || die "Error:$!"; +#open( FHOUT, ">$fileOUT" ) || die "Error:$!"; + +$/=">"; #input record seperator + +while(){ + + ($de,@seq)=split; + $seq=join("",@seq); + $seq= uc($seq); + + + $seq= substr($seq,0,-1); #> remaining at the end + print "$de\t$seq\n"; + #print FHOUT "$counter$de$seq\n"; + #print FHOUT "$de$seq\n"; + + +} +close(FHIN); +#close(FHOUT); + diff --git a/bin/findall b/bin/findall new file mode 100755 index 0000000..7f200bc Binary files /dev/null and b/bin/findall differ diff --git a/bin/gde b/bin/gde new file mode 100755 index 0000000..3d64405 Binary files /dev/null and b/bin/gde differ diff --git a/bin/installBLASTDB.pl b/bin/installBLASTDB.pl new file mode 100755 index 0000000..7deadc0 --- /dev/null +++ b/bin/installBLASTDB.pl @@ -0,0 +1,52 @@ +#!/usr/bin/perl -w +use strict; + +my $newFileName; +my $line; + +my $sourceFile = shift; +my $menuName = shift; + +print("mv -f ./$sourceFile.* /usr/local/bio/db/\n"); +print("cp -f ./$sourceFile /usr/local/bio/db/\n"); + + + +print system("mv -f ./$sourceFile.* /usr/local/bio/db/"); +# or die ("cannot copy files\n"); +print system("cp -f ./$sourceFile /usr/local/bio/db/") ; +#or die ("cannot copy file\n"); + + +open(MENUFILE, "/usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/bio/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^arg:BLASTDBDNA/){ + print "FOUND\n"; + while (){ + print NEWFILE; + if (/^argchoice:/){ + print NEWFILE "argchoice:$menuName:/usr/local/bio/db/$sourceFile\n"; + last READLOOP; + } + } + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); +print "new file: $newFileName\n"; +system("cp $newFileName /usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/bin/installBLASTDB.pl~ b/bin/installBLASTDB.pl~ new file mode 100755 index 0000000..977e53b --- /dev/null +++ b/bin/installBLASTDB.pl~ @@ -0,0 +1,52 @@ +#!/usr/bin/perl -w +use strict; + +my $newFileName; +my $line; + +my $sourceFile = shift; +my $menuName = shift; + +print("mv -f ./$sourceFile.* /usr/local/biotools/db/\n"); +print("cp -f ./$sourceFile /usr/local/biotools/db/\n"); + + + +print system("mv -f ./$sourceFile.* /usr/local/biotools/db/"); +# or die ("cannot copy files\n"); +print system("cp -f ./$sourceFile /usr/local/biotools/db/") ; +#or die ("cannot copy file\n"); + + +open(MENUFILE, "/usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/biotools/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^arg:BLASTDBDNA/){ + print "FOUND\n"; + while (){ + print NEWFILE; + if (/^argchoice:/){ + print NEWFILE "argchoice:$menuName:/usr/local/biotools/db/$sourceFile\n"; + last READLOOP; + } + } + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); +print "new file: $newFileName\n"; +system("cp $newFileName /usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/bin/installBLASTDBPROT.pl b/bin/installBLASTDBPROT.pl new file mode 100755 index 0000000..f1cd486 --- /dev/null +++ b/bin/installBLASTDBPROT.pl @@ -0,0 +1,52 @@ +#!/usr/bin/perl -w +use strict; + +my $newFileName; +my $line; + +my $sourceFile = shift; +my $menuName = shift; + +print("mv -f ./$sourceFile.* /usr/local/bio/db/\n"); +print("cp -f ./$sourceFile /usr/local/bio/db/\n"); + + + +print system("mv -f ./$sourceFile.* /usr/local/bio/db/"); +# or die ("cannot copy files\n"); +print system("cp -f ./$sourceFile /usr/local/bio/db/") ; +#or die ("cannot copy file\n"); + + +open(MENUFILE, "/usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/bio/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^arg:BLASTDBPROT/){ + print "FOUND\n"; + while (){ + print NEWFILE; + if (/^argchoice:/){ + print NEWFILE "argchoice:$menuName:/usr/local/bio/db/$sourceFile\n"; + last READLOOP; + } + } + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); +print "new file: $newFileName\n"; +system("cp $newFileName /usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/bin/installBLASTDBPROT.pl~ b/bin/installBLASTDBPROT.pl~ new file mode 100755 index 0000000..fcb8c41 --- /dev/null +++ b/bin/installBLASTDBPROT.pl~ @@ -0,0 +1,52 @@ +#!/usr/bin/perl -w +use strict; + +my $newFileName; +my $line; + +my $sourceFile = shift; +my $menuName = shift; + +print("mv -f ./$sourceFile.* /usr/local/biotools/db/\n"); +print("cp -f ./$sourceFile /usr/local/biotools/db/\n"); + + + +print system("mv -f ./$sourceFile.* /usr/local/biotools/db/"); +# or die ("cannot copy files\n"); +print system("cp -f ./$sourceFile /usr/local/biotools/db/") ; +#or die ("cannot copy file\n"); + + +open(MENUFILE, "/usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/biotools/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^arg:BLASTDBPROT/){ + print "FOUND\n"; + while (){ + print NEWFILE; + if (/^argchoice:/){ + print NEWFILE "argchoice:$menuName:/usr/local/biotools/db/$sourceFile\n"; + last READLOOP; + } + } + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); +print "new file: $newFileName\n"; +system("cp $newFileName /usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/bin/lsadt b/bin/lsadt new file mode 100755 index 0000000..1b4d297 Binary files /dev/null and b/bin/lsadt differ diff --git a/bin/newDATASET.pl b/bin/newDATASET.pl new file mode 100755 index 0000000..d936ace --- /dev/null +++ b/bin/newDATASET.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl + + +my $name = shift; +my $file = shift; + +open(MENUFILE, "/usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/bio/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^menu:seq. datasets/){ + print "FOUND\n"; + print NEWFILE "item:$name\n"; + print NEWFILE "itemmethod:readseq /usr/local/bio/GDE/db/$file -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp\n"; + print NEWFILE "out:OUTPUTFILE\n"; + print NEWFILE "outformat:genbank\n\n";\ + last READLOOP; + + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); + +system("cp $newFileName /usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/bin/newDATASET.pl~ b/bin/newDATASET.pl~ new file mode 100755 index 0000000..e014012 --- /dev/null +++ b/bin/newDATASET.pl~ @@ -0,0 +1,37 @@ +#!/usr/bin/perl + + +my $name = shift; +my $file = shift; + +open(MENUFILE, "/usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/biotools/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^menu:seq. datasets/){ + print "FOUND\n"; + print NEWFILE "item:$name\n"; + print NEWFILE "itemmethod:readseq /usr/local/biotools/GDE/db/$file -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp\n"; + print NEWFILE "out:OUTPUTFILE\n"; + print NEWFILE "outformat:genbank\n\n";\ + last READLOOP; + + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); + +system("cp $newFileName /usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/bin/newURL.pl b/bin/newURL.pl new file mode 100755 index 0000000..468f112 --- /dev/null +++ b/bin/newURL.pl @@ -0,0 +1,35 @@ +#!/usr/bin/perl + + +my $urlname = shift; +my $url = shift; + +open(MENUFILE, "/usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/bio/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^menu:On-Line/){ + print "FOUND\n"; + print NEWFILE "item:$urlname\n"; + print NEWFILE "itemmethod:netscape $url &\n"; + last READLOOP; + + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); + +system("cp $newFileName /usr/local/bio/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/bin/newURL.pl~ b/bin/newURL.pl~ new file mode 100755 index 0000000..ccfb52b --- /dev/null +++ b/bin/newURL.pl~ @@ -0,0 +1,35 @@ +#!/usr/bin/perl + + +my $urlname = shift; +my $url = shift; + +open(MENUFILE, "/usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot open menu file, sorry\n"; +$newFileName = "/usr/local/biotools/GDE/CORE/.GDEmenusNew"; +open(NEWFILE, ">$newFileName"); + READLOOP: + while (){ + print NEWFILE; + if (/^menu:On-Line/){ + print "FOUND\n"; + print NEWFILE "item:$urlname\n"; + print NEWFILE "itemmethod:netscape $url &\n"; + last READLOOP; + + } + } +while (){ + print NEWFILE; +} +close(NEWFILE); +close(MENUFILE); + +system("cp $newFileName /usr/local/biotools/GDE/CORE/.GDEmenus") + or die "cannot replace old menu file\n"; + + + + + + diff --git a/bin/readseq b/bin/readseq new file mode 100755 index 0000000..ba1b981 Binary files /dev/null and b/bin/readseq differ diff --git a/bin/sho_helix b/bin/sho_helix new file mode 100755 index 0000000..0c953a3 Binary files /dev/null and b/bin/sho_helix differ diff --git a/bin/varpos b/bin/varpos new file mode 100755 index 0000000..801dbf0 Binary files /dev/null and b/bin/varpos differ diff --git a/db/HCVCG.fasta b/db/HCVCG.fasta new file mode 100644 index 0000000..c0c9822 --- /dev/null +++ b/db/HCVCG.fasta @@ -0,0 +1,119 @@ +>gi|9626438|ref|NC_001433.1| Hepatitis C virus, complete genome +TTGGGGGCGACACTCCACCATAGATCACTCCCCTGTGAGGAACTACTGTCTTCACGCAGAAAGCGTCTAGCCATGGCGTT +AGTATGAGTGTTGTGCAGCCTCCAGGACCCCCCCTCCCGGGAGAGCCATAGTGGTCTGCGGAACCGGTGAGTACACCGGA +ATTGCCAGGACGACCGGGTCCTTTCTTGGATCAACCCGCTCAATGCCTGGAGATTTGGGCGTGCCCCCGCGAGACTGCTA +GCCGAGTAGTGTTGGGTCGCGAAAGGCCTTGTGGTACTGCCTGATAGGGTGCTTGCGAGTGCCCCGGGAGGTCTCGTAGA +CCGTGCATCATGAGCACAAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGCCGCCCACAGGACGTTAAGTT +CCCGGGCGGTGGTCAGATCGTTGGTGGAGTTTACCTGTTGCCGCGCAGGGGCCCCAGGTTGGGTGTGCGCGCGACTAGGA +AGACTTCCGAGCGGTCGCAACCTCGTGGAAGGCGACAACCTATCCCCAAGGCTCGCCGGCCCGAGGGTAGGACCTGGGCT +CAGCCCGGGTACCCTTGGCCCCTCTATGGCAACGAGGGTATGGGGTGGGCAGGATGGCTCCTGTCACCCCGTGGCTCTCG +GCCTAGTTGGGGCCCCACAGACCCCCGGCGTAGGTCGCGTAATTTGGGTAAGGTCATCGATACCCTTACATGCGGCTTCG +CCGACCTCATGGGGTACATTCCGCTTGTCGGCGCCCCCCTAGGGGGCGCTGCCAGGGCCCTGGCACATGGTGTCCGGGTT +CTGGAGGACGGCGTGAACTATGCAACAGGGAATCTGCCCGGTTGCTCTTTCTCTATCTTCCTCTTAGCTTTGCTGTCTTG +TTTGACCATCCCAGCTTCCGCTTACGAGGTGCGCAACGTGTCCGGGATATACCATGTCACGAACGACTGCTCCAACTCAA +GTATTGTGTATGAGGCAGCGGACATGATCATGCACACCCCCGGGTGCGTGCCCTGCGTCCGGGAGAGTAATTTCTCCCGT +TGCTGGGTAGCGCTCACTCCCACGCTCGCGGCCAGGAACAGCAGCATCCCCACCACGACAATACGACGCCACGTCGATTT +GCTCGTTGGGGCGGCTGCTCTCTGTTCCGCTATGTACGTTGGGGATCTCTGCGGATCCGTTTTTCTCGTCTCCCAGCTGT +TCACCTTCTCACCTCGCCGGTATGAGACGGTACAAGATTGCAATTGCTCAATCTATCCCGGCCACGTATCAGGTCACCGC +ATGGCTTGGGATATGATGATGAACTGGTCACCTACAACGGCCCTAGTGGTATCGCAGCTACTCCGGATCCCACAAGCCGT +CGTGGACATGGTGGCGGGGGCCCACTGGGGTGTCCTAGCGGGCCTTGCCTACTATTCCATGGTGGGGAACTGGGCTAAGG +TCTTGATTGTGATGCTACTCTTTGCTGGCGTTGACGGGCACACCCACGTGACAGGGGGAAGGGTAGCCTCCAGCACCCAG +AGCCTCGTGTCCTGGCTCTCACAAGGCCCATCTCAGAAAATCCAACTCGTGAACACCAACGGCAGCTGGCACATCAACAG +GACCGCTCTGAATTGCAATGACTCCCTCCAAACTGGGTTCATTGCTGCGCTGTTCTACGCACACAGGTTCAACGCGTCCG +GGTGCCCAGAGCGCATGGCTAGCTGCCGCCCCATCGATGAGTTCGCTCAGGGGTGGGGTCCCATCACTCATGATATGCCT +GAGAGCTCGGACCAGAGGCCATATTGCTGGCACTACGCGCCTCGACCGTGCGGGATCGTGCCTGCGTCGCAGGTGTGTGG +TCCAGTGTATTGCTTCACTCCGAGCCCTGTTGTAGTGGGGACGACCGATCGTTTCGGCGCTCCTACGTATAGCTGGGGGG +AGAATGAGACAGACGTGCTGCTACTTAGCAACACGCGGCCGCCTCAAGGCAACTGGTTTGGGTGCACGTGGATGAACAGC +ACTGGGTTCACCAAGACGTGCGGGGGCCCTCCGTGCAACATCGGGGGGGTCGGCAACAACACCTTGGTCTGCCCCACGGA +TTGCTTCCGGAAGCACCCCGAGGCCACTTACACAAAGTGTGGCTCGGGGCCCTGGTTGACACCCAGGTGCATGGTTGACT +ACCCATACAGGCTCTGGCACTACCCCTGCACTGTTAACTTTACCGTCTTTAAGGTCAGGATGTATGTGGGGGGCGTGGAG +CACAGGCTCAATGCTGCATGCAATTGGACTCGAGGAGAGCGCTGTGACTTGGAGGACAGGGATAGGTCAGAACTCAGCCC +GCTGCTGCTGTCTACAACAGAGTGGCAGATACTGCCCTGTTCCTTCACCACCCTACCGGCCCTGTCCACTGGCTTGATCC +ATCTTCACCGGAACATCGTGGACGTGCAATACCTGTACGGTATAGGGTCGGCAGTTGTCTCCTTTGCAATCAAATGGGAG +TATATCCTGTTGCTTTTCCTTCTTCTGGCGGACGCGCGCGTCTGTGCCTGCTTGTGGATGATGCTGCTGATAGCCCAGGC +TGAGGCCACCTTAGAGAACCTGGTGGTCCTCAATGCGGCGTCTGTGGCCGGAGCGCATGGCCTTCTCTCCTTCCTCGTGT +TCTTCTGCGCCGCCTGGTACATCAAAGGCAGGCTGGTCCCTGGGGCGGCATATGCTCTCTATGGCGTATGGCCGTTGCTC +CTGCTCTTGCTGGCCTTACCACCACGAGCTTATGCCATGGACCGAGAGATGGCTGCATCGTGCGGAGGCGCGGTTTTTGT +AGGTCTGGTACTCTTGACCTTGTCACCATACTATAAGGTGTTCCTCGCTAGGCTCATATGGTGGTTACAATATTTTATCA +CCAGAGCCGAGGCGCACTTGCAAGTGTGGGTCCCCCCTCTCAATGTTCGGGGAGGCCGCGATGCCATCATCCTCCTTACA +TGCGCGGTCCATCCAGAGCTAATCTTTGACATCACCAAACTCCTGCTCGCCATACTCGGTCCGCTCATGGTGCTCCAGGC +TGGCATAACTAGAGTGCCGTACTTTGTACGCGCTCAGGGGCTCATCCGTGCATGCATGTTAGTGCGGAAGGTCGCTGGAG +GCCACTATGTCCAAATGGCCTTCATGAAGCTGGCCGCGCTGACAGGTACGTACGTATATGACCATCTTACTCCACTGCGG +GATTGGGCCCACGCGGGCCTACGAGACCTTGCGGTGGCAGTAGAGCCCGTCGTCTTCTCTGACATGGAGACTAAACTCAT +CACCTGGGGGGCAGACACCGCGGCGTGTGGGGACATCATCTCGGGTCTACCAGTCTCCGCCCGAAGGGGGAAGGAGATAC +TTCTAGGACCGGCCGATAGTTTTGGAGAGCAGGGGTGGCGGCTCCTTGCGCCTATCACGGCCTATTCCCAACAAACGCGG +GGCCTGCTTGGCTGTATCATCACTAGCCTCACAGGTCGGGACAAGAACCAGGTCGATGGGGAGGTTCAGGTGCTCTCCAC +CGCAACGCAATCTTTCCTGGCGACCTGCGTCAATGGCGTGTGTTGGACCGTCTACCATGGTGCCGGCTCGAAGACCCTGG +CCGGCCCGAAGGGTCCAATCACCCAAATGTACACCAATGTAGACCAGGACCTCGTCGGCTGGCCGGCGCCCCCCGGGGCG +CGCTCCATGACACCGTGCACCTGCGGCAGCTCGGACCTTTACTTGGTCACGAGGCATGCTGATGTCGTTCCGGTGCGCCG +GCGGGGCGACAGCAGGGGGAGCCTGCTTTCCCCCAGGCCCATCTCCTACCTGAAGGGCTCCTCGGGTGGACCACTGCTTT +GCCCTTCGGGGCACGTTGTAGGCATCTTCCGGGCTGCTGTGTGCACCCGGGGGGTTGCGAAGGCGGTGGACTTCATACCC +GTTGAGTCTATGGAAACTACCATGCGGTCTCCGGTCTTCACAGACAACTCATCCCCTCCGGCCGTACCGCAAACATTCCA +AGTGGCACATTTACACGCTCCCACTGGCAGCGGCAAGAGCACCAAAGTGCCGGCTGCATATGCAGCCCAAGGGTACAAGG +TGCTCGTCCTAAACCCGTCCGTTGCCGCCACATTGGGCTTTGGAGCGTATATGTCCAAGGCACATGGCATCGAGCCTAAC +ATCAGAACTGGGGTAAGGACCATCACCACGGGCGGCCCCATCACGTACTCCACCTATTGCAAGTTCCTTGCCGACGGTGG +ATGCTCCGGGGGCGCCTATGACATCATAATATGTGATGAATGCCACTCAACTGACTCGACTACCATCTTGGGCATCGGCA +CAGTCCTGGATCAGGCAGAGACGGCTGGAGCGCGGCTCGTCGTGCTCGCCACCGCCACGCCTCCGGGATCGATCACCGTG +CCACACCCCAACATCGAGGAAGTGGCCCTGTCCAACACTGGAGAGATTCCCTTCTATGGCAAAGCCATCCCCATTGAGGC +CATCAAGGGGGGAAGGCATCTCATCTTCTGCCATTCCAAGAAGAAGTGTGACGAGCTCGCCGCAAAGCTGACAGGCCTCG +GACTCAATGCTGTAGCGTATTACCGGGGTCTCGATGTGTCCGTCATACCGACTAGCGGAGACGTCGTTGTCGTGGCAACA +GACGCTCTAATGACGGGTTTTACCGGCGACTTTGACTCAGTGATCGACTGCAACACATGTGTCACCCAGACAGTCGATTT +CAGCTTGGATCCCACCTTCACCATTGAGACGACAACGCTGCCCCAAGACGCGGTGTCGCGTGCGCAGCGGCGAGGTAGGA +CTGGCAGGGGCAGGAGTGGCATCTACAGGTTTGTGACTCCAGGAGAACGGCCCTCAGGCATGTTCGACTCCTCGGTCCTG +TGTGAGTGCTATGACGCAGGCTGCGCTTGGTATGAGCTCACGCCCGCTGAGACCTCGGTTAGGTTGCGGGCTTACCTAAA +TACACCAGGGTTGCCCGTCTGCCAGGACCACCTAGAGTTCTGGGAGAGCGTCTTCACAGGCCTCACCCACATAGATGCCC +ACTTCTTGTCCCAGACCAAACAGGCAGGAGACAACCTCCCCTACCTGGTAGCATACCAAGCCACAGTGTGCGCCAGGGCT +CAGGCTCCACCTCCATCGTGGGACCAAATGTGGAAGTGTCTCATACGGCTAAAGCCCACACTGCATGGGCCAACGCCCCT +GCTGTACAGGCTAGGAGCCGTTCAAAATGAGGTCACTCTCACACACCCCATAACCAAATACATCATGGCATGCATGTCGG +CTGACCTGGAGGTCGTCACTAGCACCTGGGTGCTAGTAGGCGGAGTCCTTGCGGCTCTGGCCGCGTACTGCCTGACGACA +GGCAGCGTGGTCATTGTGGGCAGGATCATCTTGTCCGGGAGGCCAGCTGTTATTCCCGACAGGGAAGTCCTCTACCAGGA +GTTCGATGAGATGGAAGAGTGTGCTTCACACCTCCCTTACATCGAGCAAGGAATGCAGCTCGCCGAGCAATTCAAACAGA +AGGCGCTCGGATTGCTGCAAACAGCCACCAAGCAAGCGGAGGCTGCTGCTCCCGTGGTGGAGTCCAAGTGGCGAGCCCTT +GAGGTCTTCTGGGCGAAACACATGTGGAACTTCATCAGCGGGATACAGTACTTGGCAGGCCTATCCACTCTGCCTGGAAA +CCCCGCGATAGCATCATTGATGGCTTTTACAGCCTCTATCACCAGCCCGCTCACCACCCAAAATACCCTCCTGTTTAACA +TCTTGGGGGGATGGGTGGCTGCCCAACTCGCTCCCCCCAGCGCTGCTTCGGCTTTCGTGGGCGCCGGCATTGCCGGTGCG +GCCGTTGGCAGCATAGGTCTCGGGAAGGTACTTGTGGACATTCTGGCGGGCTATGGGGCGGGGGTGGCTGGCGCACTCGT +GGCCTTTAAGGTCATGAGCGGCGAGATGCCCTCCACTGAGGATCTGGTTAATTTACTCCCTGCCATCCTTTCTCCTGGCG +CCCTGGTTGTCGGGGTCGTGTGCGCAGCAATACTGCGTCGGCACGTGGGCCCGGGAGAGGGGGCTGTGCAGTGGATGAAC +CGGCTGATAGCGTTCGCTTCGCGGGGTAACCACGTCTCCCCCACGCACTATGTGCCCGAGAGCGACGCCGCGGCGCGTGT +TACTCAGATCCTCTCCAGCCTTACCATCACTCAGTTGCTGAAGAGGCTTCATCAGTGGATTAATGAGGACTGCTCCACGC +CTTGTTCCGGCTCGTGGCTAAAGGATGTTTGGGACTGGATATGCACGGTGTTGAGTGACTTCAAGACTTGGCTCCAGTCC +AAGCTCCTGCCGCGGTTACCGGGACTCCCTTTCCTGTCATGCCAACGCGGGTACAAGGGAGTCTGGCGGGGGGATGGCAT +CATGCAAACCACCTGCCCATGTGGAGCACAGATCACCGGACATGTCAAAAATGGCTCCATGAGGATTGTTGGGCCAAAAA +CCTGCAGCAACACGTGGCATGGAACATTCCCCATCAACGCATACACCACGGGCCCCTGCACGCCCTCCCCAGCGCCGAAC +TATTCCAGGGCGCTGTGGCGGGTGGCTGCTGAGGAGTACGTGGAGGTTACGCGGGTGGGGGATTTCCACTACGTGACGGG +CATGACCACTGACAACGTGAAATGCCCATGCCAGGTTCCAGCCCCTGAATTTTTCACGGAGGTGGATGGAGTACGGTTGC +ACAGGTATGCTCCAGTGTGCAAACCTCTCCTACGAGAGGAGGTCGTATTCCAGGTCGGGCTCAACCAGTACCTGGTCGGG +TCACAGCTCCCATGTGAGCCCGAACCGGATGTGGCAGTGCTCACTTCCATGCTCACCGACCCCTCTCATATTACAGCAGA +GACGGCCAAGCGTAGGCTGGCCAGGGGGTCTCCCCCCTCCTTGGCCAGCTCTTCAGCTAGCCAGTTGTCTGCGCCTTCTT +TGAAGGCGACATGTACTACCCATCATGACTCCCCGGACGCTGACCTCATCGAGGCCAACCTCCTGTGGCGGCAGGAGATG +GGCGGGAACATCACCCGTGTGGAGTCAGAAAATAAGGTGGTAATCCTGGACTCTTTCGATCCGATTCGGGCGGTGGAGGA +TGAGAGGGAAATATCCGTCCCGGCGGAGATCCTGCGAAAACCCAGGAAGTTCCCCCCAGCGTTGCCCATATGGGCACGCC +CGGATTACAACCCTCCACTGCTAGAGTCCTGGAAGGACCCGGACTACGTCCCCCCGGTGGTACACGGGTGCCCTTTGCCA +TCTACCAAGGCCCCCCCAATACCACCTCCACGGAGGAAGAGGACGGTTGTCCTGACAGAGTCCACCGTGTCTTCTGCCTT +GGCGGAGCTCGCTACTAAGACCTTTGGCAGCTCCGGGTCGTCGGCCGTTGACAGCGGCACGGCGACTGGCCCTCCCGATC +AGGCCTCCGACGACGGCGACAAAGGATCCGACGTTGAGTCGTACTCCTCCATGCCCCCCCTCGAGGGAGAGCCAGGGGAC +CCCGACCTCAGCGACGGGTCTTGGTCTACCGTGAGCGGGGAAGCTGGTGAGGACGTCGTCTGCTGCTCAATGTCCTATAC +ATGGACAGGTGCCTTGATCACGCCATGCGCTGCGGAGGAGAGCAAGTTGCCCATCAATCCGTTGAGCAACTCTTTGCTGC +GTCACCACAGTATGGTCTACTCCACAACATCTCGCAGCGCAAGTCTGCGGCAGAAGAAGGTCACCTTTGACAGACTGCAA +GTCCTGGACGACCACTACCGGGACGTGCTCAAGGAGATGAAGGCGAAGGCGTCCACAGTTAAGGCTAGGCTTCTATCTAT +AGAGGAGGCCTGCAAACTGACGCCCCCACATTCGGCCAAATCCAAATTTGGCTACGGGGCGAAGGACGTCCGGAGCCTAT +CCAGCAGGGCCGTCAACCACATCCGCTCCGTGTGGGAGGACTTGCTGGAAGACACTGAAACACCAATTGATACCACCATC +ATGGCAAAAAATGAGGTTTTCTGCGTCCAACCAGAGAAAGGAGGCCGCAAGCCAGCTCGCCTTATCGTATTCCCAGACCT +GGGGGTACGTGTATGCGAGAAGATGGCCCTTTACGACGTGGTCTCCACCCTTCCTCAGGCCGTGATGGGCCCCTCATACG +GATTCCAGTACTCTCCTGGGCAGCGGGTCGAGTTCCTGGTGAATACCTGGAAATCAAAGAAATGCCCTATGGGCTTCTCA +TATGACACCCGCTGCTTTGACTCAACGGTCACTGAGAATGACATCCGTACTGAGGAATCAATTTACCAATGTTGTGACTT +GGCCCCCGAAGCCAGGCAGGCCATAAGGTCGCTCACAGAGCGGCTTTATGTCGGGGGTCCCCTGACTAATTCGAAGGGGC +AGAACTGCGGTTATCGCCGGTGCCGCGCAAGTGGCGTGCTGACGACTAGCTGCGGCAACACCCTCACATGTTACTTGAAG +GCCACTGCGGCCTGTCGAGCTGCAAAGCTCCAGGACTGCACGATGCTCGTGAACGGAGACGACCTTGTCGTTATCTGTGA +GAGTGCGGGAACCCAGGAGGATGCGGCGGCCCTACGAGCCTTCACGGAGGCTATGACTAGGTATTCCGCCCCCCCCGGGG +ACCCGCCCCAACCAGAATACGACTTGGAGCTGATAACGTCATGCTCCTCCAATGTGTCGGTCGCGCACGATGCATCCGGC +AAAAGGGTGTACTACCTCACCCGTGACCCCACCACCCCCCTCGCACGGGCTGCGTGGGAGACAGTTAGACACACTCCAGT +CAACTCCTGGCTAGGCAATATCATCATGTATGCGCCCACCCTATGGGCGAGGATGATTCTGATGACTCATTTCTTCTCTA +TCCTTCTAGCTCAGGAGCAACTTGAAAAAGCCCTGGATTGTCAGATCTACGGGGCCTGTTACTCCATTGAGCCACTTGAC +CTACCTCAGATCATTGAACGACTCCATGGTCTTAGCGCATTTTCACTCCACAGTTACTCTCCAGGTGAGATCAATAGGGT +GGCTTCATGCCTCAGGAAACTTGGGGTACCGCCTTTGCGAGTCTGGAGACATCGGGCCAGAAGTGTCCGCGCTAAGCTAC +TGTCCCAGGGGGGGAGGGCTGCCACTTGCGGCAAGTACCTCTTCAACTGGGCAGTAAAGACCAAGCTTAAACTCACTCCA +ATCCCGGCTGCGTCCCAGCTAGACTTGTCCGGCTGGTTCGTTGCTGGTTACAACGGGGGAGACATATATCACAGCCTGTC +TCGTGCCCGACCCCGTTGGTTCATGTTGTGCCTACTCCTACTTTCTGTAGGGGTAGGCATCTACCTGCTCCCCAACCGGT +GAACGGGGAGCTAACCACTCCAGGCCAATAGGCCATTCCCTTTTTTTTTTTTC diff --git a/db/HIV1POLDNA.fasta b/db/HIV1POLDNA.fasta new file mode 100755 index 0000000..48c435a --- /dev/null +++ b/db/HIV1POLDNA.fasta @@ -0,0 +1,5704 @@ +>CONSENSUS_A +TTTTTTAGGGAaAATtTGGCCTTCCagcAa------------gGGGAG---GCCaGGaAATTTTCCTCAG +AgCAGACc------gGAgCCAaCAGCcCC---------------------------------ACC???AG +CAGAgatctttgggATggGgGAAgaGAt---------------agcCTCCCCTccGAagCAGGAGcagAa +agacaggga------???acaggacccaCCtTtAgTTTCCCTCAAATCACTcTTTGGCAACGACCtcTTG +TCACAGTAAaaaTAGggGGacaGCtaAaAgAAGCTCTATTAGATACAGGAGCAGATGATACAGTatTAGA +AGAcATAAATTTgCCAGGAAAATGGAaACCAAAAATgATAGGGGGAATTGGAGGtTTcATCAAgGTAAaa +CAgTATGATCAGaTActtATAGAAATTTGTGGaAAAAAGgCTATAGGTACAGTATTaGTAGGaCCTACAC +CTGTCAACATAATTGGAAGaAAtATGTTGACcCaGATTGGTTGTACTtTAAATTTcCCAATTAGTCCTAT +TgagACTGTACCAGTAAaAtTAAAgCCAGgAATG---GATGGcCCAAaggTTAAACAATGGCCATTGACA +GAAGAaAAAATAAAAGCATTAACAGAAATTTGT------acaGA?ATGGAAAagGAAGGAAaAATtTCAA +AAATTGGGCCtGAaAATCCaTAcAATACtCCAaTATTTGCtATAAAgAAaAAagAcAGcacTAaaTGGAG +aAAAtTAGTAGATTTcAGAGAgCTCAATAAAAGAACtCAaGAcTTtTGGgaAgttcaattaggaATaCCg +CATcCaGCgGGcctAAAAAAGAAAAAaTCAGTAACAGTaCTaGATgTgGGGGACGCATATTTtTCAGTTC +CtTTagATgAaagcTTTAGaAAgTATACtGCaTTCACCATACCtAGTAcAAAcAAtGagACACCAGGAaT +CAGgTATCAGTAcAATGTGCTTCCaCAGGGATGGAAaGGATCACCggCaATATTCCAGagtAGCATGAcA +AAAATCTTAGAGCCcTTtAGAtcAaaaaATCCAGAaaTAaTTATCTAtCAATACATGGATGAcTTGTATG +TaGGaTCTGATtTAGAAATAGggCAgCATAGaaCAAAaaTAGAaGAgTTaAGagctCATCTATTgAgcTG +GGGatTtacTACaCCAGAC---aAAAAGCATCAGAAaGAACCtCCATTtCTTTGGATGGGaTATGAacTc +CATCCTGAcAAgTGGACAGTcCAgCCtATAgagCTGCCAgAaAAaGAaAgcTGGACTGTCAATGATATAC +AGAAATTAGTgGGaAAACTaAATTGGGCAAGTCAaATTTATgcAGGaATTAAAGTAAagCAatTgTGtAa +aCTcCTCAGgGGAgCCAAAGCAcTAACAGAtaTAGTAacatTGACTGAgGAAGCAGAAtTAGAAtTgGCA +GAgAACAGgGAgATT---------cTAAAaGacCCTGTgCAtGGaGtaTAtTATGAcCCATCAAAAGACt +TAaTAGcAGAaATACAGAAACAAGGGCaAGACCAATGGAcATAtCAaATTTaTCAaGAGcCATTTAAaAA +TcTaAAAACAGGaAAaTATGCAAGAAAaAgGTCTGcTCACACTAATGATGTAAaaCAATTagcAGAAgTg +GTgCAAAAgGTggtCAtgGAAAGCATAGTAATaTGGGGAAAGAct---CCTAaATtTAaAcTACCCATAC +AaAAaGAaACATGGGAaaCATGGTGGATgGAcTATTGGCAgGCTACCTGgATTCcTGAATGGGAaTTTGT +CAATACCCCTCCtCTAGTAaaATtaTGGTACcagTTAGAgaAaGACCCCATAGcAGGAGcAGAgaCtTTC +TaTGTaGAtGGGGCAGCcAAtagggAaACTaagCtAGGAAaAGcAgGgTatGTcaCTgAcagaGGAaGaC +AaaagGTtGTttCcCTaaCTgAGACAACaaATCAaAagACTGAAcTaCaTGCaaTccaTctagCctTGCA +GGATTCAGGaTCAGaaGTaAacATAGTAAcaGACTCACAgTatGCAttaGGaATcATTCAgGCaCAaCCA +GACAggAGTGAaTCAGAgtTAGTcAATCAAATAATAGAGAAgcTAaTAGgAAAGGAcAAAGTcTACCTGt +CATGGGTACCAGCaCACAAaGGaATTGGAGGaAATGAaCAAGTaGATAAATTAGTCAGttCtGGAaTCAG +gAaGgTgcTaTTTTTaGATGGgATAGAtAAaGCTCAAGAa---GAaCATGAaAgATATCAcaGcAAtTGG +AGAaCAATGGCTAGTGATTTTAATCTGCCaCCtaTagTA---GCaAAgGAAATAGTAGCCAGCTGTgATA +AATGtCAgCTAAAAGGgGAAGCCATgCATGGACAAGTAGACTGtAGtCCAGGgATaTGGCAAtTAGATTG +cACaCATcTAGAAGGaAAAGTAATtcTgGTAGCAGTcCATGTaGCcAGTGGCTAtaTAGAAGCAGAAGTt +ATcCCAGCAGAAACAGGACAgGAgaCAGCATAcTTTcTaCTAAAatTAGCAGGAAGATGGCCAGTAAAAg +TAgTACACACAGAcAATGGCAGCAATTTCACCAGCGCTGCagTTAAAGCAGcCTGTTGGTGGGCaAatgT +ccAACAgGAATtTGGgATtCCCTACAATCCcCAAAGTCAAGGAGTAGTgGAaTCtATGAAtAAgGAATTA +AAGAAAATCATAGGgCAgGTAAGaGAgCAAGCTGAACAcCTTAagACAGCAGTACAAATGGCAGTaTTCA +TtCACAATTTTAAAAGAAaAGGGGGG---ATTGGGGGgTACAGTGCAGGGGAAAGaATAATAGACATAAT +AGCAaCAGACaTACAAACtAaAGAAtTACAAAAACAaATTacA---aAAATTCAAAAtTTTCGGGTTTaT +TACAGGGACAGCAGAGAtCCaaTTTGGAAAGGACCAGCAAAACTacTcTGGAAAGGTGAAGGGGCA---G +TagTAATACAgGACaAtAgTGAtATAAAgGTAGTaCCaAGAAGAAAAGcAAAgATCaTtAgGGATTATGG +AAAACAGATGGCAGGtGATGaTTGTgTGGCAgGTAGACAGGATGAgGAT--- +>A.KE.Q23-CXC-CG +TTTTTTAGGGAAAATTTGGCCTTCCAGAAA------------GGGGAG---GCCAGGGAATTTTCCTCAG +AACAGACT------GGAACCAACAGCTCC---------------------------------ACC---AG +CAGAGACCTGTGGGATGGGGGAAGAGAC---------------AGTCTCCCCTCTGAAGCAGGAGCAGAA +AGACAGGGA---------ACAGGCCCAACCCTTAGTTTCCCTCAAATCACTCTTTGGCAACGACCTCTTG +TCACAGTAAGAATAGGGGGACAGCTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTCATCAAGGTAAAA +CAATATGATCAGATACTTATAGAAATTTGTGGGAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAACATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATATAATACTCCAATATTTGCAATAAAGAAAAAAGATAGCACTAAGTGGAG +AAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACACAAGACTTTTGGGAAGTTCAATTAGGGATACCG +CATCCAGCGGGTCTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGGGACGCATATTTTTCAGTTC +CTTTACATGAAGAGTTTAGGAAATATACTGCATTCACCATACCTAGTACAAACAACGAGACACCAGGAGT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGATCAAAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCATAGAGCAAAGATAGAAGAATTAAGAGCTCATCTATTGAGCTG +GGGACTTATTACACCAGAC---AAAAAGCATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTC +CATCCTGACAAGTGGACAGTCCAGCCTATAGAGCTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGGAAACTAAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAAGCAATTATGTAA +ACTCCTCAGGGGAGCCAAAGCACTAACAGATGTAGTAACATTGACTGAGGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAGATT---------CTAAAAGAACCTGTACATGGGGTATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTTTCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCAAGAAAAAGGTCTGCTCACACTAATGATGTAAGACAATTAGCAGAAGTG +GTGCAAAAGGTGGTCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGACACATGGTGGATGGACTATTGGCAGGCTACCTGGATTCTTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTGTGGTACCAATTAGAGAAAGACCCCATAGCAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAATAGGGAAACTAAGCTAGGAAAAGCAGGATATGTTACTGATAAGGGAAGAC +AAAAGGTAGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAACTACATGCAATCCATTTAGCCTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCA +GACAAGAGTGAATCAGAGATAGTCAATCAAATAATAGAGAAGCTAATAGAAAAGGACAAAGTTTACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCCGGAATCAG +AAAGGTGCTGTTTTTAGATGGGATAGACAAAGCTCAAGAA---GAACATGAAAGATATCATAGCAATTGG +AGAACAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGAATATGGCAATTAGATTG +CACACATTTAGAAGGGAAAGTAATTGTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAAGAGACAGCATACTTTCTACTAAAATTAGCAGGAAGATGGCCAGTAAAAA +TAGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCAAATAT +CCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAAGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACACATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTATTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGACATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGCGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>A.SE.SE6594 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------AGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGATCCTTGGGATAGGAGAAGAGAT---------------AGCCTCCCCTCCGAGACAGGAGCAGAC +------------------------CCCACCTTCAGTTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAGTAGGGGGACAGCTAAGAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTCATCAAGGTAAAA +CAATATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACCCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAGGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCGTATAATACTCCAATATTTGCTATAAAAAAGAAAAACAGCACTAGATGGAG +GAAACTAGTAGATTTCAGAGAGCTCAATAAAAGAACACAAGACTTTTGGGAAGTTCAATTGGGAATACCG +CATCCAGCAGGCCTAAAAAAGAAAAATTCAGTAACAGTACTAGATGTGGGGGACGCATATTTTTCAGTTC +CTTTACATGAAGATTTTAGAAAATATACTGCATTCACCATACCTAGTACAAATAATGCGACACCAGGAAT +CAGGTATCAGTAYAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGATCAAAAAATCCAGACATAATTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAACATAGAGCAAAAATAGAAGAGTTAAGATCTCATCTATTGAGCTG +GGGATTTACTACACCAGAC---CAAAAGCATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTC +CATCCTGACAAGTGGACAGTCCAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATCCAGGAATTAAAGTAAAGCAGTTGTGTAG +ACTCCTCAGGGGAGCCAAAGCACTAACAGACATAGTAGAATTGACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAGATT---------CTAAAAGACCCTGTGCATGGAGTATATTATGATCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATACCAAATTTATCAGGAGCCATTTAAAAA +TCTAAAAACAGGGAAATATGCAAGAAAAAGGTCTGTTCACACTAATGATGTAAAGCAATTAGCAGAAATG +GTGCAAAAAGTGGTCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAGACATGGGAAACATGGTGGATGGACTATTGGCAGGCTACCTGGATTCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTATTATGGTGGTACTTGTTAGAAGAGGACCCCATAGTAGGAGCAGACTCCTTC +TTTGTTGAGGGGGCAGCCAAGGAGAAGACTCTACKAGGAACAGGATGGTTCGTCGCTGAGGGGGGACGAC +AGGTGGTTGTCCCCCTTGCTAAGACAACTCATCAGACTACTGAACTCCCTGCCCTCCATCCCTCCTTGCA +GGATTCAGGATCAGTAGTCATCATAGTAAACGACTCACAGTCTGCAGGAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAGTAGGAAAGGACAAAGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTGGATAAATTAGTCAGTTCCGGAATCAG +GAAGGTACTGTTTTTAGATGGGATAGATAAAGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAACAATGGCTAGTGATTTTAATCTGCCACCTGTAATA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +CACACATTTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAAGAGACAGCATACTTTCTACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCCAATAT +CCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAGTCTATGAATAAGGAATTA +AAGAAAATCATAGGACAGGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACCAGAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAAATCATCAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>A.SE.SE7253 +TTTTTTAGGGAAAATGTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACC------GGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGCTTTTTGGGATGGGGGAAGAGAT---------------AGCCTCCCCTCCGAAGCAGGAGCAGAA +AGACAGGGA---------ACAGAACTCACCTTCAGTTTCCCTCAAATCACTCTTTGGCAACGACCTCTTG +TCACAGTAAAAATAGGGGGTCAGCTAAGAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGGTTCATCAAGGTAAAA +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGATTGGTTGTACTCTAAATTTCCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAGAATCCATATAATACTCCAATATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAG +GAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCG +CATCCAGCAGGCTTAAAAAAGAAAAAATCAGTAACAGTGCTAGATGTGGGGGACGCATATTTTTCAGTTC +CCTTAGATGAAAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTACAAACAATGAGACACCAGGAGT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTCAGATTAAAGAATCCAGAAATAATTATCTATCAATACATGGATGACTTGTATG +TGGGGTCTGATTTAGAAATAGGGCAGCATAGAACAAAAATAGAAGAGTTAAGAGCTCATCTATTGAGCTG +GGGATTTACTACACCAGAC---AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTT +CATCCTGACAAATGGACAGTCCAGCCTATAGAGCTGCCAGAAAAGGAAAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATGCAGGAATTAAAGTAAAGCAATTGTGCAG +ACTCCTCAGGGGAGCCAAAGCACTAACAGATATAGTAACACTGACTGAGGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAGGACCCTGTGCATGGGGTATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCTAGACCAATGGACATATCAGATTTATCAGGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCAAGAAAGAGGTCTGCTCACACTAATGATGTAAGACAATTAGCAGAAGTG +GTGCAAAAAGTGGTCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGATGGACTATTGGCAGGCTACCTGGATTCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGCAGGAGCAGAGACCTTC +TATGTAGATGGGGCAGCCAATAGGGAAACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGGC +AAAAGGTTGTTTCTCTAACTGAGACAACAAATCAAAAGACTGAACTACATGCAATCCATCTAGCCTTGCA +GGATTCAGGGTCAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GACAGTAGTGAATCAGAGATAGTTAATCAAATAATAGAGAAGCTAATAGGAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGCTCTGGAATCAG +GAAGGTGTTATTTTTAGATGGGATAGATAAAGCTCAAGAA---GAGCATGAAAGATATCACAGCAACTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAATA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGCCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGGATATGGCAACTAGATTG +CACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATGTGGCCAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTACTAAAATTAGCAGGAAGATGGCCAGTAAAAA +TAGTACACACAGATAATGGCAGCAATTTCACCAGCGCTGCATTTAAAGCAGCCTGTTGGTGGGCAAGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAAATTTCGGGTTTAT +TACAGGGACAGCAGAGATCCCATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TGGTAATACAGGACAACAATGATATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>A.SE.SE7535 +TTTTTTAGGGAGAATTTGGCCTTCCAGCAA------------GGGGAG---GCCGGGAAATTTTCCTCAG +AGCAGACT------GGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGATCTTTGGAATGAGGGAAGAGAT---------------AGCCTCCCCTCCGAAGCAGGAGCAGAA +GGG---------------ACAAGACCCACCTTTAGTTTCCCTCAAATCACTCTTTGGCAACGACCTCTTG +TCACAGTAAAAATAGAGGGACAGCTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTTTTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTCATCAAGGTAAAA +CAGTATGATCAGGTACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGGCCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACCCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAGATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACCCCAATATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTCAATAAAAGAACACAAGATTTTTGGGAAGTTCAGTTAGGAATACCG +CATCCAGCGGGCCTAAAAAAGAAAAAATCAGTAACAGTACTAGATTTGGGGGACGCATATTTCTCAGTTC +CTTTACATGAAGGCTTTAGAAAGTATACTGCGTTCACCATACCTAGTACAAACAATGCGACACCAGGAAT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAGAAAGAAATCCAGAAGTAATTATCTATCAATACATGGATGATTTGTATG +TGGGATCTGATTTAGAAATAGGGCAGCATAGGACAAAAATAGAAGAGTTGAGAGAACATCTATTGAGCTG +GGGATTTACTACACCAGAC---AAAAAGCATCAGAAGGAACCCCCATTCCTTTGGATGGGATATGAGGTC +CATCCTGACAAGTGGACAGTCCAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGGAAACTAAATTGGGCAAGTCAAATTTATGCAGGGATTAAAGTAAGGCAATTGTGTAA +ACTCCTCAGGGGAGCCAAAGCACTAACAGATATAGTAACCTTGACTGAGGAAGCAGAATTAGAACTGGCA +GAGAACAGGGAGATT---------CTAAAAGACCCTGTGCACGGAGCATATTATGACCCATCAAAAGACT +TAATAGTAGAAATACAGAAACAAGGGCAAGACCAATGGACATACCAAATTTATCAAGAGACATTTAAAAA +TCTAAAAACAGGAAAATATGCAAGAAAAAGGTCTGCTCACACTAATGATGTAAAACAATTAGCAGAAGTG +GTACAAAAGGTGGCCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAGATCTAAACTACCCATAC +AAAAAGAGACATGGGAGACATGGTGGATTGACTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAATTAGAGAAAGACCCCATAGTAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCCAATAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTAACAGAGGAAGAC +AAAAAGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAACTACATGCAATTCTTCTAGCCTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTACGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAGTCAGAGTTAGTCAATCAAATAATAGAGAAGTTAATAGGAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCCGGAATCAG +GAAGGTGCTGTTTTTAGATGGAATAGATAAAGCTCAAGAA---GAACATGAAAGATATCACAGTAATTGG +AGAACAATGGCTAGTGATTTTAATCTGCCACCCATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGGATATGGCAATTAGATTG +CACGCATCTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTC +ATTCCAGCAGAAACAGGACAAGAGACAGCATATTTTATACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCAAACGT +CAAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGACAAGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAGAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGTAAAAATCATCAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>A.SE.SE8131 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCGGGAAATTTTCCTCAG +AGCAGACC------GGAGCCATCAGCCCC---------------------------------ACC---AG +CAGAGAT------GATGGGGGAAGAGAT---------------AGCCTCCCCTCCGAAGCAGGAGCAAAA +---------------------CAACCCACCTTCAGTTTCCCTCAAATCACTCTTTGGCAACGACCTATTG +TCACAGTAAGAATAGGGGGACAGCTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTACTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATATTAATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTGGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACCCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAGAACAGTGATAGATGGAG +AAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGGATACCA +CATCCAGCGGGCTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGGGACGCATATTTTTCAGTTC +CCTTAGATGAAAGCTTTAGGAAGTATACTGCATTCACCATACCTAGTACAAATAATGAAACACCAGGAAT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGATCAAAGAATCCAGAAATAATTATCTACCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGAACAGCATAGAACAAAAATAGAAGAGTTGAGGGCTCATCTATTGAGCTG +GGGGTTTTTTACACCAGAC---CAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAGTGGACAGTCCAACCTATAAAGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGGAAACTAAATTGGGCAAGTCAAATTTATGCAGGGATTAAAGTAAAACAATTGTGTAA +ACTCCTCAGGGGAGCCAAAGCATTAACAGATATAGTAACATTGACTGAGGAAGCAGAACTAGAATTAGCA +GAGAACAGGGAGATT---------CTAAAAGACCCTGTGCATGGAGTATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAGGAGCCATTTAAAAA +TCTGAAAACAGGGAAATATGCAAGAAAAAAGTCTGCTCACACTAATGATGTAAGACAATTAGCAGAAGTA +GTGCAAAAAGTGGTCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGACACATGGGATACATGGTGGATGGACTATTGGCAGGCTACCTGGATTCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGCAGGAGTAGAGACTTTC +TATGTAGATGGGGCAGCCAATAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGATAGAGGAAGAC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCAATCCATCTAGCCCTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAATAGAAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCAGGAATCAG +GAAGGTGCTATTTTTAGATGGGATAGATAAAGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAACAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGCCCAGGAATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGGCAGCATACTTTCTACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCATTTAAAGCAGCCTGTTGGTGGGCAAATGT +CCAACAAGAATTTGGAATTCCCTACAATCCTCAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAGGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA----AAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTGCTTTGGAAAGGTGAAGGGGCA---G +TGGTAATACAGGACCAAAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>A.SE.SE8538 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------AGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGATCTTTGGGATGGGGGAAGTGAT---------------AACCTCCCCTCCGAAGCAGGAGCAGAA +AGACAAGGA---------ACAGGTCCCACCCTTAGTTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAGTAGGAGGACAGCTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAGACCAAAAATGATAGGGGGAATTGGAGGTTTCATCAAGGTAAAA +CAGTATGATCAGATATCTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACCCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAACTAAAACCAGGAATG---GATGGTCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAGATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCCGAAAATCCATACAATACCCCAATATTTGCTATAAAGAAAAAAGACAGCAATAGATGGAG +GAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAAGACTTTTGGGAAGTCCAATTAGGAATACCG +CATCCAGCGGGCCTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGACGCATATTTTTCAGTTC +CTTTAGATGAGAGCTTTAGGAAGTATACCGCGTTCACCATACCCAGTACAAACAATGAGACACCAGGAAT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAGGGATCACCGTCGATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGATCAAAAAATCCAGAAATAATTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCATAGAACAAAAATAGAAGAGTTAAGAGCTCATCTATTGAGCTG +GGGACTTACTACCCCAGAC---AAAAAGCATCAGAAAGAACCACCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAGTGGACAGTCCAGCCTATAAACCTGCCAGAAAAAGAAAGTTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTGAATTGGGCAAGTCAAATTTATGCAGGGATTAAAGTAAAGCAATTGTGTAA +GCTCCTCAGAGGAGCCAAAGCACTAACAGATATAGTAACATTGACTGAGGAAGCAGAACTAGAATTGGCA +GAGAACAGAGAGATT---------CTAAAAGCCCCTGTGCATGGAGTGTACTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAA +TTTAAAAACAGGAAAGTATGCAAGAAAGAGGTCTGCTCACACTAATGATGTAAAACAATTAGCAGAAGTG +GTGCAAAAGGTGGTCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAAGTACCCATAC +AGAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGCAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCCAATAGAGAAACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAAACTGAAGTACATGCAATCCATCTAGCTTTGCA +GGATTCAGGATCAGAGGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAGCAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAATTAATAGGAAAGGACAAAGTATACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGGAATGAACAAGTAGATAAATTAGTCAGTTCAGGAATCAG +GAAGGTGCTGTTTTTGGATGGGATAGATAAAGCTCAAGAA---GAACATGAGAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAATA---GCAAAGGAAATAGTAGCCAGCTGTAATA +AATGTCAACTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATGTGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCATTTAAAGCAGCCTGTTGGTGGGCAAATGT +CCAACAGGAATATGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGACAGGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGGATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>A.SE.SE8891 +TTTTTTAGGGAAAATTTGGCCTTCCAACAG------------GGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACC------GGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGATCTTTGGGATGGGGGAAGAGAT---------------AACCTCCCCTCCGAAGCAGGAGAAGAA +AGACAGGGA------GTCGGAGGCACCACCCTTAATTTCCCTCAAATCACTTTTTGGCAACGACCCCTTG +TCACAGTAAGAATAGGGGGAATGCAAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAG +CAGTATGATCAGATACTTATAGAAATTTGTGGGAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAGACTGTACCAGTAACATTAAAACCAGGAATG---GATGGCCCAAGAATTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AGAGAGATGGAAAAGGAAGGAAAAATCTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGATAGCACTAAGTGGAG +AAAATTAGTAGATTTTAGAGAGCTCAATAAAAGAACTCAAGACTTCTGG-GA------------ATACCA +CATCCTGCAGGTTGAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGGGACGCATATTTTTCAGTTC +CTTTGGATAAAAACTTTAGAAAGTATACTGCGTTCACCATACCTAGTATAAACAATGAGACACCAGGAAT +CAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGATA +AAAATCTTAGAGCCCTTTAGAGTACAAAATCCAGAAATAATTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATCTAGAAATAGGGCAGCATAGAGCAAAAGTAGAGGAGTTGAGAGCTCATCTATTGAGTTG +GGGGTTTACTACACCAGAC---AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAAACAAAGAAAACTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATGTAGGAATTAAAGTAAAGCAACTGTGTAA +ACTCCTCAGAGGAGCCAAAGCACTAACAGATATAGTAACACTGACTGAGGAAGCAGAATTAGAATTGGCA +GAAAACAGGGAGATT---------CTAAAAGACCCTGTGCATGGAGTATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAGAA +TCTAAAAACAGGAAAATATGCAAGAAAAAGGTCTGCTCACACTAATGATGTAAGACAATTAGTAGAAGTG +GTGCAAAAGGTAGTCATGGAAAGCATAGTAATTTGGGGAAAGACT---CCTAAATTTAAATTACCCATAC +AAAAAGAAACATGGGAGACATGGTGGATGGACTATTGGCAAGCTACCTGGATTCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAAACTAAGCTAGGAAAAGCAGGATATGTCACTGACAGAGGAAGAC +AAAAGGTTGTTACCCTAGCTGAGACAACAAATCAAAAGACTGAACTGCATGCAATCTATCTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTGGGAATTATTCAGGCACAACCA +GACAGAAGTGAATCAGAGTTAGTTAATCAAATAATAGAGAAGCTAATAGGAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAGTCAG +GAGGATACTATTTTTAGATGGGATAGATAAGGCTCAAGAA---GAACATGAAAGATATCACAGTAATTGG +AGAACAATGGCTAGTGATTTTAATCTGCCCCCTATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAACTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATGTGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTG +ATCCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCATTTAAAGCAGCCTGTTGGTGGGCAAATGT +CCAACAGGAATTTGGGATCCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAACAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TACTAATACAGGACAATAGTGATATAAAGGTAGTGCCCAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGGTTGTGTGGCAGGTAGACAGGATGAGGAT--- +>A.UG.92UG037 +TTTTTTAGGGAAAATCTGGCCTTCCAGCAA------------AGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACC------AGAACCAACAGCCCC---------------------------------ACCAGCAG +CAGAGATCTTTGGGATGAGGGAAGAGAT---------------AGTCTCCCCTCCGAAGCAGGAGCAGAA +CGACAGGGA---------CCAGAACCCACCTTCAGTTTCCCTCAAATCACTCTTTGGCAACGACCTCTTG +TCACAGTAAAGATAGGGGGACAGCTAAAAAAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTCATCAAGGTAAAG +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTGGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACCCTGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TAGTACTGTACCAGTAAAATTAAAACCAGGAATG---GATGGCCCAAGGATTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------GCAGATATGGAAAGAGAAGGAAGAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAGGACAGCACTAAATGGAG +AAAATTAGTAGATTTTAGAGAGCTCAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCG +CATCCAGCGGGCTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGGGACGCATATTTTTCAGTTC +CCTTAGATGAAAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTACAAACAATGAGACACCAGGAAT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGGCCAGCATGACA +AAAATCTTAGAGCCCTTTAGATCAAAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCATAGAACAAAAATAGAAGAATTAAGAGAACATCTATTAAAATG +GGGATTTACTACACCAGAC---AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTC +CATCCTGATAAGTGGACAGTCCAACCGATAGAGCTGCCAGAAAAGGAAAGCTGGACTGTCAATGATATAC +AGAAATTAGTAGGGAAACTAAATTGGGCAAGTCAAATTTATGCAGGAATTAAAGTAAAACAATTGTGTAA +ACTCCTCAGGGGAACCAAAGCATTAACAGATATAGTAACATTGACTGAGGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAGATT---------TTAAAAGACCCTGTGCATGGAGCATATTATGACCCATCAAAAGACT +TAATAGCAGAGATACAGAAACAAGGGCAAGACCAATGGATATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCAAGAAAAAGGTCTGCTCACACTAATGATGTAAAACAATTGGCAGAAGTG +GTGCAAAAGGTGGTCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGATGGACTATTGGCAGGCTACCTGAATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGCAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCCAATAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGGC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCAATCCATCTAGCCTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCCCAGCCA +GACAGGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAATAGAAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAG +GAAGGTACTATTTTTAGATGGGATAGATAAAGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGGATATGGCAATTAGATTG +TACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATGTGGCTAGTGGCTACGTAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTACTAAAGCTAGCAGGAAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCGGTTAAAGCAGCCTGTTGGTGGGCAAATGT +TAAACAGGAATTTGGTATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGGGAGCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTGTTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGACTTACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAAATTTCGGGTTTGT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TGGTAATACAGGACAATAGTGATATAAAAGTAGTACCAAGAAGAAAAGTAAAGATCATTAAGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>A.UG.U455 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGGAATTTTCCTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAAATCTTTGGGATGGGGGAAAAGAT---------------GACCTCCCCTGCGAAACAGGAGCTGAA +AGACAGGGA------------ACAGACTCCTTTAGTTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAATAGGAGGACAGCTGATAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTCTTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATAATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGACTATAGGTACAGTATTGGTAGGACCTACAC +CTGTCAACATAATTGGAAGGAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAACCAGAAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------AATGAGATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAGGACAGCACTAAATGGAG +GAAATTAGTAGATTTCAGAGAACTCAATAAAAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATCCCG +CATACAGCGGGTCTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGGGACGCATATTTTTCAGTTC +CTTTAGATGAAAGCTTTAGAAAGTATACTGCGTTCACCATACCTAGTATAAACAATGAGACACCAGGAGT +CAGGTATCAGTACAATGTGCTTCCGCAGGGATGGAAAGGATCACCATCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGATCACAACATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATG +TGGGATCTGATTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAATTAAGAGCTCATCTATTGAGCTG +GGGATTCATTACCCCAGAC---AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGGTATGAACTT +CATCCTGACAAATGGACAGTTCAGCCTATACAGCTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTAGGAAAACTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAACTGTGTAA +ACTTCTCAGGGGAGCCAAAGCACTAACAGATATAGTAACCCTGACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAGATT---------CTAAAAGACCCTGTGCATGGAGTATATTATGACCCATCAAAAGACC +TAGTAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAAAAAGGTCTGCTCACACTAATGATGTAAAACAATTAACAGAAGTG +GTGCAAAAAGTGTCCACAGAAAGCATAGTAATATGGGGAAAGATC---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAGCATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCCCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGCAGGAGCAGAGACATTC +TATGTAGATGGGGCAGCTAATAGGGAAACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCAATCCATCTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAAATAGTCAATCAAATAATAGAGAAGCTAATAGAAAAGGAAAAAGTCTACCTGT +CATGGGTACCAGCGCACAAAGGGATTGGAGGAAATGAGCAAGTAGATAAATTAGTCAGTTCTGGAATCAG +GAAGGTGCTATTTTTAGATGGGATAGATAAGGCTCAAGAG---GACCATGAAAAATATCACTGCAACTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTGGTA---GCGAAGGAAATAGTAGCCAGCTGTAATA +AATGTCAACTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTAATCCTAGTAGCAGTCCATGTAGCCAGTGGCTACATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAGTCTGTTGGTGGGCAAATAT +CCAACAGGAATTTGGGATCCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCCATGAACAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCCATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTATGGCAGGTAGACAGGATGAAGAT--- +>CONSENSUS_B +TTTTTTAGGgAAgATCtGGcCtTCCcacAa------------gGgaAG---GCcagggaAtTTtCttCAG +agCAGAcc------AGAgCCAaCAGCcCC???????????????---???????????????AcC---AG +aagAGAGCtTCaGGTttgGgGaagagAc------------aacaaCTCCctctCAGAAGCAGGAgccgat +agAcaagga????????????acTgTATCCtTTagCtTCCCTCagatCACTCTTTGgCAaCGACCCcTcG +TCacaaTAAagaTAGggGGgCAAcTaAAgGaaGCtCTATTAGATACAGGAGCAGATGAtACAGTAtTAGa +AgAaATgaaTtTGCCAgGaAgATGGAAaCCAAAaATGaTAGGgGGAATTGGAGGtTTTATCAaAGTaAgA +CAgTAtGAtcAgaTactcaTAGaaATcTGtGGACAtAAAGCTatAGGTACAGTaTTAGTAGGACCTACAC +CTGTCAACATAATTGGAAgAaATcTGTTGACTCAgaTTGGtTGcACTtTAaATTTtCCcATTAGtCCTAT +TgAaACTGTACCAGTaAaatTAAAgCCAGgAaTG???gATGGcCCAAaaGTtAAaCAATGGCCATTgaCA +GAaGAaAAAATAAAAGcAtTagtAGAaATtTGT??????AcaGAaATGgAAAAGGAAGGgAAaaTttCaA +aAaTtGGGCCTgAaAAtCCATAcAATACtCCAgTATTTGCcATAAaGAAAAAagAcaGTACTAaaTGGAG +AAAAtTAGTAGAtTTcAGaGAACTTAATAagAgaACtCAaGAcTTCTGGGAAGttCAatTAGGaATACCa +CAtCCcgCaGGgtTAAAAaAGAaaAaaTCaGtAACAGtacTgGAtGTGGGtGATgcAtAtTTtTCAgTtC +CctTAgATaAagactTcAGgAAgTAtACTGcaTTTACCATACCTAGTataaAcAATGagaCaCCagGgat +TAGaTATCAgTACAATGTGCTtCCaCAgGGaTGGAAaGGaTCaCCAgCaATATTCCaaagTAGcATGACa +aaAATcTTaGAgCCtTTTAgAAaaCaaAATCCAGAcaTagTtATCTATCAaTAcATGGATGAttTGTAtG +TAGGATCTgAcTTAGAaATAGggCAGCATAGAacAAAAaTAGAGGAacTGAGacaacATCTGTTgAgGTG +GGGatTTaccACACCaGAC--?aAaAAACATCAgAAAGAaCCtCCATTcCTTTGGATGGGtTATGAaCTc +CAtCcTgAtAAaTGGACaGTaCAgCCTATAgtgCTgCCaGAAAAaGAcAGcTGGACTGTCAATGAcATAC +AgAAgTTAGTGGGaAAaTTgaATTGGgCAAGTCAgATtTAtgCaGGgATtAaaGTaAaGcAatTATGTAA +aCTccTTAGgGGaaccAAAGCAcTaACAGAAgTAaTAccAcTAACAgAAGAaGCAGAgcTAGAacTgGCa +GaaAAcAGgGAgATt---------CTaAaAgaacCaGTaCAtGgAGtgTATTatGACCCAtCAaAaGAct +TaatAGcAGAaaTACAGAAgCAgGGgcaaGGcCAATGGACATAtCAaATtTaTCAaGAgccATtTAaaaa +tCTgaAAACAGgaAAaTATGCAAgaatGAggGGTgCcCACACtAATGATgTaAaACAaTTAaCAGAgGcA +GTgCAaAAaaTAgcCacaGAaaGCATAgTaATATGGggaAAgAct---CCtAAATtTAaAcTaCCCATaC +AaAaaGAAACATGGGAagCatGGTGGAcagAgTATtgGcAAGCcACCTGGATTCCTgAgTGGGAGttTgT +cAATACCcCTCCCtTAGTgAAaTTaTGGTAcCAgTTAGAgAaaGAaCCCATAgtAGgaGCAGAaACtTTc +TAtGTAgATGGgGCAGCtAatAggGAgACtAaAttAggaAAaGCAGGaTAtGTtACTgacAgaGGaAGAC +AaAaaGTtgTctcccTAactGACACaACAAATCAGAAgACtGAgtTacAaGCaATTcAtcTAGCttTgCA +gGATTCgGGattaGAAGTAAACATAGTaaCAGACTCaCAaTATGCAtTaGGaATcATTCAaGCACAaCCA +GATaaaAGTGAATCAGAgtTAGTcAgTcAaATAATAGAgcAgtTaATAaaAAAGGaaaagGTCTAcCTgg +CaTGGGTaCCAGCACAcAAAGGaATTGGaGGaAATGAAcAagTAGATAaaTTaGTCAGTgctGGaaTCAG +gAaAGTaCTATTTttagATGGaATAGAtAaGGCcCAAGAa---GAaCATGAgAaATAtCAcagtAAtTGG +AgAGCaATGGCTagTGAtTTTAAccTgCCAcCtgTaGTa---GCAAAAGAaATAGTAGCCaGcTGTgATA +AaTGtCAGcTaAAaGGaGAaGCCAtGCATGGaCAAGTAGAcTgtAGtCCaGGAATATGGCAacTAGATTG +tACACATtTaGAaGgAAAaaTTATCcTgGTaGCaGTtCATGTaGCcAGTGGaTAtATAGAaGCAGAaGTt +ATTCCAGcAGAgACAGggCAgGAaACAGCATAcTTtcTctTAAaAtTAGCAGGaAGATGGCCAGTaaaaa +cAaTaCAtACaGACAATGGcagcAAtTTCAccaGtactaCgGTTAagGCCGCCTgtTGGTGGgCaGGgaT +CAagCAggAATTTGGcAttCCcTACAATCCCCAAAGtCAAGGaGTagTaGaaTCTATgaATaaagaaTTA +AAGaAaATTATAGgaCAgGTaAGAGATCAgGCTGAACAtCTTAAgAcAGCaGTACAaATGGCAGTATTcA +TcCACAATTtTAaaAGAAAAGGGGGG--?ATTGGGGggTAcaGTGCAGGGGaAAGAATAgTaGacATaAT +AGCaaCAGAcaTaCAaactaaagAatTACAaaAaCAAaTTACA---AaaaTtCAAaATTtTCGGGTtTAT +TACAGgGaCAgCAgAgAtCCacTTTGGAAAGGACcaGCAAAgCTtCTcTGGAAAGGtGAaGGGGCA---G +TAGTAATACAaGATAAtAgTGAcATAAAAGTAGTGCCAAGAAGaAAaGcAAAgaTcATtAGgGAtTATGG +AAAaCAGAtGGcagGTGATGATTGTGTGGCAaGtAGACAGGATGAGGAT--- +>B.-.NL43E9 +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAGCGACCCCTCG +TCACAATAAAGATAGGGGGGCAATTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCATAGAAATCTGCGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGAGTG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTAGGAATACCA +CATCCTGCAGGGTTAAAACAGAAAAAATCAGTAACAGTACTGGATGTGGGCGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGTGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTCATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTATCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATAC +AGAAATTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAGGCAATTATGTAA +ACTTCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGAATGAAGGGTGCTCACACTAATGATGTGAAACAATTAACAGAGGCA +GTACAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAATTACCCATAC +AAAAGGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAGTTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCCAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTAACTGACAGAGGAAGAC +AAAAGGTTGTCCCCCTAACGGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCAGGATCAGAAGTAAACATAGTGACAGACTCACAATATGCATTGGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAGAAAAGGAAAAAGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTGGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTACCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGAATATGGCAGCTAGATTG +TACACATTTAGAAGGAAAAGTTATCTTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTA +ATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAGTACATACAGACAATGGCAGCAATTTCACCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGAT +CAAGCAGGAATTTGGCATTCCTTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACATATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAGTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATCAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.AU.MBC18 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAACTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCCTCTCCAGAGCAGGCC---GGAGCCATCAGCCCCACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAGGGA------CAGCAGCCTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATGCATCTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAA +CAATATGATGAAATTCTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTCCCCATTAGTCCTAT +TGAAACTGTACCAGTAAGCTTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAGATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAGGAAAAAAGATGGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCTCAGGGTTAAAAAAGAAAAGATCTGTAACAGTACTGGATGTGGGTGATGCATACTTTTCAGTTC +CCTTAGATGAAAACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AGAATCTTAGAGCCCTTTAGAAGACAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAGCATCTGTTGAAGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAGCTC +CATCCTGACAAATGGACAGTGCAACCTATAGTACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAGGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGTTAGAACTGGCA +GAAAACAGGGAAATA---------CTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAGTACAGAAGCAGGGGGAAGGTCAATGGACATATCAAATTTATCAAGATCAATTTAGAAA +TCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAAGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGACGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCCTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAACAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCTCCCTAGCTGACACAACAAATCAGAAGACAGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAAATAGTCAATAATATAATAGAGCAATTAATAAAAAAGGAAAAGGTCTACCTGG +CGTGGGTACCAGCACACAAAGGAATTGGAGGGAATGAAAAAGTAGATAAATTAGTCAGTACTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAGGGCACAAGAA---GACCATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTATAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTGAAAGGAGAGGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATCTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAACAA +CAATACATACAGACAATGGCAGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCACTCCCTACAATCCCCAAAGCCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCATCAGACATACAAACTAAGGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTCTGGAAAGGCGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.AU.MBC200 +TTTTTTAGGGAAGATCTGGCCTTCCTACAA------------GGGAAG---GCCGGGAAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTAGTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAGATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAATACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCTATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACACCATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTACAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTT +TATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGAC +AAAAGGTTGTCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTCTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGGATAGATAAGGCCCAAGAG---GAGCATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAG +CAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCCGCCTGCTGGTGGGCGGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGATTACTGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAGAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>B.AU.MBC925 +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGAGAAGGA------------ACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCG +TCACAGTAAAGGTAGGGGGACAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTAACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGTATAGTTATGGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACGGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTATATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCATTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAGAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGATT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAACAGGGAGACGAAATTAGGAAAAGCAGGATATGTTACTAACAAAGGAAGAC +AAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCA +AGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATCAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAGTTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAATAATTGG +AGAGCAATGGCTAGTGATTTTAACATACCACCTGTAGTA---GCAAAAGAAATAGTAGCCTGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCCCCAATTTCACCAGTAATACAGTTAAGGCCGCCTGTTGGTGGGCGGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.AU.MBCC54 +TTTTTTAGGGAAGATCTGGCCTTCCTACAA------------GGGAAG---GCCAGGAAACTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGGAGAC------------AACAACTCCCTCCCAGAAGCAGGAGTCGAT +AGACAAGGA------------ACTGTATCCCTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAAATAGGAGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAATCCTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAGATCTGTTGACTCAGATTGGTTGCACTTTATATTTTCCCATTAGTCCTAT +TGATACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATACTTTTCAGTTC +CCTTAGATGAAAACTTCAGGAAGTATACTGCGTTTACCATACCTAGTATAAATAATGAGACACCAGGAAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAGGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGACTTAGAGATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGAATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTAGCA +GAAAACAGGGAAATT---------CTAAAAGAGCCAGTACATGGAGTGTATTATGACCCATCAAAAGATT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAAGCACAGAAAGCATAATAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAGAGAAACATGGGAAGCATGGTGGACCGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCCTAGTGAAGTTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCTAACAGGGAGACTAAAATAGGGAAAGCAGGATATGTTACTAACAAAGGAAGAC +AAAAGGTTGTCCTCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCACGCACAACCA +GATAAAAGTGAATCAGAAATAGTCAGTCAAATAATAGAGCAATTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGCGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCACAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCCATGGCTAGTGATTTTAACCTGCCACCGGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCACGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATCTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAGGCAGAAGTT +ATTCCAGCAGAGACAGGGCAAGATACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGCCAAGGAGTTGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCCGTACAAATGGCAGTATTCA +TCCACAATTATAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATGCAAACTAAAGAATTACAAAAACAAATTACA---AGAATTCAACATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCGGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.AU.MBCC98 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------CCTGTATCCTTTAGCTTCCCTCATATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGTAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGA +AGAGATGGCTTTGCCAGGTAGATGGAAACCAAACATGATAGGGGGAATTGGAGGTTTTATCAGAGTAACA +CAATATGATCAGATCCTCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGACACTGTACCAGTAAAACTAAAGCCAGGAATGGTGCATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATACTTTTCAGTTC +CCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATATAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATCTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGGCGACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGATAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAAGCAATTATGTAA +GCTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GATAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAGAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAGACAATTAACAGAGGCA +GTGCAAAAAATAGCCACAGAGAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCCCTAGTGAAATTATGGTACCAGTTAGAGAGAGAGCCCATAATAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAATAAGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAGGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAAGTAGTCAGTCAAATAATAGAGCAATTAATAAAAAAGGAGAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGGAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGACAAGGCACAAGAA---GAACATGAGACATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCGATAGTA---GCAAAAGACATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTTTAGTCCAGGAATATGGCAACTAGATTG +TACACATCTAGATGGAAATATTATCCTGGTGGCAGTTCATGTAGCCAGTGGGTATATAGACGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGACACAGCATACTTTCTCTTAACATTAGCAGGAAGATGGCCAGTGAACA +CACTACATACAGACAATGGCAGCAATTTCACAAGTACTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAAT +CACACAGGAATTTGGCATTCCCTACAATCCCCAAAGCCAAGGAGTAGTCGCCTCTATGCATCGAGCATTA +AAGAAAATTATAGGACAGGTCAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTATAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.AU.MBCD36 +TTTTTTAGGAAAAATCTGGCCTTCCCACAA------------GGGGAG---GCAAGAGAACTTTCTTCAG +AGCAGACC------AGAGCCATCAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGAAAAC------------AACCACTCCCCCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AAAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAAAAATCTGTTGACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGAAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAGATGAAAAAGGAAGGAAAAATTTCAA +GAATTGGGCCTAAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGGGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CATCCCTCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATACTTTTCAGTTC +CCTTAGATGAAAGCTTCAGGAAGTATACTGCATTTACCATACCTAGTATCAATAATGAGACTCCGGGGAC +TAGATATCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTC +CATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTATATTGGGCAAGTCAGATTTACGCAGGAATCAGAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAGGCAGAGCTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAGAAAACCAGTACATGAAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAAAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGAAAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAGCATGGTGGACAAAGTATTGGCAAGCCACCTGGATTCCTAAGTGGGAGTTTGT +CAATACCCCTCCCCTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTT +TATGTAAATGGGGCAGCTAACAGGGAGACTAAAGTAAAAAAAGCAGGATATGTTACTAACAAAGGAAGAC +AAAAAGTTGTCCTCCTAAATGACACAACAAATCAGAAGACTGAGTTGCAAGCAATTCATCTAGCTTTGCA +GGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAAATAGTCAATCAAATAATAGAGCAATTAATAAAAAAGGAAAAAGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCACAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCAGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATCTAGAAGAAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAGGCAGAAGTC +ATTCCAGCAGACACAGCACAGGATACAGCATACTTTCTCTTAAGATTAGCAGGGAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAATTTCATCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGGAT +CAAGCAAGAATTTGGCATACCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAGAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAGATCATCAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.CN.RL42 +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAACTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AGGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCATCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTATATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGGTAGGAGGGCAATTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACCCATAGAAATCTGCGGACACAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TAAAACTGTACCAGTAAAATTAAAACCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATCGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGCTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCCGTAACAGTCCTGGATGTGGGTGATGCATATTTCTCAGTTC +CTTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTGTAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCTTGATAAATGGACAGTGCAGCCTATAATGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAGGTAAAGGAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCG +GAAAACAGGGAAATT---------CTGAAAGAATCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCTAGGCCAATGGACATACCAAATTTATCAAGAGCCATATAAAAA +TCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGGAAGACT---CCTAAATTTAAACTACCCATAC +AAAAGGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTAT +CAATACCTCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCATAGAAGAAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATACGTTACTAACAAAGGAAGAC +AAAAAGTTGTCACCTTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATTTAGCTTTACA +GGATTCAGGAGTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTTAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGG +CATGGGTGCCAGCACACAAAGGAATTGGAGGGAATGAACAAATAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACTGTAACTGG +AGAGCAATGGCTAGTGATTTTAACCTACCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTAAAATTAGCAGGAAGATGGCCAGTGAAAA +CAATACATACAGACAATGGCAGAAATTTCACCAGTAATTCGGTTAAGGCCGCCTGTTGGTGGGCGGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGCGTAGTAGAATCTATGGATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAGAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGGCAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTTTGGAAAGGTGAGGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAGGTAAAGATCATTAGGGACTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.DE.D31 +TTTTTTAGGGAAGATCTGGCCTTCCTACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +CGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AGCAACTCCCTTTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAATTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCATAGAAATCTGCGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGACTTTAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGATTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGGT +TAGATATCAGTACAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGGTTTACTACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAACGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAATTATGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAGTTATGGTACCAGTTAGAGACAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTCTGCA +GGATTCGGGATTAGAAGTAAACATAGTATCAGACTCACAATATGCAATAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGA +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAT---GAGCATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGTTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTGGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAACTTCACCAGTACAACGGTTAAGGCCGCCTGTTGGTGGGCAGGGGT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>B.DE.HAN +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGGAG---GCCAGGAAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGCAAC---------------AGCTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCCTTAGCCTCCCTCAAATCACTCTTTGGCAGCGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTAGTAGA +AGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAATAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTCCTGGATGTGGGTGATGCATATTTTTCAGTTC +CCCTAGATAAAGACTTCAGAAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAATAGAGGAACTGAGACAGCATCTGTTGAAGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAA +GCTACTTAGGGGACCCAAAGCACTAACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAACTAGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTGTGACCCATCAAAAGACT +TAGTAGCAGAAATACAGAAGCAGGGGGAAGGCCAATGGACATATCAAATTTATCAAGAACCATTTAAGAA +TCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATATAAAACAGTTAACAGAGGCA +GTGCAAAAAATAGCCACAGAAGGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAGCATGGTGGACGGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGGGGAAGAC +AAAAAGTTGTCTCCCTAAATGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGACTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +AAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GACCATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAATGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTC +ATTCCAGTAGAGACAGGGCAGGAAACAGCATATTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAGTACATACAGACAATGGCCCTAATTTCACCAGTACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGGGTAGTAGAATCTATGAATAAAGAGTTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGAACCATTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGGCAGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.FR.HXB2 +TTTTTTAGGGAAGATCTGGCCTTCCTACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTCTGGGGTAGAGAC------------AACAACTCCCCCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAGATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTG +GGGACTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAA +ACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGAGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTGCCCATAC +AAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTC +TATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGAC +AAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAT---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAAT +CAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.GA.OYI +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AACAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTTGGGGAAGAGAC------------AACAACTCCCCCTCAGAAGCAGGAGCCGAT +AGACAAGGG------------ACTGTATCCTTTAACCTCCCTCAGATCACTCTTTGGCAACGACCCATCG +TCACAATAAAGATAGGGGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGTATTAATAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAGTTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTCTGGGAAGTCCAATTAGGAATACCA +CATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATACTTTTCAGTTC +CCTTAGATAAAGACTTCAGAAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCGATATTCCAAAGTAGTATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAATGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGAACTTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAGTAGCAGAATTACAGAAACAGGGACAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCA +GTGCAAAAAATAACCCAAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAGCATGGTGGACGGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTCTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAG---GAACATGAGAAATATCACAGTAACTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGCCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTA +AAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTTA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGATATAAT +AGCTACAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTTCTTTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGGAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.GB.CAM1 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAA------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTTCCTATTAGTCCTAT +TGAAACTGTACCAGTGAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGACTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAACCTATAATGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATCTATGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAACTAGAACTGGCA +GAAAACAGAGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAACTACAGAAACAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAAAATGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAATTACCCATAC +AAAAGGAAACATGGGATGCATGGTGGATAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAAACTAGATTAGGAAAAGCAGGATATGTGACTGACAGAGGAAGAC +AAAAAGTTGTTCCCCTAACGGACACAACAAATCAGAAGACTGAATTACAAGCAATTTATCTAGCTTTGCA +GGATTCGGGATTGGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGGAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTGCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACACACAGACAATGGTGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGATATACAAACTAAAGAATTACAAAAACAAATTACA---AAGATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.GB.MANC +TTTTTTAGGGAAGATCTGGCCCTCCCACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AGGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCTGCTCAGAAGCAGGAACCGAT +AGACAAGGA------------ACTGTATCCCTTAGCTTCCCTCAGACCACTCTTTGGCAACGACCCCTCG +TCACAATAAAAATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTGAGA +CAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTCAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACCCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTATATGAAGACTTCAGGAAGTATACTGTATTTACCATACCTAGTATAAACAATGAGGCACCAGGAGT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCGCCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAACCTTTTAGAAAACAAAATCCAGACGTGGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAGTATCTGTTGAAGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTA +CATCCTGATAAATGGACAGTACAGCCTATAACACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAGCCAAAGCATTAACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAATTGGCA +GAAAACAGGGAGATT---------CTAAAAGTACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAACAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAGAAG +TCTGAAAACAGGAAAGTATGCAAAAATGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAGTAACCACAGAAAGCATAATAATATGGGGAAAGATC---CCCAAATTTAAACTACCCATAC +AAAAAGAAACATGGGACGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TACGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAGGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCAGGATTAGAAGTAAACATAGTATCAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAATTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCGGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCGCAAGAA---GAACATGAGAAATATCATAGTAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAGGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAGATGGCAGTATTCA +TCCACAATTTTAAGAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGGAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAGCAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGTAAAGATTATTAGGGATTATGG +AAAGCAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>B.NL.3202A21 +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAG +GGACAAGGA------------ACTGTATCCCTTAGCCTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGGTTTATCAAAGTAAGA +CAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAACTAGGAATACCA +CATCCCGCAGGGCTAAAAAAGAAAAAATCAGGAACAGCACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAGGACTTCAGAAAGTATACTGCATTTACCATACCTAGTGTAAACAATGAGACACCAGGAAT +TAGATATCAGTACAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATATATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCACTTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACACGGAGTGTATTATGACCCATCAAAAGAAT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATCTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACCAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTGCCCATAC +AAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAGGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCTCCCTAAATGACACGACAAATCAGAAGACTGAGTTACAAGCAATTAATCTAGCTTTGCA +GGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAAGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAACAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGGCAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAGTGGACATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.TW.LM49 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGAAAG---GCCAGGAAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AGGAGAGCTTCAGGTTTGGGGAACAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------CCTGTATCCTTTAGCTTCCCTCGAATCACTCTTTGGCAACGACCCCTCG +TCACAATAAGGATAGGGGGGCAACTAAAGGAAGCCCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGCACTTTAAATTTTCCCATTAGTCCTAT +TGATACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGTACTAATAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTCAGAAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTCCCTCAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AGAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAATAAAAGTAGAGGAACTGAGACAACATCTGTTAAGGTG +GGGATTTACCACACCAGAC--AAAAAAACATCAGAAAGAGCCTCCATTCCTTTGGATGGGCTATGAGCTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCTGAAAAAGACAGCTGGACTGTCAATGACATAC +AAAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAATTATGTAA +ACTTCTTAGAGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAACAACCAGTACATGGAGCATATTACGACCCATCAAAAGACT +TAGTAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGACATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGGACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAGGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAAAGGAAGAC +AAAAAGTAGTCTCACTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGGAGTTAATAAAAAAGGAAAAAGTCTACCTGA +CATGGGTTCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAGGGCCCAAGAA---GAACATGAGAAATACCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTGGGTATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATCCATACAGACAATGGCAGCAATTTCACTAGTGCTGCGGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGAAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG--GATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>B.US.AD8 +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTTCTACAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTGACTTCCCTCAAATCACTCTTTGGCAACGACCCATCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAGATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAGGACTTCAGAAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAGAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAGCCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACC +TAGTAGCAGAAGTACAGAAACAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAAAATGAGGGGTGCCCACACCAATGATGTAAAACAGTTAACAGAGGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAGCATGGTGGATGGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTTCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAATAGATAAATTAGTCAGTAATGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAA---GATCATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTATAGTA---GCAAAAGAGATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAGGTT +ATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAACAA +CAATACATACAGACAATGGCACCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGGGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATGAT +AGCAACAGACCTACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.BC +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTTCCTCAG +AGCAGACT------AGAGCCAACAGCTCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGGAGAC------------AACAACTCCCCCTCAGAAGCAGGAGCGGGA +AGACAAGGA------------AATGTATCCCTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAGGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGGAGATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTCTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAGATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGGACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCTGGATTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATCGATATTTCTCAGTTC +CCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTCCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAGTACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACTCAGGGATCAAAGTGAAGCAGTTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTAACACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGCAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAACCACAGAATGCATAATAATATGGGGAAAAACT---CCTAAATTTAGACTGCCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAGCCCATAGAAGGCGCAGAAACTTTC +TATGTAGATGGAGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAAAGGAAGAC +AAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTAGAAGCAATTCATCTAGCTTTGCA +GGATTCTGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAATTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGGGTCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCCATGGCTAGTGATTTTAACTTACCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +CACACATCTAGAAGGAAAAATTATCCTGGTGGCGGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTACCACGGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTTA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAACAAAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.DH123 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGAAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCATCAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGGAGAC------------AGCAACTCCCTCTCAGAAGCAGGAGCCGAA +GGA------------------ACTATATCCCTTAGCCTCCCTCAAATCACTCTTTGGCAACGACCCCTAG +TCAAGATAAAAATAGGGGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGGTACTCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAGAGTTAAACAATGGCCATTGTCA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAGAACAGTACTAGATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCG +CATCCCGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGACGTGGGTGATGCATATTTTTCAATTC +CCTTAGATGAAGACTTTAGGAAGTATACTGCATTTACCATACCTAGTGTAAACAATGCAGCACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAACCTTTTAGAAAACAAAATCCAGACATAGTAATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGAACAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGACTTTTCACACCAGAC---CAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAGTGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGAGGAGCTAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGTTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACA +TAATAGCAGAGATACAGAAACAGGGGCAAGGCCAATGGACATATCAAATTTATCAGGAACCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGTA +GTGCAAAAAGTAACCACAGAGTGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGGGCAGAAACTTTC +TATGTAGATGGGGCAGCTAGCAGGGAAACTAGATTAGGAAAGGCAGGATATGTTACTAACAGAGGAAGAC +AAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTACTGGAATCAG +GAGAGTACTATTTCTAGATGGAATAGAGAAGGCCCAAGAA---GAACATGAGAAATATCATAGTAATTGG +AGAGCAATGGCTAGTGAATTTAACCTGCCAGCTGTAGTA---GCAAAAGAGATAGTAGCCTGCTGTGATA +AGTGCCAGGTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAGGTT +ATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTATTTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGTAATTTCACCAGTACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGAACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCATCAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTTTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAAGAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATCAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.JRCSF +TTTTTTAGGGAAGATCTGGCCTTCCTACAA------------GGGAAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AGCAACTCCCTCTCAGAAGCAGGAGCAGAA +GCAGGAGCCGATAGACAAGGAATTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATGGATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACCCATAGATATCTGTGGACATAAAGCTGTAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTCAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAGATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAGGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAAATTTATGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTTACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGTAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTTTCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCAATGAAAGCATAGTAATATGGGGAAAGATT---CCTAAATTTAAATTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAGCAGAGGAAGAC +AAAAAGTTGTCTCCCTAACAGACACAACAAATCAGAAAACTGAGTTACAAGCAATTCACCTAGCTTTGCA +GGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGCTAATAAAAAAGGAAAAAGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAGGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTGCTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GATCATGAAAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTATAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGCAGATGGCCAGTAACAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCTGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAACAGAGATCCAATTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.JRFL +TTTTTTAGGGAAGATCTGGCCTTCCTACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAAAGAGCTTCAGGTTTGGGGAAGAGAC------------AGCAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------AATGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATGGATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTCAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGGTATCAGTACAATGTGCTTCCGCAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAGATAGGGCAGCATAGAGCAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTG +GGGGTTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAGCCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAACTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAT +TCTGAAAACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCAATGAAAGCATAGTAATATGGGGAAAGATT---CCTAAATTTAAATTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAACAGGGAGACTAAACTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGAC +AAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAAACTGAGTTACAAGCAATTCATCTAGCGTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTGCTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GATCATGAGAAATATCACAGTAATTGG +AAAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCCGCCTGTTGGTGGGCTGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGAGCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.MNCG +TTTTTTAGGGAAGATCTGGCCTTCCTGCAA------------GGGAAG---GCG---GAATTTTCCTCAG +AGCAGAAC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCCTATCAGAAGCAGGAGAAGAA +GCAGGAGACGATAGACAAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCATTG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGG +AGAAATGAATTTGCCAAGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATAACCATAGGAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGGTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAATAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAACTGAGACGACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTACCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAGTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGCAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATACGTAAGCCACCTGGATTCCTGAGTGGGAGGTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGTGCAGAAACTTTC +TATGTAGATGGGGCAGCTAACAGGGAGACTAAAAAAGGAAAAGCAGGATATGTTACTAACAGAGGAAGAC +AAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +AGATTCAGGGTTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GACCATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGACTTTAACCTACCACCTATAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAGACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCCCCAATTTCACCAGTACTACGGTTAAGGCCGCCTGTTGGTGGACGGGAAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGAGAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGGCATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAATGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGGTCATTAGGGATTATGG +AAAACAGACGGCAGGTGATGATTGTGTGGCAAGCAGACAGGATGAGGAT--- +>B.US.NY5CG +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAGCGACCCCTCG +TCACAATAAAGATAGGGGGGCAATTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTCATAGAAATCTGCGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTAGGAATACCA +CATCCTGCAGGGTTAAAACAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGTGTAGCATGACA +AAAATCTTGGAGCCTTTTAGAAAACAAAATCCAGACATAGTCATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATAC +AGAAATTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAATTATGTAA +ACTTCTTAGGGGAATCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGAATGAAGGGTGCCCACACTAATGATGTGAAACAATTAACAGAGGCA +GTACAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAATTACCCATAC +AAAAGGAAACATGGGAAGCGTGGTGGACGGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAGTTATGGTACCAGTTAGAAAAGGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCCAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTAACTGACAGAGGTAGAC +AAAAAGTTGTCCCCTTAACGGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTGACAGACTCACAATATGCATTGGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGG +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTACCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGCCCCGGAATATGGCAGCTAGATTG +TACACATTTAGAAGGAAAAGTTATCTTGGTAGCAGTTCATGTGGCCAGTGGATATATAGAAGCAGAAGTA +ATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAGTACATACAGACAATGGCAGCAATTTCACCAGTACTACAGTTAAAGCCGCCTGTTGGTGGGCGGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAATTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAGTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATCAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.P896 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCCTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAAGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGAGCAGATAGACATAGAAATCTGTGGACATAAAGCTAAAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGTCCAAAAGTGAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAGAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACCCAAGACTTCTGGGAAGTTCAATTAGGCATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAATACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGATCTGAGACAACATCTGTTGAAGTG +GGGGTTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACT +TAATAGCAGAGCTACAGAAGCAGGGGCAGGGCCAATGGACATATCAAATTTATCAGGAGCCATATAAAAA +TCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATTC +AAAAGGAAACATGGGAAGCATGGTGGACAGATTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCTAACAGGGACACCAAATCAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCTCCCTAGCTGACACAACAAATCAGAAGACTGAGCTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTGACAGACTCACAATATGCATTAGGGATCATTCAAGCACAGCCA +GATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACACTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTAATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +CACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTAAAATTAGCAGGCAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTACCACAGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCATCAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.RF +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAACTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC---------------AACTCCCTCTCAGAAGCAGGAGAAGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCATCG +TCACAGTAAAGATAGGGGGGCAATTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTGAGA +CAGTATGATCAAATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAGGAAAAAATAAAAGCATTGGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCCA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CATCCTGCAGGGTTAAAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAAACACCACGGAT +TAGATATCAGTACAATGTGCTTCCACAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACA +AAAATCTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGAGAACATCTGTTAAAGTG +GGGGTTTACCACACCGGAC---AAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAGCTAGAACTGGCA +GAAAATAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAA +CCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAGGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTGGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAGTCAGATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAGATTAGTCAGTACTGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAT---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATCTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACATACAGACAATGGCAGCAATTTCACCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAACAATTA +AAGCAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACACGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.SF2 +TTTTTTAGGGAAGATCTGGCCTTCCTACAA------------GGGAAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAGGAGAA------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTACGATCAGATACCTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTCCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAGCAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAGATATGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAG +AAAACTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTATTGGATGTGGGTGATGCATACTTTTCAGTTC +CCTTAGATAAAGACTTTAGAAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAGAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAGCATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAATGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAGTTATGTAA +ACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGAAGTATATTATGACCCATCAAAAGACT +TAGTAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGGATGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCA +GTGCAAAAAGTATCCACAGAAAGCATAGTAATATGGGGAAAGATT---CCTAAATTTAAACTACCCATAC +AAAAGGAAACATGGGAAGCATGGTGGATGGAGTATTGGCAAGCTACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCTCCATAGCTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTATTTTTGAATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATCTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATATTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAGCAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAACAAAGATCCCCTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.WEAU160 +TTTTTTAGGGAAGATCTGGTCTTCCCAAAA------------GGGAAG---GCCAGGGAATTTTCCTCAG +AGCAGACT------AGAACCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTCAGGGAAGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCAAT +AGACAAGGA------------GCTGTATCCTTTAACTTCCCTCAAATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGAGGGGCAACTGAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATGAATTTGCCAGGGAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGGTACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCTTCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTAC +CCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATATTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAAAAAGACCCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGAAAGTTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAAGCAACTATGTAA +ACTCCTTAGGGGGACCAAAGCACTAACAGAAATAATACCAATAACAGAAGAAGCAGAGCTAGAGCTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAGCTACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAGAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAACAAGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGAC +AAAGAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCTATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTGACAGACTCCCAATATGCATTAGGAATCATTCAAGCACAACCA +GATCAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTAG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCAGGAATCAG +GAAAGTACTATTTTTAGATGGGATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAGGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATCTTAAAACTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTACTACGGTTAAGGCCGCCTGTTGGTGGGCGGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTGATAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAGACTCAACAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTCTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.WR27 +TTTTTTAGGGAAWATCCGGCCTTCCCACAT------------KGGAAG---GCCAGGGRATTTCCTTCAG +AACAGACC------AGAGCCATCAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTTGGGGRAGAGAC------------AACAACTCCCTCTCAGAAGCAGGAACCGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGTCAACGACCCCTCG +TCGCAATAAAGATAGGGGGGCAAATAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGGTAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAGTCACAA +AAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTCCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTGCTGGATGTGGGTGATGCAWATTTTTCAGTTC +CYTTAGATNAAGAGWTCAGGAAGTATACTGCATTTACCATACCTAGTACMCACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTCCCACAGGGATGGAAAGGATCACCAACAATATTCCCAAGTAGCATGACC +CAAATCTTAGAGCCTTTTAGAAAACCAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTAACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAGCATCTGTTAAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTT +CACCCTGATAAATGGACTGTACAGCCTATAGAGCTGCCAGAAAAGGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAATTGGSCAAGTCAGATTTATGCTGGGATTAAAGTAAWGCAATTATGTAA +ACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAATTAGAATTGGCA +GGAAACAGGGAGATT---------CTAAAAGAACCAGTWCATGGAGTGTATTATGACCCATCAWAWGACT +TAGTAGCAGAATTACAGAAGCAAGGGCAWGGCCAATGGACATATCAAATTTATCAAGAGCCATTTATAWA +TCTGWAAACAGGAAAGTATGCAAGAACGAGAGGTGCCCACACTAATGATGTTAWACAATTAWCAGAGGCA +GTGCAAAAAWTAGCCACAGAWAGCATAGTGATATGGGGAAAGACT---CCTAAATWTATACTACCCATAC +AGAAAGAAACATGGGAATCACGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTYTGT +CAATACCCCTCCCTTAGTGAAGTTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAGTAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCTCCCTAAATGACACAACAAATCAGAAAACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATAATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTGATAAWAAAGGWWWWGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAG +GAAAGTACTATTTTKGGATGGAATAGATAWGGCCCAAGAA---GACCATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTGGTGAATTTAACCTGCCACCTGTGGTW---GCAAAAGAAATAGTAGCCTGCTGTGATA +AATGTCAGCTAAAGGGAGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGAATATGGCAWCTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGRAGATGGCCAGTAWAWA +CAATACATACAGACAATGGCAGCAATTTCATCAGCACTACGGTTAWGGCCGCCTKTTGGTGGGCGGGGAT +CANGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATWAATAAAGAATTA +AAGAAGATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTATAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAMCCAAAGAWTTACAAWAACAAWTTACA---ATTWTCCAAAATTYTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>B.US.YU2 +TTTTTTAGGGAAGATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGAAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ATC---AG +AAGAGAGCGTCAGGTTTGGAGAAGAGAC------------AACAACTCCCTCTCAGAAGCAGGAGCCGAT +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAGCGACCCCTCG +TCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACCCATAGAAATATGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAAATTTGT------ACAGAAATGGAAAAGGAAGGGAAAATTTCAA +AAATTGGGCCTGAAAACCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTACATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAC +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGGTCACCAGCAATATTCCAAAGTAGCATGACA +ACAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACCTAGTTATCTATCAGTACATGGATGATTTGTACG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGATAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAACTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TGATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTACAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAAGGGAAGAC +AAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAGAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGGATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGGCAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAA +CAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACAGTTAAAGCCGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAAACTAAAGAACTACAGAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CONSENSUS_C +TTTTTTAGGGAaaaTtTGGCCTTCCcACAa---------???GGGgAG---GCCAGGgAATtTcCTtCAG +AgcAGacc------aGAGCCaaCAGCcCC?????????????????????????????????aCC---Ag +caGAgAGCTTCaggttcga???gGAgaC------------???AaCcCCcgcTCcgAAgCAgGAGccgAa +aGAcAgGGA------????????????acCcTTAacttCCcTCAaATCACTCTTTGGCAgCGaCCcCTTG +TctcaATAAaAgTAGGGGGcCAgatAAagGAggCtCTctTAGAcACaGGAGCaGATgATaCAGTaTTAGA +AGAaaTaAatTTgCCAGGAAAaTGGAaACCAAAAATGATaGGaGGAATTGGaGGtTTTATcAAAGTAAGa +CAgTATGAtcAaATActtATAGAAATtTGtGGAAAAAAGGCTATAGGTaCAGTatTagTAGGACCtACaC +CTGTCAACATAATTGGaAGaAAtaTgTTGACtCAgcTTGGatGcACacTAAAtTTtCCAATtAGTCCcAT +TgAaACTgTaCCaGTAAAATTAAAgCCAGGAATG---GAtGGcCCAAAgGTtAAaCAATGGccaTTgACA +gAAGAgAAaATAaAaGCATTAAcAgcAATtTGT------gAaGAAATGGAgaagGAaGGAAAAaTtaCAA +aAATTGGGCCTGAAAAtCCATAtAAcACTCCAgTATTTGCcATAAAAAAGAAgGACAGTACTAAGTGGAG +AAAatTAGTAGATTTcAGgGAaCTcAATAAAAGAACTcAAGAcTTtTGGGAAgTtCAAtTAGGaATACCa +CACCCaGCaGGgTtAAAaAAGAAaAAATCAGTgACAGTacTgGAtgTGGGgGATGCATAtTTtTCAGTtC +CTTTAgATGAaggcTTcAGgAAaTATACTGCaTTCACCATACCTAGTatAAACAATgaAACACCAGGgAT +TAGaTATCAATATAATGTgCTtCCACAgGGaTGGAAaGGATCaCCAgCAATATTcCAgagTAGcAtGaca +aaAATcTTAGAGCCCTTtAGggcacaaAAtCCAGaaaTAgtcATCTATCAATAtATGGATGActTGTATG +TAGGatCtGAcTTAGAAATAGGGCAACatAgaGCaaaAATAGAgGAgTtAAgAgaaCAtcTgTTaAagTG +GGGatTtACCACaCCagAC---AAgAAACATCAGAAaGAACCcCCATTtCTTTGGATGGGGTATGAACTc +CATCCTGAcAAATGGACAGTACAGcCTATAcAgcTgCCAGAaAAgGAtAGCTGGACtGTcaATGATATAC +AgAAgTTaGTgGGAAAaTTAAAcTGGGCaAGTCAgATTTACcCaGGgaTtAaAGTAAggCAacTtTGTAa +AcTcCTTAGGGGggcCAAAgcAcTaACAGAcaTAgTacCACtaACTGAAGAAGCAGAATTaGAAtTgGCa +GAgaAcAGgGAAATT---------cTAAaaGAACCAgTACATGGaGtaTaTTATGAcCCATCaAAAGAcT +TaaTAGCtGAaaTaCAgAAACAgGGGcatgacCAaTGGACATatCAAaTtTACCAaGAACCaTTCAAAAA +tcTgAAaACAGGgAAgTaTGCaAAAatgagGaCTgCCCACACtAATGATGTaAaaCAgTTAaCaGagGca +GTGCAaAAaATAgCcatgGAAaGcATAgTAaTATGGGGaAAgacT---CCTAAATTTAGAcTaCCCATCC +AaAAaGAaaCaTGGGAgaCATGGTGGACAGAcTAtTGGCAaGCCACcTGgATtCCTgAgTGGGAgTTTGt +tAAtACcCCTCCCcTAGTAAAaTTATGGTACCaGcTgGAgAaaGAaCCcATagcAGGAGcAGAaACtTTc +TATGTaGAtGGaGCAGCTAATAGgGAaActAAaataGGaAAAGCaGGgTaTGTtACTGAcAgaGGAaGgc +AgaaAaTtGTttCTcTAActGAAACaACaAATCAGAagaCtGAaTTaCAAGCaATtcagCTAGCtTTgCA +aGAtTCAGGatCAGAaGTAAAcATAGTaACAGAcTCACAgTATGCAtTAGGaATcATtCaaGCACAACCA +gATaAgAGTGAATCAGAgtTAGTcAacCAaATAATAGAaCAaTTaATAaaaAAGGAaagggTcTAcCTgT +CATGGGTACCaGCACATAAaGGaATTGGaGGAaATGAacAagTaGAtAAATTAGTAAGTagtGGaATcAG +gaaagTgCTgTTtcTAGATGGaATAgATAAgGCTCAAGAA---GAgCATGAaAagTATCACagcAATTGG +AGAGCaATGGCTagTGAcTTTAATcTgCCACCCaTAGTA---gCaAAaGAaATAGTAGCtaGCTGTGata +AaTGtCAgctAAAAGGgGAAGCCAtgCATGGACAaGTAgAcTGTaGtCCAGGgATaTGGCAATTAGATTG +tACACAttTAGAAGGaAAaaTcATCCTgGTAGCAGTCCaTGTAGCcAGTGGcTACATaGAaGCAGAgGTt +ATcCCAGCAGAAACAGGaCAaGAAACAGCAtAcTttaTAcTAAAaTTAGCAGGaAGATGGCCAGTcaaag +TAATACAtACAGAcAATGGtAgtAAtTTcAccAGtgcTgCAGTtAAgGCAGCCTGTTGGTGGGCAgGtAt +cCAaCAGGAATTTGGaATTCCCTACAATCCCCAaAGTCAgGGAGTAGTAGAAtCcATGAATAAaGAATTA +AAGAAaATtATAgGgCAgGTAAGAGAtCAAGCTGAGCACCTTAAGACAGCAGTACaAaTGGCAGTATTcA +TTCACAATTTTAAAAGAaaAGGGGGG---ATTGGGGGGTAcAgTGCAGGGGAaAGAATAaTAGAcATaAT +AgCAaCAGAcATACAaACTAaAGAAtTaCAAAAaCAAATTata---AAAaTTCAaAAtTTTCGGGTTTAT +TACAGAGACAGCaGAGAcCCtaTTTGGAAaGGACCAGCCAAacTacTcTGGAAAGGTGAAGGgGCa???G +TAGTAaTACAAGAtAAtAGTGACATAAAgGTaGTACCaAGGAGgAAaGcAAAaATCATTAaGGAcTATGG +AAAACAGATGGCAGGtGCTGATTGTgTGGCAgGTaGACAGGATGAagAT?-- +>C.BR.92BR025 +TTTTTTAGGGAAAATTTGGCCTTCCCACAG------------GGGGAG---GCCAGGAAATCTTCTTCAG +AACAGAAC------AGAGCCAACAGCCCC---------------------------------ACC---AG +AAGAGAGCTTCAGGTTTGGGGAAGAGAC------------AACAACTCCCTCTCGGAAGCAGGAGACGAT +AGACAAGGA---------------ACTGCCCTTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCAACATAAAAGTAGGGGGACAGCTAAAGGAGGCTCTCTTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAAATTGCCAGGAAATTGGAAACCAAAAATGATAGGAGGAATTGGGGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTACTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAACATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATTAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTCAAACAATGGCTATTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GATGAAATGGAGAGGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGAGAACTCAATAAAAGAACTTAAGACTTTTGGGAAGTTCAATTAGGGATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGAGATGCATATTTTTCAGTAC +CTTTAGATGAAGGCTTTAGGAAATATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCATCAATATTCCAGAGTAGTACGACA +AAAATCTTAGAGCCCTTTAGGGCACAAAATCCAGAAATAATTATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGAACATCTATTGAAGTG +GGGATTCACCACCCCAGAC---AAGAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGAAAAGGATAGCTGGACCGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGGATAAAAGTAAGACAATTGTGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGACATAGTGCCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTATATTATGATCCATCGAAAGACT +TAATAGCTGAGATACAGAAACAAGGGCAGAACCAATGGACATATCAAATCTACCAAGAACCATTCAAAAA +TCTGAAAACAGGAAAATATGCAAAAATGAGGACTGCCCACACTAATGATGTAAGACAGTTAACAGAGGCA +GTGCAAAAAATAGCCCTGGAAAGCATAATAATATGGGGAAAGACT---CCTAAATTTAGATTACCCATCC +AGAAAGAAACATGGGAAGCATGGTGGACAGACTATTGGCAGGCCACCTGAATTCCTGAGTGGGAGTTTGT +TAATACTCCTCCCTTAGTAAAATTATGGTACCAGCTGGAGAAAGAACCCATAGCAGGAGCAGAAACTTTC +TATGTAGACGGAGCAGCTAATAGGGAAATTAAAATGGGAAAAGCAGGGTATGTTACTGACAGAGGAAGGC +AGAAAATTGTTTCTATAACTGAAACAACAAATCAGAAGACTGAGTTACAAGCAATTCAGCTAGCTTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATTATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAAGAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACATAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTAAGTAGTGGAATCAG +GAAAGTGCTGTTTCTAGATGGAATAAATAAGGCTCAAGAA---GAGCATGAAAAATATCACAGCAATTGG +AGAGCAATGGCTAGTGAGTTTAATCTGCCACCCATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCACACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATCATCCTGGTAGCAGTCCATGTAGCCAGTGGATACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGGCAAGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGCAGTAATTTCATCAGTAATACAGTTAAAGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCAATGAATAAAGAATTA +AAGAAAATCATAGGACAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGA-AAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGATATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTATG---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGATCCTATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTACTACAAGATAACAGTGACATAAAGGTAGTACCAAGGAGGAAAGTAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTATGGCAAGTAGACAGGATGAAGATT-- +>C.BW.96BW01B03 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTCCTTCAG +AACAGACT------AGAGCCATCAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------AACCCCCGCTCCGAAGCAGGAGCCGAA +GGACAGGGA------------------GCCCTTAACCTCCCTCAGATCACTCTTTGGCAGCGACCCCTTG +TCACAATAAGAGTAGGGGGCCAGATAAAGGAGGCTCTCTTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGTGGTTTTATCAAAGTAAGG +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTCTTGGTAGGACCCACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GAAGAAATGGAGAAGGAAGGAAAAATCACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTCAATAAAAGAACTCAAGACTTTTGGGAAGTCCAATTAGGAATACCC +CACCCAGCAGGGTTAAAGAAGAAAAAATCAGTGACAGTATTGGACGTGGGGGATGCATACTTTTCAGTCC +CTTTAGATGAAGGCTTCAGGAAATATACTGCGTTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTGCTTCCACAGGGATGGAAGGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGGGCACTAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGACCTGACTTAGAAATAGGGCAACATACAGCAAAAATAGAGGAGTCAAAAGAACATCTATTAAAGTG +GGGGTTTACCACACCAGAC---AAGAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATAAATCTGCCAGAAAAGGAAAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAAATTTACCCAGGGATTAAAGTAAGGCAACTTTGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGACATAGTACCACTAACTGAAGAAGCAGAATTAGAACTAGCA +GAGAACAGGGAAATT---------CTAAAAGAACCAGTACATGGGGTATATTATGACCCATCAAAAGACT +TGATAGCTGAAATACAGAAACAGGGGCATGACCAATGGACATATCAAATTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCA +GTGCAAAAAATAGCCACGGAAAGTATAGTAATATGGGGAAAA-CT---CCTAAATTTAGACTACCCATCC +AAAAAGAAACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACTTGGATACCTGAGTGGGAGTTTGC +TAATACCCCTCCCCTAGTAAAATTATGGTACCAGCTGGAGAAAGAACCTATAGTAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAAATAGGAAAAGCAGGATATGTTACTGACAGAGGAAGGC +AGAGAATTGTTTCTCTAACTGAAACAACGAATCAGAAGACTGAATTACAAGCAATTCAGCTAGCTTTGCA +AGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAACCAGATAATAGAACAATTAATAAAAAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAGTGGAATTAG +GAAAGTGCTGTTTCTAGATGGAATAGATAAGGCTCAAGAA---GAGCATGAAAAGTATCACAACAATTGG +AGAGCAATGGCTAGTGAATTTAATCTACCACCCATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTGGTCCAGGGATATGGCAATTAGATTG +TACACATCTAGAAGGAAAAGTCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATGGAAGCAGAGGTT +ATCCCAGCAGAAACAGGGCAGGAAACAGCATACTATATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGTACTAATTTCACCAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAGATTATAGGGCAGGTAAGAGAGCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAGAGGGGGG---ATTGGGGGGTATAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTATG---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCGAGGAGGAAAGTAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>C.BW.96BW0402 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------AACCCCCGTTCCGAAACAAGAGCCGAA +GGACAGGGA------------------ACCCTTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAAAGTAGGGGGCCAGATAAGGGAGGCTCTCCTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGAACAAATACTCATAGAAATTTGCGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACACAGCTTGGATGCACACTAAATTTTCCAATTAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GAAGAAATGGAGAAGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGGATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTATTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTAGATGAAAGCTTCAGGAAATATACTGCATTCACCATACCTAGTATAAACAATTCAACACCAGGAAT +TAGATATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGATA +AAAATCTTAGAGCCCTTCAGGACAAAAAACCCAGACATAGTTATCTATCAATATATGGATGACCTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGAACATTTATTGAAATG +GGGACTTACCACACCATAC---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGACAAGGATAGCTGGACTGTCCATGATATAC +AGAAATTAGTAGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGGATTAGAGTAAAACACCTGTGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGACATAGTGCCACTGACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACT +TAATAGCTGAAGTACAGAAACAGGGGCATGACCAATGGACATACCAAATTTACCAGGAACCATTCAAAAA +CCTGAAAACAGGAAAGTATGCCAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACGGAAGTA +GTGCAAAAAATAACCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATCC +AAAAAGACACATGGGAGACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAACACCCCTCCCCTAGTAAAATTATGGTACCAGCTGGAGAAAGAACCCATAGCAGGAGCAGAAACCTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAACTAGGAAAAGCAGGGTATGTTACTGACAAGGGAAGGC +AAAAAATCGTTCCTCTAACTGAAACAACAAATCAGAGGGCCGAATTACAAGCAATTCAGCTAGCTTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAATTAGTCACTCAGATAATAGAACAGTTAATAAAAAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTGGATAAATTAGTAAGTAGTGGAATCAG +GAAAGTACTGTTTCTAGATGGAATAGATAAGGCTCAAGAA---GAGCATGAAAAGTATCACTGCAATTGG +AGAGCAATGGCTAGTGAGTTTAATCTGCCACCCATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAACTAAAAGGGGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGGATCTGGCAATTAGATTG +TACACATTTAGAAGGAAAGATCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTATATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACACACAGACAATGGCAGTAATTTCACCAGTACTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +ACAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAACAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAGAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTATA---AAAATTCAAAACTTTCGGGTTTAT +TACAGAGACAGCCGAGACCCTGTTTGGAAGGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGTAAAAATCATTAGGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>C.BW.96BW0502 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTCCAG +AACAGATC------AGAGCCAGCAGCCCCAACAGTACC---------------AACAGCCCCACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------AACCCCCGCTCCGAAGCAGGAGCCGAA +GGACAGGGA------ACCCTACAGGGAACCCTTAACTGCCCTCAGATCACTCTTTGGCAGCGGCCCCTTG +TCTCAATAAAAGTAGGGGGCCAGATAAAAGAGGCACTTTTAGACACAGGAGCAGATAATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATAGTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGACATTGACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------GAAGAAATGGAGAAGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTTAGGGAGCTTAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAGAAATCAGTGACAGTACTGGATATGGGGGATGCATATTTTTCAGTTC +CTTTAGATGAAGGCTTCAGGAAATATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATTTTAGAGCCCTTTAGACTACAAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACGCAGAGCACAAATAGAAGAATTAAGAGAACACCTGTTAAAGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGACAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAGTTAAACTGGGCAAGTCAGATTTACCCAGGGATCAAAGTAAGGCAACTCTGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGATGTAGTACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------TTAAAAGAACCAGTACATGGGGTATATTATGACCCATCAAAAGACT +TAATAGCTGAAATACAGAAACAGGGGCATGACCAGTGGACATATCAAATTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGAAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCA +GTGCAAAAAATAGCCCAGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATCC +AAAAGGAAACGTGGGAGACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGT +TAATACTCCTCCCCTAGTAAAATTATGGTACCAGCTGGAGAAAGAACCCATACCAGGAGTAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAACTAGGAAAAGCAGGATATGTCACTGACAGAGGAAGGC +AGAAAATTGTTTCTCTAACTGAAACAACAAATCAGAAGACTGAATTGCAAGCAATTCAGCTAGCTTTGCA +AGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATAATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAACCAAATAATAGAACAATTAATACAAAAGGAATGGGTCTACCTGT +CATGGGTACCTGCACATAAAGGAATTGGAGGAAATGAGCAAGTAGATAAATTAGTAAGTCAGGGAATCAG +GAAGATGCTGTTTCTAGATGGGATAGATAAGGCTCAAGAA---GAGCATGAAAAATATCACAACAATTGG +AGAGCAATGGCTGATGAATTTAATCTGCCACCCATAGTA---GCAAAGGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACACTTAGAAGGAAAAGTCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATGGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCATACTTCATACTAAAATTAGCAGGAAGATGGCCAGTCAGAG +TAATACATACAGACAATGGTACTAATTTCACTAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATTATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATTAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCG---G +TAGTAATACAAGATAACAGTGACATAAAGGTAGTACCAAGGAGGAAAGCAAAAATCATTAAGGATTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTGGACAGGATGAAAAT--- +>C.BW.96BW1104 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------GGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAAAGCTTCGA---------GGAAAC---------------AACCCCTGCTCCGAAGCAGGAGACGAA +AGACAGGGA------------------ACCCTTAATTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAAAGTAGGGGGCCAGATAAAGGAGGCTCTATTAGATACAGGAGCAGATGATACAGTCTTAGA +AGAAATAAATTTGCCAGGAAAATGGAGACCAAAAATGATAGGAGGAATTGGAGGCTTTATCAAAGTAAGA +CAGTATGATCAAATACCTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGATGCACACTAAATTTCCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GACGGCCCAAAGGTTAAGCAATGGCCGTTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GAAGAAATGGAGAAGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAACTAGTAGATTTCAGGGAGCTTAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTAGATGAAAGCTTCAGGAAGTATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTTCTTCCACAGGGATGGAAAGGATCACCATCAATATTTCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAGCAAAAAATCCAGAACTAGTCATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAGAATAGAGGAGTTAAGAAAACATTTGTTAAGGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTACAGTCTATAAAGCTGCCAGAAAAGGAAAGCTGGACTGTTAATGATATAC +AGAAATTAGTGGGAAAATTAAATTGGGCCAGTCAGATTTACCCGGGGGTTAAAGTAAGGCAACTGTGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGACATAGTACCACCGACTGAAGAAGCAGAATTAGAATTGGCT +GAGAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTATATTATGACCCATCGAAAGATT +TAATAGCTGAAATTCAAAAACAGGGGGGTGACCAATGGACATATCAAATTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGAAAGTATGCAAAAATGAGGACTGCCCACACGAATGATGTAAAACAGTTAACAGAGGCA +GTGCAAAAGATATCCATGGAAAGCATAGTAATATGGGGAAAGATT---CCTAAATTTAGACTACCCATCC +AAAAAGAAGCATGGGAAGCATGGTGGACAGACTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCCTAGTAAAGTTATGGTACCAGCTGGAAACAGAACCCATGGCAGGAGCAGAAACTTTT +TATGTAGATGGAGCAGCTAATAGAGAAACTAAAATAGGAAAAGCAGGGTATGTTACTGACAAAGGAAGGC +AGGAAGTTGTGACTCTAACTGAAACAACAAATCAGAAGGCTGAATTACAAGCAATTCAACTAGCTTTGCA +GGATTCAGGACCAGAAGTAAACATAGTCACAGATTCACAGTATGCACTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAATTAGTCAATCAAATAATAGAACAGTTAATAAAAAAGGAAAAAGTCTACCTAT +CATGGGTACCAGCACATAAGGGAATTGGAGGAAATGAAAAGGTAGATAAATTAGTAAGTAGTGGAATCAG +GGAAGTACTATTTCTAGATGGAATAGATAAGGCTCAAGAA---GAGCATGAAAAATATCACAGCAATTGG +AGAGCTATGGCTAGTGAGTTTAATCTGCCACCCATAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAGTCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATATTACATACTAAAATTAGCAGGAAGATGGCCAGTCAAAA +TAATACATACAGATAATGGCAGTAATTTCACCAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAGAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATGAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAGCTACTCTGGAAAGGTGAAGGAGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGTAAAAATCATTAGGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>C.BW.96BW1210 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAGCCATCAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------AACCCCTGCTCAGAAGCAGGAGCCGAA +GGACAGGGA---------------ACCACCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCTCAATAAAAGTAGGGGGCCAGATAAAGGAAGCTCTCTTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATAGTTATAGAAATTTGTGGAAAAAAGGCTATAGGTTCAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAACTTGGATGCACACTAAATTTTCCAATTAGTCCGAT +TAAAACTGTACCAGTAAAATTAAAACCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTAACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------GAAGAAATGGAGAAGGAAGGAAAAGTTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTCAATAAAAGAACTCAAGATTTTTGGGAAGTTCAACTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAGAAATCAGTGACAGTGCTAGATGTGGGGGATGCATATTTTTCAGTTC +CTTTAGATGAAAGCTTCAGGAAATATACTGCATTCACCATACCTAGTAGAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTGCTACCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAGCACAAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATACAGCAAAAATAGAGGAGTTAAGAGAACATCTATTAAAGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAGGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGAAAAGGAAAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGACAACTTTGTAA +AATCCTTAGGGGAGTCAAAGCACTCACAGACATAGTAACACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTATATTATGATCCATCAAAAGACT +TAATAGCTGAAATACAGAAACAGGGGCATGACCAATGGACATACCAAATTTACCAAGAACCTTTCAAAAA +TTTGAAGACAGGGAAGTATGCAAAATTAAGGACTGCCCACACTAATGATGTAAGACAGTTAACAGAGGCA +GTGCAAAAAATAGCCCAGGAATGTATAGTAATATGGGGGAAGACT---CCTAAATTTAGACTGCCCATCC +AAAAAGAAACATGGGAGGCATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCCTAGTAAAATTATGGTACCAGCTAGAGAAAGAACCCATAGCAGGAGTAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAAATGGGAAAAGCGGGGTATGTTACTGACAGAGGAAGAC +AGAAAATTGTCTCTCTAAATGAAACAACAAATCAGAAGACTGAATTACAAGCAATTCAGCTAGCTTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAATTAATATGCAAGGAAAGAGTCTACCTGT +CATGGGTACCAGCACATAAAGGGATTGGAGGAAATGAGCAAGTAGACAAATTAGTAAGTAGTGGGATCAG +AAAAGTACTGTTTCTAGATGGAATAGATAAAGCTCAAGAA---GAGCATGAAAAATATCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCCATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGGAAAATCATCCTGGTAGCAGTCCGTGTAGCCAGTGGCTACATAGAAGCAGAGGTT +ATTCCAGCAGAAACAGGACAAGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACACACAGACAATGGCAGTAATTTTACCAGCAATGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAC +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATCATAGGGCAAGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGATATACAAACTACAGAACTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGGAGAAAGGTAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGGGCTGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>C.BW.96BW15B03 +TTTTTTAGGGAAGATTTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTCCTTCAG +AACAGAAC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAAGTTCGA---GGAGAC---------------AACCCCCGCTCCGAAGCAGGAGCCGAA +AGACAGGGA------------------ACCCTTAATTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAAAGTAGGGGGTCAAATAAAGGAGGCTCTCTTAGACACAGGAGCTGATGATACAGTATTAGA +AGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATGGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAATAGGACCTACAC +CTGTCAACATAATTGGAAGGAATATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATTAGTCCCAT +TGAAACTGTGCCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GAAGAAATGGAGAAAGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAGTTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTAGATGAGGACTTCAGGAAATATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCATCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAGCAAGAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAAAACATCTGTTAAGGTG +GGGATTTACCACACCGGAC---AAGAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATAGAGTTGCCAGAAAAGGAAAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCCAGTCAGATTTACCCAGGAATTAAAGTAAGGCAACTTTGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGATATAGTACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGGACAGGGAAATT---------CTAAGAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACT +TGGTAGCTGAAATACAGAAACAGGGGCATGACCAATGGACATATCAAATTTACCAAGAACCATTCAAAAA +CCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCA +GTGCAAAAAATAGCTATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATCC +AAAAAGAAACATGGGAGACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCTTAGTAAAATTATGGTACCAGCTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTC +TATGTGGATGGAGCAGCTAATAGGGAAACTAAAATAGGAAAAGCAGGGTATGTTACTGACAGAGGAAGGC +AGAAAATTGTTTCTCTAACAGAAACAACAAATCAGAAGACTGAATTACAAGCAATTCAGCTAGCTTTGCA +AGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAACCAAATAATAGAACAATTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAATAGATAAATTAGTAAGTAAGGGAATCAG +GAAAGTGCTGTTTCTAGATGGAATAGATAAGGCTCAAGAA---GAGCATGAAAAATATCACAGCAATTGG +AGAGCAATGGCTAGTGAGTTTAATCTACCACCCATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AGTGTCAGCTAAAAGGAGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTATATACTAAAGTTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGCAGTAATTTCACCAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTATAGTGCAGGGGAGAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAACTAATCTGGAAAGGTGAAGGAGCAGTAG +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGTAAAAATCATTAGGGACTATGG +AAAACAGATGGCAGGCGCTGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>C.BW.96BW1626 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CGGAGAGCTTCAGGTTTGG---GGAGAC---------------AACCCCCGCTCCGAAACAGGAGCCAAA +GGACAGGGA------------------ACCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTTG +TCTCAATAAAAGTAGGGGGCCAGGTAAAGGAGGCTCTCTTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATAACTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTATTGACTCAGATTGGATGCACACTAAATTTTCCAATTAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GAAGAAATGGAAAAGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATACAACACTCCAGTATTTGCTATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAGCTTAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTAGATGAAAGCTTCAGGAAATATACTGCATTCACCATACCTAGTATAAACAATGCAACACCAGGGAT +TAGATATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAGCACAAAATCCAGGAATAGTCATCTATCAATATATGGATGATTTGTATG +TAGGATCTGATTTAGAAATAGGGCAACATAGAGCAAAAATAGAAGAGTTAAGAACACATCTCTTAAAGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAACTTTGTAA +ACTCCTTAGGGGGGCCAAAGCATTAACAGACATAATACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGAATAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACT +TAATAGCTGAAATACAGAAACAGGGGCATGACCAATGGACATATCAAATCTACCAAGAACCATTCAAAAA +TCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAGCAGAGGCA +GTGCAAAAAATAACCATGGAAAGCATAGTACTATGGGGAAAGACT---CCTAAATTTAGACTACCCATCC +AAAAAGAAACGTGGGAGACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTAAGTGGGAGTTTGT +AAATACCCCTCCCCTAGTAAAATTATGGTACCAGCTGGAGAAAGAACCCATAGTAGGAGCAGAGACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAACTAGGAAAAGCAGGGTGTGTTACTGACAGAGGAAGGC +AGAAAATTGTTTCTCTAACTGAAACAACAAATCAGAAGGCTGAATTACAAGCAATTCAGCTAGCTTTGCA +AGATTCAGGGGCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAACCAAATAATAGAACAGTTAATAAACAAGGAAAGGATTTACCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGACATGAACAAGTAGATAAATTAGTAAGTAGTGGAATCAG +GAAAGTGCTGTTTCTAGATGGAATAGATAAGGCTCAAGAA---GATCATGAAAAGTATCACAGCAATTGG +AGAGCGATGGCTAGTGACTTTAATCTGCCACCCATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGACA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGTAGTAATTTCACCAGTGCTACAGTTAAGGCAGCCTGTTGGTGGGCAGGCAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATTATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACACTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAACTACTTTGGAAAGGTGAAGGGGCA---G +TAGTACTACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGTAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTGGACAGGATGAAAAT--- +>C.BW.96BW17A09 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA---------GGGGGGGAG---GCCAGGGAATTTCCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------AACCCCCGCTCCGAAGCAGGAGCCGAA +AGACAGGGA------------------ACCCTTAACTTCCTTCAAATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAAAGTAGGGGGCCAGATAAGGGAAGCTCTATTAGATACAGGAGCAGATGATGCAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAATAGTCATAGAAATCTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAACATGTTGACTCAGCTTGGCTGTACTCTAAATTTTCCAATTAGTCCTAT +TGAAACTATACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAGGCATTAAAAGCAATATGT------GAAGAAATGGAAAAGGAGGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAATACTCCAATATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTTAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTCCAATTAGGAATACCA +CACCCAGCGGGATCAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTAGATGAGGACTTTAGGAAGTATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTGCTTCCACAGGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCCTTCAGGGCACAAAACCCAGAAATAGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCMGATTTAGAAATAGGGCAACATAGGGCAAAAATAGAAGAGTTAAGAGAGCATCTATTGAAGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAAGCAACTTTGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGACATAGTACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACT +TGATAGCTGAAATACAGAAACAGGGGAATGGACAATGGACATATCAAATTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGAAAGTATGCGAAAAAGAAGTCTACCCACACTAATGATGTTAAACAATTAACAGACGCA +GTGCAAAAAATAACTATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTGCCCATCC +AAAAGGAAACATGGGATACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCCTAGTAAAATTATGGTACCAGCTAGAAAAGGATCCCATAGCAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAAACTAAGCTAGGGAAAGCAGGGTATGTCACTGACAAAGGAAGAC +AGAAAGTTGTTTCTCTAACTGAAACAACAAATCAGAAGACTGAATTACAAGCAATTAAACTAGCTTTGCA +GGACTCAGGATCAGAAGTAAACATAGTAACAGATTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATGAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTGATACAAAAGGACAAGGTATACCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTTCTGGAATCAG +GAAAGTGCTATTTTTAGATGGAATAGATAAAGCTCAAGAA---GACCATGAGAAATATCACGGCAATTGG +AGAGCAATGGCTAATGAGTTTAATTTGCCACCCATAGTA---GCAAAAGAGATAGTAGCTAGCTGTGATA +AATGTCAGTTAAAAGGAGAAGCCATGCATGGACAGGTAGACTGTAGTCCAGGAATGTGGCAATTAGATTG +TACACACTTAGAAGGGAAAGTTATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATAGAAGCAGAAGTA +ATCCCAGCAGAAACAGGACAGGAAACAGCAGACTTCATATTAAAATTAGCAGGAAGATGGCCAGTACAAA +TAATACATACAGACAATGGCAGCAATTTCACCAGCACTGCAGTCAAGGCAGCCTGTTGGTGGGCAGGGAT +CCAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATAATAGGACAAGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACTAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGATATAAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGAACCCGTTTGGAAAGGACCAGCCAAATTGCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>C.ET.ETH2220 +TTTTTTAGGGAGACTTTGGCCTTCCAACAA------------GGGAAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAGCCAACAGCCCCACCAGAGAGTCTCAGACCAGAGCCAACAGCCCCACC---AC +CAGAGAGCTTCAGGTTCGA---GGAAGC---------------AACACCTTCTCCGAAGCAGGAGCTGAA +AGACAGGGA------------------AGCCTTAACTTCCCTCAAATCACTCTTTGGCAACGACCACTTG +TTACAATAAAAATAGGGGGACAGCTAAAGGAGGCTCTCTTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATTAAAGTAAGA +CAGTATGATCAAATAATCATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTACTAGTAGGACCTACAC +CTGTCAACATAATTGGCAGAAACATGTTGACTCAGCTTGGACGCACATTAAACTTTCCAATTAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTCAAACAATGGCCATTGACA +GAAGAAAAGATAAAAGCATTAACAGCAATTTGT------GAAGAAATGGAGCAGGAAGGAAAAATTTCAA +GAATTGGGCCTGAAAACCCATATAATACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTCAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGGATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTAGATGTGGGGGATGCATATTTCTCAGTTC +CTTTAGATGAAGGTTTCAGAAAATATACTGCATTCACCATACCTAGTACAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTCCTCCCACAGGGATGGAAAGGATCACCACCAATATTCCAGAGTAGCATGCCC +CAAATCTTAGAGCCCTTTAGGGCCCCCAACCCAGAAATAGTTATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCCCCAATAGAAGAGTTAAGAGAACATCTATTAAAGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGGTATGAACTT +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AAAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAACTGTGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGACATAGTAACACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAGGAACCAGTACATGGAGTATTTTATGACCCATCAAAAGACT +TAATAGCTGAAATACAGAAACAGGGGAATGACCAATGGACATTTCAATTTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGGAAGTTTGCAAAAAGAGGGACTGCCCACACTAATGATGTAAAGCAGTTAACAGCGGTA +GTGCAAAAGATAGCCCTGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGATTACCCATCC +AGAAAGAAACATGGGAAGCATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGT +TAATACCCCTCCCCTAGTAAAATTATGGTACCAGCTAGAGAAAGAACCCATAGCAGGAGTAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAAATAGGAAAAGCAGGGTATGTTACTGATAGAGGAAGGC +AGAAAATTGTTTCTCTAACTGAAACAACAAATCAGAAGACTGAATTACAAGCGATCCAGCTAGCGTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATCCTGGCACAACCA +GATAAGAGTGAATCAGAGATAGTCAATCAAATAATAGAACAGTTAATAAGCAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAGTGGAATCAG +GAAAGTGCTGTTTCTAGATGGAATAGATAAGGCTCAAGAA---GAGCATGAAAAATATCACAGCAATTGG +AGAGCAATGGCTAATGAATTTAATATCCCACCCGTAGTA---CCCAAAGAAATAGTAGCTTGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATACATGGACAAGTAAATTGTAGTCCAGGGATATGGCAATTAGATTG +TACACACTTAGAAGGGAAAATCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATAGAGGCAGAGGTT +ATTCCAGCAGAAACAGGACAAGAAACAGCATACTTTCTACTAAAATTAGCAGGGAGATGGCCAGTCAGGG +TAATACATACAGATAATGGCAGTAACTTCACCAGTAATGCAGTTAAAGCAGCCTGTTGGTGGGCAGGTAT +TCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAACAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAGAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGATATAAT +AGCATCAGACATACAGACTAAAGAACTCCAAAACCAAATTTTA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>C.IN.21068 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTCCAG +AGTAGACC------AGAGCCAACAGCTCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------AACCCCAGCTCCGAAGCAGGAGCCGAA +AGACAGGGA------------------ACCTTTAACTTCCCTCAGATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAGAGTAGGGGGCCAGATAAAAGAGGCTCTCTTAGACACGGGAGCAGATGATACAGTATTAGA +AGAAGTAAGTTTGCCAGGAAAATGGAGACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGAGGAAATACCCATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCCACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATCAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGCAATTTGT------GATGAAATGGAGAAGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAATATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTCAATAAAAGAACTCAAGATTTTTGGGAAGTTCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTATATGAAGACTTCAGGAAATATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGGTATCAATATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAGAATAGCATGACA +AGAATCTTAGAGCCCTTTAGGGCACAAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGCTCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAAGAGTTAAGAAAACATCTGTTAAGGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAGGCAACTTTGTAA +ACTTCTTAGGGGGACCAAAGCACTAACAGACATAGTACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGAGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TGATAGCTGAAATACAGAAACAGGGGCAGGACCAATGGACATATCAAATTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCT +GTGCAGAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGATTACCCATCC +AAAAAGAAACATGGGAGACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +TAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTGGAAAAGGAACCCATAGCAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAAATAGGAAAAGCAGGGTATGTTACTGACAAAGGAAGGA +ATAAAATTGTTTCTCTAACTGAAACCACAAATCAGAAGACTGAGTTACAAGCAATTTGTCTAGCTTTGCA +AGATTCAGGATCAGAAGTAAACATAGTAACAGATTCACAGTATGCATTAGGGATCATTCAAGCACAACCA +AATAAGAGTGAATCAGAGTTAGTTAACCAAATAATAGAACAATTAATAAAAAAGGAAAGGGTCTATCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAGTGGAATCAG +GAAAGTGCTATTTCTAGATGGAATAGATAAAGCTCAAGAA---GAGCATGAAAGGTATCACAGCAATTGG +AGAGCGATGGCTAGTGACTTTAATCTGCCACCCGTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATC +AATGTCAGTTAAAAGGGGAAGCCACGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +CACACATTTAGAAGGAAAAATCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATGGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGTAGTAATTTCACAAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATTATAGGGCAAGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAGTTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCCATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTGGTACCGAGGAGGAAAGCAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>C.IN.301904 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTCCAG +AGCAGACC------AGAGCCGACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------ACCCCCAGCTCCAAAGCAGGAGCCGAA +AGACAGGGA------------------ACCCTTAACTTCCCTCAGATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAGAGTAGGGGGCCAGATAAAAGAGACTCTCTTAGACACGGGAGCAGATGATACAGTATTAGA +AGAAGTAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAATACCTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCCACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATCAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +AAAGAGAAAATAGAAGCATTAACAGCAATTTGT------GATGAAATGGAGAAGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAATATTTGCTATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAGCTCAATAAAAGAACTCAAGATTTTTGGGAAATTCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTATATGAAGACTTCAGGAAATATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGGTATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AGAATCTTAGAGCCCTTTAGGGCACGAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGGACATCTGTTAAAGTG +GGGCTTTACCACACCAGAC---AAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTGGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAGGCAACTTTGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGACATAGTACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACT +TGATAGCTGAAATACAGAAACAGGGGCAGGACCAATGGACATATCAAGTTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCT +GTGCAGAAAATAGCCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGATTACCCATCC +AAAAAGAAACATGGGAGACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +TAATACCCCTCCCCTAGTAAAATTATGGTACCAGTTGGAGAAAGATCCCATAGCAGGAGTAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAAAAAGGAAAAGCAGGGTATGTTACTGACAGAGGAAGGC +AAAAAATTGTTTCTCTAACTGAAACCACAAATCAGAAGACTGAGTTGCAAGCAATTTGTCTAGCTTTGCA +AGATTCAGGATCAGAAGTAAACATAGTAACAGATTCACAGTATGCATTAGGGATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTTAACCAAATAATAGAACAATTAATAAAAAAGGAAAGGGTCTATCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAGTGGAATCAG +GAAAGTGCTATTTCTAGATGGAATAGATAAAGCTCAAGAA---GAGCATGAAAAGTATCACAGTAATTGG +AGAGCAATGGCTAGTGACTTTAATCTGCCACCCGTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATC +AATGTCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATCATCCTGGTAGCAGTCCATGTAGCTAGTGGCTACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGTAGTAATTTCACCAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAAGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAAGCCATGAATAAAGAATTA +AAGAAAATTATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCCATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTACTACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGCAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>C.IN.301905 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTCCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------AACCCCAGCTCCAAAGCAGGAGCCGAA +AGACAGGGA------------------ACCCTTAACTTCCCTCAGATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAGAGTAGGGGGCCAGATAAAAGAGGCTCTCTTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAGTAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAATACCTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCCACAC +CTGTCAACATAATTGGAAGGAATATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATCAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGGCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GATGAAATGGAGAAGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAATATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTCAATAAAAGAACTCAAGATTTTTGGGAAGTTCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTATATGAAGACTTCAGGAAATATACTGCATTCACCATACCTAGTGTAAACAATGAAACACCAGGAAT +TAGGTATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGTGTAGCATGACA +AGAATCTTAGAGCCCTTTAGGGCACAAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGAACATCTGTTAAGGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTACCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAGGCAACTTTGTAA +ACTCCTTAGGGGGGCCAAAACACTAACAGACATAGTACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACT +TGATAGCGGAAATACAGAAACAGGGGCAGGACCAATGGACATATCAAATTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCT +GTGCAGAAAATAGCCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGATTACCCATCC +AAAAAGAAACATGGGAGACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +TAATACCCCTCCCCTAGTAAAATTATGGTACCAGCTGGAGAAAGATCCCATAGCAGGAGTAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGACACCAAAATAGGAAAAGCAGGGTATGTTACTGACAGAGGAAGGC +AGAAAATTGTTTCTCTAACTGAAACCACAAATCAGAAAACTGAGTTGCAAGCAATTTGTCTAGCTTTGCA +AGATTCAGGATCAGAGGTAAACATAGTAACAGATTCACAGTATGCATTAGGGATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAATTAATAAAAAAGGAAAGGGTCTATCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAATGGGATCAG +GAAAGTGCTATTTCTAGATGGAATAGATAAAGCTCAAGAA---GAGCATGAAAAGTATCACAGCAATTGG +AGAGCAATGGCTAGTGACTTTAATCTGCCACCCGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATC +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGTAGTAATTTCACCAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATTATAAGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTTA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCCATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGCAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>C.IN.301999 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTTCAG +AACAGACC------AGAGCCAACAGCCCCTCCAGCCAGACC------AGAGCCAACAGCCCCTCC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------AACCCCCGCTCTGAAGCAGGAGCCAAA +AGACAGGGA------------------ACCCTTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAAAGTAGGGGGCCAGATAAGAGAGGCTCTCTTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAGTAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGGTTTATCAAAGTAAGG +CAATATGATCAAATACCTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCCACGC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATTAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GATGAAATGGAGAGGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAATATTTGCCATAAAAAAGAAAGACAGTACTAAGTGGAG +AAAACTAGTAGATTTCAGGGAACTCAATAAAAGAACTCAAGATTTTTGGGAAGTCCAATTAGGAATACCA +CACCCGGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTAGATGTGGGGGATGCATATTTTTCAGTTC +CTTTATATGAAGACTTCAGGAAATATACTGCGTTCACCATACCTAGTAGAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGGCTAGCATGACA +AAAATCTTAGAGCCCTTTAGGGCACAAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAATTAAGACAACATCTGTTAAGGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAACTTTGTAG +ACTCCTTAGGGGGGCCAAAGTACTAACAGACATAGTACCACTGACTGAAGAAGCAGAATTGGAATTGGCA +GAGAACAGAGAAATT---------CTAAAAGAACCAGTACATGGAGTATATTATGATCCATCAAAAGATT +TGATAGCTGAAATACAGAAACAGGGGCAGGGCCAATGGACATATCAAATTTACCAAGAACCATTCAAAAA +TCTAAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAGCAGAGGCA +GTGCAAAAAATAGCCATGGAAAGCATAGTAATATGGGGAAAAACT---CCTAAATTTAGATTACCCATCC +AAAAAGAAACATGGGAGGCATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +TAATACCCCTCCCCTAGTAAAATTATGGTACCGGCTGGAGAAAGAACCCATAGCAGGAGTAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAAATAGGAAAAGCAGGGTATGTTACTGACAGAGGACGAC +AGAAAATTGTTCCTCTAACTGAAACAACAAATCAGAAGACTGAATTGCAAGCAATTTATCTAGCTTTGCA +AGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAACCAAATAATAGAACAATTAATAAAAAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAGTGGAATCAG +GAAAGTGCTATTTCTAGATGGAATAGATAAGGCTCAAGAA---GAGCATGAAAAGTATCACAGCAATTGG +AGAGCAATGGCTAGTGACTTTAATCTGCCACCCATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGGTC +AATGTCAGCAAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATCTAGAAGGAAAAATCATCCTGGTAGCAGTCCATGTAGCCAGTGGTTACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTATATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGTAGTAATTTCACCAGTGCTGCAGTTAAAGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAGGAATTA +AAGAAAATTATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAGTGGCAGTATTCA +TTCACAATTTTAAAAGAAGAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTATA---AAAATTCACAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGCAAAAATCATTAGGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>C.IN.94IN11246 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTCCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTCGA---GGAGAC---------------ACCCCCAGCTCCAAAGCAGGAGCCGAA +AGAGAGGGA------------------ACCCTTAACTTCCCTCAGATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAGAGTAGGGGGCCAGACAAGAGAGGCTCTCTTAGACACGGGAGCAGATGATACAGTATTAGA +AGAAGTAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAATACCTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCCACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATCAGTCCTAT +TGAAACTGTACCCGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +AAAGAGAAAATAGAAGCATTAACAATAATTTGT------AATGAAATGGAGAAGGAAGGAAAAATTACAA +AAATTGGGCCTGAAAATCCATATAACACTCCAATATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTCAATAAAAGAACTCAAGATTTTTGGGAAGTTCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTATATGAAGACTTCAGGAAATATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGGAT +TAGGTATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCCCCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGGGGACGAAATCCAGAGATAGACATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGAACATCTGTTAAAGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGTTGCCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTGGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAGGCAACTTTGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGACATAGTACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAATACATGGAGTATATTATGACCCATCAAAAGACT +TAATAGCTGAAATACAGAAACAGGGGCAGGACCAATGGACATATCAAGTTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCT +GTGCAGAAAATAGCCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGATTACCCATCC +AAAAAGAAACATGGGAGACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +TAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTGGAGAAAGATCCCATAGCAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGATACTAAAATAGGAAAAGCAGGGTATGTTACTGACAGAGGAAGGC +AAAAAATTGTTTCTCTAACTGAAACCACAAATCAGAAGACTGAGTTGCAAGCAATTTGTCTAGCTTTGCA +AGATTCAGGATCAGAAGTAAACATAGTAACAGATTCACAGTATGCATTAGGGATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTTAACCAAATAATAGAACAATTAATAAACAAGGAAAGGGTCTATCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAGTGGAATCAG +GAGAGTGCTATTCCTAGATGGAATAGATAAAGCTCAAGAA---GAGCATGAAAAGTATCACAGCAATTGG +AGAGCAATGGCTAGTGACTTTAATCTGCCACCCGTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATC +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGATTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATCATCCTAGTAGCAGTCCATGTAGCCAGTGGCTACATGGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGTAGTAATTTCACCAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAGAGTCAGGGAGTAGTAGAAGCCATGAATAAAGAATTA +AAGAAAATTATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +ATCAACAGACATACAAACTAGAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCCATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGCAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGTGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>CONSENSUS_D +TTTTTTAGGGAaaATTTGGCCTTCCCACAA------------gGGAAG---GCCgGGGAAcTTtCTtCAg +AGCAGACC??????AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAgAgCTTCGGgTTtGGGGAgGAGAT---------??????aaCcCcctctCAgaAACAGgAGcAgAA +AGACAAGGA------------ACTGTATCCtTtAaCttCCCTCAAATCACTCTTTGGCAACGACCCcTtG +TcaCAaTAAAgATAGGGGGACAGCTAAAgGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAaATaAATTTGCCAGGAAAaTGGAAaCCAAAAATGATAGGGGGAATTGGaGGtTTTATCAAAGTAAGA +CAGTATGATCAaATACtCaTAGAAATCTGTGGacAtAAaGCTATaGGTACAGTATTAGTAGGACCtACaC +CTGTCAACATAATtGGAAGAAATTTGTTGACcCAGATTGGcTGCACTTTAAAtTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGaATG---GATGGCCCAAAAGTTAAACAATGGCCaTTGACA +GAAGAAAAAATAAAAGCAtTAAcAGAAATTTGT------aCAGAaaTgGAAAAGGAAGGAAAAATTTCAA +gAaTTGGGCCTGAAAAtCCATAcAATACTCCaATATTTGCCATAAAGAAAAAAGACAGTACcAAGTGGAG +AAAATTAGTAGATTTCAGaGAACTTAATAAGAGAACTCAAGAtTTcTGGGAaGTTCAAtTAGGaATACCg +CATCCtGCAGGgcTaAAaAAGAAAAAaTCAgTAACAGTACTGGATGTGGGtGAtGCATATTTtTCAgTTC +CCTTAgaTgAAGAcTTTAGgAAaTAtACcGCaTTtACCATAcCTAGTAtAAAcAATGAGACACCAGGgAT +TAGATATCAGTACAATGTgCTtCCACAgGGATGGAAAGGATCACCgGCAATATTCCAAAGTAGCATGAcA +AAAATCTTAGAgCCcTTTAGAAAACAAAATCCAGAAaTagTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGAcTTAGAAATAGGgCAGCATAGaacAAAAATAGAGgAATTAAGaGaACAtCTaTTgAgGTG +GGGatTTACCAcACCAGAt---AAAAAacATCAGAAAGAACCcCCATTTCTTTGGATGGGtTATGAACTC +CATCCTGATAAaTGGACAGTACAGtCTATAaaacTGCCAGAaAAgGAaAGCTGGACTGTCAATGATATAC +AGAAgTTAGTGGggAaATTAAAcTGGGCAAGCCAGATTTATcCAGGAATTAAAGTAAggCAATTaTGtAA +ActCCTTAGGGGAaCCAAaGCACTaACAGAaGTAaTACCACTaACAGaAGAAGCAGAATTAGAACTGGCA +GAAAACAGGGAaATt---------cTAAAaGAACCAgTaCATGGAGtGTATTATGACCCATCAAAAGACT +TAATAGCAGAAaTACAGAAACAAGGGcAcGgcCAATGGACATAcCAAATtTATCAAGAACcATtTAAAAA +TCTgAAAACAGGAAAGTATGCaAgAAtGAGGGGTgCCCACACTAATGATGTAAAaCAaTTAaCAGAGGcA +GTGCAaAaAATAgCCacAGAAaGcATAGTgATATGGGGAAaGACT---CCTAAATTTAgAcTACCCATAC +AAAAGGAAACATGGGAAACATGGTGGatAGAgTATTGGCAaGCCACcTGGATTCCTGAGTGGGAaTtTGT +CAAtACCCCTCCTTTAGTaAAATTATGGTAcCAGTTAGAGAAGGAACCCATAaTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAgAGAGACTAAAtTAGGAAAaGCAGGATATGTTACTGACAGAGGAAGAC +AGAAAGTTGTCcCttTtACTGAcACaACAAATCAGAAGACTGAgTTACAAGCaATTAATcTAGCtTTgCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGAtTCACAATATGCAtTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAgtTAGTCAgTCAAATAATAGAgCAGtTAATAAAAAAGGAAAAGGTtTACCTGg +CATGGGTACCAGCaCAcAAaGGaATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTca?GGAATCAG +gAAAgTACTATTTTTGGATGGAATAGATAAgGCTCAaGAa---GAACATGAGAAATAtCACAaCAATTGG +AGaGCAATGGCTAGTGAtTTTAACCTaCCACCtGTgGTA---GCaAAAGAAATAGTAGCtAGCTGTGATA +AATGTCAGcTaAAAGGAGAAGCcaTGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGAtTG +TACACAttTaGAAGGAAAAGtTATCCTGGTAGCAGTtCATGTAGCCAGTGGcTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACaGGGCAgGAAaCAGCaTAcTTTcTtTTaAAATTAGCAGGAAGATGGCCAGTAAAAg +TAGTaCATACAGAcAATGGCAGCAATTTCACCAGtGCTgCAGTtAAGGCcGCCTGtTGGTGGGCAGGtAT +cAAaCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTa +AAGAAAATTATaGGaCAGGTAAGAGAtCAAGCTGAaCATCTTAAGACAGCaGTACAAATGGCAGTATTcA +TcCACAATTTTAAaAGAAaAgGGGGG---ATTGGGGGaTACAGTGCAGGGGAaAGAATAATAGAcATaAT +AGCAaCAGAcATACAAACTAaaGAATTACAAAAACAAATcAtA---AAAATTCAAAATTTTCGGGTTTAT +TACAGgGACAGCAGAGATCCAaTTTGGAAAGGACCAGCAAAGCTtCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAAtAGTGAcATAAAGGTAGTACCAAGAAGAAAAGtAAAGATtATtAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT?-- +>D.CD.84ZR085 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGAAG---GCCGGGGAACTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGA-AGCTTCGGGTTTGGGGAGGAGAT---------AACCCCCTCCCAGAAACAGGAACAGAAGGACAA +AGACAAGGA------------ACTGTATCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCGTTG +TCACAATAAAGATAGGGGGACAGCTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAGTGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCACATACTCATAGAAATCTGTGGACATAAGGCTATAGGTACAGTATTAGTAGGACCCACAC +CTGTCAACATAATTGGAAGAAATTTGTTGACCCAGATTGGCTGCACTTTAAACTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCGTTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGATATGGAAAAGGAAGGAAAAATTTCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGGATACCA +CATCCTGCAGGATTAAAGAAGAAAAAGTCAATAACAGTACTGGATGTGGGCGATGCATATTTTTCAATTC +CCTTATGTGAAGACTTTAGGAAGTACACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATA +AAAATCTTAGAGCCCTTTAGAAAACAAAATCCAGAAGTAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGATTTAGAAATAGGACAGCATAGAGCAAAAATAGAGAAATTAAGAGAACATCTGTTGAGGTG +GGGGCTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTC +CATCCTGATAAGTGGACAGTACAGTCTATAACACTGCCAGAGAAAGAAAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGCCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAGGCACTAACAGAGGTAATACCACTAACAGAAGAAGCAGAATTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAGGAACCAATGCATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAATTACAGAAACAAGGGCAAGGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCA +GTGCAAAAAATAGCCATAGAAAGCATAGTGATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAACATGGTGGATAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAAAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AGAAAGTTGTCCCCTTTACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTAATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGATTCACAATATGCACTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAATTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCTCACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAGGCTCAAGAG---GAACATGAGAAATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCAATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGACTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAGGAAGCAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTGCATACAGACAATGGCAGCAATTTCACCAGTGCTACAGTTAAGGCCGCCTGCTGGTGGGCAGGTAT +CAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAGAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAGAGAATTACAAAAACAAATCACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGACATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATT-- +>D.CD.ELI +TTTTTTAGGGAGAATTTGGCCTTCCCACAA------------GGGAAG---GCCGGGGAACTTTCTCCAA +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTTGGGGAAGAGAT---------------AACCCCCTCTCAAAAACAGGAGCAGAA +AGACAAGGA------------ACTGTATCCTTTAACTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCGCAATAAAAATAGGGGGACAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACCCATAGAAATCTGTGGACAGAAAGCTATAGGTACAGTATTAGTAGGACCTACGC +CTGTCAACATAATCGGAAGAAATTTGTTGACCCAGATTGGCTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGATATGGAAAAGGAAGGAAAAATTTCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACCAAGTGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTAGGAATACCG +CATCCTGCAGGGCTGAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATGAAGATTTTAGGAAATATACCGCCTTTACCATATCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAAAACAAAATCCAGAAATGGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGGACAAAAATAGAGAAATTAAGAGAACATCTATTGAGGTG +GGGATTTACCAGACCAGAT---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGTCTATAAAACTGCCAGAAAAGGAGAGCTGGACTGTCAATGATATAC +AGAACTTAGTGGAGAGATTAAACTGGGCAAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAATTAGAACTGGCA +GAAAACAGGGAAATT---------TTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAGCAATTAGCAGAGGCA +GTGCAAAGAATATCCACAGAAAGCATAGTGATATGGGGAAGGACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAACATGGTGGGCAGAGTATTGGCAAGCCACTTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AGAAAGTTGTCCCTTTGACTGACACGACAAATCAGAAGACTGAGTTACAAGCAATTAATCTAGCCTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGATTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTTTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTCAAGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAGGCTCAAGAA---GAACATGAGAAATATCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAACCTACCACCCGTGGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACATACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTTAAGGCCGCCTGTTGGTGGGCAGGTAT +CAAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAGAAGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAAGAGTGACATAAAGGTAGTACCAAGAAGAAAAGTAAAGATTATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>D.CD.NDK +TTTTTTAGGGAAGATTTGGCCTTCCCACAA------------GGGAAG---GCCGGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTTGGGGAGGAGAT---------------AACCCCCTCTCAGAAACAGGAGCAGAA +AGACAAGGA------------ACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCG +TCACAATAAAGATAGGGGGACAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACTCATAGAAATCTGTGGATATAAAGCTATGGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATTTGTTGACCCAGATTGGCTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +GAATTGGGCCTGAAAATCCATATAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACCAAGTGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGATTTCTGGGAGGTTCAATTAGGAATACCG +CATCCTGCAGGGCTGAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTCTCAGTTC +CCTTAGATGAAGATTTTAGGAAATATACCGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTCCCACAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTAAGAGAACATCTATTGAGGTG +GGGATTTACCACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAAACCTGCCAGAAAAAGAAAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGGAAATTAAACTGGGCAAGCCAGATTTATGCAGGAATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAATTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAACTACAGAAACAAGGGGACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAA +TCTAAAAACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAGGAAACATGGGAAACATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AGAAAGTTGTCCCTTTCACTGACACGACAAATCAGAAGACTGAGTTACAAGCAATTAATCTAGCTTTACA +GGATTCGGGATTAGAAGTAAACATAGTAACAGATTCACAATATGCACTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGGTTTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTCAGGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAGGCTCAGGAA---GAACATGAGAAATATCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAACCTACCACCTGTGGTA---GCGAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATCTGGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACGGGGCAAGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACATACAGATAATGGCAGCAATTTCACCAGTGCTACAGTTAAGGCCGCCTGTTGGTGGGCAGGGAT +CAAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTTA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAGAGAATTACAAAAACAAATCATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGACATAAAGGTAGTACCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>D.CD.Z2Z6 +TTTTTTAGGGAAGATTTGGCCTTCCCACAA------------GGGAAG---GCCGGGGAACTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTTGGGGAAGAGAT---------------AACCCCCTCTCAGAAACAGGAGCAGAA +AGACAAGGA------------ACTGTATCCTTCAACTGCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TTACAATAAAAATAGGGGGACAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACTCATAGAAATCTGTGGGCATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATTTGTTGACCCAGATTGGCTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +GAGTTGGGCCTGAAAATCCATACAATACTCCCATATTTGCCATAAAGAAAAAAGACAGTACCAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTAGGAATACCG +CATCCGGCAGGGCTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTTAGGAAATATACTGCATTTACCATACCTAGTATAAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTAAGAGAACATCTATTAAGGTG +GGGATTTACCACACCAGAT---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTACAGTCTATAAAATTGCCAGAAAAGGAGAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGGAAATTAAACTGGGCAAGCCAGATTTATCCAGGAATTAAAGTAAGGCAATTGTGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAATTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAGCAGAGGTA +GTGCAAAAAATATCCACAGAAAGCATAGTGATATGGGGAAAGACT---CCTAAATTTAGATTACCCATAC +AAAAGGAAACATGGGAAACATGGTGGGTAGAGTATTGGCAAGCCACTTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAGGCAGGATATGTTACTGACAGAGGAAGAC +AGAAAGTTGTCCCTTTTACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTAATTTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGATTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTTTACCTGG +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTCAGGGAATCAG +GAAAGTACTATTTTTGGATGGAATAGATAAAGCTCAAGAA---GAACATGAGAAATATCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAACCTACCACCTGTGGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTATTTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +TAGTACATACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTTAAGGCTGCCTGTTGGTGGGCAGGTAT +TAAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTG +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAGCATCTTAAGACAGCTGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAGAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATCACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGACATAAAGGTAGTACCAAGAAGAAAAGTAAAGATTATCAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>D.UG.94UG1141 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------TGGAAG---GCCAGGGAATTTCCTTCAG +AGCAGACCCCCAGCAGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGATCTTCGGATTAGGGGAGGAGAT---------------AACACCTCCTCAGAAACAGGAGCAGAA +AGACAAGGA------------ACTGTATCCTTTAACCTCCCTCAAATCACTCTTTGGCAACGACCCGTTG +TCACAGTAAAGATAGGGGGACAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGGGGCTTTATCAAAGTAAGA +CAGTATGATCAAATACCCTTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGGATG---GATGGCCCAAAAGTTAAACAATGGCCGTTGACA +GAAGAAAAAATAAAAGCACTAATAGAAATTTGT------TCAGAACTAGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAACCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTTTGGGAAGTTCAACTAGGAATACCA +CATCCTGCAGGGCTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGACGCATATTTTTCAGTTC +CCTTACATGAAGACTTTAGAAAATATACCGCATTCACCATACCTAGTACAAACAATGAGACACCAGGAAT +TAGATATCAGTACAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAACCTTTTAGAAAACAAAATCCAGAAATGATTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAATTAAGGGGACACCTCTTGAAGTG +GGGATTTACCACACCAGAC---AAAAAGTATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAACTC +CATCCTGATAAGTGGACAGTACAGCCTATACATCTGCCAGAAAAGGAAAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGCAA +ATGCCTTAGGGGAGCCAAAGCACTGACAGAAGTAATACCACTGACAGCAGAAGCAGAATTAGAACTGGCA +GAAAACAGGGAAATA---------CTAAAAGAACCAGTACATGGAGCGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGATCAATGGACATATCAAATATATCAAGAACAATATAAAAA +TCTGAAAACAGGAAAGTATGCGAAAATGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAGAAAATAGCCCAAGAATGTATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTATGT +CAACACCCCTCCTTTAGTTAAATTATGGTATCAGTTAGAGAAGGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAAATAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AGAAAGTTGTCTCTCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCCATTAATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGGTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAG +AAAAATACTATTTTTGGATGGAATAGATAAGGCTCAAGAA---GAACATGAGAAATACCACAACAATTGG +AGGGCAATGGCTAGTGAGTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGGTGAAAGGAGAAGCCTTGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAGGTATCCTGGTAGCAGTCCATGTAGCCAGTGGTTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAGGAAACAGCCTACTTTCTTTTGAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACATACAGATAATGGCAGCAATTTCACCAGCGCTGCAGTAAAGGCCGCCTGTTGGTGGGCAGGTAT +CAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATTATCGGGCAGGTAAGAGAACAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGATATCAT +AGCAACAGACATACAAACTAAGGAATTACAAAAACAAATCATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAGTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAGATTATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>CONSENSUS_F1 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCCGGAAAtTTccTtCAG +A?cAG?CC------AGAGCCaaCAGCCCC---------------------------------gCC---AG +CaGAGAGCtTCgGGtTCagaGAgGAgat---------------AACcCCcTCTCCGAaGCAGGAGCAGAA +aGAcGaGGg---??????ACtGTAcCCTCCCTTAGCtTCCCTCAAATCACTcTTTGGCAaCGACCC?TAG +TCACaATAA?A?TaGGGGGACAGcTAAaGGAaGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACaTAaATTTGCCAGGAAAATGGAAACCAAAAATgATAGGGGGAATTGGAGGTTTTATcAAAGTAAAA +CAGTATGAT?acATActCATAGAaATTTGTGGACAcAa?GCtATAGGTACAGTGTTAGTAGGACCTACgC +CTGTCAACATAaTTGGAAGaAATATGTTGACtCAgATTGGTTGtACTTTAaATTTTCCAaTTAGTCCTAT +TGAaACTGTACCAGTAAAAtTGAAgCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAaGAAAAaATAAAAGCAtTAACAGAaATATGT------a?aGA?ATGGAAAAaGAAGGAAAaATTTCAA +aAATTGGGCCTGAAAATCCATACAATACTCCAgTATTTGCCATAAAGAAAAAAGACAGTAcTAAATGGAG +gAAaTTAGTAGATTTcAgAGAACTtAATAAAAGAACTCAAGATTTtTGGGAgGTTCAATTAGGAATACCa +CATCCtGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTgGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAAgGAtTTCAgGAAGTACACTGCATtCACCATACCTAGTgtCAACAATGAGACACCaGGAaT +tAGgTACCAGTACAATGTGCTTCCACAAGGaTGGAAAGGATCACCAGCAATATTCCAAtgTAGcATGACA +AAAATcTTAGAgCCCTTTAGA?caAaAAAtCCAGACATAGTtATCTACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAaTAGGgCAGCATAGaAcaAAAATAGAgGAGTTAAGaGAACATCTACTGAaATG +GGGaTTtACTACACCAGAC---AAaAAACATCAgAAAGAACC?CCATTCCTtTGGATGGGGtATGAACTC +CATCCTGATAAATGGACAGTgCAGCCTATACAAtTGCCAgAcAAGGACAGCTGGACTGTCaATGAtATAC +AGAAgTTAGTAGGAAAAcTAAAtTGGGCAAGTCAgATTTATCCAGGGATTAAAgTAA?gCaATTATGTAA +ACTCCTTAGgGGAGCCAAGGCACTAACAGACATaGTGCCACTGACTgcAGAaGCAgAgTTAGAATTGGCA +gA?AAtAGGGAGATT---------CTAAaAGAACCaGTACATGGGGtATATTATGACCCaTCAAAAGACT +TAATAgCAgAAaTaCAGAAACAAGGgcAaGGgCAATGGACATATCAAATTTAtCaagAgCCATTTAAAAA +tCTAAAAACAGGAAAGTATGCAAAAatgAGGTCtGCCCACACTAATGATgTAAAAcAaTTAACAGAaGCA +GTgCAAAAGaTAgCTctaGAAaGCATAGTAATATGGGGaAAGA?t???CCTAAGTTTA?aCTACCcATac +taAAaGAgACATGGGAtaCATGGTGGACAGA?TAtTGGCAAGCCACCTGGATTCCTGA?TGGGAGTTTGT +CAATACCCCCCCTCTAGTaAAACTATGGTAtCAgTTAGAAACAGAGCCCATag?AGGAGCAGAaACcTTC +TATGTaGATGGGGCATCTAATAGAGAGACCAAAAAAGGAAAAGCAGGATATGTTACTGA?AgAGGAAgAC +AAAA?G?tGTCtcCCTAACtGAgACcACAAATCAGAAGGCTGAGTTACAAGCAATTcAtTTAGCTTTaCA +GGATTCAGGATCAGAAGTgAAcATAGTAACAGAcTCACA?TATGcATTAGGAATcATTCAaGCaCAACCA +GATAAGAGTGAATCAGAg?TAGTCAATcAAATAATAGAGcAaTTAATAcAAAAGGAAA?GgTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAaATTAGTCAGTGCTGGAaTCAG +gAAAaTACTGTTTtTAGATGGGATAGATAAGGCACAAGA?---GAACATGAAAAATATCACAACAAtTGG +AGAGCaATGGCTAGTGATTTTAATcTgCCAcCtgTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +A?TGTCAGCTAAAAGGgGAaGCCATGCAtGGACAAGTAGAcTGTAGtCCAGGgATATGGCAATTAGATTG +?ACACATTTAGAAGGaAAaaTTATCCTgGTAGCAGTCCATGTAGCTAGTGg?TAcaTAGAAGCAGAAGTt +AT?CCAGCAGAAACAGGACAgGAaACAGCCTACTTCaTAcTAAAGTTAGCAGGAAGATGGCCAGTAAAAA +taATACATACAGACAATGGcAgCAATTTCACCAGTgccgCgGTTAAGGCAgCcTGTTGGTGGGCAGGTAT +cCAgCAGGAATTTGGAATtCCcTAcAA?CCCCAAAGTCAAGGAGTAGTAGAATCtATgAATAAAGAg?TA +AAgAA?ATCATAGGACAggTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTaCAAATGGCaGTATTCA +T?CACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGgTACAGTGCAGGGGAAAGAAtAATAGACATAAT +A?CAACAGACATACAAACTAgAGAATTACAAAAACAAaTTA?a---AAAATTCAAAATTTCCGGGTTTAT +TACAGGGACAGCAGAgACCCAGTTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAGTGAAATAAA?gTAGTaCCAAGaAGaAAAGCAAAGATCATTAGgGATTATGG +AAAACAGATGGCAGgTGAtGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>F1.BE.VI850 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCCGGAAATTTCCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTCAGAGAGGAGAT---------------AACCCCCTCTCCGAAGCAGGAGCAGAA +AGACGGGGA---------ACTGTACCCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAG +TCACAATAAAAATAGGGGGACAGATAAAGGAGGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAA +CAGTATGATAACATACTCATAGAAATTTGTGGACACAAGGCTATAGGTACAGTGTTAGTAGGACCTACGC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAGTTAGTCCTAT +TGAAACTGTACCAGTAAAATTGAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAGATAAAAGCATTAACAGAAATATGT------CTAGAAATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTAGTAAATGGAG +GAAATTAGTAGATTTCAAAGAACTTAATAAAAGAACTCAAGATTTCTGGGAGGTTCAATTAGGAATACCA +CATCCTGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAAGGATTTCAAGAAGTACACTGCATTCACCATACCTAGTGTCAACAATGAGACACCTGGAAT +TAGGTACCAGTACAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAATGAAAAACCCAGACATAGTCATCTACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGTTAAGAGAACATCTACTGAGATG +GGGATTTACTACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGCATGAACTC +CATCCTGATAAATGGACAGTGCAGCCTATACAATTGCCAAACAAGGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTAGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCCATTATGTAA +ACTCCTTAGGGGAGCCAAGGCACTAACAGACATAGTGCCACTGACTGCAGAGGCAGAGTTAGAATTGGCA +AAAAATAGGGAGATT---------CTAAGAGAACCAGTACATGGGGTATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGAGACGGGCAATGGACATATCAAATTTATCAGAACCCATTTAAAAA +TCTAAAAACAGGAAAGTATGCAAAAGTGAGGTCGGCCCACACTAATGATGTAAAACAATTAACAGAAGCA +GTACAAAAGATAGCTTTAGAAAGCATAGTAATATGGGGGAAGAGATCTCCTAAGTTTAAACTACCCATAC +TGAAAGAGACATGGGATACATGGTGGACAGATTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCCCCTCTAGTGAAACTATGGTACCAGTTAGAAACAGAGCCCATAGCAGGAGCAGACACCTTC +TATGTAGATGGGGCATCTAATAGAGAGACCAAAAAAGGAAAAGCAGGATATGTTACTGATAAAGGAAAAC +AAAAAGTTGTCTCCCTAACGGAGACCACAAATCAGAAGGCTGAGTTACAAGCAATTTATTTAGCTTTGCA +GGATTCAGGATCAGAAGTGAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCGCAACCA +GATAAGAGTGAATCAGAAATAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAGTCAG +GAAAATACTGTTTTTAGATGGGATAGATAAGGCACAAGAA---GAACATGAAAAATATCACAACAATTGG +AGAGCGATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AGTGTCAGCTAAAAGGGGAAGCCATGCACGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +CACACATTTAGAAGGCAAGGTTATCCTGGTAGCAGTCCATGTAGCTAGTGAGTACATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGACAGGAAACAGCCTACTTCATATTAAAGTTAGCAGGAAGATGGCCAGTAAAAA +TAATACATACAGACAATGGCAGCAATTTCACCAGTGCCGCGGTTAAGGCATCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATCCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATAAATAAAGAGTTA +AAAAAGATCATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +ATCAACAGACATACAAACTAGAGAATTACAAAAACAAATTACG---AAAATTCAAAATTTCCGGGTTTAT +TACAGGGACAGCAGAAACCCAGTTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAGTGAAATAAAGATAGTACCAAGGAGGAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGTTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>F1.BR.93BR020.1 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCCGGAAACTTCATCCAG +AACAGGCC------AGAGCCGTCAGCCCC---------------------------------GCC---AG +CAGAGAGCTTCAGGTTCGGGGAGGAGAC---------------AACCCCATCTCCGAAGCAGGAGCAGAA +AGACGAGGG---------ACTGTACCCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTAG +TCACAATAAGAGTAGGGGGACAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACGTAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAA +CAGTATGATAGCATACTCATAGAAATTTGTGGACACAGAGCTATAGGTACAGTGTTAGTAGGACCTACGC +CTGTCAACATAATTGGAAGAAATATGTTGACCCAGATTGGTTGTACTTTACATTTTCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTGAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATATGT------ATGGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +GAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCG +CATCCAGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAAGGATTTCAGGAAGTACACTGCATCCACCATACCTAGTACCAACAATGAGACACCAGGAGT +TAGGTACCAGTACAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAATATAGCATGACA +AAAATCTTAGATCCCTTTAGAGCAAAAAATCCAGACATAGTTATCTACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGACAGCATAGAACAAAAATAGAAGAGTTAAGAGAACATCTACTGAAATG +GGGATTAACTACACCAGAC---AAAAAACATCAAAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTGCAGCCTATACAATTGCCAGACAAGGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTAGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAAACAATTATGTAA +ACTCCTTAGGGGAGCCAAGGCACTAACAGACATAGTGCCACTGACTACAGAAGCAGAGTTAGAATTGGCA +GAGAATAGGGAGATT---------CTAAAAGAACCAGTACATGGGGCATATTATGACCCGTCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGGGCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAGTATGCAAAAATGAGGTCTGCCCACACTAATGATGTAAAACAGTTAACAGAAGCA +GTGCAAAAGATATCTCTAGAAAGCATAGTAATATGGGGCAAGACT---CCTAAGTTTAGACTACCCATAT +TAAAAGAGACATGGGATACATGGTGGACAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCCCCTCTAGTAAAACTATGGTATCAGTTAGAAACAGAGCCCATAGTAGGAGCAGAAACCTTC +TATGTAGATGGGGCATCTAATAGAGAGACCAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGCGGTCTCCCTAACTGAGACTACAAATCAGAAGGCTGAGTTACAAGCAATTCAGTTAGCTTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTACTGTTTCTAGATGGGATAGATAAGGCACAAGAG---GAACATGAAAAATATCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAATATACCAGCTGTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGATTGTAGCCCAGGGATATGGCAATTAGATTG +CACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCTAGTGGGTACCTAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAAGAGACAGCCTACTTCCTACTAAAGTTAGCAGGAAGATGGCCAGTAAAAA +CAATACATACAGACAATGGCACCAATTTCACCAGTGCCACGGTTAAGGCAGCTTGTTGGTGGGCAGGTAT +CCAGCAGGAATTTGGAATTCCTTACAACCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAGCTA +AAGAAAATCATAGGACAGATAAGAGATCAAGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAACAATAGACATAAT +AGCAACAGACATACAAACTAGAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTCCGGGTTTAT +TACAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAGTGAAATAAAGGTAGTTCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>F1.FI.FIN9363 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCCGGAAATTTCCTTCAG +AGTAGACC------AGAGCCAACAGCCCC---------------------------------GCC---AG +CAGAGAGCCTCGGGATCAGAGAAGAGGT---------------AACTCCCTCTCCGAGGCAGGAGCAGAA +AGAAGAGGG---------ACAGTACCCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTAG +TCACAATAAAAATAGGGGGACAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAAA +CAGTATGATCACATACTCATAGAAATTTGTGGACATAAAGCCATAGGTACAGTGTTAGTAGGACCTACGC +CTGTCAACATAGTTGGAAGAAATATGTTGACTCAAATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAACTGAAACCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAGATATGT------ACAGATATGGAAAAAGAAGGAAAAATTTCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +GAAGTTAGTAGATTTCAGAGAACTCAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCA +CATCCTGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAAAGATTTCAGGAAGTACACTGCATTCACCATACCTAGTGTCAACAATGAGACACCAGGAAT +TAGGTACCAGTACAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAACAAGAAATCCAGACATAGTTATCTACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGTTAAGAGAACATCTACTGAAATG +GGGATTTACTACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTCTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTGCAGCCTATACAATTGCCAGACAAGGACAGCTGGACTGTCGATGATATAC +AGAAATTAGTAGGAAAACTAAACTGGGCAAGTCANATTTATCCAGGGATTAAAGTAAGGCAATTATGTAA +ACTCCTTAGAGGAGCCAAGGCACTAACAGACATGGTGCCACTGACTGCAGAAGCAAATTTAGAATTGGCA +GAAAATAGGGAGATT---------CTAAAAGAACCAGTACATGGGGTATATTATGACCCATCAAAAGACT +TAATACCAAAATTACAGAAACAAGGGCAAGGACAATGGACATATCAAATTTACCGAGAGCCATTTAAAAA +TCTAAAAACAGGAAAGTATGCAAAAATGAGGTCTGCCCACACTAATGATGTAAAACAATTAACAGAAGCA +GTGCAAAAGATAGCTCTAGAAAGCATAGTAATATGGGGAAAGACT---CCTAAGTTTAAACTACCTATTC +TAAAAGAGACATGGGATACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCCCCTCTAGTAAAACTATGGTATCAATTAGAAACAGAGCCCATAGCAGGAGCAGAAACATTC +TATGTGGATGGGGCATCTAATAGAGAGACCAAAAAAGGAAAAGCAGGATATGTTACTGATAGAGGAAGAC +AAAAGGTTGTCTCCCTAACTGAGACCACAAATCAGAAGGCTGAGTTACAAGCAATTCATTTAGCTTTACA +GGATTCAGGATCAGAAGTGAACATAGTAACAGATTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATACAAAAGGAAAAGATCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +AAAAATACTGTTTTTAGATGGGATAGATAAGGCACAAGAG---GAACATGAAAAATATCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AGTGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCTAGTGGATACATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGACACAGCCTACTTCATACTAAAGTTAGCAGGAAGATGGCCAGTAAAAA +TGATACATACAGACAATGGCAGCAATTTCACCAGTGCCGCGGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAGCAGGAATTTGGAATTCCCTATAACCCCCAAAGTCAAGGAGTAGTAGAATCAATGAATAAAGAGCTA +AAGAAGATCATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAGTTACA---AAAATTCAAAATTTCCGGGTTTAT +TACAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGACGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>F1.FR.MP411 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCCGGAAATTTTCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------GCC---AG +CGGAGAGCTTCGGGTTCAAAGAGGAAAT---------------AACCCCCTCTCCGAAGCAGGAGCAGAA +GGACGAGGG---ACAGGGACTGTATCCTCCCTTAGCCTCCCTCAAATCACTTTTTGGCAGCGACCCTTAG +TCACCATAAGAGTGGGGGGACAGCTAAGGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAGATTTGCCAGGAAAATGGAAACCAAAAATAATAGGGGGAATTGGAGGTTTTATCAAAGTAAAA +CAGTATGATCAAATAACCATAGATATTTGTGGACACAAGGCTATAGGTACAGTGTTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGGAATATGTTGACTCAGATTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTGAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAGGAAAAAATAAAAGCACTAACAGAAATATGT------ACAGATATGGAAAAAGAAGGAAAGATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTTAGAGAACTTAATAAAAGAACTCAAGATTTTTGGGAAGTTCAATTAGGAATACCA +CATCCTGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTAGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAAGGAGTTCAGGAAGTACACTGCATTCACCATACCTAGTCTCAACAATGAGACACCAGGAAT +CAGATACCAGTACAATGTGCTTCCACAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACA +AAAATTTTAGAGCCCTTTAGAGCAAAAAATCCAGACATAGTTATCTACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAACTAGGGCAGCATAGGATGAAAATAGAGGAGTTAAGGGAACATCTACTGAAATG +GGGCTTTACTACACCAGAC---AAGAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAAAAGGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTAGGAAAATTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAATAAAGCAATTATGTAA +ACTCCTTAGGGGAGCCAAGGCACTAACAGACATAGTGCCACTGACTGAAGAAGCAGAGTTAGAATTGGCA +GAGAACAGGGAGATT---------CTAAAAGAACCGGTACATGGGGTATATTATGACCCATCAAAAGACT +TAATAGCAGAACTGCAGAAACAAGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +CCTAAAAACAGGAAAGTATGCAAAAACAAGGTCTGCCCACACTAATGATATAAAAGAATTAACAGATGCA +GTGCAAAAGGTAGCTAGGGAATGCATAGTAATATGGGGAAAGAGT---CCTAAGTTTAGGCTACCCATAC +AAAAGGAAACATGGGAGGCATGGTGGACAGATTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCCCCTCTAGTAAAACTATGGTATCAGTTAGAAACAGAGCCCATCATAGGAGCAGAAACCTTC +TATGTAGATGGGGCATCTAATAGAGAGACCAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAGGCTGTCATCCTAACTGAAACCACAAATCAGAAGGCTGAGTTACAAGCAATTCATTTAGCTTTACA +GGATTCAGGATCAGAAGTGAATATAGTAACAGACTCACAGTATGTATTAGGAATTATTCAAGCACAACCA +GATAAGAGTGAATCAGAGATAGTCAATAAAATAATAGAGAAATTAATACAAAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAGATTAGTCAGTGCTGGAATCAG +GAAAATACTGTTTTTAGATGGGATAGATAAGGCACAAGAA---GAACATGAAAAATATCACAACAACTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCAGTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGAGAGGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTAGTAGCAGTCCATGTAGCTAGTGGATATATAGAAGCAGAAGTC +ATTCCAGCAGAAACAGGACAGGAAACAGCCTACTTCATACTAAAGTTAGCAGGAAGATGGCCAGTAAAAA +TAATACATACAGACAATGGTAGCAATTTCACCAGTAGTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +ACAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATCATAGGACAAGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCGGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +ATCAACAGACATACAAACTAGAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTCCGGGTTTAT +TACAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAGTGAAATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CONSENSUS_F2 +TTTTTTAGGGAAAAT?TGGCCTTCCAACA?------------GGGGAG---GCC?GGAAATTTTCTTCAG +AACAGACC------AGAGCCAACAGCCCC---------------------------------GCC---AG +CAGAGA?CTTCGGGTTCGG?GAGG?GAT---------------A?C?CCCTCCCCGAAGCAGGAGCAGAA +AG?C?AGGA---------ACAGG?TCCTCCCTT??TTTCCCTCAAATCACTCTTTGGCAGCGACC??TAG +TC?CAATAA?AGTAGG?GGGCAACTAAGGGAGGCT?TATTAGATACAGGGGCAGATGATACAGTATTAGA +AGA?ATAAATTT?CCAGG?AAATGGAAACCAAAAATGATAGGGGGAAT?GG?GGTTTTAT?AAAGTAAGA +CAGTATGATCAA?TA?CCATAGAAATTTGTGGACAAAA?GCTATAGGTACAGTATTAGT?GG?CCTACGC +CTGTCAACATAATTGGAAGAAAT?TGTTGACTCAG?TTGGTTGCACT?TAAATTTTCCAAT?AGTCCTAT +TGAAAC?GTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAA?GGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCA?TAACAGAAATCTGT------ACAGAGATGGA?AAAGAAGGAAAAATTTCAA +AAATTGGGCCAGAAAATCCATACAATACTCCAGTATTTGCCATAAA?AA?AAGGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGA?CTTAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCA +CACCCTGCAGGGTTAAAAAAGAAAA?ATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAA?GAGTTCAGGAA?TACACTGCGTTCACCATACCTAGTATCAACAATGAGACACCAGGAAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGGTCACCAGCAATATTCCAA?GTAGCATGA?A +AAAATCTTAGAGCCCTTTAGA??A?AAAATCCAGAAATAGTTATCTACCAATA?ATGGATGAT?TGTATG +TAGGGTCTGACTTAGAAATAGGGCAGCATAGG?CAAAAATAGAGGAGTTAAG?GAACATCTATTGA?ATG +GGGATTTACTACACCAGAT---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGA?AAATGGACAGTACAG?CTATACAATTGCCAGA?AAGAGCAGCTGGACTGTCAATGATATAC +AGAAGTTAGT?GGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAGA?TAAA?CACTTATGTA? +ACTCCTTAGGGGA?CCAAAGCACTAACAGATGTAGTGCC?CT?ACTGCAGAAGC?GAGTTAGAA?TGGCA +GAGAACAGGGA?ATT---------?TAAAAGAACCAGTACATGGGGTATATTATGACCCATCAAA?GATT +TAATAGCAGAAATACAGAAACAAGG?CA?GA?CAATGGACATATCAAAT?TATCAAGA?CCA?ATAAAAA +T?TGAAAACAGGAAA?TATGCAA?AAGGAAGTCTGCCCACACTAATGATGTAAA?CAATTAACAGAAGTA +GT?CAAAAA?TAGCCACAGAA?GCATAGT?ATATGGGGAAA??TT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAATATGGTGGAC?GAGTATTGGCA?GC?ACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAAACAGAACC?ATA??AGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAACTAGGAAAAGCAGGATAT?TTACTGACAGAGGAAGAC +AAAA?GTTGTC?CCCTAAC?GAGACAACAAATCAGAAGACTGAATTACAAGCAATTCA?TTAGC?TTGCA +GGA?TCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACA?CC? +GATAA?AGTGA?TCAGAG?TAGTCAACCAAATAATAGAGCAATTAATACAAAAGGAAA?GGTCTACCTGT +CATGGGTACCAGCACATAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTA?TGGAATCAG +GAAAGTACTGTTTTT?GATGGGATAGATAA?GCTCAAGAA---GA?CATGA?AAATATCACA?CAATTGG +AGAGCAATGGCTAGTGATTTTAA?CTGCCACCTGTAGTA---GCAAAAGA?ATAGTAGCCA?CTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGA?TGCAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCT?GTAGCAGTCCATGTAGCTAGTGGCTATATAGAAGCAGAAGTT +AT?CCAGCAGAAACAGGACAGGAA?CAGCCTACTTCAT?CTAAAGTTAGCAGGAAGATGGCCAGT?AAAA +TAATACATACAGACAATGGCAGCAATTT?ACCAGT?CTGTGGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAGCAGGAATTTGGA?TTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACA??TAAGAGATCAAGCTGAACATCTTAAGACAGCAGTGCAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGA?ATACAAACTAAAGAATTACAAAAACAAATT?CA---AAAATTCAAAATTTTCGGGTTTAT +TTCAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAA?CTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAATGAAATAAAAGTA?TACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGA?GAT--- +>F2.CM.MP255 +TTTTTTAGGGAAAATTTGGCCTTCCAACAG------------GGGGAG---GCCAGGAAATTTTCTTCAG +AACAGACC------AGAGCCAACAGCCCC---------------------------------GCC---AG +CAGAGAACTTCGGGTTCGGAGAGGGGAT---------------AACCCCCTCCCCGAAGCAGGAGCAGAA +AGGCGAGGA---------ACAGGCTCCTCCCTTAGTTTCCCTCAAATCACTCTTTGGCAGCGACCCTTAG +TCGCAATAAGAGTAGGGGGGCAACTAAGGGAGGCTTTATTAGATACAGGGGCAGATGATACAGTATTAGA +AGACATAAATTTACCAGGGAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGA +CAGTATGATCAAATACCCATAGAAATTTGTGGACAAAAGGCTATAGGTACAGTATTAGTGGGACCTACGC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACAGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAGGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATCTGT------ACAGAGATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCAGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCA +CACCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAAGGAGTTCAGGAAATACACTGCGTTCACCATACCTAGTATCAACAATGAGACACCAGGAAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGGTCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAGCAAAAAATCCAGAAATAGTTATCTACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGGCAGCATAGGACAAAAATAGAGGAGTTAAGAGAACATCTATTGAAATG +GGGATTTACTACACCAGAT---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAATTGCCAGAAAAGAGCAGCTGGACTGTCAATGATATAC +AGAAGTTAGTAGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAGAATAAAACACTTATGTAG +ACTCCTTAGGGGAGCCAAAGCACTAACAGATGTAGTGCCACTGACTGCAGAAGCGGAGTTAGAATTGGCA +GAGAACAGGGAGATT---------ATAAAAGAACCAGTACATGGGGTATATTATGACCCATCAAAGGATT +TAATAGCAGAAATACAGAAACAAGGGCATGATCAATGGACATATCAAATTTATCAAGAACCATATAAAAA +TTTGAAAACAGGAAAATATGCAAAAAGGAAGTCTGCCCACACTAATGATGTAAAACAATTAACAGAAGTA +GTACAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGATT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAATATGGTGGACAGAGTATTGGCAGGCCACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAAACAGAACCCATAGCAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAACTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAAGTTGTCCCCCTAACCGAGACAACAAATCAGAAGACTGAATTACAAGCAATTCACTTAGCTTTGCA +GGACTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAGCCT +GATAAAAGTGAGTCAGAGTTAGTCAACCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACATAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAG +GAAAGTACTGTTTTTAGATGGGATAGATAAGGCTCAAGAA---GAACATGAAAAATATCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAGTA---GCAAAAGAGATAGTAGCCAACTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGATTGCAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCTAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCCTACTTCATCCTAAAGTTAGCAGGAAGATGGCCAGTGAAAA +TAATACATACAGACAATGGCAGCAATTTCACCAGTACTGTGGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAGCAGGAATTTGGAGTTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAGATAAGAGATCAAGCTGAACATCTTAAGACAGCAGTGCAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGATATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TTCAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAATGAAATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>F2.CM.MP257 +TTTTTTAGGGAAAATGTGGCCTTCCAACAA------------GGGGAG---GCCCGGAAATTTTCTTCAG +AACAGACC------AGAGCCAACAGCCCC---------------------------------GCC---AG +CAGAGAGCTTCGGGTTCGGGGAGGAGAT---------------AGCTCCCTCCCCGAAGCAGGAGCAGAA +AGACAAGGA---------ACAGGTTCCTCCCTTGATTTCCCTCAAATCACTCTTTGGCAGCGACCAGTAG +TCACAATAAAAGTAGGAGGGCAACTAAGGGAGGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGA +AGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATAGGGGGTTTTATCAAAGTAAGA +CAGTATGATCAAGTATCCATAGAAATTTGTGGACAAAAAGCTATAGGTACAGTATTAGTAGGGCCTACGC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGATTGGTTGCACTCTAAATTTTCCAATAAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCACTAACAGAAATCTGT------ACAGAGATGGAGAAAGAAGGAAAAATTTCAA +AAATTGGGCCAGAAAATCCATACAATACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAGCTTAATAAAAGAACTCAAGATTTTTGGGAGGTTCAATTAGGAATACCA +CACCCTGCAGGGTTAAAAAAGAAAAGATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAAAGAGTTCAGGAAGTACACTGCGTTCACCATACCTAGTATCAACAATGAGACACCAGGAAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGGTCACCAGCAATATTCCAAAGTAGCATGATA +AAAATCTTAGAGCCCTTTAGAAAAGAAAATCCAGAAATAGTTATCTACCAATATATGGATGATCTGTATG +TAGGGTCTGACTTAGAAATAGGGCAGCATAGGGCAAAAATAGAGGAGTTAAGGGAACATCTATTGAGATG +GGGATTTACTACACCAGAT---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTACAGGCTATACAATTGCCAGACAAGAGCAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAGAGTAAAGCACTTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGATGTAGTGCCTCTAACTGCAGAAGCAGAGTTAGAACTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCAGTACATGGGGTATATTATGACCCATCAAAAGATT +TAATAGCAGAAATACAGAAACAAGGACACGACCAATGGACATATCAAATCTATCAAGAGCCACATAAAAA +TCTGAAAACAGGAAAGTATGCAAGAAGGAAGTCTGCCCACACTAATGATGTAAAGCAATTAACAGAAGTA +GTGCAAAAAGTAGCCACAGAAGGCATAGTAATATGGGGAAAAGTT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAATATGGTGGACGGAGTATTGGCAAGCTACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAAACAGAACCTATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAACTAGGAAAAGCAGGATATATTACTGACAGAGGAAGAC +AAAAGGTTGTCTCCCTAACTGAGACAACAAATCAGAAGACTGAATTACAAGCAATTCAGTTAGCCTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACACCCA +GATAAGAGTGAATCAGAGATAGTCAACCAAATAATAGAGCAATTAATACAAAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACATAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTACTGGAATCAG +GAAAGTACTGTTTTTGGATGGGATAGATAAAGCTCAAGAA---GAGCATGAGAAATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTAGTAGCAGTCCATGTAGCTAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGACAGGAAGCAGCCTACTTCATACTAAAGTTAGCAGGAAGATGGCCAGTAAAAA +TAATACATACAGACAATGGCAGCAATTTTACCAGTGCTGTGGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAAGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTGCAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCA---AAAATTCAAAATTTTCGGGTTTAT +TTCAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAATGAAATAAAAGTAATACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>CONSENSUS_G +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGgAAtTTtCTtCAG +AACAGG?C------AGAgCCAACAGCCCC---------------------------------ACC---cG +CAGAgAgCtTCGGgTTCGGaGAGGAgAT---------------AGCCCCCTCCCCGAAGCagGAGccgAA +GGAaAAGGA------------gctatatCcctta?CtTCCCTCAAATCACTCTTTGGCAgCGACCccTAG +TCAcAGTAAaAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGaGCAGATGA?ACAGTATTaGA +AGaaATAaAtTTACCAGGAAaATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAgTATGATCAAaTACtTATAGAAATTaGTGGAAAAAaGGCTATAGGGACagTATTAGTAGGACCTACAC +CTATcAAcATAATTGGgAGAAATATGTTGACTCAGATTGGTTG?aCTTTAAAtTTTCCaATtAGTCCTAT +TGAAACTGTaCCAGtAAAATTAAAGCCAGGAATG---GATGGcCCAaggGTTAAACAATGGCCAtTGACA +GAAGAgAAAATAAaAGCaTtAACAGAAATTTGT------AaaGAaATGGAAaaGGAAGGAAAAATTTCAA +AaATTGGGCCTGAAAAtCCATA?AAcaCTCCAATATTtGCCATaAaGAAAAA?GACAGTACTAaATGGAG +AAAATTgGTAGATTTcAGAGAgCTcAATAAAAGaACTCAAGACTTCTGGGAgGTCCaATTAGGAATACCT +CAtCC?GcgGGGTTAAAAaagaAAA?atcagtAaCaGTACTAGATGTGGGgGATGCaTAtTTTTCAGTTC +CcTTAGATgAA?acTTTAGAAAgTATACtGCATTcACTATACCTAGTA?AAATAATGAGACACCAGGgaT +TAGATAtCAgTACaATgtgcttCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGACA +AaAATcTTAGAGCCcTtTAGAacAaAaAATCCAGAAATgGTGATCTACCAATAcATGGATGATTTATATG +TAGGATCtGACTTAGAAATAGGGCAGCATAGAGCAAAAATAgAgGAgTTAAGAGAACATCTA?TGA?ATG +GGGaTTtACCACACCAGAt---AAaAAACATCAGAAAGAACCTCCATTcCTTTGGATGGGATATGAGCTc +CATCCTGACAAATGGACgGTACAaCCTATACAGCTGCCAgA?AAGGAAa?cTGGACTGTcAATGATATAC +AAAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGgATTAAAGTAAaGCA??TATGtAa +ACTCcTTAGGGGGGCCAAAGCAcTAACAGACATAGTA?CacTgACaGCaGAAGCAGAAaTGGAgtTGGCA +GAGAACAGGGAaATT---------CTAAaAGAACCTGTACATGGaGTCTATtATGACCCATCAAAAGAat +TAATAGCAGAAGTACA?AAaCAAGggCtAG?cCAATGGACATATCAAaTTTAtCAAGAGCCATAcAAAAA +TCTgAAAACAGGAAAaTATGCAAaAaGGGGGtCTGCCCACACTAATGATGTAAAaCAaTTAACAGAAGTA +GTGCAAAAAATAGCCACAGAG?GCATAgTAATaTGGGGAAAGA?T---CCtAAATTTAAAcTACCtATAc +gAAAAGAAACATGGGAAgTaTGGTGGACAGAgTAtTGGCAGGCCaCcTGGATTCCTGAgTGGGAGTTTGT +CAAtACCCCTCCTcTAGTaAAatTaTGGTATC?gTTAGAAACAGAACCCATACCAGGAGcAGAAACTTAc +TATGTAGATGGGGCAGCTAATAGGGAgACAAAATTAGGAAAGGCAGGAtATGTtACTGAcAAAGGAAaaC +AAAaaATTATTACCCTAACTGAAACAACAAACCAAAAGgCTGAATTACAtGCAATTCAgCTAGCttTGCA +gGACTCAagAtCAGAAGTAAACATAGTAACaGACTCACAGTATGCAtTAGGAATCATTCAAGCACAaCCA +GATAGgAGTGaAtCAGAAtTAGTCAATCAAATAATAGAACAGCTAATAAAAAAGGAAAAGGTCTAcCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTaGAtAAatTAGTCAGtAGTGGAATCAG +GAAAGTA?TATTTtTaGATGGCATAGATAAAGCCCAAGAA---GAgCATGAaAgATATCACAgCAAtTGG +AgAGCAATGGCtAGTGATTTTAATcTGCCACcTaTAGTA---GCAAAAGAAATAGTGGCCAGCTGTgATA +AATGtCAgcTAAAAGGGGAAGCCATGCATGGaCAAGTAGACTGTAGTCCAGGAATATGGCAATTaGATTG +TACACATTTAGAAGGAAAAATTATC?T?GTAGCAGTtCATGTAGCCAGTGGcTATATAGAAGCAGAAGTT +ATcCCAGCAGAAACAGGaCAGGAAACAGCATACTTtATAtTAAAATTAGCAGGAAGGTGGCCAGTaAaAg +TaATACATACAGAcAATGGcagCAATTTcAcCAGTGCTGCAGTAAAGGCAGCaTGTTGGTGGGCAaatAT +cACACAgGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGT?GAATCTATGAATAAgGAATTA +AAGAAAATCATcGG?CAGGTcaGgGATCAAGCTGAACAtCTTAAGACAGCAGTACAgATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGgGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAaaCAAATTAcA---AAAaTTCAAAATTTTCGGGTTTAT +TaCAGGGACAGCAGAGAcCCA?TTTGGAAAGGACCAGCaAAaCTaCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAaGACAATAACGAAATAAAgGTAGTACCAAGAAGAAAAGCAAAgATcaTTAgGGATTATGG +AAAACAGATGGCAGGTGaTGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>G.BE.DRCBL +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGGAATTTCCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---CG +CAGAGAACTTCGGGTTCGGGGAGGAGAT---------------AGCCCCCTCCCCGAAGCAGGAGCAGAA +GGAAAAGGA------------ACTATATCCTCTATCTTCCCTCAAATCACTCTTTGGCAACGACCAATAG +TCAAAGTAAGAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAGATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACTTATAGAAATTAGTGGAAAAAGGGCTATAGGGACAGTATTAGTAGGACCTACAC +CTATCAATATAATTGGGAGAAATATGTTGACTCAGATTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTGCCAGTAAAATTAAAGCCAGGAATG---GATGGGCCACGGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------AATGAGATGGAAAAGGAAGGAAAAATTTCAA +AGATTGGGCCTGAAAACCCATATAACACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAGATGGAG +AAAATTGGTAGATTTCAGAGAGCTCAATAAAAGGACTCAAGACTTCTGGGAGGTCCCATTAGGAATACCT +CATCCCGGGGGGTTAAAACAGAAAAGATCAGTAACAGTACTAGATGTGGGGGATGCATATTTTTCAGTTC +CTTTAGATGAAAACTTTAGAAAATATACTGCATTCACTATACCTAGTACAAATAATGAGACACCAGGAAT +TAGATATCAGTAC-AT------CCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAACACAAAATCCAGAAATAGTGATCTACCAATACATGGATGATTTATATG +TAGGATCAGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAAGAGTTAAGAGAACATCTACTGAGATG +GGGATTTACCACACCAGAT---AAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAGCTC +CATCCTGACAAATGGACAGTACAACCTATACAGCTGCCAAACAAGGAAAACTGGACTGTTAATGATATAC +AAAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGCAA +ACTCATTAGGGGGGCCAAAGCATTAACAGACATAGTATCAATGACAGCAGAAGCAGAAATGGAGTTGGCA +GAGAACAGGGAGATT---------CTAAAAGAACCTGTACATGGAGTCTATTATGACCCATCAAAAGACC +TAATAGCAGAAGTACAGAAACAAGGGCTAGGCCAATGGACATATCAAGTTTATCAAGAGCCATATAAAAA +TCTGAAAACAGGAAAATATGCAAAAGGGGGGTCTGCCCACACTAATGATGTAAAGCAATTAACAGAAGTA +GTGCAAAAAATAGCCACAGAGGGCATAATAATATGGGGAAAGATT---CCTAAATTTAAATTACCTATAA +AAAAAGAAACATGGGAAGTGTGGTGGACAGAGTATTGGCAGGCCACTTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCTTTAGTCAAATTATGGTATCAGTTAGAAACAGAACCCATACCAGGAGTAGAAACTTAC +TATGTAGATGGGGCAGCTAATAGGGAGACAAAATTAGGAAAGGCAGGATATGTGACTGACAAAGGAAGAC +AAATTATTATTACCCTAACTGAAACAACAAACCAAAAGGCTGAATTACATGCAATTCAGCTAGCTCTGCA +GGACTCACAATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAGCCA +GATAGGAGTGAATCAGAAATAGTCAATCAAATAATAGAACAGCTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTGGATAAATTAGTCAGTAGTGGAATCAG +GAAAGTACTATTTTTAGATGGCATAGATAAAGCCCAAGAA---GAGCATGAAAGATATCACAGCAATTGG +AAAGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTGGCCAGCTGTGATA +AATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTGGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCATACTTTATATTAAAATTAGCAGGAAGGTGGCCAGTAAAAA +TAATACATACAGACAATGGCAGCAATTTTACCAGTGCTGCAGTAAAGGCAGCATGTTGGTGGGCAAGCAT +CACACAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGACAGGTCAGGGATCAAGCTGAACATCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAACGAAATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>G.FI.HH8793 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGGAATTTTCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---CG +CAGAGAGCTTCGGGTTCGGAGAGGAAAT---------------AGCCCCCTCCCCGAAGCCAGAGCCGAA +GGAAAAGGA------------GACACATCCCTTAGCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTAG +TCACAGTAAAAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGGGCAGATGATACAGTATTGGA +AGACATAAAGTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACTTATAGAAATTAGTGGAAAAAAGGCTATAGGGACAGTATTAGTAGGACCTACAC +CTATCAACATAATTGGAAGAAATATGTTGACTCAGATTGGTTGTWCTTTAAATTTTCCGATTAGTCCTAT +TGAAACTGTACCAGYAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCACTGACA +GAAGAGAAAATAAMAGCCTYAACAGAAATTTGT------ACAGAAATGGAAAGGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATMCTCCAATATTTGCCATCAGGAAAAAGGACAGTACTAAATGGAG +AAAATTAGTAGATTTTAGAGAGCTCAATAAAAGAACTCAAGACTTCTGGGAAGTCCAATTAGGAATACCT +CATCCTGCGGGGTTAAAAAGAGAAAATCAGTCAGCAGTACTAGATGTGGGGGATGCCTATTTTTCAGTTC +CCTTAGATGAAAGTTTTAGAAAGTATACAGCATTCACTATACCTAGTACAAATAATGAGACACCAGGGAT +TAGATACCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAATAAAAAATCCAGAAATGGTGATCTACCAATATATGGATGATTTATATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAAAGGAATTAAGAGAACATCTATTGAGATG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTT +CATCCTGACAAATGGACGGTACAGCCTATACAGCTGCCAGAAAAGGAAAGCTGGACTGTCAATGATATAC +AAAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAACTATGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGACATAGTACCACTGACTGCGGAAGCAGAATTGGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCTGTACATGGGGTCTATTATGACCCATCAAAAGAGT +TAATAGCAGAAGTACAAAAACAAGAACAAGGACAATGGACATATCAAATTTACCAAGAGCCATACAAAAA +TCTGAAAACAGGAAAATATGCAACAAGGGGGACTGCCCACACTAATGATGTAAAACAATTAACAGAAGTA +GTGCAAAAAATAGCCACAGAGAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCTATAC +GAAAAGAAACATGGGAAGTATGGTGGACAGAATATTGGCAGGCCACCTGGATTCCTGATTGGGAGTTTGT +CAACACCCCTCCTCTAGTAAAATTGTGGTATCGGTTAGAAACAGAACCCATACCAGGAGCAGAAACTTAT +TATGTAGATGGGGCAGCTAATAGGGAAACAAAATTAGGAAAGGCAGGATATGTTACTGATAAAGGAAAAC +AAAAAATTATTACCCTAACTGAAACAACAAACCAAAAGACTGAATTACAAGCAATTCAGCTAGCGTTGCA +GGACTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAGAAGTGAATCAGAACTAGTCAATCAAATAATAGAACAGCTAATAAAAAAGGAAAAGGTCTATCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGCAGTGGAATCAG +GAAAGTATTATTTTTAGATGGCATAGATAAAGCCCAAGAA---GATCATGAAAAATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACATATAGTA---GCAAAAGAAATAGTGGCCAGCTGTCATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGTCAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTACATGTAGCCAGTGGGTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCATACTTCATACTAAAATTAGCAGGAAGGTGGCCAGTGAAAG +TAATACATACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTAAAGGCAGCGTGTTGGTGGGCAGATAT +TACACAAGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATCGGGCAGGTCAGGGATCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAACCCAAATTACA---AAACTTCAAAATTTTCGGGTTTAT +TTCAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCAAAACTGCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAACGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>G.NG.92NG083 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGAAACTTTCTCCAG +AACAGGAC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGATTCGGAGAGGAGAT---------------AGCCCCCTCCCCGAAGCAGGAGCCAAA +GGAGAAGGA------------GCTATATCCCTTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTAG +TCACAGTAAAAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGAGCAGATGACACAGTATTAGA +AGGAATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACTTATAGAAATTGGTGGAAAAAAGGCTATAGGGACAGTATTAGTAGGACCTACAC +CTATTAACATAATTGGGAGAAATATGTTGACTCAGATTGGTTGTACTTTAAACTTTCCAATAAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAGGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------AAAGACATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATATAACACTCCAATATTCGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTGGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAGGTCCAATTAGGAATACCT +CACCCCGCGGGGTTAAAAAAGAAAAGATCAGTAACGGTACTAGATGTGGGAGATGCATACTTTTCAGTTC +CCTTAGATAAAGACTTTAGAAAGTATACTGCATTTACTATACCTAGTATAAATAATGAGACACCAGGGAT +TAGATATCAATACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGACA +AAAATTTTAGAGCCTTCTAGAACAAAAAATCCAGAAATGGTGATCTACCAATACATGGATGATTTATATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAGTTAAGAGAACATCTACTGAAATG +GGGATTGACCACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTC +CATCCTGACAAATGGACGGTACAACCTATACAGCTGCCAGAAAAGGAAGATTGGACTGTCAATGATATAC +AAAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAAGCACCTATGTAG +ACTCCTTAGGGGGGCCAAAGCACTAACAGACATAGTACCCCTAACGGCAGAAGCAGAAATGGAGCTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCTGTACATGGAGTCTATCATGACCCATCAAAAGAAT +TAATAGCAGAAGTACAGAAGCAAGGGCCAGACCAATGGACATATCAAATTTATCAAGAGCCATACAAAAA +TCTAAAAACAGGAAAATATGCAAAAAGGGGGTCTGCCCACACTAATGATGTAAAACAATTAACAGAAGTA +GTGCAAAAAATAGCCACAGAGGGCATAGTAATCTGGGGAAAGATT---CCTAAATTTAAACTACCTATAC +GAAAAGAAACATGGGAAGTATGGTGGACAGAGTACTGGCAGGCCGCCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAACTATGGTATCAATTAGAAACAGAACCCATACCAGGAGCAGAAACTTAC +TATGTAGATGGGGCAGCTAATAGGGAGACAAAATTAGGAAAGGCAGGACATGTTACTGACAAAGGAAAAC +AAAAAATTATTACCCTAACTGAAACAACAAACCAAAAGGCTGAATTACATGCAATTCAACTAGCTTTGCA +GGACTCAAGACCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAGGAGTGGATCAGAATTAGTCAATCAAATAATAGAACAGCTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAGCTAGTCAGTAGTGGAATCAG +GAAAGTATTATTTTTGGATGGCATAGATAAAGCCCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAGTA---GCAAAAGAAATAGTGGCCAGCTGTGATA +AATGTCAACTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCATAGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGGCAGGAAACAGCATACTTTATATTAAAATTAGCAGGAAGGTGGCCAGTAAAAG +TGATACATACAGACAATGGTCCCAATTTCATCAGTGCTGCAGTAAAGGCAGCATGTTGGTGGGCAAATAT +CACACAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATCGGACAGGTTGGAGATCAAGCTGAACATCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAACGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCCTTAAGGATTATGG +AAAACAGATGGCAGGTGGTGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>G.SE.SE6165 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGGAATTTTCTTCAG +AACAGGAC------AGAACCAACAGCCCC---------------------------------ACC---TG +CAGAAAGCCTCGGGTTCGGAGAGGAGAT---------------AGCCCCCTCCCCGAAGCAGGAGATGAA +GGAAAAGGA------------------GCTATATCCCTCCCTCAAATCACTCTTTGGCAGCGACCCCTAG +TCACAGTAAAAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGAGCAGATGACACAGTATTAGA +AGAAATAAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAGTACCTATAGAAATTAGTGGAAAAAAGGCTATAGGGACGATATTAGTAGGACCTACAC +CTATCAACATAATTGGGAGAAATATGTTGACTCAGATTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAGGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AAAGAAATGGAAGAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAACACTCCAATATTTGCCATAAAGAAAAAGGACAGTACTAAATGGAG +AAAATTGGTAGATTTCAGAGAGCTCAATAAAAGAACTCAAGACTTCTGGGAGGTCCAATTAGGAATACCT +CATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATGAAGACTTTAGAAAGTATACTGCATTCACTATACCTAGTATAAATAATGAGACACCAGGGGT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGACA +AGAATCTTAGAGCCCTTTAGAGCAAATAATCCAGAAATGGTGATCTACCAATACATGGATGATTTATATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAGTTAAGAGAACATCTATTGAAATG +GGGGTTTACCACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTC +CATCCTGACAAATGGACGGTACAACCTATACAGCTGCCAGACAAGGAAAGCTGGACTGTCAATGATATAC +AAAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAACGCACTTATGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGACATAGTATCACTGACAGCAGAAGCAGAAATGGAGTTGGCA +GAGAACAGGGAAATT---------CTAAGAGAACCTGTACATGGAGTCTATTATGACCCATCAAAAGAAT +TAATAGCAGAAGTACAAAAACAAGGGCTAGACCAATGGACATATCAAATTTATCAAGAGCCATACAAAAA +TCTGAAAACAGGAAAGTATGCAAAAAGGGGGTCTGCCCACACTAATGATGTAAAACAGTTAACAGAAGTA +GTGCAAAAAATAGCCACAGAGAGCATAGTAATATGGGGAAAGACT---CCCAAATTTAAACTACCCATAC +GAAAAGAAACATGGGAAATATGGTGGACAGACTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAGTTATGGTATCGGTTAGAAACAGAACCCATACCAGGAGCAGAAACTTAC +TATGTAGATGGGGCAGCTAATAGGGAGACAAAATTAGGAAAGGCAGGATATGTTACTGACAAAGGAAAGC +AAAAAATTATTACCCTAACTGAAACAACAAACCAAAAGGCTGAATTACAGGCAATTCAGCTAGCTTTGCA +AGACTCAAGATCAGAAGTAAACATAGTAACGGACTCACAGTATGCACTAGGAATCATTCAAGCACAACCA +GATAGGAGTGAAGCAGAATTAGTCAATCAAATAATAGAACAGCTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGACAAATTAGTCAGTAGTGGAATCAG +GAAAGTACTATTTCTAGATGGCATAGATAAAGCCCAAGAA---GAGCATGAGAGATATCACAACAACTGG +AGAGCAATGGCCAGTGATTTTAATTTGCCACCTATAGTA---GCAAAAGAAATAGTGGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCATAGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTATATTAAAATTAGCAGGAAGGTGGCCAGTAACAG +TAATACATACAGATAATGGCAGCAATTTCACCAGTGCTGCAGTAAAGGCAGCATGTTGGTGGGCAAATAT +CACACAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTAGAATCTATGAATAAGGAATTA +AAGAAAATCATCGGGCAGGTCAGGGATCAAGCTGAACATCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGAGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAGTTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAACGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CONSENSUS_H +TTTTTTAGGGAAAATtTGGCCTTCCAGCAA------------aGGgAG---GCCAGGaAATTTtCtCCAG +AgcAGGCc------AGAgCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTCGGaGAGGAGAT---------------gACCcCCTCtCCGAaGCAGGAGCtGaa +ggA??AggA------------???ACCTCCcTTagCTTCCCTCAgATCACTCTTTGGCAaCGACCccTtG +TcACAGTAAAAATAGAGGGACAGTTaAgGGAaGCTCTATTAGAtACAGGAGCAGATGATACAGTATTAGA +AGA?ATAAATTTGCcaGGAAaATGGAAACCAAAAATGATAGGgGGAATTGGAGGTTTTATCAAAGTAAGA +CAgTATGAgCAAGTAGCCATAGAAATcTgTGGAAAAAAGGCTATAGGtACAGTATTAGTAGGACCTACAC +CTGTCAATATAATTGGAAGgAATATATTGACTCAAATtGGTTGCACCTTAAATTTtCCAATTAGTCCTAT +TGAAACTGTACCAGTAAaATTAAAgCCAGGAATG---GATGGCCCAAaGGTTAAACAATGGCCAtTGACA +GAAGAaAAAATAAAAGCATTAACaGAAATTTGT------ataGAaATGGAAAAgGAAGGaAAAATtTCAA +aAATAGGGCCtGAGAATCCATACAaCACTCCAATATTTGCCATAAAAAAGAAGgAcAGTACTAaATGGAG +AAAATTAGTGGATTTCAGAGAACTcAATAAAAGAACTCAAGACTTCTGGGAAGTtCAGTTAGGAATACCA +CAtCCAGCAGGGTTAAAAAAGAAAAAaTCAGTATCAGTACTGGATGTGGGGGaTGCATATTTTTCAGTCC +CTTTAgATaAAGAcTTCAGgAAGTATACTGCATTcACCATACCTAGTAtAAACAATGAGACACCAGGGAT +TAGATATCAGTATAATGTGCTTCCACAGGGATGGAAAGGATCaCCAGCAATATTcCAGAGTAGCATGACA +AAaATCTTAGaGCCCTTTAGAaAACAAAATCCTGAAaTgaTTATTTACCAATAcATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGgCAACATAGAGcaAAAATAGAgGAGTTAAGAGCTCATTTGTTGAggTG +GGGATTtACCACACCAGAC---aAAAAaCATCAGAAAGAACCCCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTaCAGcCTGTAAAAcTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAACTAAAtTGGGCAAGTCAGATTTAtCCAgggATTAAAGTAAaGCAACTATGTAA +aCTCCTTAGGGGGGCCAAAGCAcTAACAGAcaTAgTACCACTGACaAAAGAGGCAGAATTGGAATTGGCA +GAAAACAGGGAGATT---------CTaAgAGAACCAgTACATGGAGtATATTATGATCCATCAAAAGAcT +TAATAGCAGAAATACaGAAGCAAGGGCcAGaCCAaTGGACATATCAAATTTATCAaGAGCCATTcAAAAA +TCTgAAGACAGGAAAATATGCAAAAATGAGaacTGCCCACACTAaTGATgTAAAaCAATTAACAGAAGcA +GTgCAAAAaATAgCTACAGAAAGCATAGTAATATGGGGAAAAATT---CCTAAATTTAgAtTACCTATAC +AAAAAGAAACATGGGAGACaTGGTGGACAGAGcATTGGCAAGCCACATGGATTCCTGAgTGgGAGTTTGT +TAACACCCCTCATCTAGTaAAATTATGGTATCAGTTAGAaaCAGAGCCCATAGcAGGAGCAGAAACTTAC +TATgTAGATGGGGCAGCTAAtAGGGAAACTAAAaTaGGaAAAGCAGGATATGTCACTGAcAgAGGAAAaC +AAAAagTTGTttCCCTAACgGAAACaACAAATCAGAAgACTGAATTACAAGCAATTTATCTAGCTTTGCA +AGAtTCAGGGttAGAAGTaAACATAGTGACAGAtTCACAgTATGCAtTAGGAATCATTCAaGCACAACCt +GATAAGAGTGAaTCAGAgtTAGTTAATCAAATAATAGAgGAATTAATAAAgAAGGAAAAGgTCTACcTGT +CATGGGTACCAGCACACAAAGGaATTGGAGGaAATGAACAAGTAGATAAATTAGTTAGTTCTGGAaTcAG +AAAAGTaCTATTTCTAGATGGGATAGAtAAAGCTCAAGaA---gaaCATGAAAggTATCAcAacAATTGG +AGAGCAaTGGCtAGTGAtTTTAATCTgCCACCTATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGcTAAAAGGgGAAGCCATGCATGGACAAGTaGACTGTAGCCCAGGaATATGGCAATTAGATTG +tACACAtTTaGAAGGAaAAgTTATTCTGGTAGCAGTCCAtGTAGCCAGTGGCTATATAgAAgCAGAAGTc +ATCCCAGCAGAAACAGGAcAGGAAACAGCATAcTTtaTaTTGAAACTAGCAgGCAGATGGCCAGTAAAAa +TgATACATACAGACAATGGCAgCAAtTTCACaAGTgCTGCGGTTAAGGCAGCCTGTTGGTGGGCAGATAT +CCAaCAGGAaTTTGGaATTCCCTACAATCCCCAAAGtCAgGGAGTAGTAGAATCTATGAATAAaGAAtTA +AAgAAGATCATAGGGCAGGTAAGAGACCAAGCtGAACACCTTAgGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAgGGGAAAGAATAATAGACATAAT +AGCAACAGACATACaAACTAAAGAAtTACAAAAACAAATTTCA---aA?ATTCAAAAATTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTcCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAAGTAGTACCAAGAAGAaAGGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAgGTAGACAGGATGAGGAT--- +>H.BE.VI991 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------GGGAAG---GCCAGGGAATTTCCCCCAG +AAGAGGCT------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTCGGAGAGGAGAT---------------CACCCCCTCTCCGAGGCAGGAGCTGAA +AGAACAGGA---------------ACCTCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCAATAG +TCACAGTAAAAATAGAGGGACAGTTGAAGGAGGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGAGCAAGTAGCCATAGAAATTTTTGGAAAAAAGGCTATAGGAACAGTATTAGTAGGACCTACAC +CTGTCAATATAATTGGAAGGAATATATTGACTCAAATGGGTTGCACCTTAAATTTGCCAATTAGTCCTAT +TGAAACTGTACCAGTAACATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCACTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------TTAGAAATGGAAAAGGAAGGTAAAATTTCAA +AAATAGGGCCCGAGAATCCATACAACACTCCAATATTTGCCATAAAAAAGAAGAACAGTACTAGATGGAG +AAAATTAGTGGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTACAGTTAGGAATACCA +CATCCAGCAGGGTTAAAAAAGAAAAAGTCAGTATCAGTACTGGATGTGGGGGGTGCATATTTTTCAGTCC +CTTTACATGAAGACTTCAGGAAGTATACTGCATTCACCATACCTAGTACAAACAATGAGACACCAGGGAT +TAGATATCAGTATAATGTGCTTCCACAGGGATGGAAAGGATCCCCAGCAATATTCCAGAGTAGCATGACA +AAGATCTTAGAGCCCTTTAGAAAACAAAATCCTGAAGTGATTATTTACCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGACAACATAGAGAAAAAATAGAAGAGTTAAGAGCTCATTTGTTGAGGTG +GGGATTCACCACACCAGAC---CAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTACAGCCTGTAAAATTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAACTATGTAA +WCTCCTTAGGGGGGCCAAAGCACTAACAGAAATAGTACCACTGACTAAAGAGGCAGAATTGGAATTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGCATATTATGATCCATCAAAAGAAT +TAATAGCAGAAATACAGAAGCAAGGGCCAGACCAGTGGACATATCAAATTTATCAAGAGCCATTCAAAAA +TCTGAAGACAGGAAAATATGCAAAAATGAGGTCTGCCCACACTAATGATGTAAAGCAATTAACAGAAGTA +GTACAAAAAATAGCTACAGAAAGCATAGTAATATGGGGAAAAATT---CCTAAATTTAGATTACCTATAC +AAAAAGAAACATGGGAGACATGGTGGACAGAGCATTGGCAAGCCACATGGATTCCTGAGTGGGAGTTTGT +TAACACCCCTCATCTAGTGAAATTATGGTATCAGTTAGAGACAGAGCCCATAGAAGGAGCAGAAACTTAC +TATGTAGATGGGGCAGCTAATAGGGAAACTAAAATGGGGAAAGCAGGATATGTCACTGACAGAGGAAAAC +AAAAAATTGTTTCCCTAACGGAAACAACAAATCAGAAAACTGAATTACAAGCAATTTATCTAGCTTTGCA +AGAGTCAGGGCCAGAAGTAAACATAGTGACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCT +GATAAGAGTGAATCAGAACTAGTTAATCAAATAATAGAGGAATTAATAAAAAAGGAAAAGTTCTACTTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTTCTGGAATTAG +AAAAGTACTATTTCTAGATGGGATAGATAAAGCTCAAGTA---CAGCATGAAAAATATCACAGTAATTGG +AGAGCAATGGCCAGTGATTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGCATATGGCAATTAGATTG +TACACACTTAGAAGGAAAAATTATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCATACTTTATATTGAAACTAGCAGGCAGATGGCCAGTAAAAA +TGATACATACAGACAATGGCAGCAACTTCACAAGTGCTGCGGTTAAGGCAGCCTGTTGGTGGGCAGATAT +CCACCAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAGGGAGTAGTAGAATCTATGAATAAAGAACTA +AAGAAGATCATAGGGCAGGTAAGAGACCAAGCTGAACACCTTAGGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCACGGGAAAGAATAATAGACATAAT +AGCAACAGACATACCAACTAAAGAACTACAAAAACAAATTTCA---CAAATTCAAAAATTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAAGTAGTACCAAGAAGAAAGGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>H.BE.VI997 +TTTTTTAGGGAAAATCTGGCCTTCCAGCAA------------AGGGAG---GCCAGGAAATTTTCTCCAG +AGCAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTCGGGGAGGAGAT---------------GACCTCCTCCCCGAAGCAGGAGCTGAA +GGACAAGGA---------------ACCTCCCTTTGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAATAGAGGGACAGTTAAGGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCTAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAGTAGCCATAGAAATCTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAATATAATTGGAAGAAATATATTGACTCAAATTGGTTGCACCTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAACCAGGAATG---GATGGCCCAAGGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------ATGGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATAGGGCCTGAGAATCCATACAACACTCCAATATTTGCCATAAAAAAGAAGGACAGTACTAAATGGAG +AAAATTAGTGGATTTCAGAGAACTCAATAAAAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CATCCAGCAGGGTTAAAAAAGAAAAAATCAGTATCAGTACTGGATGTGGGGGATGCATATTTTTCAGTCC +CTTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAAAACAAAATCCTGAAATAATTATTTACCAATATATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCGAAAATAGAGGAGTTAAGAGCTCATTTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTGTAAAACTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAACTAAACTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAACTATGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGACGTAGTACCACTGACAAAAGAGGCAGAATTGGAATTGGCA +GAAAACAGGGAGATT---------CTAAGAGAACCAGTACATGGAGTATATTATGATCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAAGGGCCAGACCAATGGACATATCAAATTTATCAAGAGCCATTCAAAAA +TCTAAAGACAGGAAAATATGCAAAAATGAGAAATGCCCACACTAGTGATGTAAAACAATTAACAGAAGCA +GTGCAAAAAATAGCTACAGAAAGCATAGTAATATGGGGAAAAATT---CCTAAATTTAAATTACCTATAC +AAAAAGAAACATGGGAGACATGGTGGACAGAGCATTGGCAAGCCACATGGATTCCTGAGTGAGAGTTTGT +TAACACCCCTCATCTAGTAAAATTATGGTATCAGTTAGAAGCAGAGCCCATAGCAGGAGCAGAAACTTAC +TATGTAGATGGGGCAGCTAACAGGGAAACTAAAATAGGAAAAGCAGGATATGTCACTGACAAAGGAAAAC +AAAAGGTTGTTGCCCTAACAGAAACTACAAATCAGAAGACTGAATTACAAGCAATTTATCTAGCTTTGCA +AGATTCAGGGTTAGAAGTAAACATAGTGACAGATTCACAGTATGCATTAGGAATCATTCAAGCACAACCT +GATAAGAGTGAGTCAGAGTTAGTTAATCAAATAATAGAAGAATTAATAAAGAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGGAATGAACAAGTAGATAAATTAGTTAGTTCTGGAATCAG +AAAAGTACTATTTCTAGATGGGATAGACAAAGCTCAAGAA---GCACATGAAAGGTATCACAACAATTGG +AGAGCAATGGCTAGTGAGTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTGGACTGTAGCCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAGTTATTCTGGTAGCAGTCCACGTAGCCAGTGGCTATATASAACCAGAAGTC +ATCCCAGCAGAAACAGGACAGGAAACAGCATATTTTATATTGAAACTAGCAGGCAGATGGCCAGTAAAAA +TGATACATACAGACAATGGCACCAATTTCACAAGTACTGCGGTTAAGGCAGCCTGTTGGTGGGCAGATAT +CCAACAGGACTTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAGGAATTA +AAAAAGATCATAGGGCAGGTAAGAGACCAAGCTGAACACCTTAGGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCA---AATATTCAAAAATTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAAGTAGTACCAAGAAGAAAGGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>H.CF.90CF056 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------AGGGAG---GCCAGGAAATTTTCTCCAG +AGCAGGCC------AGAACCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTCGGAGAGGAGAT---------------GACCCCCTCTCCGAAGCAGGAGCAGCT +GAAGGACAA------------GGAACCTCCCTTAGCTTCCCTCAGATCACTCTTTGGCAGCGACCCCTTG +TTACAGTAAAAATAGAGGGACAGTTAAGGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAGATAAATTTGCCGGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGAGCAAGTAGCCATAGAAATCTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAATATAATTGGAAGGAATATATTGACTCAAATTGGTTGCACCTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACGGAAATTTGT------ACAGAGATGGAAAAAGAAGGAAAAATCTCAA +GAATAGGGCCTGAGAATCCATACAGCACTCCAATATTTGCCATAAAAAAGAAGGATAGTACTAAATGGAG +AAAATTAGTGGATTTCAGAGAACTCAATAAAAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTATCAGTACTGGATGTGGGGGATGCATATTTTTCAGTCC +CTTTAGATAAAGAATTCAGAAAGTATACTGCATTCACCATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGCGCCCTTTAGAGAACAAAATCCTGAAATGGTTATTTACCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGCTCATTTGTTGAAATG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTACAGACTGTAAAACTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAAATATTAAAGTAAAGCAACTATGTAA +ACTCCTTAGGGGGGCCAAAGCATTAACAGACATAATACCACTGACAAAAGAGGCAGAATTGGAATTGGCA +GAAAACAGGGAGATT---------CTGAGAGAACCAATACATGGAGTATATTATGATCCATCAAAAGACT +TAATAGCAGAAATACGGAAGCAAGGGCAAGGCCAATGGACATATCAAATTTATCAGGAGCCATTTAAAAA +TCTGAAGACAGGAAAATATGCAAAAATGAGAACTGCCCACACTAATGATATAAAACAATTAACAGAAGCA +GTGCAAAAGATATCTACAGAAAGCATAGTAATATGGGGAAAAATT---CCTAAATTTAGACTACCTATAC +AAAAAGAAACATGGGAGACCTGGTGGACAGAGTATTGGCAAGCCACATGGATTCCTGAATGGGAGTTTGT +TAACACCCCTCATCTAGTAAAATTATGGTATCAGTTAGAAACAGAGCCCATAGCAGGAGCAGAAACTTAC +TATATAGATGGGGCAGCTAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTCACTGATAGAGGAAAGC +AAAAAGTTGTCTCCCTAACGGAAACAACAAATCAGAAGACTGAATTACAAGCAATTTATCTAGCTTTGCA +AGATTCAGGGTTAGAAGTGAACATAGTGACAGATTCACAGTATGCACTAGGAATCATTCAAGCACAACCC +GATAAGAGTGAATCAGAGTTAGTTAATCAAATAATAGAGGAATTAATAAAGAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTTCTGGAGTCAG +AAAAGTGCTATTTCTAGATGGGATAGATAAAGCTCAAGAA---GAACATGAAAGGTATCATAACAATTGG +AGAGCAGTGGCTAGTGATTTTAATCTACCACCTATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGAATATGGCAATTAGATTG +CACACATTTGGAAGGACAAGTTATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTC +ATCCCAGCAGAAACAGGAAAGGAAACAGCATACTTCCTGTTGAAACTAGCAAGCAGATGGCCAGTAAAAG +TAATACATACAGACAATGGCAGCAATTTCACGAGTGCTGCGGTTAAGGCAGCCTGTTGGTGGGCAGATAT +CCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAGATCATAGGGCAGGTAAGAGACCAAGCAGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCA---AACATTCAAAAATTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAAGTAGTACCAAGAAGAGAGGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>CONSENSUS_J +TTTTTTAGGGAAGATTTGGCCTTCCAGCAA------------AGGGAG---GCCAGGGAA?TTTCTCCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCCTCGGG?TCGGAGAGGAGAT------------------CCCCTCCCCGAAACAGGAGCCGAA +GGACAAGGA------------ACTGTATCCTCTAACTTCCCTCA?ATCACTCTTTGGCAGCGACCCCTTG +TCACAATAAGAATAGGGGGGCAGCT?AGGGAAGCTCTATTAGATACAGGAGCAGA?GATACAGTATTAGA +AGA?ATAGA?TTGCCA?GAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGT?AGA +CAGTATAA?GAGGTACC?ATAGAAATTGAGGGAAAAAAGGCTATAGGTACAGT?TTAATAGGACCTACAC +CTGTCAACATAATTGGAAG?AACATGTTGACTCAGCTTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAATTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACACAAATTTGT------GCAGAA?TGGAAGAGGA?GGAAAAATTTCAA +GA?TTGGGCCTGAAAATCCATATAACAC?CCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTTAGAGAACTCAATAAAAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CATCCAGCAGGGTTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGGGATGCATATTTTTCAGTCC +CTTTATATGAAGATTTCAGGAA?TATACTGCATTCACTATACCTAGTATAAACAATGAGACACCAGGGAT +CAGATATCAGTACAACGTGCTACCACAGGGATGGAAAGGATC?CCAGCAATATTTCAGTGTAGCATGACA +AAAATCTTAAAACCTTTTAGAGAAAGAAACCCAGAAATAGTTATCTACCAGTACATGGATGACTTGTATG +TGGGATCTGACTTGGAAATAGAACAGCATAGAAGAAAAATAAAGGAGCTGAGGGAACATCTATTGAAGTG +GGGATTT??CACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAGCT? +CATCCTGACAAATGGACAGT?CAACCTATACA?CTGCCAGAAAAAGAAGA?TGGACTGTCAATGATATAC +AAAAGTTAGTGGGAAAACTAAATTGGGCAAGTCA?ATTTATCCAGGAATTAAA?TAAAG?AACTATGTAA +ACTC?TTA??GGGGCTAAAGCACTAACAGACATAGTACCATTGACTAGAGAAGCAGAATTGGAACTGGCA +GAAAACAA?GAGATT---------CTAAAAGAACCAGTACATGGGGTATATTATGAC?CAGCAA?AGAAT +TAATAGCAGAAGTGCAGAAACAAGG?CTGGACCAATGGACATATCAAATTTATCAGGAGCCATTTAAAAA +CCTGAAAACAGGGAAATATGCAAAAAGGAGGAGTGCCCACACTAATGATGTAAA?CAATTA?CA?AAGTG +GTGCAAAAAATAGCCTTGGAAGCCATAGT?ATATGGGGAAAAACT---CCTAAATTTAGACTACCCATAC +AAA??GAAACATGGGAGACATGGTGGACAGACTA?TGGCAGGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCCCCTCTAGTAAA?TTATGGTACCAATTAGAAAAGGAACCCATAATGGGAGCAGAAACTTTC +TATGTAGATGGGGCATCTAACAGGGA?ACTAAA??AGGAAAGGCAGGGTATGTTACTGACAAAGGAAGAC +AGAAAGTA?TTACCCTAACTGACACAACAAATCAGAAGACTGAACTACA?GCCATTTATTTAGCTTTAC? +GGATTCAGGG?TAGAAGTAAACATAGTAACAGATTCACAATATGCATTGGGGATTAT?CAAGCACAACCA +GATAAGAGTGAATCAGAATTAGTCAATCAAATAATAGAGGAGTTAATAAAGAAGGAAAAGGTCTACCTGT +CGTGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACA?GTAGATAAATTAGTCAGTTCTGGAATCAG +GAAAGTGCTGTTTCTAGATGGGATAGATAAAGCTCAAGAA---GA?CATGAAAAATATCATAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCT?CCACCTGTAGTA---GCAAAAGA?ATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAGGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACAT?TAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAGCAGCATTTTTTATATTAAAATTAGCAGGC?GATGGCCAGTAAAAG +?AATACATACAGA?AATGGCAGCAACTTCACCAGTGGTGCTGTGAAGGCAGCCTGTTGGTGGGCAGATAT +CAA?CAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAGGTAAGAGAACAAGCTGAACACCTTAAGACAGCAGTACA?ATGGCAGTATTCA +TACACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTA?AGAATTACAAAAACAAAT?ACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACT?C?CTGGAAAGGTGAAGGGGCA---G +TAGTAATACA?GACAATAGTGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>J.SE.SE9173 +TTTTTTAGGGAAGATTTGGCCTTCCAGCAA------------AGGGAG---GCCAGGGAATTTTCTCCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCCTCGGGTTCGGAGAGGAGAT------------------CCCCTCCCCGAAACAGGAGCCGAA +GGACAAGGA------------ACTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAGCGACCCCTTG +TCACAATAAGAATAGGGGGGCAGCTAAGGGAAGCTCTATTAGATACAGGAGCAGACGATACAGTATTAGA +AGAAATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTGAGA +CAGTATAATGAGGTACCCATAGAAATTGAGGGAAAAAAGGCTATAGGTACAGTATTAATAGGACCTACAC +CTGTCAACATAATTGGAAGAAACATGTTGACTCAGCTTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAATTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACACAAATTTGT------GCAGAACTGGAAGAGGAGGGAAAAATTTCAA +GAATTGGGCCTGAAAATCCATATAACACCCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTTAGAGAACTCAATAAAAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CATCCAGCAGGGTTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGGGATGCATATTTTTCAGTCC +CTTTATATGAAGATTTCAGGAAGTATACTGCATTCACTATACCTAGTATAAACAATGAGACACCAGGGAT +CAGATATCAGTACAACGTGCTACCACAGGGATGGAAAGGATCACCAGCAATATTTCAGTGTAGCATGACA +AAAATCTTAAAACCTTTTAGAGAAAGAAACCCAGAAATAGTTATCTACCAGTACATGGATGACTTGTATG +TGGGATCTGACTTGGAAATAGAACAGCATAGAAGAAAAATAAAGGAGCTGAGGGAACATCTATTGAAGTG +GGGATTTTACACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAGCTC +CATCCTGACAAATGGACAGTACAACCTATACAGCTGCCAGAAAAAGAAGATTGGACTGTCAATGATATAC +AAAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATCCAGGAATTAAAATAAAGGAACTATGTAA +ACTCATTAGGGGGGCTAAAGCACTAACAGACATAGTACCATTGACTAGAGAAGCAGAATTGGAACTGGCA +GAAAACAAGGAGATT---------CTAAAAGAACCAGTACATGGGGTATATTATGACCCAGCAAGAGAAT +TAATAGCAGAAGTGCAGAAACAAGGACTGGACCAATGGACATATCAAATTTATCAGGAGCCATTTAAAAA +CCTGAAAACAGGGAAATATGCAAAAAGGAGGAGTGCCCACACTAATGATGTAAAGCAATTATCACAAGTG +GTGCAAAAAATAGCCTTGGAAGCCATAGTGATATGGGGAAAAACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAGACATGGTGGACAGACTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCCCCTCTAGTAAAATTATGGTACCAATTAGAAAAGGAACCCATAATGGGAGCAGAAACTTTC +TATGTAGATGGGGCATCTAACAGGGAAACTAAAGTAGGAAAGGCAGGGTATGTTACTGACAAAGGAAGAC +AGAAAGTAATTACCCTAACTGACACAACAAATCAGAAGACTGAACTACAAGCCATTTATTTAGCTTTACA +GGATTCAGGGATAGAAGTAAACATAGTAACAGATTCACAATATGCATTGGGGATTATTCAAGCACAACCA +GATAAGAGTGAATCAGAATTAGTCAATCAAATAATAGAGGAGTTAATAAAGAAGGAAAAGGTCTACCTGT +CGTGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAG +GAAAGTGCTGTTTCTAGATGGGATAGATAAAGCTCAAGAA---GAACATGAAAAATATCATAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTACCACCTGTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAGGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAGCAGCATTTTTTATATTAAAATTAGCAGGCGGATGGCCAGTAAAAG +CAATACATACAGATAATGGCAGCAACTTCACCAGTGGTGCTGTGAAGGCAGCCTGTTGGTGGGCAGATAT +CAAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAGGTAAGAGAACAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTATTCA +TACACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATCACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTGCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>J.SE.SE9280 +TTTTTTAGGGAAGATTTGGCCTTCCAGCAA------------AGGGAG---GCCAGGGAACTTTCTCCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCCTCGGGCTCGGAGAGGAGAT------------------CCCCTCCCCGAAACAGGAGCCGAA +GGACAAGGA------------ACTGTATCCTCTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCACAATAAGAATAGGGGGGCAGCTGAGGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAGACTTGCCACGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATAACGAGGTACCGATAGAAATTGAGGGAAAAAAGGCTATAGGTACAGTGTTAATAGGACCTACAC +CTGTCAACATAATTGGAAGGAACATGTTGACTCAGCTTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAATTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACACAAATTTGT------GCAGAAATGGAAGAGGAAGGAAAAATTTCAA +GAGTTGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTTAGAGAACTCAATAAAAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CATCCAGCAGGGTTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGGGATGCATATTTTTCAGTCC +CTTTATATGAAGATTTCAGGAAATATACTGCATTCACTATACCTAGTATAAACAATGAGACACCAGGGAT +CAGATATCAGTACAACGTGCTACCACAGGGATGGAAAGGATCCCCAGCAATATTTCAGTGTAGCATGACA +AAAATCTTAAAACCTTTTAGAGAAAGAAACCCAGAAATAGTTATCTACCAGTACATGGATGACTTGTATG +TGGGATCTGACTTGGAAATAGAACAGCATAGAAGAAAAATAAAGGAGCTGAGGGAACATCTATTGAAGTG +GGGATTTACCACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAGCTT +CATCCTGACAAATGGACAGTCCAACCTATACAACTGCCAGAAAAAGAAGACTGGACTGTCAATGATATAC +AAAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGAATTAAAGTAAAGCAACTATGTAA +ACTCCTTAAAGGGGCTAAAGCACTAACAGACATAGTACCATTGACTAGAGAAGCAGAATTGGAACTGGCA +GAAAACAAAGAGATT---------CTAAAAGAACCAGTACATGGGGTATATTATGACTCAGCAAAAGAAT +TAATAGCAGAAGTGCAGAAACAAGGGCTGGACCAATGGACATATCAAATTTATCAGGAGCCATTTAAAAA +CCTGAAAACAGGGAAATATGCAAAAAGGAGGAGTGCCCACACTAATGATGTAAAACAATTAGCAGAAGTG +GTGCAAAAAATAGCCTTGGAAGCCATAGTAATATGGGGAAAAACT---CCTAAATTTAGACTACCCATAC +AAAGAGAAACATGGGAGACATGGTGGACAGACTACTGGCAGGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCCCCTCTAGTAAAGTTATGGTACCAATTAGAAAAGGAACCCATAATGGGAGCAGAAACTTTC +TATGTAGATGGGGCATCTAACAGGGAGACTAAAACAGGAAAGGCAGGGTATGTTACTGACAAAGGAAGAC +AGAAAGTAGTTACCCTAACTGACACAACAAATCAGAAGACTGAACTACACGCCATTTATTTAGCTTTACG +GGATTCAGGGCTAGAAGTAAACATAGTAACAGATTCACAATATGCATTGGGGATTATACAAGCACAACCA +GATAAGAGTGAATCAGAATTAGTCAATCAAATAATAGAGGAGTTAATAAAGAAGGAAAAGGTCTACCTGT +CGTGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAGGTAGATAAATTAGTCAGTTCTGGAATCAG +GAAAGTGCTGTTTCTAGATGGGATAGATAAAGCTCAAGAA---GATCATGAAAAATATCATAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAGTA---GCAAAAGAGATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAGGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATCTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAGCAGCATTTTTTATATTAAAATTAGCAGGCAGATGGCCAGTAAAAG +TAATACATACAGACAATGGCAGCAACTTCACCAGTGGTGCTGTGAAGGCAGCCTGTTGGTGGGCAGATAT +CAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAGGTAAGAGAACAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TACACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAGAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACCCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CONSENSUS_K +TTTTTTAGGGAA??TCTGGCCT??C?ACAA------------?G?GAG---GCCAGG?AATTTTCTTCAG +A?CAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTT?GGGTTCGGGGAG?AGAT---------------AACCCCCTCTC?GAG?CAGGA?A??AA +AGA??AGGA---------ACAG?G?CCTCCTTTAACTTCCCTCAAATCACTCTTTGGCA?CGACCC?TTG +TCACA?TAAAAGTAGGGGGACAGTTAAGAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTT?CC?GGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAGTA??TAT?GAAATTTGTGG?CAAAAGGCTATAGGTACAGT?TTAGTAGGACCTACAC +CTGTCAACATAATTGGA?GAAAC?TGTTGACTCAGATTGG?TGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATT?ACA +GAAGAAAAAATAAAAGCATTA??AGA?ATTTGT------ACAGAAATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCTGA?AATCCATA?AACAC?CCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGA? +AAAATTAGTAGATTTTAGAGAACTCAATAAAAGAACTC?AGACTTCTGGGAAGTTCAATTAGG?ATACCA +CATCCAGCAGG?TTAAAAAAGAAAAAATCAGTAACAGTACTGGATGT?GGGGATGCATATTTTTCAGTCC +CTTTAGATAAAGA?TTCAGGAAGTATACTGCATTCACTATACCTAGTATTAACAATGA?ACACCAGG??T +TAGATATCAGTACAATGT?CTACCACA?GGATGGAAAGGATCACCAGCAATATT?CAA??TAGCATGACA +AAAATCTTAGAGCCCTTTAGAA??AAAAATCCAGA?ATGGTT?TATACCAATACATGGATGATTT?TA?G +TAGGGTCTGACTTAGAAATAGGGCAAC?TAGA?CAAAAATAGAGGAACTAAGAGAACATCTATTGA?ATG +GGGATTTACCACACCAGAC---AAAAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGG?TATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACTGCCAGACAAGGATAGCTGGACTGTCAATGATATAC +A?AAGTTAGTGGGAAAATTAAATTGGGCAAGTCAAATAT??CCAGG?ATTAAAGTAAAACAATT?TGTAA +ACTCCTTAGGGGAGTCAAAGCA?TAACAGACATAGTACCA?TAACTGCAGAAGCAGAGTTAGAATTAGCA +GAGAACAGAGA?ATT---------CTAAAAGA?CCAGTGCATGGGGTATA?TATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAA?CAAGGG?A?G?CCAATGGACATATCAAATATATCAAGAGCCA?ATAAAAA +T?TGAAAACAGGGAAGTATGCAAGAAT?AG?TCTGCCCACACTAATGATGTAAA?CAATTAACAGAAG?A +GTGCAAAAA?TAGCCA??GAA?GCATAGTAATATGGGGAAAAACT---CCTAAATTTAGA?TACCCATAC +AAAAAGAAACATGGG?GACATGGTGGACAGAATATTGGCA?GCCACCTGGATCCCTGAATGGGAGTTTGT +CAATAC?CCTCCCCTAGTAAAACTATGGTACCAGTTAGAAACAGAACCCATAGTAGG?GCAGAAACTTTC +TA?GTAGATGGGGCAGC??ATAGGGAAACTAAA?AGGGAA?AGCAGGATATGTTACTGACA?AGGAAGAC +AAAA?GTT?TCTC?ATAACTGAAACAACAAATCAGAAA?CTGAATTACAAGCAATC??TTTAGCTTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACA?CCA +GATAAAAGTGAATCAGA?TTAGTTAATCAAATAATAGAGCAATTAATAAAAAAGGA?AGG?TCTACCTAT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAA?GAACAAGTAGATAAATTAGTCAGT?CTGGAATCAG +GAAAGTGCTATTTCTAGATGGAATAGATAAGGCTCAAGAA--?GAACATGAAAAATATCA?AACAA?TGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCT?TAGTA---GC?AAAGAAATAGTAGCTAGCTGTGA?A +AATGTCAGCTAAAAGGGGAAGCCATACATGGACAAGTAGACTGTAGTCCAGG?ATATGGCA?TTAGATTG +TACACATTTAGAAGGAAAAATTATCCT?GTAGCAGT?CATGTAGCTAG?GGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACGGGACA?GAAACAGCCTACTT?ATACTAAAATTAGCAGGAAGATGGCCAGTAA?AG +TAATACATACAGACAATGGCA?CAATTTCACCAG??CTGT?GTTAAGGCAGCCTGTTGGTGGGCAG?T?T +CAAGCAGGAATTTGG?ATTCCCTACAATCCCCAAAGCCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAGGTAAGGGA?CAGGCTGAACATCT?AA?ACAGCAGTACAAATGGCAGTATTCA +T?CACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGG?GA?AGAATA?TAGATATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATT??A---AA?ATTCAAAA?TTTCGGGTTTAT +TACAGGGACAGCAGAGAACCAATTTGGAAAGGACCAGCAAAGCT?CTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAA??????AGTGA?ATAAAGGTAGTACCAAGAAGAAAAGCAAAGATTATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>K.CD.EQTB11C +TTTTTTAGGGAAGTTCTGGCCTCTCAACAA------------AGAGAG---GCCAGGAAATTTTCTTCAG +AACAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTTGGGTTCGGGGAGAAGAT---------------AACCCCCTCTCTGAGACAGGAAATGAA +AGATCAGGA---------ACAGGGTCCTCCTTTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCGTTG +TCACAGTAAAAGTAGGGGGACAGTTAAGAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTACCGGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAGTATGTATGGAAATTTGTGGGCAAAAGGCTATAGGTACAGTGTTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAACATGTTGACTCAGATTGGGTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAGTAGAGATTTGT------ACAGAAATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAACACCCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAT +AAAATTAGTAGATTTTAGAGAACTCAATAAAAGAACTCCAGACTTCTGGGAAGTTCAATTAGGGATACCA +CATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTAGGGGATGCATATTTTTCAGTCC +CTTTAGATAAAGATTTCAGGAAGTATACTGCATTCACTATACCTAGTATTAACAATGAGACACCAGGAAT +TAGATATCAGTACAATGTACTACCACAGGGATGGAAAGGATCACCAGCAATATTTCAATGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAAGGAAAAATCCAGATATGGTTTTATACCAATACATGGATGATTTGTACG +TAGGGTCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAACTAAGAGAACATCTATTGAGATG +GGGATTTACCACACCAGAC---AAAAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACTGCCAGACAAGGATAGCTGGACTGTCAATGATATAC +AAAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAAATATTCCCAGGAATTAAAGTAAAACAATTATGTAA +ACTCCTTAGGGGAGTCAAAGCATTAACAGACATAGTACCACTAACTGCAGAAGCAGAGTTAGAATTAGCA +GAGAACAGAGAAATT---------CTAAAAGAACCAGTGCATGGGGTATACTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAAGGGCACGGCCAATGGACATATCAAATATATCAAGAGCCATATAAAAA +TTTGAAAACAGGGAAGTATGCAAGAATAAGGTCTGCCCACACTAATGATGTAAAACAATTAACAGAAGTA +GTGCAAAAAGTAGCCATGGAAAGCATAGTAATATGGGGAAAAACT---CCTAAATTTAGATTACCCATAC +AAAAAGAAACATGGGGGACATGGTGGACAGAATATTGGCAGGCCACCTGGATCCCTGAATGGGAGTTTGT +CAATACCCCTCCCCTAGTAAAACTATGGTACCAGTTAGAAACAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCCAATAGGGAAACTAAACAGGGAAAAGCAGGATATGTTACTGACAAAGGAAGAC +AAAAAGTTATCTCAATAACTGAAACAACAAATCAGAAAACTGAATTACAAGCAATCCATTTAGCTTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAATTAGTTAATCAAATAATAGAGCAATTAATAAAAAAGGACAGGGTCTACCTAT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAACGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAG +GAAAGTGCTATTTCTAGATGGAATAGATAAGGCTCAAGAA--AGAACATGAAAAATATCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGGATATGGCAGTTAGATTG +TACACATTTAGAAGGAAAAATTATCCTAGTAGCAGTTCATGTAGCTAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACGGGACAGGAAACAGCCTACTTCATACTAAAATTAGCAGGAAGATGGCCAGTAAGAG +TAATACATACAGACAATGGCAGCAATTTCACCAGTGCTGTAGTTAAGGCAGCCTGTTGGTGGGCAGATAT +CAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAGGTAAGGGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGAGAGAGAATAATAGATATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGAACCAATTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAA-----TAGTGAGATAAAGGTAGTACCAAGAAGAAAAGCAAAGATTATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>K.CM.MP535 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTCGGGGAGGAGAT---------------AACCCCCTCTCCGAGGCAGGAGACCAA +AGACAAGGA---------ACAGAGCCCTCCTTTAACTTCCCTCAAATCACTCTTTGGCAACGACCCATTG +TCACAATAAAAGTAGGGGGACAGTTAAGAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAGTACTTATAGAAATTTGTGGACAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAACCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTAACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCTGAGAATCCATATAACACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGATTTTAGAGAACTCAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CATCCAGCAGGATTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTCC +CTTTAGATAAAGACTTCAGGAAGTATACTGCATTCACTATACCTAGTATTAACAATGAAACACCAGGGGT +TAGATATCAGTACAATGTGCTACCACAAGGATGGAAAGGATCACCAGCAATATTCCAACATAGCATGACA +AAAATCTTAGAGCCCTTTAGAATAAAAAATCCAGAAATGGTTATATACCAATACATGGATGATTTATATG +TAGGGTCTGACTTAGAAATAGGGCAACCTAGAACAAAAATAGAGGAACTAAGAGAACATCTATTGAAATG +GGGATTTACCACACCAGAC---AAAAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACTGCCAGACAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAAATATATCCAGGGATTAAAGTAAAACAATTGTGTAA +ACTCCTTAGGGGAGTCAAAGCACTAACAGACATAGTACCATTAACTGCAGAAGCAGAGTTAGAATTAGCA +GAGAACAGAGAGATT---------CTAAAAGAGCCAGTGCATGGGGTATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGAATGACCAATGGACATATCAAATATATCAAGAGCCACATAAAAA +TCTGAAAACAGGGAAGTATGCAAGAATGAGATCTGCCCACACTAATGATGTAAAGCAATTAACAGAAGCA +GTGCAAAAAATAGCCACAGAAGGCATAGTAATATGGGGAAAAACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAGACATGGTGGACAGAATATTGGCAAGCCACCTGGATCCCTGAATGGGAGTTTGT +CAATACTCCTCCCCTAGTAAAACTATGGTACCAGTTAGAAACAGAACCCATAGTAGGGGCAGAAACTTTC +TACGTAGATGGGGCAGCTCATAGGGAAACTAAAAAGGGAAGAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAGGTTGTCTCCATAACTGAAACAACAAATCAGAAAGCTGAATTACAAGCAATCTGTTTAGCTTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAGCCA +GATAAAAGTGAATCAGATTTAGTTAATCAAATAATAGAGCAATTAATAAAAAAGGAAAGGATCTACCTAT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAG +GAAAGTGCTATTTCTAGATGGAATAGATAAGGCTCAAGAA---GAACATGAAAAATATCATAACAACTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCCAAAGAAATAGTAGCTAGCTGTGACA +AATGTCAGCTAAAAGGGGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCTAGCGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACGGGACAAGAAACAGCCTACTTTATACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACATACAGACAATGGCACCAATTTCACCAGCACTGTGGTTAAGGCAGCCTGTTGGTGGGCAGGTGT +CAAGCAGGAATTTGGGATTCCCTACAATCCCCAAAGCCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAGGTAAGGGATCAGGCTGAACATCTCAAAACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGATATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTTA---AACATTCAAAAATTTCGGGTTTAT +TACAGGGACAGCAGAGAACCAATTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAACAGTGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAGATTATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>N.CM.YBF30 +TTTTTTAGGGAAGAGCTGGTCTCCCTTCAA------------AGGGAG---ACCAGGAAACTTCCCCCAG +ACAACAACAAGGAAAGAGCCCACAGCCCC---------------------------------GCC---AC +TAGAGAGTTATGGGTTTCAGGAGGAGAAGAGCACACAGGGGAAGGAGATGCAGGAGAACCAGGAGAGGAC +AGAGAACTC---------TCTGTACCCACCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCGTCA +TCACAGTAAAAATAGGGAAAGAAGTAAGAGAAGCTCTTTTAGATACAGGAGCTGATGATACAGTAATAGA +AGAGCTACAATTAGAGGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGATTTATCAAAGTGAGA +CAATATGATAATATAACAGTAGACATACAGGGAAGAAAAGCAGTTGGTACAGTATTAGTAGGACCAACAC +CTGTTAATATTATAGGAAGAAATCTTTTAACCCAGATTGGCTGTACTTTAAATTTTCCAATAAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAACCAGGAATG---GATGGCCCAAAGGTAAAACAATGGCCTTTGACA +ACAGAAAAAATAGAGGCATTAAGAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCTA +GAATAGGGCCTGAGAATCCATATAACACTCCAATTTTTGCTATAAAAAAGAAAGATAGCACTAAATGGAG +AAAATTAGTAGATTTCAGGGAATTAAATAAAAGGACCCAAGATTTTTGGGAAGTGCAGCTAGGAATTCCA +CATCCAGCAGGATTAAAGCAGAAAAAATCAGTGACAGTTTTGGATGTAGGAGATGCTTATTTTTCATGTC +CCTTGGACAAAGATTTTAGAAAGTATACAGCTTTTACCATACCTAGTATAAACAATGAGACACCTGGTAT +TAGATACCAGTATAATGTGCTGCCACAAGGCTGGAAAGGGTCACCAGCAATTTTTCAGAGTACAATGACA +AAAATTCTAGAACCATTCAGAGAGAAACATCCAGAGATAATCATTTACCAGTACATGGATGACCTCTATG +TGGGATCTGACTTAGAACTAGCACAACATAGAGAGGCAGTAGAAGACCTTAGAGATCATCTTTTGAAGTG +GGGCTTTACGACCCCTGAC---AAAAAACATCAGAAGGAACCCCCGTTCCTCTGGATGGGATATGAACTC +CATCCAGACAAATGGACAGTCCAGCCAATAAAGTTACCAGAAAAGGATGTATGGACTGTCAATGATATAC +AGAAATTAGTAGGAAAGTTAAATTGGGCAAGTCAGATCTATCCAGGAATCAGAGTAAAACAGCTCTGTAA +ATTAATCAGAGGAACCAAAGCTTTGACAGAAGTAGTCAACTTTACAGAAGAAGCAGAATTAGAACTAGCA +GAAAACAGGGAGATA---------TTAAAAGAACCCCTGCATGGAGTCTATTATGACCCAGGAAAAGAAT +TAGTAGCAGAAATTCAAAAGCAAGGACAAGGTCAGTGGACATATCAGATTTATCAGGAGTTACATAAAAA +TTTAAAAACAGGAAAGTATGCAAAAATGAGATCTGCCCATACTAATGATATAAAACAGTTAGTTGAAGTG +GTAAGGAAAGTGGCAACAGAAAGTATAGTAATTTGGGGAAAGACT---CCTAAATTTAGATTACCAGTAC +AAAAGGAAGTGTGGGAGGCATGGTGGACCGATCATTGGCAAGCAACTTGGATTCCTGAGTGGGAATTTGT +CAACACTCCTCCCCTTGTAAAATTATGGTATCAGTTAGAAACAGAGCCAATCAGTGGGGCAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACAAAATTGGGAAAAGCAGGTTTTGTGACAGATAGGGGAAGAC +AGAAAGTGGTCTCTATTGCAGACACCACCAATCAAAAGGCTGAGTTACAAGCTATCCTTATGGCCTTACA +AGAGTCAGGACGGGATGTAAACATAGTCACTGACTCTCAGTATGCTATGGGAATAATTCATTCACAGCCA +GATAAAAGTGAATCAGAATTGGTGAGCCAAATAATAGAAGAGCTCATAAAAAAGGAAAGAGTTTATCTCT +CTTGGGTACCTGCACATAAAGGTATTGGAGGAAATGAGCAGGTAGACAAATTAGTTAGCTCAGGAATTAG +AAAAATATTATTCCTAGATGGTATAGAAAAAGCCCAAGAA---GATCATGACAGATATCACAGCAATTGG +AAAGCAATGGCCAGTGATTTTAACTTACCCCCCATAGTG---GCAAAAGAAATAGTAGCCAGCTGTGACA +AATGCCAGCTAAAAGGGGAAGCCATGCATGGACAGGTCAATTGTAGTCCAGGAGTGTGGCAATTAGATTG +TACACACTTAGAGGGAAAAATCATCCTTGTGGCGGTCCATGTGGCCAGTGGCTACTTAGAAGCAGAAGTT +ATTCCTGCAGAGACAGGACAGGAAACAGCATATTTTATTTTAAAGTTAGCTGGAAGATGGCCAGTAAAAG +TTATACACACTGATAATGGATCCAATTTCACTAGTGCCACTGTAAAAGCAGCCTGTTGGTGGGCAAATAT +CAAACAGGAATTTGGGATACCCTACAATCCTCAAAGTCAGGGAGCAGTAGAGTCCATGAATAAAGAATTA +AAGAAAATTATAGGACAAATCAGAGATCAAGCAGAACATCTAAAGACAGCAGTGCAAATGGCGGTTTTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACACTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAGACAACAAATTTACAAACACAAATTTTA---AAAGTTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGATCCCATTTGGAAAGGACCAGCCAAACTTCTGTGGAAAGGAGAAGGGGCA---G +TGGTAATTCAAGATAACGGGGATATAAAAGTAGTCCCACGTAGGAAAGCAAAAATAATTAGGGATTATGG +AAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTGGACAGGATGAAAAT--- +>CONSENSUS_O +TTTTTTAGG?AA?TACTGGCCTCCGGGGGG------------CACGAG---GCCAGGCAATTATGTGCAG +A?AC?AG?------???CCCATCAGCCCC---------------------------------ACC---?? +?????????????GA?GGA??A?GA???---------------???????????AGAA???GGA?CAG?A +AG???????------??????GCT?TA?CC?TTTGCCTCCCTCAAATCCCTCTTTGGGACAGACCAATAG +T?ACAGCAA??GTTGGGGG?CA?CTATGTGA?G?TTT?CTGGATACAGG?GCAGATGA?ACAGTA?TAAA +?AACATACAATT?GAAGGAA?ATGGA?ACCAAAAATGATAGGGGGTATAGGAGG?TTTATAAAAGTAAAA +GA?TAT?A?AATGTGACAGTAGAA?TA?AAGGAA?GGA?GTACAGGGAACAGTATTGGTGGGACCTACTC +CTGTTAATATT?TTGG?AGAAA?ATATTGACAGGATTAGG?TGTACACTAAA?TTCCCTATAAG?CCCAT +AGCCCCAGTGCCAGTAAA?CTAAAACCAGGAATG---GATGGACCAAAAGTAAAACAATGGCCCCTATCT +A?AGA?AAAATAGAAGC??T?AC?GCAATATGT------CA?GAAATGGAACA?GAAGGAAAAAT?TCAA +GAATAGGACCTGAAAATCCTTATAATACACCTAT?TTTGCTATAAAAAAGAAAGAT?G?ACTAA?TGGAG +AAAATTGGTAGA?TT?AG?GAATTAAATAA?AGAACACAAGA?TTCTGGGAGGT?CA??TAGGTAT?CCA +CATCC?GGGGGTTT?AAGCAAA?GCAATCTGTTACAGTCTTAGATGTAGGAGATGCTTATTTCTCATG?C +C?TTAGA?CCAGA?TTTAGAAAATA?ACTGC?TTCACTATTCCTAGTGTGAA?AATGAGACCCCAGGA?T +AAGATACCAGTACAATGTCCTCCCGCAAGG?TGGAAAGGTTCACCAGC?ATATT?CA?AGTTCAATGACA +AA?ATTCTAGATCCATT?AG?A?A??CAACCCAGAA?TAGAAATTT?TCAGTACAT?GATGAC?TATATG +TAGGATCAGATTTACC??TG?CAGAACATAGAAA?AGG?T?GAATTGCTTAG?GAACA??TATATCAGTG +GGGATT?ACTACCCCTGA?---AAAAAGCATCA?AAGGAACCTCCCTTT?TATGGATGGG?TATGAGCTC +CA?CCAGACAA?TGGACAGTACAG?CCATCCAATTGCCT?ACAA?GA?GTGTGGACAGTAAATGATATAC +AAAAA?TA?TAGGAAA??TAAATTGGGCAAGTCAAATCTATCAAGGAATTAGAGT?A?AGAATTGTG?AA +GTTAAT?AGAGG?ACCAA?TCATT?ACAGA?GTAGTACCTTTAAGTA?AGAGGCAGA?CT?GAATTAGA? +GAAAACAGAGAAA?G---------?TAAAA?A?CCAGT?CATGG?GTATA?TA?CA?CCTGA?AA?GA?? +T?TGGGTTA?TATTCAGAAGCA?GGAG??G?GCAATGGACTTACCAG?TATATCAGGA?GAACATAAGAA +CCT?AAAACAGG?AAATAT?CTAGGCAAAAGGCCTCCCACACAAATGATATAAGACAATT?GCAGAAGTA +?TCCAGAAGGTGTCTCAAGAA?CTATA?TTAT?TGGGG?AAATT?---CCTAAATT?A?GCTGCCAGT?A +CTAGAGAAACTTGGGAAAC?TGGTGGGC?GA?TATTGGCA?GCCACCTGGATTCC?GAATGGGAATTTGT +CAGCACACCCCCATTGATCAAATTATGGTAC??G?TAGAAA??GAACCTATT?T?GGGGCAGAAACCT?T +TATGTAGATGGAGCAGCTAATAG??A?ACAAAACTAGGAAAGGC?GGATATGTTACAGAACAAGG?AAAC +AGAA?ATAATAAA?TTAGA?GAGAC?ACCAATCAAAA?GCTGAATTAATGGC??TATTA?TAGCCTT?CA +GGATTCCAA?GA???AGTAAA?ATAGTAACAGA?TCACAATATG?ATTGGGC?TCAT?TCCTCCCAACC? +ACACAGAGTGA?TCCCCTATAGTTCAGCA?ATAATAGAGGAACTAACAAAAAAGGAAC??GTGTATCTTA +CATGGGTTCCTGC?CA?AAAGGCATAGGAGGAAATGAAAAAATAGATAAATTAGTAAGCAA?GA?ATTAG +AAGAGTCCTGTTCCT?GAAGGAATAGA?CAGGCACAAGAA---GATCATGAAAAATATCATAGTAATTGG +A?AGCA?TAGCTAGTGA?TTTGGA?TACCACCA?T?GT?---GCCAAGGAAATCATTGCTAG?TGTCCTA +AATG?CATATAAAAGGGGAAGCAA??CATGGTCA?GTAGACT?CAG?CCAGA??TATGGCAAAT?GATTG +?ACACAT?T?GAAGGCAAAATCATAATAGTTGCTGTCCATGT?GCAAGTG??TT?ATAGAAGCAGA?GTG +ATACCAGCAGAAACAGGACA?GAAACTGCCTA?TTCCTGTTAAAA?T?GC?GCAAGATGGCCTGT?AAAG +TAATACATACAGACAA?GG?CCTAATTTTACAAGT?CA?C?ATGAA?GCTGCATGTTGGTGG?C???CAT +ACAACATGAGTTTGG?ATACCATATAATCCACAAAGTCAAGGAGTAGTAGAAGCCATGAATAA?GAATTA +AAATC?ATTATACAGCAGGTGAGGGACCAAGCAGA?CA?TTAA?AACAGCAGTACAAATGGCAGT?TTTG +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACACTGCAGG?GA?AG??TAATAGACATA?T +AGCATCACAAATACAAACAACAGAA?TACAAAAACAAATTTTA---AAA?T??ACAA?TTTCGGGTCTAT +TACAGAGA?AGCAGAGACCCTAT?TGGAAAGGACCGGCACA?CTCCTGTGGAAAGGTGAGGG?GCA---G +TAGTCATACAAGATAA?GGAGACATTAA?GT?GTACCAAGAAG?AAGGCAAAAATAATCAGAGA?TATGG +AAAACAGATGGCAGGTACTGATAGTATGGCAA?T?GACAGACAGAAAGT--- +>O.CM.ANT70C +TTTTTTAGGCAAATACTGGCCTCCGGGGGG------------CACGAG---GCCAGGCAATTATGTGCAG +AGACCAGC------ACACCCATCAGCCCC---------------------------------ACC----- +-------------GATGGAGGAGGAAGT---------------GAAGGGACAGGAGAATCAGGAACAGAA +AGGGGGCCC------GAACGAGCTCTATCCGTTTGCCTCCCTCAAATCCCTCTTTGGGACAGACCAATAG +TTACAGCAAGAGTTGGGGGCCACCTATGTGAAGTTTTGCTGGATACAGGAGCAGATGACACAGTACTAAA +CAACATACAATTGGAAGGAAAATGGAAACCAAAAATGATAGGGGGTATAGGAGGTTTTATAAAAGTAAAA +GAATATGATAATGTGACAGTAGAAATAGAAGGAAGGGAGGTACAGGGAACAGTATTGGTGGGACCTACTC +CTGTTAATATTATTGGAAGAAATATATTGACAGGATTAGGTTGTACACTAAACTTCCCTATAAGCCCCAT +AGCCCCAGTGCCAGTAAAACTAAAACCAGGAATG---GATGGACCAAAAGTAAAACAATGGCCCCTATCT +AAAGAAAAAATAGAAGCCTTGACAGCAATATGT------CAGGAAATGGAACAAGAAGGAAAAATTTCAA +GAATAGGACCTGAAAATCCTTATAATACACCTATCTTTGCTATAAAAAAGAAAGATGGTACTAAATGGAG +AAAATTGGTAGATTTTAGGGAATTAAATAAGAGAACACAAGAGTTCTGGGAGGTACAGCTAGGTATCCCA +CATCCGGGGGGTTTGAAGCAAAAGCAATCTGTTACAGTCTTAGATGTAGGAGATGCTTATTTCTCATGTC +CCTTAGACCCAGATTTTAGAAAATATACTGCTTTCACTATTCCTAGTGTGAATAATGAGACCCCAGGAAT +AAGATACCAGTACAATGTCCTCCCGCAAGGATGGAAAGGTTCACCAGCTATATTCCAAAGTTCAATGACA +AAAATTCTAGATCCATTCAGGAGAGACAACCCAGAATTAGAAATTTGTCAGTACATGGATGACCTATATG +TAGGATCAGATTTACCCCTGACAGAACATAGAAAAAGGATTGAATTGCTTAGAGAACACCTATATCAGTG +GGGATTCACTACCCCTGAC---AAAAAGCATCAAAAGGAACCTCCCTTTCTATGGATGGGGTATGAGCTC +CATCCAGACAAATGGACAGTACAGTCCATCCAATTGCCTAACAAGGATGTGTGGACAGTAAATGATATAC +AAAAACTAATAGGAAAGCTAAATTGGGCAAGTCAAATCTATCAAGGAATTAGAGTGAGAGAATTGTGTAA +GTTAATTAGAGGCACCAAGTCATTAACAGAAGTAGTACCTTTAAGTAGAGAGGCAGAGCTGGAATTAGAG +GAAAACAGAGAAAGG---------TTAAAACAACCAGTGCATGGGGTATACTATCAACCTGATAAGGATC +TATGGGTTAATATTCAGAAGCAAGGAGGGGAGCAATGGACTTACCAGATATATCAGGAAGAACATAAGAA +CCTCAAAACAGGGAAATATACTAGGCAAAAGGCCTCCCACACAAATGATATAAGACAATTAGCAGAAGTA +ATCCAGAAGGTGTCTCAAGAATCTATAATTATCTGGGGAAAATTG---CCTAAATTTAAGCTGCCAGTCA +CTAGAGAAACTTGGGAAACATGGTGGGCGGACTATTGGCAAGCCACCTGGATTCCAGAATGGGAATTTGT +CAGCACACCCCCATTGATCAAATTATGGTACAGGCTAGAAAGTGAACCTATTATGGGGGCAGAAACCTAT +TATGTAGATGGAGCAGCTAATAGAGAGACAAAACTAGGAAAGGCAGGATATGTTACAGAACAAGGGAAAC +AGAAGATAATAAAATTAGATGAGACCACCAATCAAAAAGCTGAATTAATGGCGATATTACTAGCCTTACA +GGATTCCAAAGAAACAGTAAATATAGTAACAGATTCACAATATGCATTGGGCGTCATCTCCTCCCAACCT +ACACAGAGTGAATCCCCTATAGTTCAGCAAATAATAGAGGAACTAACAAAAAAGGAACAGGTGTATCTTA +CATGGGTTCCTGCCCATAAAGGCATAGGAGGAAATGAAAAAATAGATAAATTAGTAAGCAAGGATATTAG +AAGAGTCCTGTTCCTAGAAGGAATAGACCAGGCACAAGAA---GATCATGAAAAATATCATAGTAATTGG +AAAGCACTAGCTAGTGAATTTGGACTACCACCAGTGGTG---GCCAAGGAAATCATTGCTAGCTGTCCTA +AATGTCATATAAAAGGGGAAGCAATTCATGGTCAGGTAGACTGCAGTCCAGAAGTATGGCAAATAGATTG +CACACATATGGAAGGCAAAATCATAATAGTTGCTGTCCATGTGGCAAGTGGGTTCATAGAAGCAGAAGTG +ATACCAGCAGAAACAGGACAAGAAACTGCCTACTTCCTGTTAAAACTGGCTGCAAGATGGCCTGTTAAAG +TAATACATACAGACAACGGGCCTAATTTTACAAGTACAACTATGAAGGCTGCATGTTGGTGGGCCAACAT +ACAACATGAGTTTGGAATACCATATAATCCACAAAGTCAAGGAGTAGTAGAAGCCATGAATAAGGAATTA +AAATCAATTATACAGCAGGTGAGGGACCAAGCAGAACACTTAAGAACAGCAGTACAAATGGCAGTATTTG +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACACTGCAGGAGAAAGGATAATAGACATATT +AGCATCACAAATACAAACAACAGAATTACAAAAACAAATTTTA---AAANTTCACAAATTTCGGGTCTAT +TACAGAGACAGCAGAGACCCTATCTGGAAAGGACCGGCACAGCTCCTGTGGAAAGGTGAGGGAGCA---G +TAGTCATACAAGATAAGGGAGACATTAAGGTAGTACCAAGAAGGAAGGCAAAAATAATCAGAGAGTATGG +AAAACAGATGGCAGGTACTGATAGTATGGCAAGTGGACAGACAGAAAGT--- +>O.CM.MVP5180 +TTTTTTAGGGAAGTACTGGCCTCCGGGGGG------------CACGAG---GCCAGGCAATTATGTGCAG +AAACAAGT------GTCCCCATCAGCCCC---------------------------------ACC---AA +TGGAGGAGGCAGTGAAGGAACAAGAGAA------------------TCAGAGTCAGAAGGGGGATCAGGA +AGA------------------GCTGTACCCATTTGCCTCCCTCAAATCCCTCTTTGGGACAGACCAATAG +TCACAGCAAAGGTTGGGGGTCATCTATGTGAGGCTTTACTGGATACAGGGGCAGATGATACAGTATTAAA +TAACATACAATTAGAAGGAAGATGGACACCAAAAATGATAGGGGGTATAGGAGGCTTTATAAAAGTAAAA +GAGTATAACAATGTGACAGTAGAAGTACAAGGAAAGGAAGTACAGGGAACAGTATTGGTGGGACCTACTC +CTGTTAATATTCTTGGGAGAAACATATTGACAGGATTAGGATGTACACTAAATTTCCCTATAAGTCCCAT +AGCCCCAGTGCCAGTAAAGCTAAAACCAGGAATG---GATGGACCAAAAGTAAAACAATGGCCCCTATCT +AGAGAGAAAATAGAAGCACTAACTGCAATATGT------CAAGAAATGGAACAGGAAGGAAAAATCTCAA +GAATAGGACCTGAAAATCCTTATAATACACCTATTTTTGCTATAAAAAAGAAAGATAGCACTAAGTGGAG +AAAATTGGTAGACTTCAGAGAATTAAATAAAAGAACACAAGATTTCTGGGAGGTGCAATTAGGTATTCCA +CATCCAGGGGGTTTAAAGCAAAGGCAATCTGTTACAGTCTTAGATGTAGGAGATGCTTATTTCTCATGCC +CTTTAGATCCAGACTTTAGAAAATACACTGCCTTCACTATTCCTAGTGTGAACAATGAGACCCCAGGAGT +AAGATACCAGTACAATGTCCTCCCGCAAGGGTGGAAAGGTTCACCAGCCATATTTCAGAGTTCAATGACA +AAGATTCTAGATCCATTTAGAAAAAGCAACCCAGAAGTAGAAATTTATCAGTACATAGATGACTTATATG +TAGGATCAGATTTACCATTGGCAGAACATAGAAAGAGGGTCGAATTGCTTAGGGAACATTTATATCAGTG +GGGATTTACTACCCCTGAT---AAAAAGCATCAGAAGGAACCTCCCTTTTTATGGATGGGATATGAGCTC +CACCCAGACAAGTGGACAGTACAGCCCATCCAATTGCCTGACAAAGAAGTGTGGACAGTAAATGATATAC +AAAAATTAGTAGGAAAATTAAATTGGGCAAGTCAAATCTATCAAGGAATTAGAGTAAAAGAATTGTGCAA +GTTAATCAGAGGAACCAAATCATTGACAGAGGTAGTACCTTTAAGTAAAGAGGCAGAACTAGAATTAGAA +GAAAACAGAGAAAAG---------CTAAAAGAGCCAGTACATGGAGTATATTACCAGCCTGACAAAGACT +TGTGGGTTAGTATTCAGAAGCATGGAGAAGGGCAATGGACTTACCAGGTATATCAGGATGAACATAAGAA +CCTTAAAACAGGAAAATATGCTAGGCAAAAGGCCTCCCACACAAATGATATAAGACAATTGGCAGAAGTA +GTCCAGAAGGTGTCTCAAGAAGCTATAGTTATATGGGGGAAATTA---CCTAAATTCAGGCTGCCAGTTA +CTAGAGAAACTTGGGAAACTTGGTGGGCAGAATATTGGCAGGCCACCTGGATTCCTGAATGGGAATTTGT +CAGCACACCCCCATTGATCAAATTATGGTACCAGTTAGAAACAGAACCTATTGTAGGGGCAGAAACCTTT +TATGTAGATGGAGCAGCTAATAGGAATACAAAACTAGGAAAGGCGGGATATGTTACAGAACAAGGAAAAC +AGAACATAATAAAGTTAGAAGAGACAACCAATCAAAAGGCTGAATTAATGGCTGTATTAATAGCCTTGCA +GGATTCCAAGGAGCAAGTAAACATAGTAACAGACTCACAATATGTATTGGGCATCATATCCTCCCAACCA +ACACAGAGTGACTCCCCTATAGTTCAGCAGATAATAGAGGAACTAACAAAAAAGGAACGAGTGTATCTTA +CATGGGTTCCTGCTCACAAAGGCATAGGAGGAAATGAAAAAATAGATAAATTAGTAAGCAAAGACATTAG +AAGAGTCCTGTTCCTGGAAGGAATAGATCAGGCACAAGAA---GATCATGAAAAATATCATAGTAATTGG +AGAGCATTAGCTAGTGACTTTGGATTACCACCAATAGTA---GCCAAGGAAATCATTGCTAGTTGTCCTA +AATGCCATATAAAAGGGGAAGCAACGCATGGTCAAGTAGACTACAGCCCAGAGATATGGCAAATGGATTG +TACACATTTAGAAGGCAAAATCATAATAGTTGCTGTCCATGTAGCAAGTGACTTTATAGAAGCAGAGGTG +ATACCAGCAGAAACAGGACAGGAAACTGCCTATTTCCTGTTAAAATTAGCAGCAAGATGGCCTGTCAAAG +TAATACATACAGACAATGGACCTAATTTTACAAGTGCAGCCATGAAAGCTGCATGTTGGTGGACAGGCAT +ACAACATGAGTTTGGGATACCATATAATCCACAAAGTCAAGGAGTAGTAGAAGCCATGAATAAAGAATTA +AAATCTATTATACAGCAGGTGAGGGACCAAGCAGAGCATTTAAAAACAGCAGTACAAATGGCAGTCTTTG +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACACTGCAGGGGAGAGACTAATAGACATACT +AGCATCACAAATACAAACAACAGAACTACAAAAACAAATTTTA---AAAATCAACAATTTTCGGGTCTAT +TACAGAGATAGCAGAGACCCTATTTGGAAAGGACCGGCACAACTCCTGTGGAAAGGTGAGGGGGCA---G +TAGTCATACAAGATAAAGGAGACATTAAAGTGGTACCAAGAAGAAAGGCAAAAATAATCAGAGATTATGG +AAAACAGATGGCAGGTACTGATAGTATGGCAAATAGACAGACAGAAAGT--- +>AC.ET.E3099G +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGAAACTTTCCTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTTGGGATGGGGGAAGAGAT---------------NNCCCCCTCCCCGAAGCAGGANCNGAN +NGACAAGGN---------ACTATATCCTCCTTTAGNTTCCCTCAAATCACTCTTTGGCAACGACCCCTNG +TCACAGTAAGAATAGGGGGACAGCCAATAGAAGCCCTACTGGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACCTATAGAAATTTGTGTGAAAAAGGCTATAGGTACAGTGTTAGTAGGACCTACAC +CTGTCAACATAATTGGGAGAAATATATTGACTCAGATTGGTTGTACTTTAAATTTTCCAATCAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGGATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AAAGAGATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCAATACAATACTCCAATATTTGCCATAAAGATAAAGGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTTTGGGAGGTCCAATTAGGAATACCT +CATCCTGCGGGGTTAAAAAAGAAAAGATCAGTAACAGTACTAGATGTGGGAGATGCATATTTTTCAGTTC +CCTTAGATGAAAGTTTTAGAAAGTATACTGCATTCACCATACCTAGTATAAACAATGAGACACCAGGAAT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTTCAGAGTAGCATGACA +GAAATCTTAGAGCCCTTTAGAACAAAAAATCCAGAAATGGTGATCTACCAATACATGGATGATTTATATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAGTTGAGAGCACATATATTGAAANN +NNNNNNNNNNACACCAGAC---AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTCCAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTGAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAAGCAACTGTGTAG +ACTCCTCAGGGGAGCCAAAGCACTAACAGATATAGTAACACTGACTGAGGAAGCAGAATTAGAACTGGCA +GAGAACAGGGAGATT---------TTAAAAGACCCTGTGCATTGGGTATATTATGACCCATCAAAAGACT +TAGTAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TATAAAAACAGGAGAATATGCAAAAAAGAGTTCTGCTCACACTAATGANGTAAAACAATTAACAGCAGTA +GTACAAAAAGTGGCAACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAGACTGCCCATAC +AAAAAGAAACGTGGGAGACATGGTGGACAGAATATTGGCAAGCCACCTGGGATCCTGAATGGGAGTTTGT +CAATACCCCTCCCCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACTTTC +TATGTAGANGACACGGCTAATAGAGAAACAAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +ATAAGGTTGTTACCCTAACTGAGACAACAAATCAAAAGACTGAACTACATGCAATGCATTTAGCCTTACA +GGATTCAGGGTCAGAAGTAAACATAGTGACAGACTCACAGTATGCACTAGGAATCATTCAGGCACAACCA +GACAAGAGTGAATCAGAAATAGTCAATCAAATAATAGAGAAGCTAATTAGAAGGGACAAAGTCTACCTGT +CCTGGGTACCGGCACACAAGGGGATTGGAGGAAATGAGACAGTAGATAAATTAGTCATAGTTGGAATCAG +GAAAGTACTATTTTTAGATGGCATAGATAAAGCCCAAGAA---GAGCATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATTTGCCACCTATAGTA---GCAAAAGAAATAGTGGCCAGCTGTGATA +AATGTCAAATAAATGGGGAGGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTCCATGTAGCCAGTGGTTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTCTATATTAAAATTAGCAGGAAGATGGCCAGTGAAAA +TAATACACACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTAAAAGCAGCATGTTGGTGGGCAAATGT +CACACAAGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATAAATAAAGAATTA +AAGAAAATTATAGGGCAGGTCAGGGATCAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACATATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCCTTGGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AC.IN.21301 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTTCAG +AACAGACC------AGAGCCAACAGCCCC---------------------------------GCC---AG +CAGAGAGCTTCAGATTCGA---GGAGAC---------------AACCCCCGCACTGAAGCAGGAGCAAAA +AGACAGGGA------------------ACCCTTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCTCAATAAGAGTAGGGGGCCAGACAAAAGAGGCTCTCTTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAATATCTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCCACAC +CTGTCAACATAATTGGAAGGAATATGTTGACTCAGCTTGGATGCACACTAAATTTTCCAATTAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGCAATTTGT------GATGAAATGGAGAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATATAACACTCCAATATTTGCCATAAAAAAGAAAGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTCAATAAAAGAACTCAAGATTTTTGGGAAGTCCAATTAGGAATACCA +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTC +CTTTATATGAAGAATTCAGGAAATATACTGCATTCACCATACCTAGTACAAACAATGAAACACCAGGGAT +TAGATATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGGCTAGCATGACA +AAAATCTTAGAGCCCTTTAGGGCACAAAATCCAGAAATAGTCATCTATCAATATATGGATGACTTGTATG +TAGGATCTGACTTAAAAATAGGGCAACATAGAGCAAAAATAGAGGAATTAAGAGAACATCTGTTAAGGTG +GGGATTTACCACACCAGAC---AAGAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTGCCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAAATTTACCCAGGGATCAAAGTAAGGCAACTTTGTAG +ACTTCTTAGGGGAGCCAAAGCACTAACAGACATAGTGCCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGAGAAATT---------CTAAAAGAGCCAGTACATGGAGTATATTATGATCCATCAAAAGACT +TGATAGCTGAAATACAGAAACAGGGGCAGGACCAGTGGACATATCAAATTTATCAAGAACCATTCAAAAA +TCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAGTTAACAGAGACA +GTGCAAAAAATAGCCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGATTACCCATCC +AAAAAGAAACATGGGAGACATGGTGGACAGACTATTGGCAAGCCACTTGGATTCCTGAGTGGGAATTTGT +TAATACCCCTCCCCTAGTAAAATTATGGTACCAGCTGGAGAAAGAACCCATGGCAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAAACTAAAATAGGAAAAGCAGGGTATGCTACTGACAGAGGACGGC +AAAAAATTGTTACTCTAACTGAAACAACAAATCAGAAGACTGAATTGCAAGCAATTTATCTAGCTTTGCA +AGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCCCTAGGAATTATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTCAACCAAATAATAGAACAATTGATAAAAAAGGAAAGGGTCTACCTGT +CATGGATACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAATGGAATCAG +GAGAGTGCTATTTCTAGATGGAATAGATAAGGCTCAAGAA---GAGCATGAAAGGTATCACAGCAATTGG +AGAGCAATGGCTAGTGACTTTAATCTGCCACCCATAATA---GCAAAAGAAATAGTAGCTAGCTGTAATC +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATCATCCTGGTAGCAGTCCATGTAGCCAGTGGCTACATAGAAGCAGAGGTT +ATCCCAGCAGAAACAGGACAAGAAACAGCATACTATATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGTAGTAATTTCACCAGTGCTGCAGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAGGAATTA +AAGAAAATTATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCGGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGACCCTATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAGGTAGTACCAAGGAGGAAAGCAAAAATCATTAAGGACTATGG +AAAACAGATGGCAGGCGCTGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>AC.RW.92RW009 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGAAATTTTCCCCAG +AGCAGACT------GGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAACTTTGGAATGGGGGAAGAGAT---------------AGCCTCTCCTCTGAAACAGGAGCAGAA +AGACAGGGA------------------ACCTTTAATTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAATAGGAGGTCAGCTAAGAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAGGTAAAA +CAGTATGATCAAATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAT +CTGTCAACATAATTGGAAGAAATATGTTGACCCAGATTGGTTGTACTTTAAACTTTCCAATTAGTCCTAT +TGAGACTGTACCAGTAGCATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAAGAGAAATTTGT------ACAGAAATGGAAAAAGAGGGAAAAATTTCAA +AAATCGGGCCTGAAAATCCATATAACACTCCAGTATTTGCCATAAAAAAGAAGGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGGGAACTCAACAAAAGAACTCAAGACTTTTGGGAAGTCCAATTAGGGATACCA +CACCCAGCAGGGTTAAAGAAGAAAAAATCAGTGACAGTACTGGATGTGGGGGATGCATACTTCTCAGTTC +CTTTAGATGAGAGCTTCAGGAAATATACTGCATTCACCATACCTAGTATAAACAATGAAACACCAGGAAT +TAGGTATCAATATAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAATAGTATGACA +AAAATCTTAGAGCCCTTTAGGGCACAAAACCAAGAAATAGTGATCTATCAATATATGGATGACTTGTACG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGAACATCTATTAAAGTG +GGGATTTACCACACCAGAC---AAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGGTATGAACTT +CATCCTGACAAATGGACAGTACAACCTATACAGCTGCCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAATTAAACTGGGCAAGTCAGATTTACCCAGGGGTTAAAGTAAGGCAATTGTGTAA +ACTCCTTAGGGGAACCAAAGCATTAACAGACATAGTACCACTAACTGAAGAAGCAGAATTAGAATTGGCA +GAAAACAGGGAAATT---------TTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACT +TAATAGCTGAAATACAGAAACAGGGGCATGACCAATGGACATATCAAATTTACCAAGAACCATTCAAAAA +TCTGAAAACAGGAAAGTATGCAAAAAGGAGGACTGCCCACACTAATGACGTAAAACAGTTAACAGAGGCA +GTGCAAAAGATAGCCATGGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAGATTACCCATCC +AGAAAGAAACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCCTAGTAAAATTATGGTACCAGCTAGAGAAAGAACCCATATTAGGAGCAGAGACTTTC +TATGTAGATGGAGCAGCTAATCGGGAAACTAAAATAGGAAAAGCAGGGTATGTTACTGACAGAGGAAGGC +AGAAAATTGTTTCTCTAACTGAAACAACAAATCAGAAGACTGAATTACAAGCAATTCAGCTAGCTTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAGCAGCGAATCGGAGGCAGTCAATCAAATAATAGAACAGTTAATAAAAAAGGAAAGAGTCTACCTGT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTAAGTAGTGGAATCAG +GAGAGTGCTGTTTCTAGATGGAATAGATAAGGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AGTGTCAGCTAAAAGGGGAAGCCATGCATGGGCAAGTAGACTGTAGTCCAGGGATATGGCAATTAGACTG +TACACATCTGGAAGGAAAAATAATCCTGGTAGCAGTCCATGCAGCCAGTGGTTATATAGAAGCAGAGGTT +ATTCCAGCAGAAACAGGACAAGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTCAAAG +TAATACATACAGACAATGGCAGTAATTTCACCAGTAATACAGTTAAAGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAATAGAATCCATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAGGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAGGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>AC.SE.SE9488 +TTTTTTAGGGAAAATCTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACT------GGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGATCTTCGGGATGGGGGAAGAGAT---------------------AACTCAGAAGCAGGAACAGAC +AGACAGGGA---------ACAGGGCCCGCCTTTAGTTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAGAATAGGAGGACAGCTAAAAGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGA +AGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAG +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACCCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAGACTGTACCAGTAACATTAAAACCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATCTGT------ACAGATATGGAAAAGGAAGGAAAACTCTCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAG +GAAATTAGTAGACTTCAGAGAGCTCAATAAAAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATACCG +CATCCAGCAGGTTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGGGACGCATATTTTTCAGTTC +CTTTAGATAAAGACTTTAGAAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGAAT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGCGCCCTTTAGATCACAAAATCCAGAAATAATTATCTATCAATACATGGATGATTTATATG +TAGGATCTGATTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGTTGAGAGCTCATCTATTGAGCTG +GGGGTTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACGGTCCAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAAACAACTGTGTAA +ACTCCTTAGAGGAACCAAATCATTAACAGATGTAGTGACACTGACTGAGGAAGCAGAATTAGAACTGGCA +GAGAACAGGGAGATT---------CTAAGAGACCCTGTGCATGGAGTATATTATGACCCATCAAAAGACT +TAATAGCAGAAATCCAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGGAAATATGCAAAAAAGAGGTCTGCTCACACCAATGATGTAAGACAATTAGCAGAAGTG +GTGCAAAAAGTAGTCATGGAAAGCATAATAATATGGGGAAAGACT---CCTAAATTTAAACTCCCCATAC +AAAAAGAAACATGGGAAACATGGTGGATGGACTATTGGCAGGCTACCTGGATTCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCTATAATAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAATAGGGAAACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACGGAATTACATGCTATCCAGCTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAGATAGTCAACCAAATAATAGAAAAGCTAATACAAAAGGACAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCCGGAATCAG +GAAGGTACTATTTTTAGATGGAATAGATAAGGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAACTAAAAGGGGAAGCCATGCATGGACAAGTAGATTGTAGCCCAGGGATATGGCAACTAGATTG +CACACATCTAGAAGGAAAAGTAATACTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCCAGTAAGAA +GAGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCAAATAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAGGGAGTAGTAGAATCCATGAATAAAGAATTA +AAGAAAATCATAGGACAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTCCAGGGGAAAGAATAATAGACATAAT +AGCAACAGATATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AC.ZM.ZAM184 +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGGAG---GCCAGGGAATTTCCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAAAGCTTCAGGTT-CG--AGGAGAC---------------AACCTCCGCTCCGAAGCAGGAGCTGAA +AGACAGGGA------------------GCCCTTAATTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAATAGAAGGACAGCTGAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGA +AGACATAAATCTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAGATAGTTATAGAAATTTGTGGAAAAAGGGCCATAGGTACAGTATTAGTAGGACCCACGC +CTGTCAACATAATTGGAAGAAATATGTTGGTTCAGCTTGGTTGTACTTTAAATTTTCCAATAAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGC---CCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AGAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAACACTCCAGTGTTTGCTATAAAGAAAAAAGACAGCACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCA +CATCCAGCAGGATTGAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGGGACGCATATTTTTCCGTTC +CCTTACATGAAGACTTCAGAAAATATACTGCATTCACCATACCTAGTATAAACAATGAGACACCAGGAAT +TAGGTATCAGTACAATGTACTTCCACAGGGATGGAAGGGATCACCATCAATATTCCAGAGTAGCATGACA +AAAATCTTAGATCCCTTTAGATCAAAAAATCCAGAAATAGTCATCTACCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGTCAGCATAGAGCAAAAATAGAGGAATTAAGAGCTCATTTATTGAAATG +GGGATTTACTACACCAGACAAAAAA---CATCAAAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTACAGCCTATACAGCTACCAGAAAAGGATAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAGTTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAACTGTGTAA +ACTCCTTAGGGGGGCTAAAGCACTAACAGACATAGTACCACTGACTAAAGGAGCAGAATTAGAATTGGAA +GAGAACAGGGAGATT---------CTAAAGGACCCTGTACATGGGGTATACTATGACTCATCAAAAGACT +TAATAGCAGAAATACAGAAACAGGGGCATGACCAATGGACATATCAAATTTATCAAGAACCATTTAAAAA +TCTGAAAACAGGGAAGTATGCAAAAATGAGGACTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATAGCCATGGAAAGCATAGTAATATGGGGAAAGACTCCTAAA---TTTAGACTGCCCATCC +AAAAGGAAACATGGGAGACATGGTGGACAGATTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAACTATGGTACCAGTTAGAAAAGGAACCCATAGCAAGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAACTAGGAAAGGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAAATTGTCTCCCTGACGGAGACAACAAATCAAAGGGCTGAATTACATGCAATCTATTTGGCTTTACA +GGATTCAGGATTAGAGGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAACCAAATAATAGAAAAGTTAATAGGAAAGGAAAGGATCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGGAGAAATGAACAGGTAGACAAATTAGTCAGTTCTGGAATCAG +GAAAGTGTTATTTTTAGATGAGATAGATAAGGCTCAAGAAGAACAT---GAAAGATATCATAGTAATTGG +AGAGCAATGGCTCATGACTTTAATCTGCCACCTATAGTAGCAAAA---GAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +CACACATCTAGAAGGAAAAGTTATCCTGGTAGCAGTCCATGTAACCAGTGGCTATATGGAAGCAGAAGTC +ATCCCAGCAGAAACAGGGCAGGAGACAGCATACTTTATATTAAAACTAGCAGGAAGATGGCCAGTAAAAG +TAATACATACAGACAATGGGCCCAATTTCACTAGTGCAACAGTTAAGGCGGCCTGTTGGTGGGCAGGTGT +CCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGATCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGGATTGGG---GGATACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGATATACAAACTAAAGAATTACAAAAACAAATTACAAAAATT---CACAAATTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAG +TA---ATACAAGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGCGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAG +>ACD.SE.SE8603 +TTTTTTAGGGAAGATATGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAACCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTTGGAGAAGAGAT---------------AACCCCCTCCCAGAAGCAGGAGCAGAA +AGACAAGGA------------ACTGTATCCTTTAGCCTCCCTCAAATCACTCTTTGGCAACGACCCCTAG +TCAAAGTAAAGATAGGGGGACAGCTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGA +CAGTATGATCAAATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGGATG---GATGGCCCAAGAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAATAGAAATTTGT------ACAGAGATGGAAAAGGAAGGAAAAATTTCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCG +CATCCAGCGGGCTTGAAAAAGAAAAAATCAGTAACAATACTAGATGTGGGGGACGCATATTTTTCAGTCC +CCTTAGATGAAAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTACAAACAATGAGACACCAGGAAT +CAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCGGCAATATTTCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGATCAAAAAATCCAGACATGATTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGACAGCATAGAACAAAAATAGAGGAGTTAAGAGCTCATCTATTGAGCTG +GGGATTTACTACACCAGAC---AAAAAGCATCAGAAAGAACCCCCATTTCTGTGGATGGGATATGAACTC +CATCCTGACAAGTGGACAGTCCAATCTATAAAACTGCCAGAAAAAGAAAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGGAAATTAAATTGGGCAAGCCAAATTTATCCAGGAATTAAAGTAAAACAGTTGTGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGATGTAGTAACATTGACTGAGGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAGATT---------CTAAAAGACCCTGTGCATGGGGTATATTATGACCCATCAAAGGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGAACAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGGAAGTATGCAAAAAAGAGGTCTGCTCACACTAATGATGTAAAACAATTAGCAGAAGTG +GTGCAAAAAGTGGTCATGGAAAGCATAGTAATATGGGGAAAGGCT---CCTAAATTTAAATTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGATGGACTATTGGCAGGCCACCTGGATTCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAATAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCCAATAGGGAAACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAACTACATGCAATCTATCTAGCCTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAATAGGAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAG +GAAAGTGCTATTTTTGGATGGGATAGATAAAGCTCAAGAA---GAACATGAAAGGTATCACAGCAATTGG +AGAGCAATGGCTAGTGACTTTAATCTGCCACCTGTAATA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGATAAAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGGATATGGCAATTAGATTG +CACGCATTTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGCAGCAATTTCACCAGTGCTGCATTTAAAGCAGCCTGTTGGTGGGCAAGTGT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGTAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAAATTCCGGGTTTAT +TACAGGGACAGCAGAAATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TGGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAGGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AD.SE.SE6954 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------GGGAAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------TCC---AG +CAGAGAGCTTCGGGTTTGGAGAGGAGAT---------------AGCACCTTCTCAGAAACAGGAGCAGAA +AGACAAGGA------------GCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTAG +TCACAGTAAAGATAGGGGGACAGTTAAGGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGA +CAGTATGATCAAATACTCATAGAAATCTGTGGATATAAAGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAACTTGTTGACTCAGATTGGTTGCGCTTTGAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTACAATTAAAGCCAGGGATG---GATGGTCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCGCTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTGCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAACTAGGAATACCA +CATCCAGCAGGGCTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTAGGTGATGCATATTTTTCAGTTC +CCTTATATGAAGACTTTAGAAAGTATACAGCATTCACCATACCTAGTATAAACAATGAGACACCAGGAAT +TAGATATCAATACAATGTGCTCCCACAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAACCTTTTAGAAAACAAAATCCAGAAATGGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAGATAGGGCAGCATAGAATAAAAATAGAAGAATTAAGGGGACACCTATTGAAGTG +GGGATTTACCACACCAGAC---AAAAAGCATCAGAAAGAACCTCCATTTCTATGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAACACTGCCAGAAAAAGAAAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGGAAATTAAATTGGGCAAGCCAGATTTACCCAGGAATTAAAGTAAGACAATTATGCAA +ATGCATTAGGGGAGCCAAAGCACTGACAGAAGTAGTACCACTGACAGAAGAAGCAGAATTAGAGCTGGCA +GAAAATAGGGAAATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAACAATATAAAAA +TCTGAAGACAGGAAAGTATGCAAAAGTGAGGGGTACCCACACTAATGATATAAAACAATTAACAGCGGCA +GTGCAAAAAATAGCCCAGGAATGTATAGTGATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAACGTGGTGGACAGAGTATTGGCAGGCCACCTGGATCCCTGAGTGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATGGTAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAATAGGGAAACTAAGCTAGGAAAAGCAGGATATGTCACTGATAGAGGAAGAC +AAAAGGTTGTTCCCCTAACTGAGACAACCAATCAAAAAACTGAACTACATGCAATTCATCTAGCCTTACA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAGTCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTAT +CATGGGTGCCAGCACACAGGGGGATTGGAGGAAATGAACAGGTAGATAAATTAGTCAGTAATGGAATCAG +GAAAATACTATTCTTGGATGGGATAGATAAGGCTCAAGAA---GAACATGAAAAATACCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCGCCTGTGGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCTTGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGGCAGGAAACAGCCTACTTTCTCTTGAAATTAGCAGGAAGATGGCCAGTAAGAG +TAGTACATACAGACAATGGCAGCAATTTCACCAGCACTGCAGTTAAGGCCGCCTGTTGGTGGGCAGGCAT +CAAGCAGGAGTTTGGGATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAGGAATTG +AAGAAAATTATAGGGCAGGTAAGAGATCAAGCGGAACATCTTAGGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAGACAAATCACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAGGTAGTACCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>AD.SE.SE7108 +TTTTCTAGGGAAAATTTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTTCCTCAG +AACAGACT------GGAACCAACAGCTCC---------------------------------ACC---AG +CAGAAACCTTTGGGATGGGGGAAGAGAC---------------AGCCTCCCCTCTGAAGCAGGAGCAGAG +AAACAGGGA---------ACAGGCTCCACCCTTAATTTCCCTCAAATCACTCTTTGGCAACGACCTCTTG +TCACAGTAAAAATAGGGGGACAGCTAAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTCATCAAGGTAAAA +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAATATAATTGGAAGAAACATGTTGACCCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGGATG---GATGGCCCAAAAGTTAAGCAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAGAATAGCACTAGATGGAG +GAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACACAAGACTTTTGGGAAGTTCAGTTAGGCATACCG +CATCCAGCGGGTCTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGGGACGCATATTTTTCAGTTC +CTTTACATGAAGAATTTAGAAAGTATACTGCGTTCACCATACCTAGTACAAACAATGAGACACCAGGAAT +CAGGTATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGATCAAAAAATCCAGAACTAATTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAGATAGGGCAGCATAGAATAAAAATAGAGGAATTAAGGGAACACCTATTGAAGTG +GGGATTTTACACACCAGAC---CAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTC +CATCCTGATAGATGGACAGTACAGCCTATAAAACTGCCAGAAAAGGAAAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGGAAATTAAATTGGGCAAGCCAGATTTATGCAGGAATTAAAGTAAAACAATTATGCAA +ATGCCTTAGGGGAGCCAAAGCACTGACAGAAATAGTACCACTGACAGAAGAAGCAGAATTAGAACTGGCA +GAAAATAGGGAAATT---------TTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAGTGGTCATATCAAATTTATCAAGAACAATATAAAAA +TCTGAAAACAGGAAAATATGCAAAATTGAGGGGTACCCACACCAATGATATAAAACAATTAACAGCGGCA +GTGCAAAAAATAGCCCAAGAATGTATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAACGTGGTGGACAGAATATTGGCAGGCCACCTGGATTCCTGAATGGGAATTTGT +CAATACCCCTCCTTTAGTTAAATTATGGTACCAGTTAGAGAAGGACCCCATAGCAGGAGTAGAAACTTTC +TATGTGGATGGGGCAGCTAATAGAGAGACTAAATTAGGGAAAGCAGGATATATTACTGACAGAGGAAGAC +AGAAAGTTGTCTCTCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCCATTAATTTAGCTTTGCA +GGATTCAGGACCCGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAATAGGAAAGGACAAAGTTTACCTGT +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCCGGAATCAG +GAAGGTGCTGTTTTTAGATGGGATAGATAAAGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGGATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTTCATGTAGCTAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAAGAGACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCAGGTAT +CCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGACAAGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACACATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>ADU.CD.MAL +TTTTTTAGGGAAAATTTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTTGGGGAGGAGAT---------------AAAACCCTCTCAGAAACAGGAGCAGAA +AGACAAGGA------------ATTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTTG +TCACAGTAAGAGTAGGAGGACAGCTAAAAGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAATATTGGTAGGACCTACAC +CTGTCAACATAATTGGACGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAGCCAGGGATG---GATGGCCCAAGGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AAAGATATGGAAAAGGAAGGAAAAATTTTAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGCACTAAATGGAG +AAAATTAGTGAATTTCAGAGAGCTTAATAAAAGAACTCAAGATTTTTGGGAAGTTCAATTAGGAATACCA +CATCCTGCTGGGTTGAAAAAGAAAAAATCAGTCACAGTATTGGATGTGGGGGATGCATATTTTTCAGTCC +CTTTAGATGAAGATTTCAGGAAGTATACTGCATTCACTATACCCAGTATTAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTACCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAACCCTTTAGAACAAAAAATCCAGAAATAGTCATATACCAATACATGGATGATTTGTATG +TAGGGTCTGATTTAGAAATAGGACAACATAGAACAAAAATAGAGGAACTAAGAGAACATCTATTGAAATG +GGGATTTACCACACCAGAC---AAAAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CACCCTGACAAATGGACAGTGCAGCCTATACAACTGCCAGACAAGGAAAGCTGGACTGTCAATGATATAC +AGAAATTGGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGTAA +ACTCCTTAGGGGAGCAAAAGCACTAACAGACATAGTACCATTAACTGCAGAGGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCAGTGCATGGGGTATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATATACCAAGAGCAATATAAAAA +TCTGAAAACAGGGAAGTATGCAAGAATAAAGTCTGCCCACACTAATGATGTAAAACAATTAACAGAAGCA +GTGCAAAAGATAGCCCAAGAAAGCATAGTAATATGGGGAAAAACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAGGCATGGTGGACAGAATATTGGCAAGCCACCTGGATCCCTGAATGGGAGTTTGT +CAATACTCCTCCCCTAGTAAAACTATGGTACCAGTTAGAAACAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAAACTAAAAAGGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAGGTTGTCTCCTTAACTGAAACAACAAATCAGAAGACTGAATTACAAGCAATCCACTTAGCTTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATTATTCAAGCACAACCA +GATAAAAGTGAATCAGAGATTGTTAATCAAATAATAGAGCAATTAATACAGAAGGACAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGCAGTGGAATCAG +AAAGGTACTATTTTTAGATGGGATAGATAAGGCTCAAGAA---GAACATGAAAAATATCACAGCAATTGG +AGAGCAATGGCTAGTGACTTTAATCTACCACCTATAGTA---GCGAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAACTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAATAATCATAGTAGCAGTCCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTTAAAGCAGCCTGTTGGTGGGCAAATAT +CAAACAGGAATTTGGAATTCCCTACAACCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTGTTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATGAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAACAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTGGACAGGATGAGGAT--- +>AG.NG.G3 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGGAATTTTCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---CG +CAGAGAGCTTCGGGTTCGGAGAGGAGAT---------------AGCCCCTTCCCTGAAGCAGGAGCCGAG +GGAAAAGGA------------ATCACCTCCATTAACCTCCCTCAAATCACTCTTTGGCAACGACCCCTAG +TCACAGTAAGAATAGGGGGACAGCTAATAGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGA +ACAAATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGATTTATCAAAGTAAAA +CAGTATGATCAAATACTTATAGAAATTGAAGGGAAAAAGGCTATAGGGACAGTACTAGTAGGACCTACAC +CTATCAACATAATTGGGAGAAATATGTTGACTCAAATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAGACTGTACCAGTAAAATTAAAACCAGGAATA---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------ACAGATATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCAGAAAATCCATACAACACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAGTTGGTAGATTTCAGAGAGCTCAATAAAAGAACTCAAGACTTCTGGGAGGTCCAATTAGGCATACCT +CATCCCGCGGGGTTAAAAAAGAAAAGATCAGTAACAGTACTAGATGTGGGGGATGCATATTTTTCAATTC +CCCTAGATGAAAACTTTAGAAAGTATACAGCATTCACTATACCTAGTATAAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCGCAAGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGACA +AAAATCTTAGAACCCTTTAGAACAGAAAATCCAGAAATAGTGATCTATCAGTACATGGATGATTTATATG +TAGGATCTGACTTAGAAACAGGGCAGCATAGAGCAAAAATAGAGGAATTAAGAAATCATCTACTGAGATG +GGGATTTACCACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTTCTCTGGATGGGATATGAGCTC +CATCCTGACAAATGGACGGTACAACCTATACAGCTGCCAAACAAAGAAAGCTGGACTGTCAATGATATAC +AAAAATTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAAGCAACTATGTAA +ACTCCTTAGGGGGGCCAAAGCACTAACAGACATAGTACCACTGACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCTGTACATGGAGTCTACTATGACCCATCAAAAGAAT +TAATAGCAGAATTACAGAAACAAGGGTGCGACCAATGGACATATCAAATTTATCAAGAGCCATACAAAAA +TCTGAAAACAGGAAAGTATGCAAAAAGGGGGTCTGCCCACACTAATGATGTAAAACAGTTAACAGAAGCA +GTGCAAAAAATAGCCACAGAGAGCATAGTAATATGGGGAAAAGTT---CCTAAATTTAAACTACCTATAA +GGAAAGAAACATGGGAAGTATGGTGGACAGAATATTGGCAGGCCACCTGGATTCCTGATTGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAGTTATGGTATCGATTAGAAACAGAACCCATACCAGGAGCAGAAACTTAC +TATGTAGATGGGGCAGCTAATAAAGAAACAAAATTAGGAAAGGCAGGATATGTTACTGACAGAGGAAAAC +AAAAAATTATCACCATACAGGAAACAACAAATCAAAAAACTGAATTACACGCAATTCAGCTAGCTTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAGGAGTGAATCAGAATTAGTCAATCAAATAATAGAACAGCTAATAAAAAAGGAAAAGGTCTACTTAA +CATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAG +AAAAGTACTGTTTTTAGATGGCATAGACAAAGCTCAAGAG---GACCATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTGGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTCATTATAGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTCCTGCTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACACACAGACAATGGCAGCAATTTCACCAGTGCTGCAATGAAAGCAGCCTGTTGGTGGGCAAATAT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGGCAGGTCAGGGATCAAGCTGAACACCTCAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAGATTATA---AAAATTCAAAATTTTCGGGTCTAT +TACAGGGACAGCAGAGACCCCATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGAGATAAAGGTAGTACCAAGAAGAAAAGTAAAAATCATTAAGGATTATGG +AAAACAGATGGCAGGTGGTGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AG.SE.SE7812 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACC------GGAACCAACAGCCCC---------------------------------GCC---AG +CAGAGAGCCTTGGGATAGGGGAAGAGAT---------------AACCTCCTCTCAGAAGCAGGAACCGGG +GGACAAGGG---------ACTATATCCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAG +TCACAGTAAAAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCCATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGGAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACCGTACCAGTAAAATTAAAGCCAGGCATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGACATTTGT------ACAGAAATGGAAAAGGAAGGAAAAATTTCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAGAAAAATAGTACTAGATGGAG +AAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTCTGGGAGATCCAATTAGGAATACCT +CATCCCGCGGGATTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGGGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTTAGAAAGTATACTGCATTCACTATACCTAGTATAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGGCGAGCATGACA +AAAATCTTAGAGCCCTTTAGAACAAAAAATCCGGAGATAGTGATCTACCAATATATGGATGATTTATATG +TAGGATCAGACTTAGAGATAGGACAGCATAGAGCAAAAATAGAGGAGTTGAGAGAACATCTACTGAGATG +GGGATTTACCACACCAGAT---GAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTCCAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAAGTAAATTGGCAAGTCAGGATTTATGCAGGAATTAAAGTAAAGCAATTGTGTAA +ACTCCTCAGAGGAGCCAAAACACTAACAGATATAGTAACACTGACTGAGGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCTGTACATGGAGTATATTATGACCCAACAAAAGACT +TAGTAGCAGAAATACAGAAACAAGGGCAGGACCAATGGACATATCAAATTTACCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCAAAAAAGAGGTCTGCCCACACTAATGATGTAAAACAATTAACAGAGGTA +GTGCAAAAGGTGGCTACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAGACTACCCATAC +AAAGAGAAACATGGGAAGCATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGCTAGAAAAAGACCCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAGATAGGAAAAGCAGGGTATGTTACTGACAGAGGAAGAC +AAAAGGTTGTTTCCCTAGCTGAGACAACAAATCAAAAGACTGAATTACATGCAATTCATTTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAGCTAGTCACTCAAATAATAGAGGAGCTAATAAAAAAGGATAGAGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAGGTGGATAAATTAGTCAGTAGTGGAATCAG +GAAGGTACTATTTTTAGATGGCATAGATAAGGCCCAAGAA---GAGCATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGACTTTAATCTGCCACCTGTAGTA---GCAAAAGAAATAGTGGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGCCAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAAGAGACAGCATACTTTATATTAAAATTAGCAGGAAGATGGCCAGTGAAAG +TAATACACACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTAAAGGCAGCATGTTGGTGGGCAAATGT +CACACAAGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGGCAGGTCAGGGATCAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTGCTCA +TTCACAATTTTAAAAGAAGAGGGGGG---ATTGGGGGGTACAGTGCAGGAGAGAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAGATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCCATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAGGTAAAAATCGTTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AGHU.GA.VI354 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGGAATTTTCTCCAG +AACAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTCGGAGAGGAGAT---------------AGCCCCCTCCCCGAGGCCGGAGCCAAG +GGAAAAGGA------------GCGGTATCCTTTAACCTCCCTCAAATCACTCTTTGGCAGCGACCCCTAG +TCACAGTAAAAATAGGGGGACAACTAATAGAAGCACTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTACTAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAGTACCCATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACCC +CCATCAACATAATTGGAAGAAACATGTTGACTCAGATTGGGTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAACCAGGAATA---GATGGGCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAGATAAAAGCATTAACAGAGATTTGC------AATGAAATGGAACAAGAAGGAAAAATTTCAA +GAATAGGGCCTGAAAATCCATACAACACTCCAATATTTGCCATAAAGAAAAAGGACAGCACTAAATGGAG +AAAATTAGTAGATTTCAGAGAGCTCAATAAGAGAACTCAAGACTTTTGTGAAGTTCAGTTAGGAATACCT +CACCCAGCAGGGTTAAAAAAGAAGAAATCAGTAACAGTACTAGATGTGGGAGATGCATATTTTTCAGTCC +CTTTATATGAAGGTTTCAGAAGGTATACTGCGTTCACTATACCTAGTATAAATAATGAGACGCCAGGGAT +TAGATATCAGTATAATGTGCTCCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATTTTAGAGCCTTTCAGAAAACAAAATCCTGAAATGGTTATTTATCAATACATGGATGATTTGTATG +TAGGCTCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGCTCATTTGTTGAAGTG +GGGATTCACCACGCCAGAC---AAAAAACATCAAAAAGAACCCCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTACAGACTGTAAAATTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATTCAGGGATTAAAGTAAAACAACTATGTAA +ACTCCTTAGAGGGGCCAAAGCACTAACAGATATAGTACCACTAACTGAAGAAGCAGAATTGGAATTGGCA +GAGAACAGGGAGATT---------CTGAAAGAACCAGTACATGGAGTATATTATGATCCATCAAAAGACT +TAGTAGCAGAAGTACAAAAGCAAGGGCCAGACCAATGGACATATCAAATTTATCAAGAGCCATTCAAAAT +TCTGAAAACAGGAAAATATGCAAAAAAGAGGTCTGCCCACACGAATGATGTAAAACAATTAACAGAAGCA +GTGCAGAAAATAGCTACAGAAAGCATAGTAATATGGGGAAAAATT---CCTAAATTTAAACTACCCATAC +AGAAAGAAACATGGGAGACATGGTGGACAGATCATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCATCTAGTAAAATTATGGTATCAGTTAGAGACAGAGCCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAAACTAAACAAGGGAAAGCAGGATATGTCACTGACAGAGGAAGAC +AAAAAATTGTTTCCCTAACAGAAACAACAAATCAGAGGACTGAATTACAAGCAATTCACTTAGCTTTGCA +GGATTCAGGACCAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAATTAGTTAATCAAATAATAGAGAAATTAATCCAGAAGAACAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAAAAGGTAGATAAATTAGTTAGTGCTGGAATCAG +AAAGGTACTATTTTTAGATGGAATAGATAAGGCTCAAGAA---GACCATGAAAGATATCACAGCAATTGG +AAAGCAATGGCTAGTGATTTTAATCTGCCACCCATAGTA---GCAAAAGAAATAGTAGCTAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCAATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACACTTAGAAGGAAAAATTATTCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCATACTTTATATTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACACACAGACAATGGCACCAATTTCACCAGTGCTGCAGTAAAGGCAGCATGTTGGTGGGCAAATGT +TACACAAGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTCAGGGATCAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCATAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAGGTAGTACCAAGAAGAAAGGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AGHU.NO.NOGIL3 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------AGGAAG---GCCAGGGAACTTTCCTCAG +AGCAGACC------GGAGCCATCAGCCCC---------------------------------ACC---AG +CAGAGAGCTTTGGGATAGGGGAAGAGAT---------------AACCTCCTATCAGAAGCAGGAACAGAA +GGACAGGGA---------ACCGCCCCCTCCCTTAGTTTCCCTCAAATCACTCTTTGGCAACGACCCGTTG +TCACAGTAAAAGTAGGAGGACAGCTAAAAGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGACATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCARATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTGGCAGGACCTACAC +CTGTCAACATAATTGGACGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------TTAGAAATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAGGACAGTACTAAATGGRG +AAAMTTRGTRGATTTCAGAGAACTTAACAAAAGAACTCAAGATTTTTGGGAAGTTCAATTAGGAATACCA +CATCCTGCTGGGTTGAAGAAGAAAAAATCAGTAACAGTATTGGATGTGGGGGATGCATATTTTTCAGTCC +CCTTAGCTGAAGATTTCAGAAAGTATACTGCATTCACTATACCTAGTATTAATAATGAGACACCAGGGAT +TAGATATCAGTATAATGTGTTACCACAGGGATGGAAAGGATCACCAGCAATATTCCAGTGTAGCATGACA +AAAATCTTAGAACCCTTTAGAGCAAAAAATCCAGAAATAGTTATATACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGACAACATAGAACAAAAATAGAGGAACTAAGAGAACATCTATTGAGATG +GGGATTAACTACACCAGAC---AAAAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAGCTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACTGCCAGACAAGGAAAGCTGGACTGTCAATGATATAC +AGAAATTGGTGGGAAAACTAAACTGGGCAAGTCAGATTTATCCAGGGATAAAAGTAAGGCAATTATGTAG +ACTCCTTAGGGGAACAAAAGCACTAACAGACATAGTACCACTAACTGCAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAGATT---------CTAAAAGAACCAGTACATGGGGTATATTATGACCCATCAAAAGATT +TAATAGCAGAAATACAGAAGCAGGGGCGAGGCCAATGGACATATCAAATATATCAAGAGCCATATAAAAA +TTTGAAAACAGGGAAGTATGCAAGGATGAAGTCTGCCCACACTAATGATGTAAAACAATTAACAGAGGCA +GTGCAAAAAATATCCATAGAAAGTATAGTAATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATCCCTGAATGGGAGTTTGT +CAATACCCCTCCCCTAGTAAAACTATGGTACCAGTTAGAAACAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAAACTAAAAAGGGGAAAGCAGGATATGTTACTGATAGAGGAAGAC +AAAAAGTTGTCTTCCTAACTGAAACAACAAATCAGAAGACTGAACTACAAGCAATTCACTTAGCCTTACA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCA +GACAAAAGTGAGTCAGATTTAGTCAATCAAATAATAGAGCAATTAATACAGAAGGACAAGGTCTACCTGA +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGACAAATTAGTCAGCAGTGGAATCAG +AAAGGTGCTATTTTTAGATGGGATAGATAAGGCTCAAGAA---GCACATGAAAAATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTACCACCTATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAACTAAAAGGAGAAGCCATACATGGACAAGTAGACTGCAGTCCAGGAATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAATAATCCTAGTAGCAGTCCATGTAGCTAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACACACAGACAATGGTAGCAATTTCATCAGTGCTGCAGTCAAAGCAGCCTGTTGGTGGGCAGATAT +TAAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAACAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAAGACAGCARTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCGGGGGAAAGAATAATAGACATRAT +AGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGATAATGGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AGJ.AU.BFP90 +TTTTTTAGGGAAAATCTGGCCTTCCAACAA------------GGGGAG---GCCAGGGAATTTTCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---CA +TAGAGAGCTTCGGTTTCGGAGAGGAGAT---------------AGCCCCCTCCCCGAAACAGGAGTCGAA +GGAGAAGGA---GGAAAAGGGGCTATATCCCTTAGCCTCCCTCAAATCACTCTTTGGCAGCGACCCCTAG +TCACAGTAAGAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAA +CAGTATGATCAAATACTTATAGAGATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTATCAACATAATTGGACGAAACATGTTGACCCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAACTAAAGCCAGGGATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAGAAGATAAAAGCATTAACAGAAATTTGT------AAAGAAATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGATAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAAAGGACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCA +CATCCTGCTGGGTTGAAGAAGAAAAAATCAGTAACAGTATTGGATGTGGGGGATGCATATTTTTCAATTC +CCCTAGATGAAAAATTTAGAAAGTATACTGCATTCACTATACCTAGTACAAATAATGAAACACCAGGGAT +TAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGATA +AAAATCTTAGAGCCCTTTAGAATAAAAAACCCAGAAATAGTGATCTACCAATACATGGATGATTTATATG +TAGGGTCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGAGCATCTATTGAAATG +GGGACTTACCACACCAGAT---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACTCCCAGACAAGGAGAGCTGGACTGTCAATGATATAC +AGAAATTGGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAATTATGTAA +ACTCCTCAGGGGAGCAAAAGCACTAACAGACATAGTACCACTAACTGCAGAAGCAGAACTAGAATTGGCA +GAGAACAGGGAAATT---------TTAAAAGAACCAGTACATGGGGTATATTATGACCCATCGAAAGACT +TAATAGCAGAAGTACAGAAGCAGGGGTACGGCCAATGGACATACCAAATATATCAGGAGCCACATAAAAA +CCTGAAAACAGGGAAGTATGCAAGAATAAAGTCTGCCCACACTAATGATGTAAAACAATTAACAGAAGCA +GTGCAAAAAATAGCCCTAGAAAGCATAGTAATATGGGGTAAGACT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAGACATGGTGGACAGAATATTGGCAAGCCACCTGGATCCCTGAATGGGAATTTGT +CAATACCCCTCCCCTAGTGAAACTATGGTACCAGTTAGAAACAGAGCCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCGGCTAATAGAGAAACTAAGCAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAGGTTGTTACCCTAACTGAAACAACAAATCAGAAGACGGAATTACAAGCAATTAATCTAGCATTACA +GGATTCAGGACCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAACTAGTCAACCAAATAATAGAACAGCTAATAAAAAAGGAAAAAATCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAG +GAAGGTACTATTTTTAGATGGCATAGATAAAGCCCAGGAA---GATCATGAAAGATATCACAGCAATTGG +AGAGCCATGGCTAATGATTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTGGCCAGCTGTGACA +AATGTCAGCTAAAAGGGGAAGCCATACATGGACAAGTAGATTGTAGTCCAGGGATATGGCAATTAGATTG +CACACACCTAGAAGGAAAAATAATCCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTATATTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TGATACACACAGACAATGGTAGCAATTTCACCAGTGCTGCAGTTAAAGCAGCCTGTTGGTGGGCAAATAT +CACACAAGAATTTGGAATTCCTTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAACAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AGJ.ML.95ML8 +TTTTTTAGGGAGAATCTGGCCTTCCAGCAA------------GGGGAG---GCCGGGGAACTTTCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---CG +CAGAGAGCTTCGGGTTCGGAGAGGAGAC---------------AGCCCCCTCCCTGAAGCAGGAACCGAA +GGAAAAGGA------AAAGGAGCTATATCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTAG +TCACAGTAGGAATAGAGGGACAGCTGATAGAAGCCCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTACCAGGAAAATGGAAACCAAAGATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGA +CAGTATGATCAAATACTTATAGAGATTTGTGGAAAAAAGGCTATGGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAACATGTTGACCCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCCAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGATTAAACAATGGCCATTAACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGT------ACAGAAATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGATAGTACTAAATGGAG +AAAATTAGTAGACTTCAGAGAACTTAATAAAAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCA +CACCCTGCTGGGTTGAAGAAGAGAAAATCAGTAACAGTATTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGGCGAAAACTTTAGAAAGTATACTGCATTCACTATACCTAGTTTAAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGATA +AAAATTTTAGAGCCCTTTAGAACAAAGAATCCAGAAATAGTAATCTACCAATACATGGATGATTTATATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTAAGAGAACATCTATTGAAATG +GGGATTTACCACACCAGAT---AAAAAACATCAGAAAGAGCCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACTGCCAGACAAGGATAGCTGGACTGTCAATGATATAC +AGAAATTGGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAAGCATTTATGTAA +ACTCCTCAGGGGAGCAAAAGCACTAACAGACATAGTACCACTAACTGCAGAAGCAGAATTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAGCCAGTGCATGGGGTATATTATGACCCATTGAAAGACT +TAATAGCAGAACTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATATATCAGGAACCACATAAAAA +CCTGAAAACAGGGAAGTATGCAAGAATGAGGTCTGCCCACACTAATGATATAAAACAATTAACAGAAGCA +GTGCAAAAGATAGCCCTAGAAGCCATAGTAATATGGGGAAAGATT---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAGACATGGTGGACAGAATATTGGCAAGCCACCTGGATCCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAACTATGGTACCAGTTAGAAACAGAGCCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCGGCTAATAAAGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAAAGGAAGAC +AAAGGGTTATCTCCCTAACTGAAACAACAAATCAGAAGACGGAATTACAAGCAATTAATCTAGCCTTACA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAATTAGTCAGCCAAATAATAGAACAGCTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGGATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAAGCCCAAGAA---GATCATGAAAGATATCACAGCAATTGG +AGAGCCATGGCTAGTGATTTTAATCTGCCACCTATACTA---GCAAAAGAAATAGTAGCCTGCTGTGACA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +CACACACCTAGAGGGAAAAATAATCCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TGATACACACAGATAATGGTAGCAATTTCACCAGTACTGCAGTTAAAGCAGCCTGTTGGTGGGCAAATGT +CACACAAGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGACCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAGGCAAAGATCATTAGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>AGU.CD.Z321 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGGAG---GCCAGGGAATTTTCTTCAG +AACAGACC------AGAGCCAACAGCCCC---------------------------------ACC---CG +CAGAGAGCTTCGAGACGAAAGAGGAGAT---------------AACCTCCTCTCCGAAGCAGGAACCGAG +GGACAAGGA---------ACTATACCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAGCGACCCATAG +TCACAGTAAAAATAGGGGGACAGCCAATAGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAAATTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAATACTTATAGAAATTGGTGAAAAAAGAGCTATAGGGACAGTATTAGTAGGACCTACAC +CTATCAACATAATTGGGAGAAATATATTGACTCAGATTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGGATG---GATGGCCCAAAGGTTAAGCAATGGCCATTGACA +GAAGAGAAAATAAAAGCATTAACAGAAATTTGC------ACAGAAATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAACACTCCAATATTTGCAATAAAGAAAAAGGACAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAAGATTTCTGGGAGGTACGATTAGGAATACCA +CACCCCGCGAGGTTAAAAAAGAAAAGATCAGTAACAGTACTAGATGTGGGAGATGCATATTTTTCAGTTC +CCTTACATGAAGACTTTAGAAAGTATACCGCATTCACTATACCTAGTATAAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAACAAAAAATCCAGAAATTGTGATCTATCAATACATGGATGATTTATATG +TAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAGTTGAGAGAACATCTACTGAGATG +GGGATTTACAACACCAGAT---AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAGCTC +CATCCTGACAAGTGGACGGTACAACCTATACAGCTGCCAGATAAGGAAAGCTGGACTGTCAATGATATAC +AAAAGTTGGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGAATTAAAGTAAAGCAACTATGTAA +ACTCCTTAGGGGGACCAAGGCACTAACAGACATAGTACCACTGACTGCAGAAGCAGAATTAGAACTGGCA +GAGAACAGGGAAATT---------TTAAAAGAGCCAGTACATGGAGTATATTATGACTCATCAAAAGAAT +TAATAGCAGAAGTGCAGAAACAAGGGCTAAACCAATGGACATATCAAATATATCAAGAGCCATTTAAAAA +CCTGAAAACAGGGAAATATGCTAGAAGGAGGACTGCCCACACTAATGATGTAAGGCAATTAGCAGAAGTG +GTACAAAAAATAGCCTCAGAAAGCATAGTGATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAGACATGGTGGGCAGACTATTGGCAGGCCACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCTCTTAGTAAAATTATGGTACCAGTTAGAAAAGGAGCCCATAATAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAACAGGGAAACTAAACAGGGAAAAGCAGGGTATATTACTGACAAAGGAAGAC +AGAAAGTGATTACCCTAACTGAAACAACAAATCAGAAGACTGAACTAGAAGCAATTCATCTAGCTTTGCA +GGATTCAGGATTAGAAGTAAATATAGTAACAGATTCACAATATGCATTGGGAATTATTCAAGCACAGCCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAG +AAAGGTACTATTTTTAGATGGCATAGATAAAGCCCAAGAA---GAGCATGAAAGATATCACTGCAATTGG +AGAGCTATGGCTAGTGACTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTGGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCATACTTTATATTAAAATTAGCAGGGAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTAAAGGCCGCCTGTTGGTGGGCAAATAT +CACACAAGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGGCAGGTCAGGGATCAAGCTGAACACCTTAAAACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAATCAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>BF.BR.93BR029.4 +TTTTTTAGGGAAAATCTGGCCTTCCCACAA------------GGGAAG---GCCAGGGAATTTCCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCAGGTTTGGGGAAGAGGT------------AACAACTCCCTCTCAGAAACAGGAGCCGAT +AGACAAGGA------------GATGTATCCTTTGGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCG +TCACAGTAAAGATAGGGGGGCAACTAAAGGAAGCCCTATTAGATACCGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAAATACCCATAGAAATTTGTGGACGTAAAGCTACAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGCACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTGAAGCCAGGAATG---GATGGCCCAAGGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATATGT------ACAGAAATGGAAAAAGAAGGAAAAATTTCAA +AAATTGGGCCCGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGATAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAGTTAGGGATACCA +CATCCCGCAGGGTTAAAGAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CATTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAAACACCAGGGCT +TAGATATCAGTACAATGTGCTTCCACAGGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAAATAGGGCAGCATAGAACTAAGATAGAGGAATTGAGACAGCATTTGTTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAATTATGTAA +ACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGCAGAGGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCCTCAAAAGACT +TAATAGCAGAAATACAGAAACAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATATAAAAA +TTTGAAAACAGGAAAGTATGCAAGGATGAGGGGTGCCCACACTAATGATGTAAAACAACTAACAGAGGCA +GTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGATT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACGTGGGAAGCATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTGACTGACAGAGGAAGAC +AAAAAGTTGTCCCCCTAACGGACACAACAAATCAGAAAACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATTAGAAATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGATCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAATTAGTCAGTTCTGGAATCAG +GAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAA---GAACATGAGAAATATCACAATAATTGG +AGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTG +TACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTGCATGTAGCCGGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +CAATACACACAGACAATGGCAGCAATTTCACCAGTACTACAGTCAAGGCCGCCTGTTGGTGGGCGGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTAAGGGATCAGGCTGAACATCTTAAGACAGCAGTACAAACGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCAACAGACATACAGACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGGTGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CRF01_AE.CF.90CF40 +TTTTTTAGGGAAAATCTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTCCCTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AA +TGGAGAGCTTGGGGATGGGGGAAGAGAT---------------AACCTCCTTCCCGAAGCAGGAGCAGAA +AGACAAGAA---------ACAGCCTCCTCCTTTAGTTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAGTAGGAGGACAGCTAAAAGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAGATACTTATAGAAATTTGTGGGAAAAAGGCTATAGGTACAGTGTTAGTAGGACCTACCC +CCGTCAACATAATTGGACGGAACATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TGACACTGTACCAGTAACATTAAAGCCAGGAATG---GATGGACCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AAAGAAATGGAAGAGGAAGGAAAAATCTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAGGACAGCACTAAATGGAG +AAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAGGACTTTTGGGAAGTTCAATTAGGAATACCA +CATCCAGCAGGTCTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGAGACGCATATTTTTCAGTTC +CTTTAGATGAAGGCTTTAGAAAGTATACTGCATTCACCATACCTAGTATAAACAATGAGACACCAGGAGT +CAGATATCAGTACAATGTGCTGCCGCAGGGATGGAAAGGATCACCAGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAGCAAGAAATCCAGAAATAGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGATCTAAGAGCTCATCTATTGAGCTG +GGGATTTACTACACCAGAC---AAAAAGCATCAGAAAGAACCGCCATTCCTTTGGATGGGATATGAACTC +CATCCTGACAGATGGACAGTCCAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATGCAGGGATTAAAGTGAGGCAACTGTGTAA +ACTCCTCAGGGGAGCTAAAGCACTAACAGACATAGTAACACTGACTGAGGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAACCCCTGTGCATGGGGTATATTATGACCCATCAAAAGACT +TAGTAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCAAGAAAAAGGTCTGCTCACACTAATGATGTAAGACAATTAGCAGAAGTG +GTGCAAAAAGTGGCCACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAGACTACCCATAC +AAAGAGAAACATGGGAGACATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAATTAGAAAAAGACCCCATAATGGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAGTAGGGAGACTAAGCAAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAAGTAGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCGATCCATTTAGCCTTGCA +GGATTCAGGACCAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATTATTCAGGCACAACCA +GACAGGAGTGAGTCAGATATAGTCAATCAAATAATAGAGAAACTAATAGAAAAGGAAAAAGTCTACCTGT +CATGGGTCCCAGCACATAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAG +GAAGGTGCTATTTTTAGATGGGATAGATAAGGCTCAAGAA---GACCATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGACTTTAATTTGCCACCTATAGTA---GCAAAGGAAATAGTAGCCAGCTGTGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAACTAGATTG +CACGCATCTAGAAGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCCAGTAAGGG +TAATACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCCAATGT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGAGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGCATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGAATGAGGAT--- +>CRF01_AE.TH.93TH25 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGAAG---GCCAGGGAATTTTCCTCAG +AGCAAACC------GGAGCCAACAGCTCC---------------------------------GCC---AG +CAGAAAACTGGGGGATGGGGGAGGAGCA---------------------------------------GAA +AGACAAGGA---------ACATCCTCCTCCTTCAGTTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAATAAAAATAGGAGGACAACTGAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGATATAAATTTGCCAGGGAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAGGTAAGG +CAATATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAATATGTTGACTCAGATTGGTTGTACTCTAAATTTCCCAATTAGTCCTAT +TGACACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGACCAAAGGTTAAACAGTGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AAAGAGATGGAAGAGGAAGGAAAAATCTCAA +AAATTGGACCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAGGACAGCACCAAATGGAG +GAAATTAGTAGATTTCAGAGAACTCAATAAAAGAACTCAGGACTTTTGGGAAGTTCAATTAGGAATACCG +CATCCAGCAGGTTTAAAAAAGAAAAAATCAGTAACAGTGCTAGATGTGGGAGATGCATATTTTTCAGTTC +CTTTAGATGAAAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTATAAACAATGAGACACCAGGAAT +CAGATATCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAATAAAAAATCCAGAAATGGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCACAGAACAAAAATAGAGGAGCTAAGAGCTCATCTATTGAGCTG +GGGATTTACTACACCAGAC---AAAAAGCATCAGAAGGAACCTCCATTCCTTTGGATGGGATATGAACTC +CATCCTGACAGATGGACAGTCCAGCCTATAGAACTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTGAATTGGGCAAGTCAAATTTATGCAGGGATTAAGGTAAAGCAACTGTGTAA +ACTCCTCAGGGGAACTAAAGCACTAACAGATATAGTACCACTGACTGAAGAAGCAGAATTAGAGTTGGAA +GAGAACAGGGAGATT---------CTAAGAATCCCTGTGCATGGAGTATATTATGACCCATCAAAAGACT +TAGTAGCAGAAGTACAGAAACAAGGGCAGGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATTCCAGAAAAAGGTCTGCTCACACTAATGATGTAAGACAATTAACAGAAGTG +GTACAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAGACTACCCATAC +AAAGAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +TAATACCCCTCCTCTAGTAAAATTATGGTACCAATTAGAAAAAGACCCCATAGTGGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAGTAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGGGGAAGAC +AAAAGGTAATTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCGATCCATTTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAAGTAGTCAGCCAAATAATAGAGGAGCTAATAAAAAAGGAAAAAGTCTACCTGT +CATGGGTACCAGCACACAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCATTTCAGGAATCAG +GAAGGTACTATTTTTAGATGGGATAAATAAGGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAACAATGGCTAGTGACTTTAATTTGCCACCTATAGTA---GCAAAGGAAATAGTAGCCAACTGTGATA +AATGTCAACTAAAAGGGGAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGGTGGCCAGTAAAAG +TAATACACACAGACAACGGTAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCCAATGT +CCGACAGGAATTTGGGATCCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCAATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGGGAGCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGATATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CRF01_AE.TH.CM240 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGAAG---GCCGGGGAATTTTCCTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAAAACTGGGGGATGGGGGAAGAGATAACGGGGGAAGAGATAACCTCCTTACCGAAGCAGGAGCAGAA +AGACAAGGA---------ACATCCTCCTCCTTTAGTTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAATAGGAGGACAGCTGAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAGGTAAAG +CAATATGATCAGATACTTATAGAAATCTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTAT +TGACACTGTACCAGTAACATTAAAGCCAGGAATG---GATGGACCAAAGGTTAAACAGTGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AAAGAGATGGAAGAGGAAGGAAAAATCTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAGGACAGCACCAAATGGAG +GAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAGGACTTTTGGGAAGTTCAATTAGGAATACCG +CATCCAGCAGGTTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGAGATGCATATTTTTCAGTTC +CTTTAGATGAAAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTATAAACAATGAGACACCAGGAAT +CAGATATCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAATAAAAAATCCAGAAATGGTTATCTATCAATACAAGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCACAGAACAAAAATAGAGGAGCTAAGAGCTCATCTATTGAGCTG +GGGATTTACTACACCAGAC---AAAAAGCATCAGAAGGAACCTCCATTCCTTTGGATGGGATATGAACTC +CATCCTGACAGATGGACAGTCCAGCCTATAGAACTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATGCAGGGATTAAGGTAAAGCAACTGTGTAA +ACTCCTCAGGGGAGCTAAAGCACTAACAGACATAGTACCACTGACTGAAGAAGCAGAATTAGAGTTGGCA +GAGAACAGGGAGATT---------CTAAAAACCCCTGTGCATGGAGTATATTATGACCCATCAAAAGACT +TAGTAGCAGAAGTACAGAAACAAGGGCAGGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCCAGAAGAGGGTCTGCTCACACTAATGATGTAAGACAATTAACAGAAGTG +GTGCAAAAAGTAGCCACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAGACTACCCATAC +AAAGAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +TAATACCCCTCCTCTAGTAAAATTATGGTACCAATTAGAAAAAGACCCCATAGTAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAGTAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTAGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCGATCCATTTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAAAAGGAGAAAGTCTACCTGT +CATGGGTACCAGCACACAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCAGGAATCAG +GAAGGTGCTATTTTTAGATGGGATAGATAAGGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAACAATGGCTAGTGATTTTAATTTGCCACCTATAGTA---GCAAAGGAAATAGTAACCAACTGTGATA +AATGTCAACTAAAAGGGGAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAACTAGCAGGAAGATGGCCAGTAAAAG +TAATACACACAGACAACGGTAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCCAATGT +CCAACAGGAATTTGGGATCCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGATATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CRF01_AE.TH.TH022 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGAAG---GCCGGGAAATTTTCCTCAG +AGCAGACC------AGAACCAACAGCCCC---------------------------------ACC---AG +CAGAAAACTGGGGGATGGGGGAAGAGAT---------------AACCTCCTTCCTGAAGCAGGAGCAGAA +AGACAAGGA---------ACACCCTCCTCCTTCAGTTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAATAGGAGGACAGCTGAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGATATAAATTTGCCAGGAAAATGGAAACCAAGAATGATAGGGGGAATTGGAGGTTTTATCAAGGTAAGG +CAATATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAATATGTTGACTCAGATTGGTTGTACTCTAAATTTCCCAATTAGTCCTAT +TGACACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGACCAAAAGTTAAACAGTGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AAGAAGATGGAAGAGGAAGGAAAAATCTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAGGACAGCACCAGATGGAG +GAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAGGACTTTTGGGAAGTTCAATTAGGAATACCG +CGTCCAGCAGGTTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGAGATGCATATTTTTCAGTTC +CTTTAGATGAGAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTATAAACAATGAGACACCAGGAAT +CAGATATCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGTGTAGCACGACA +AAAATCTTAGAGCCCTTTAGAACAAAAAATCCAGAAATAGTTATCTATCAATACATGGACGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCACAGAACAAAAATAGAGGAGCTAAGAGCTCATCTATTGAGCTG +GGGATTTACTACACCAGGC---AAAAAGCATCAGAAGGAACCTCCATTCCTTTGGATGGGATATGAACTC +CATCCTGACAGATGGACAGTCCAGCCTATAGAACTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATCCAGGGATTAAGGTAAAGCAACTGTGTAA +ACTCCTCAGGGGAGCTAAAGCACTAACAGACACAGTACCACTGACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAGATT---------CTAAAAACCCCTGTGCATGGAGTTTATTATGACCCATCAAAAGACT +TAGTAGCAGAAGTACAGAAACAAGGGCAGGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCCAGAAAAAGGTCCGCTCACACCAATGATGTAAGACAGTTAACAGAAGTG +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAATTAGAAAAAGACCCCATAGTAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAGTAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTAACAGAGGAAGAC +AAAAGGTAGTTTCCCTAACTGAGACAACAAATCAAAAGAGTGAATTACATGCGATCCATTTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACACAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCAGGAATCAG +GAAGGTGCTATTTTTAGATGGGATAGATAAAGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAACAATGGCTAGTGATTTTAACTTGCCACCTATAGTA---GCAAAGGAAATAGTAGCCAACTGTGATA +AATGTCAGCTAAAAGGGGAAGCTATGCATGGACAAGTAGACTGCAGTCCAGGGATATGGCAATTAGATTG +TACACATCTAGAAGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAGTACACACAGACAATGGTAGTAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCCAATGT +CCGGCAGGAATTTGGAATCCCCTACAATCCCCAAAGCCAAGGAGTAGTAGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGATATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCGGGTAGACAGGATGAGGAT--- +>CRF01_AE.TH.TH047 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------GGGAAG---GCCGGGAAATTTTCCTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAACTGGGGGATGGGGGAAGAGAC---------------AACCTCCTCGCTGAAACAGGAGCAGAA +AGACAAGGA---------ACCCCCTCCTCCTTTAATTTCCCTCAAATCACTCTTTGGCAACGACCCCTTG +TCACAGTAAAAATAGGAGGAGAGCTGAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGATATAAATTTGCCAGGAAAATGGAAACCGAAAATGATAGGGGGAATTGGAGGTTTTATCAAGGTAAGG +CAATATGACCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAACATGTTGACTCAGATTGGTTGTACTCTAAATTTCCCAATTAGCCCTAT +TGACACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGACCAAAGGTTAAACAGTGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------AAAGAGATGGAAGAGGAAGGAAAAATCTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAGAACAGCACCAGATGGAG +GAAGTTAGTAGACTTCAGAGAGCTCAATAAAAGAACTCAGGACTTTTGGGAAGTTCAATTAGGAATACCG +CATCCAGCAGGTTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGAGATGCATATTTTTCAGTTC +CTTTAGATGAAAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTATAAACAATGAGACACCAGGAAT +CAGATATCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCGGCAATATTCCAGAGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAATAAAAAATCCAGAAATGGTTATCTATCAATACATGGATGACTTGTATG +TAGGATCTGATTTAGAAATAGGGCAGCACAGAACAAAAATAGAGGAGCTAAGAGCTCATCTGTTGAGCTG +GGGATTTACTACACCAGAC---CAAAAGCATCAGAAGGAACCTCCATTCCTTTGGATGGGATATGAACTC +CATCCTGACAGATGGACAGTCCAGCCTATAGAACTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAAATTTATGCAGGGATTAAGGTAAAGCAACTGTGTAA +ACTCCTCAGGGGAGCTAAAGCACTAACGGACATAGTACCACTGACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAACCCCTGTGCATGGAGTATATTATGACCCATCAAAAGACT +TAGCAGCAGAAGTACAGAAACAAGGGCAGGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCCAGAAACAGGTCTGCTCACACTAATGATGTAAGACAATTAACAGAAGTG +GTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAGACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +TAATACCCCTCCTCTAGTAAAATTATGGTACCAATTAGAAAAAGAACCCATAATAGGAGCAGAGACTTTC +TATGTCGATGGGGCAGCTAGTAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTAGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCGATCCATTTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAAGTAGTCAACCAATTAATAGAGGAGCTAATAAAAAAGGAAAAGGTCTACCTGT +CATGGGTACCAGCACATAAGGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCAGGAATCAG +GAAGGTACTATTTTTAGATGGGATAGATAAGGCTCAAGAA---GAACATGAAAGATATCACAGCAATTGG +AGAACAATGGCTAGTGATTTTAATTTGCCACCTGTAGTA---GCAAAGGAAATAGTAGCCAACTGTGACA +AATGTCAACTAAAAGGGGAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAATTAGATTG +CACACATCTAGAAGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAAGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCCAGTAAAAG +TAATACACACAGACAACGGTAGCAATTTCACCAGCGCTGCAGTTAAAGCAGCCTGTTGGTGGGCCAATGT +CCGACAGGAATTTGGGATCCCCTATAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTAAGAGATCAAGCTGAGCACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGATATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CRF02_AG.FR.DJ263 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTTCCTCAA +AGCAGACC------GGAACCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTGGGGATGGGGGAAGAGAT---------------AACCTCCCCTCCGAAGCAGGAAGCGAG +GGACCAGGG---------ACTATATCCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAG +TTACAGTAAAAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGA +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCCATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGGCGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACCGTGCCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAGCAATGGCCATTGACA +GAGGAAAAAATAAAAGCATTAACAGAAATCTGT------ACAGAGATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGGCATAAAGAAGAGAGATAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTCCTGGGAGGTCCAATTAGGAATACCT +CATCCCGCGGGATTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTCC +CCTTAGATAAAGACTTTAGAAAGTATACTGCATTCACTATACCTAGTACAAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGGCAAGCATGACA +AACATCTTAGAGCACTATAGAATAAAAAATCCAGAGATAATGATCTACCAATATATGGATGATTTATATG +TAGGATCTGACTTAGAGATAGAGCAGCATAGAGCAAAAATAGAGGAGTTGAGAGAACATCTACTAAAATG +GGGATTTACCACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTCCAGCCTATACAGCTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAAGCAACTGTGTAA +ACTCCTCAGGGGAGCCAAAGCATTAACAGATATAGTACCACTGACTGAGGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCTGTACATGGAGTATATTATGACCCAGCAAAAGACC +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCAAAAAGGAGGTCTGCCCACACTAATGATGTAAAACAATTAGCAGAGGTA +GTGCAAAAAGTGGTTACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAGACTACCCATAC +AAAGAGAAACATGGGAAGCATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGACTGGGAGTTCGT +CAATACCCCTCCTCTAGTAAAATTGTGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACCTTC +TATGTAGATGGGGCAGCTAATAGGGAGACTAAGCTAGGAAAAGCGGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAGTTACATGCAATTTATCTAGCCTTGCA +GGACTCAGGATCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAATAGAAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAG +GAAGGTACTATTTTTAGATGGCATAGATAAAGCCCAAGAA---GAGCATGGAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTATAATA---GCAAAAGAAATAGTGGCCTGCTGTGATC +AATGTCAGCTGAAAGGGGAAGCCATGCATGGACAAGTAGACTGTGGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGTTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTGAAAG +TAATACACACAGACAATGGCAGCAATTTTACCAGTGCTGCAGTAAAGGCAGCATGTTGGTGGGCAAATGT +CACACAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAAGCTATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTCAGGGATCAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAGATTACA---AAAATTCAAAATTTTCGGGTCTAT +TACAGGGACAGCAGAGACCCCATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAAGAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAAATCATTAAAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CRF02_AG.FR.DJ264 +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTTCCTCAA +AGCAGACC------GGAACCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTTGGGATGGGGGAAGAGAT---------------AACCTCCCCTCCGAAGCAGGAACCGAG +GGACCAGGG---------ACTATATCCCCCTCTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAG +TCACAGTAAGAATAGGGGGACAGCTAATAGAAGCCCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAGAAGGCCATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAATATAATTGGACGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACCGTGCCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGACATCTGT------GCAGAGATGGAAAAGGAGGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGATAGTACTAAATGGAG +AAAACTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGGCTTCTGGGAAGTCCAATTACGAATACCT +CATCCCGCGGGATTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTCC +CCTTAGATAAAGACTTTAGAAAGTATACTGCATTCACTATACCTAGTGTAAATAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAGGCAAGCATGACA +AAAATCTTAGAGCCTTTTAGAATAAAAAATCCAGAGATAGTGATCTACCAATATATGGATGATTTATATG +TAGGATCTGACTTAGAGATAGGGCAGCATAGAGCAAAAATAGAGGAGTTGAGAGAACATCTACTGAAATG +GGGATTTACCACACCAGAT---AAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTC +CATCCTGACAAATGGACAGTCCAGCCTATACAGCTGCCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTTTGCAGGAATTAAAGTAAAGCAACTGTGTAG +ACTCCTCAGGGGAGCCAAAGCATTAACAGATATAGTACCACTGACTGAAGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCTGTACATGGAGTATATTATGACCCAGCAAAAGACC +TAATAGCAGAAATACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTAAAAACAGGAAAATATGCAAAAAGGAGGTCTGCCCACACTAATGATGTAAAACAATTAACAGAGGTA +GTGCAAAAAGTGGCTACAGAAAGCATAGTAATATGGGGAAAGACC---CCTAAGTTTAGCCTACCCATAC +AAAGAGAAACATGGGAAGCATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGACTGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAATTAGAGAAAGACCCCATAGTAGGAGCAGAAACCTTC +TATGTAGATGGGGCAGCTAATAAGGAGACTAAGCTAGGAAAAGCGGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCAATTTATCTAGCCTTGCA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAATAGAAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAG +GAAGGTACTATTTTTAGATGGCATAGATAAAGCTCAAGAA---GAGCATGAAAGATATCACAGCAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTGGCCTGCTGCGATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGGCAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTGAAAG +TAATACACACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTAAAGGCAGCATGTTGGTGGGCAAATGT +CACACAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTAGAATCTATGAATAAGGAATTA +AAGAAAATCATAGGGCAGGTCAGGGATCAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAGGAACTACAAAAACAGATTATA---AAAATTCAAAATTTTCGGGTCTAT +TACAGGGACAGCAGAGACCCCATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAAGAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAAATCATTAAAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CRF02_AG.NG.IBNG +TTTTTTAGGGAAAATTTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACC------GGAACCAACAGCTCC---------------------------------ACC---AG +CAGAGAGCTTTGGGATGGGGGAAGAGAT---------------ACCTCCCTCTCCACAGCAGGAACCGAG +GGACAAGGG---------GCTATATCCTCCTTTAACTTCCCTCAAATCACTCTTTGGCAACGACCCTTAG +TTACAGTAAGAATAGAGGGACAGCTAATAGAAGCCCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTACCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAAATACTTATAGAAATTTGTGGAAAAAAGGCCATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTGCCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGACATTTGT------ACAGAGATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGACCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGATAGTACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGATTTCTGGGAGGTCCAGTTAGGAATACCT +CATCCCGCGGGATTAAAAAAGAAAAAATCAGTAACAGTGCTAGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATAAAGACTTTAGAAAGTATACTGCATTTACTATACCTAGTGTAAATAATGAGACACCAGGGAT +TAGATACCAGTACAATGTGCTTCCACAGGGATGGAAAGGGTCACCGGCAATATTTCAGGCAAGCATGACA +AAAATCTTAGAGCCCTTTAGAACAAAAAATCCAGAGATAGTGATCTACCAATACATGGATGATTTATATG +TAGGATCTGATTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAGTTGAGAGGACATCTACTGAAATG +GGGATTTACCACACCAGAC---AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACAGTCCAGCCTGTAGAACTACCAGAAAAAGACAGCTGGACTGTCAATGATATAC +AGAAATTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATGCAGGAATTAAAATAAAGCAACTGTGTAG +ACTCCTCAGGGGAGCCAAAGCACTAACAGATATAGTAGCACTGACTGAGGAAGCAGAATTAGAATTGGCA +GAGAACAGGGAAATT---------CTAAAAGAACCTGTACATGGGGTATATTATGACCCAACAAAAGACT +TAGTAGCAGAATTACAGAAACAAGGGCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TTTAAAAACAGGAAAATATGCAAAAAAGAGGTCTGCCCACACTAATGATGTAAAGCAATTAACAGAGGTA +GTGCAAAAAGTAGCTATGGAAAGCATAGTAATATGGGGAAAGACC---CCTAAATTTAGACTACCCATAC +AAAGAGAGACATGGGAAACATGGTGGATGGAGTATTGGCAGGCTACCTGGATTCCTGAATGGGAGTTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAAAAAGACCCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGAGAGACTAAGATAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGAC +AAAAGGTTGTTTCCCTAACTGAGACAACAAATCAAAAGACTGAATTACATGCAATTCATTTAGCCTTACA +GGATTCAGGATCAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAGGCACAACCA +GACAGGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAATAAAAAAGGACAAAGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAG +GAAGGTACTATTTTTAGATGGCATAGATAAAGCCCAAGAA---GAGCATGAAAGATATCACAGCAATTGG +AAGGCAATGGCTAGTGATTTTAATCTGCCACCTATAGTA---GCAAAAGAAATAGTGGCCAGCTGTGATA +AATGTCAGATGAAAGGGGAAGCCATGCATGGACAAGTAGACTGTGGTCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGGAAAATTATCTTAGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTATATTAAAATTAGCAGGAAGATGGCCAGTGAAAG +TAATACACACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTAAAGGCGGCATGTTGGTGGGCAAATGT +CACACAAGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATTATAGGACAGGTCAGAGATCAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGATATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAGATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCCATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAATAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CRF03_AB.RU.KAL153 +TTTTTTAGGGAGAATTTGGCCTTCCAGCAA------------AGGGAG---GCCAGGAAATTTTCCTCAG +AGCAGACC------AGAGCCATCAGCCCC---------------------------------ACC---AG +CAGAAAACTTTGGGATGGGGGAAGAGAT---------------AACCCCCTCCCTGAAACAGGAACAGAA +GGACAGGGA---------ACAGCATCCTCCTTCAATTTCCCTCAAATCACTCTTTGGCGACGACCCCTTG +TCACAGTAAGAATAGGAGGACAGCTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGA +AGACATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGGATTGGAGGTTTTATCAAGGTAAGA +CAGTATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACGGTATTAGTAGGACCTACCC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGCTTGGTTGTACTTTAAATTTTCCAATAAGTCCTAT +TGAAACTGTACCAGTAACATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTAACA +GAAGAGAAAATAAAAGCATTAACAGACATTTGT------AAGGAGATGGAAAAGGAAGGAAAAATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAG +AAAATTAGTAGGTTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCA +CACCCTGCAGGGTTAAAAAAGAAAAAATCTGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTC +CCTTAGATCAAGACTTCAGAAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGAT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATTTTCCAAAGTAGCATGACA +AAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGAGATAGTTATCTATCAATACATGGATGATTTGTATG +TAGGATCTGACTTAGAGATAGGGCAGCATAGAACAGAAATAGAGGAACTGAGAGAACATCTGCTGAGGTG +GGGATTTACCACACCAGAC---AAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTC +CATCCTGATAAATGGACTGTACAGCCTATAGTGTTGCCAGAAAAAGACAGCTGGACTGTCAATGACATAC +AGAAGCTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAATTATGTAA +ACTCCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGCAGAAGCAGAGCTAGAACTGGCA +GAAAACAGGGAGATT---------CTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACT +TAGTAGCAGAAATACAGAAGCAGGGACAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGACTGAGGGGTGCCCACACTAATGACGTAAAACAGTTAACAGAGGCA +GTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACT---CCTAAATTTAAACTACCCATAC +AAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTC +TATGTAGATGGAGCAGCTAATAGGGAGACTAAATCAGGAAAAGCAGGATATGTTACTGACAGAGGAAGAC +AAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCA +GGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTGATAAAAAAGGAAAAGGTCTACCTGG +CATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTTGATAAATTAGTCAGTGCTGGAATCAG +GGAAGTACTATTTTTAGATGGAATAGATAAGGCACAAGAA---GAACATGAGAAATATCACGGTAATTGG +AGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTA---GCAAAAGAAATAGTAGCCAGCTGTGATA +AATGTCAATTAAAAGGAGAAGCCATGCACGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTT +ATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTGTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAA +TAATACATACAGACAATGGCAGCAATTTCACCAGTACTGCGGTTAAGGCTGCCTGTTGGTGGGCAGGGAT +CAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAACAATTA +AAGCAAACTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGAGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TGGTAATACAGGACAATAACGATATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT--- +>CRF04_CPX.CY.94CY0 +TTTTTTAGGGAGAATGTGGCCTTCCAGCAA------------AGGGAG---GCCAGGAAATTTTCTTCAG +AACAGGCC------AGAGCCAACAGCCCC---------------------------------GCC---CG +CGGAATGCTTAGAGAGGAAAGAGGAGAC---------------AACCTCCTCTCTGAAGCAGGAACCGAG +GGACAAGGA------------ACTATATCCTTTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCACAATAAAACTAGGGGGACAGATAAGGGAGGCTCTTTTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATTTGCCAGGAAAATGGAAGCCAAAAATGATAGGGGGAATCGGAGGTTTTATCAAAGTAAGA +CAATATGATCAGATACCTATAGAAATTTGTGGAAAAAAGGCCATAGGCACAGTGTTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAACATGTTGACTCAGCTTGGTTGTACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCCTTAACAGAGATATGT------ACAGACATGGAAAAGGAAGGCAAGATTTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAG +AAAATTAGTAGATTTCAGAGAACTCAATAAAAGAACTCAGGACTTCTGGGAAGTTCAGTTAGGAATACCG +CACCCAGCAGGGTTAAAGAAGAAAAAATCAGTAACAGTATTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATCCAGAGTTCAGGAAGTACACTGCATTCACCATACCTAGTACCAACAATGAGACACCAGGAAT +TAGATATCAGTACAATGTGCTTCCACAGGGCTGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCCTTTAGATTCAAAAACCCAGAAATAGTCATATACCAATATATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAAGAGCTAAGAGAGCATCTATTGAGATG +GGGATTCACCACACCAGAC---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACCGGCAGAAAAGGATAGCTGGACTGTCAACGATATCC +AGAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAAGCAATTATGTAA +ACTTCTTAGGGGAGCTAAAGCCCTAACAGACATAGTACCACTAACTACAGAGGCAGAGTTAGAATTAGCA +GAGAACAGGGAGATT---------CTAAAAGAACCAGTACATGGGGCATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAAGGGCAAGGTCAATGGACATATCAAATATATCAAGAGCCACATAAAAA +TCTGAAAACAGGGAAGTATGCAAGAACCAGATCTGCCCACACTAATGATGTTAGACAATTAACAGAAGCA +GTGCAAAAGATAGCCATGGAATGCATAGTAATATGGGGAAAGACT---CCTAAGTTTAGATTACCCATAC +AAAAGGAAACATGGGACACATGGTGGACAGAATATTGGCAGGCCACCTGGATCCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAAACAGACCCCATAGCAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAAACAAAACAGGGAAAAGCAGGATATGTTACTGATAGAGGCAGAC +AAAAAGTTGTCTCCCTATCTGAAACAACAAATCAGAAGACTGAATTACAAGCAATTTACTTAGCTTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCAATAGGAATCATTCAAGCACAACCA +GATAGAAGTGAATCAGATTTAGTTAATCAAATAATAGAGCAGTTAATACGGAAGGACAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGCAATGGAATCAG +AAAGGTGCTATTTTTAGATGGAATAGATAAGGCTCAAGAA---GAACATGAGAAATATCACAATAACTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCATCAGTGGTA---GCAAAAGAGATAGTAGCTAGCTGTAATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTGGACTGTAGTCCAGGGATATGGCAGTTAGATTG +TACACATTTAGAAGGTAAAGTTATCATGGTAGCAGTTCATGTGGCTAGTGGATACATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCCTACTTCATACTAAAATTAGCAGGAAGATGGCCAGTGAAAA +TGATACATGCAGACAACGGCCCCAATTTCACCAGTGCTGCGGTTAAGGCAGCCTGTTGGTGGGCAGATAT +CAACCAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATCATAGGGCAGGTCAGGGATCAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGAACCAATTTGGAAGGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAACAGTGATATCAAAGTAGTACCAAGAAGAAAAGCAAAGATTATTAGGGACTATGG +CAAACAGATGGCAGGTAATGATTGTGTGGCAGGTAGACAGGATGAAGAT--- +>CRF04_CPX.GR.97PVC +TTTTTTAGGGAGAATGTGGCCTTCCAGCAA------------AGGAAG---GCCGGGGAATTTTCTTCAG +AGCAGGCC------AGAGCCAACAGCCCC---------------------------------ACC---CG +CAGAGAGCTTAGAGATGAAAGAGGAGAC---------------AACCTCCTCTCCGAAGCAGGAACCGAG +GGACAAGGA------------ACTATATCCTTTAACTTCCCTCAAATCACTCTTTGGCAACCACCCCTTG +TCACAATAAAAATAGGGGGACAAATAAGGGAGGCTCTTTTAGATACAGGAGCAGATGATACAGTATTAGA +AGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAGGTAAAA +CAATATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCCATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGACGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAGGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCTTTAACAGAAATATGT------ACAGAAATGGAAAAGGAAGGAAAGATTTCAA +AAATTGGGCCTGAAAATCCATATAATACTCCAATATTTGCTATAAAGAAAAAGAACAGCACTAGATGGAG +AAAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATACCG +CATCCAGCAGGATTAAAAAAGAAAAAATCAGTAACAGTATTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATCCAGCGTTCAGGAAGTACACTGCATTCACCATACCTAGTACCAACAATGAGACACCAGGAGT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATATAGTATGACA +AAAATCTTAGAGCCCTTTAGAACCAGAAACCCAGAAATAGTCATATACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGGCAACATAGAACAAAAATAGAAGAACTAAGAGAACATCTTTTGAGATG +GGGCTTTTACACACCAGAC---AAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACTGCCAGAAAAGGATAGCTGGACTGTCAATGATATCC +AGAAGTTAGTGGGAAAGCTAAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAAGCAATTATGTAA +ACTTCTTAGGGGAACTAAAGCCCTAACAGACATAGTACCACTAACTACAGAAGCAGAGCTAGAATTAGCA +GAGAACAGGGAGATT---------CTAAAAGAACCAGTACATGGGGCATACTATGACCCATCAAAAGACT +TAATAGCAAAAATACAGAAGCAAGGGCAAGGCCAATGGACATATCAAATATATCAAGAGCCATATAAAAA +TCTGAAAACAGGGAAGTATGCAAAAACCAGATCTGCCCACACTAATGATGTAAGACAATTAACAGAAGTA +GTACAAAAGATAGCCATGGAATGCATAGTAATATGGGGAAAGACT---CCTAAGTTTAGATTACCCATAC +AAAAGGAAACATGGGACACTTGGTGGACAGAATATTGGCAGGCCACCTGGATCCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTATAGTACCAGTTAGAACCAGACCCCATAGCAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAGTAGAGAAACAAGGCGGGGAAAAGCAGGATATGTTACTGATAGAGGAAGAC +AAAAGGTTGTCTCCCTATCTGAAACAACCAATCAGAGGACTGAATTACAAGCAATTTACTTAGCTTTGAA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCAATAGGAATCATTCAAGCACAACCA +GATAGAAGTGAATCAGATTTAGTTAATCAAATAATAGAGCTGTTAATACAGAAGGACAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGCAATGGAATCAG +AAAGGTGCTATTTTTAGATGGGATAGATAAGGCTCAAGAA---GACCATGAGAAATATCACAGTAACTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCAGTAGTA---GCAAAAGAGATAGTAGCTAGCTGTAATA +AATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGWGGACTGTAGTCCAGGGATATGGCAATTAGACTG +TACACATTTAGAAGGTAAAATTATCCTGGTACCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCCTACTTCATACTAAAATTAGCAGGAAGATGGCCAGTAAAAA +TAATACATACAGACAATGGCCCCAATTTCACCAGTGCTGCGGTTAAGGCAGCCTGTTGGTGGGCAGATGT +CCAACAGGAATTTGGAATTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAAGAATTA +AAGAAAATCATAAAGCAGGTCAGGGATCAAGCTGAACACCTTAAGACAGCAGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAGGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAACAGTGATATAAAGGTAGTACCAAGAAGAAAAGCAAAGATTATCAGGGACTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGAT--------- +>CRF04_CPX.GR.97PVM +TTCTTTAGGGAGAATGTGGCCTTCCAGCAA------------GGGGAG---GCCAGGAAATTTTCTTCAG +AGCAGGAC------AGAGCCAACAGCCCC---------------------------------GCC---CG +CAGAGAGCTTCGAGATGAAAGAGGAGAC---------------AACCTCCTCTCCGAAGCAGGAACAGAG +GGACAAGGA------------ACTATATCCCATAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTTG +TCACGATAAAAATAGGGGGACAGCTAAGAGAGGCTCTTTTAGATACAGGAGCAGATGATACAGTATTAGA +AGAAATAAATCTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAATATGATCAGATAACTATAGAAATTTGTGGAAAAAAGGCTACAGGTACAGTATTAGTGGGACCCACAC +CTGCCAACATAATTGGACGAAATATGTTGACTCAGCTTGGTTGTACTTTAAATTTTCCGATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAGTGGCCTTTGACA +GAAGAAAAAATAAAGGCCTTAAGAGAAATATGT------ACAGAAATGGAACAGGAAGGAAAGATCTCAA +AGGTTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAGAACAGCAATAGATGGAG +AAAATTAGTAGATTTCAGAGAACTCAATAAAAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATACCG +CATCCAGCAGGATTAAAAAAGAAAAAATCAGTAACAGTATTGGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATCCAGAGTTCAGGAAGTACACTGCATTTACCATACCTAGTATCAACAATGAGACACCAGGAAT +TAGATATCAGTACAATGTGCTTCCGCAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAACCAAAAACCCAGAGATGGTCATATACCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGGCAACATAAAGCAAAAATAGAAGAACTAAGAGAACATCTATTGAGGTG +GGGATTCTTTACACCAGAC---CAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGACAAATGGACAGTGCAGCCTATACAACTGGTAGAAAAGGAGAGCTGGACTGTCAATGATATCC +AGAAGTTAGTGGGAAAACTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAA +ACTTCTTAGGGGAGCTAAAGCCCTAACAGACATAGTGCCACTAACTACAGAAGCAGAGCTAGAATTAGCA +GAGAACAGGGAGATT---------CTAAAAGAACCAGTACATGGGGCATATTATGACCCATCAAAAGACT +TAATAGCAGAAATACAGAAGCAAGGGCTAGGCCAATGRACATATCAAATATATCAAGAGCCATATAAAAA +TCTGAAAACAGGGAAGTATGCAAAAACCAGGTCTGCCCACACTAATGATGTAAGACAATTAACAGAAGCA +GTACAAAAGATAGCCATGGAATGCATAGTAATATGGGGAAAGACT---CCTAAGTTTAGATTACCCATAC +AAAAGGAAACATGGGACACATGGTGGATGGAATATTGGCAGGCCACCTGGATCCCTGAATGGGAATTTGT +CAATACCCCTCCTCTAGTAAAATTATGGTACCAGTTAGAAACAGAACCCATAGCAGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAGTAGAGAAACAAACCAGGGAAAAGCAGGATATGTTACTGATAGAGGAAGAC +AAAAAGTTATCACCCTACCTGAAACAACAAATCAGAAGACTGAATTACAAGCAATTTACTTAGCTTTGCA +GGATTCAGGATCAGAAGTAAACATAGTAACAGACTCACAGTATGCAATGGGAATCATTCAAGCAAAACCA +GATAAAAGTGAATCAGATTTAGTTAATCAAATAATAGAGCAATTAATACAGAAGGACAAGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGCAATGGAATCAG +AAAGGTGCTATTTTTAGATGGAATAGATAAGGCTCAAGAA---GAACATGAGAAATATCACAATAATTGG +AAAGCAATGGCTAGTGATTTTAATCTGCCACCAGTAGTA---GCAAAAGAGATAGTAGCTAGCTGTAATA +AATGTCAGCTAAAGGGGGAAGCCATGCATGGACAAGTGGACTGTAGTCCAGGGATATGGCAATTAGACTG +TACACATTTAGAAGGTAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAAACAGCTTACTTCATACTAAAATTAGCAGGAAGATGGCCAGTAAAAA +TAATACATACAGACAATGGCTCCAATTTCACCAGTGCTGCGGTTAAGGCAGCCTGTTGGTGGGCAAATAT +CCAACAGGAATTTGGAGTTCCCTACAATCCCCAAAGCCAAGGAGTAGTGGAATCTATGAATAAAGAATTG +AAGAAAATCATAGGGCAGGTCAGAGATCAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGAGGGGTACAGTGCAGGGGAAAGAATAATAGACATAAT +AGCATCAGATATACAAACTAAAGAACTACAGAAACAAATTATA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAAAGACCCAATTTGGAAGGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAGGACAACAGTGATATAAAGGTAGTACCAAGAAAAAAAGCAAAAATCATTAGGGACTATGG +AAAACAGATGGCAGGTGATGACTGTGTGGCAGGTAGACAGGATGAAGAT--- +>DF.CD.VI961 +TTTTTTAGGGAGAGTTTGGCTTTCCCACAA------------GGGAAG---GCCAGGGAATTTCCTCCAG +AGCAGGCC------AGAACCCTCAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTTGGAGAGGAGAT---------------AACCCCCTCGCCGAAGCAGGAGCAGAA +GGACGAGGG---------GAAGTACCCTCCCTTAGCTTCCCTCAAATCACTTTTTGGCAACGACCCGTTG +TCACAATAAAGATAGAGGGACAGCTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGA +AGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGA +CAGTATGATCAAATACTCATAGAAATCTGTGGACATAAAGCTGTAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATTTGTTGACCCAGATTGGCTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTAAAGCCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGT------ACAGACATGGAAAAGGAGGGAAAAATTTCAA +GAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAGGTGGAG +AAAATTAGTAAATTTCAGGGAACTTAATAAAAAAACTCAAGATTTTTGGGAAGTTCAATTAGGAATACCA +CATCCTGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTATTGGATGTGGGAGATGCATATTTTTCAGTTC +CCTTACATGAGGACTTCAGGAAGTACACTGCATTCACCATACCTAGTATCAACAATGAAACACCAGGATT +CAGATACCAGTACAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACA +AAAATCTTAGAGCCCTTTAGAAAACAAAATCCAGAAATGGTTATTTATCAATACATGGATGATTTGTATG +TAGGGTCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGTTAAGGGAACATCTACTGAAATG +GGGATTTACTACACCAGAC---AAAAAGCATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTGCAGCCTATACAATTGCCAAACAAGGACAGCTGGACTGTCAATGATATAC +AGAAGTTAGTAGGAAAGCTAAATTGGGCAAGTCAGATTTATCCAGGAATTAAAGTAAAGCAGTTATGTAA +ACTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCATTAACAGAAGAAGCAGAATTAGAACTGGCA +GAAAACAGGGAAATT---------CTAAAAGAACCAGTACATGGGGTATATTATGACCCAGCAAAAGACT +TAATAGCAGAAATACAGAAACAAGGGCAAGAGCAATGGACATATCAAATTTATCAAGAACCATTTAAAAA +TCTGAAAACAGGAAAGTATGCAAGGACGAGGAATGCCCACACTAATGATGTAAAACAATTAGCAGAGGCA +GTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGACT---CCTAAATTTAGACTACCCATAC +AAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +TAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAAAAGGAACCCATAATGGGAGCAGAAACTTTC +TATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAAAGGAAGAC +AAAAGGCTGTCTCCCTAACTGAGACCACAAATCAGAAGACTGAGTTACATGCAATTTATTTAGCTTTACA +GGATTCAGGATCAGAGGTGAACATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCA +GATAAGAGTGAATCAGAGTTAGTCAATAAAATAATAGAGCAATTAGTACAAAAGGAAAGGGTCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAG +AAAAATACTATTTTTGGATGGGATAGATAAGGCACAAGAA---GAACATGAAAAATACCACAACAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCTGTAGTC--AGCAAAAGAAATAGTAGCTAGCTGTGATA +AGTGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATATGGCAACTAGATTG +TACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTCCATGTAGCTAGTGGCTATATAGAAGCAGAAGTC +ATCCCAGCAGAGACAGGACAGGAAACAGCCTACTTCATACTAAAGTTAGCAGGAAGATGGCCAGTAAAAA +TGGTACATACAGATAATGGCAGCAATTTCACCAGTGCTGCAGTTAAGGCTGCCTGTTGGTGGGCAGGTAT +CAAACAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAGTCGATGAATAAAGAGTTA +AAGAAAATTATAGGACAGGTAAGAGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAGGAATAATAGACATAAT +ATCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACA--AAAAATTCAAAATTTCCGGGTTTAT +TACAGGGACAGCAGAGACCCAGTGTGGAAAGGACCAGCAAAACTACTCTGGAAAGGTGAAGGGGCA---G +TAGTCATACAAGACAATAGTGAAATAAAGGTAGTACCAAGAAGAAAAGCAAAAATCATTAGGGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>U.CD.VI1126 +TTTTTTAGGGAAAATTTGGCCTTCCAACAA------------AGGGAG---GCCGGGAAATTTTCTTCAG +AGCAGACC------AGAGCCAACAGCCCC---------------------------------ACC---AG +CAGAGAGCTTCGGGTTTGGGGAGGAGAT---------------AAACCCCTCTCCGAGGCAGGAGACGAA +AGACAAGGG---------ACAGGAGCCTCCTTTAACCTCCCTCAAATCACTCTTTGGCAGCGACCCATTG +TTACAATAAAGATAGGGGGACAGTTAAGGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTGGA +AGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGA +CAGTATGATCAGGTAGTTATGGAAATTTGTGGACAAAAGGCTATAGGTACAGTATTAGTAGGACCTACAC +CTGTCAACATAATTGGAAGAAATATGTTGACTCAGATTGGTTGCACTTTAAATTTTCCAATTAGTCCTAT +TGAAACTGTACCAGTAAAATTGAAACCAGGAATG---GATGGCCCAAAAGTTAAACAATGGCCATTGACA +GAAGAAAAAATAAAAGCATTAACAGAAATATGT------CTAGAAATGGAAAAGGAGGGAAAAATCTCAA +AAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAGAAAAATAGTAATAGATGGAG +AAAATTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGATTTTTGGGAAGTTCAATTAGGAATACCA +CATCCTGCTGGATTAAAGAAGAAAAAATCAGTAACAGTGTTAGATGTGGGGGATGCATATTTTTCAGTTC +CCTTAGATCCAGAATTCAGGAAATACACTGCATTCACCATACCTAGTGTCAACAATGAGACACCAGGAGT +TAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATACAGCATGACA +AAAATCTTAGAGCCATTTAGAACAAAAAATCCAGAAATGGTTATATACCAATACATGGATGATTTGTATG +TAGGATCTGATTTAGAAATAGGGCAACATAGAACAAAAATAGAGGAACTAAGAGAACATCTGTTGAGATG +GGGATTTTTCACACCCGAC---GAAAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGGGTATGAACTC +CATCCTGATAAATGGACAGTGCAGCCTATACAACTGCCAGACAAGGAAGACTGGACTGTCAATGACATAC +AGAAGTTAGTGGGAAAACTAAATCGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAAGCAATTATGTAA +ACTCATTAGGGGAGCCAAAACACTAACAGACATAGTACCACTAACTGCAGAAGCAGAGTTAGAATTGGCA +GAGAACAGAGAGATT---------CTAAAAGAACCAGTACATGGGGTATATTATGACCCATCAAAAGATC +TAATAGCAGAAATACAAAAACAAGGGCAAGGCCAATGGACATATCAAATATATCAAGAGCCATATAAAAA +TCTAAAAACAGGAAAGTATGCAAGAATGAGGTCGGCTCACACTAATGATGTAAAACAGTTAACAGAAGCA +GTGCAAAAAATAGCCACAGAAGGCATAGTAATATGGGGAAAAACT---CCTAAGTTTAGACTGCCCATAC +AAAAGGAAACATGGGAGACATGGTGGACAGAGTATTGGCAAGCCACCTGGATCCCTGAATGGGAGTTTGT +CAATACCCCTCCCCTAGTAAAACTATGGTACCAGTTAGAAACAGACCCCATAGCAGGAGCAGAGACTTTC +TATGTAGATGGGGCAGCTAATAGAGAAACTAAAAAGGGAAGAGCAGGATATGTTACTGACAAAGGAAGAC +AGAAAGTTGTCTCCCTAACTGAAACAACAAATCAGAAGACTGAATTACAAACAATCTATTTAGCTTTGCA +GGATTCAGGTTCAGAAGTAAACATAGTAACAGACTCACAGTATGCAATAGGAATCATTCAAGCACAACCA +GATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAGCAATTAATACAGAAGGACCAGATCTACCTGT +CATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGCAGTGGAATCAG +AAAGGTGCTATTTTTAGATGGAATAGATAAGGCTCAAGAA---GAGCATGAAAAATATCACAATAATTGG +AGAGCAATGGCTAGTGATTTTAATCTGCCACCAATAGTA---GCAAAAGAGATAGTAGCTAGCTGTGATA +AATGTCAACTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGAATATGGCAATTAGATTG +TACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCAGAAACAGGACAGGAGACAGCCTTCTTCATATTAAAGTTGGCAGGAAGATGGCCAGTAAAAA +TAATACATACAGACAATGGCAGCAATTTCATCAGTGCTACGGTTAAGGCAGCCTGTTGGTGGGCAGGTAT +CCAGCAGGAATTTGGAATTTCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAACAAAGAATTA +AAGAAAATTATAGGACAAATAAGAGATCAGGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCA +TCCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAAT +AGCATCAGACATACAAACTAGAGCATTACAAAAACAAATTACA---AAAATTCAAAATTTTCGGGTTTAT +TACAGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCA---G +TAGTAATACAAGACAATAGTGAAATAAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGACTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGAT--- +>CONSENSUS_CPZ +TTTTTTAGGGAAAc?ttggcCc?C?tggtG?????????????ggGaG???aCCAGGgAACTTtgTGcAg +AAcaggaa??????gGAgCCaACaGCtCC---------------------------------ACC---?A +TAGAGa?tTatggGtA?cAggA?gAga?---------------ga?ga??cAGGagaa?aAgGgggagGA +gaa---------------????Ctat?TCc?ccaactTCCCTcAAATCaCTCTTTGGCAgcGACCccTca +Tc??AgTaaaagTAgaaGGgCAaatAtgtgAaGCTcT?tTAGAtACAGGAGCTGATGAtACAGTA?TAGA +gagaATaCAatT?cAAGGaa??TGGAaaCCAAAAAtgAT?GG?GGAATTGGaGGTTTTAT?aaaGTaaAA +CAaTaTgAtaA?GTacacATAgAaATaGaaGg?AGaAaAGtActaGg?aCAGTatTagTgGGACCAAcaC +CaGTAAAtATtATtGG?AGaAATaTTTTaacaCaatT?GGaTGTACTtTAaatTTtCCaATtAGttcaaT +TgAAACtGT?CCaGT?AaATTaAAAccAGGAATG---GATGG?CCaAgaGTAAAgCAATGGCC?tT?tCA +gcAGAaAAAATtaAgGCctTAAcAGAAATTTGt------cAagAaaTgGAAaaaGAAgg?AAgATttC?A +gAaTAGGgCCaGAaAAtCCaTACAATACaCCaATTTTTGCaATcAAAAAGAAAGAcAgtaCtAAATGGAG +AAAatTAGTaGAcTTcAGAGAAtTAAATAAaAGaACACAAGAcTTtTGGGAagT?CAgTTAGGcATaCCt +CAcCCAGCaGGaTTAAAgaAgAAaAaATCAGTgACAGTatTaGATGTaGGaGATGCCTAtTTcTCttgtC +CccTgGATaAgGA?TTtAGaAAATAtACaGCaTTtACaATtCC?AGtgTaAAcAATGAGACCCCAGGAaT +tAGATAtca?TATAATGTttTaCCACAAGGaTGGAAAGGgTCaCCAgCtATtTTcCAaagcAGtATGACa +AAaATtcTAGaaCC?TTtAGagAaaAg?ATCCaGa?gTta?aATtTAtCAaTAcATGGATGAtCTcTATG +TaGGgTCtGATcT?gAaaTtgat?aaCATAGagAAAaGgTaGAA?agCT?AGACAAcATTTgCtta??TG +GGGatTcac?ACcCCtGAc---AAaAAgCATCAgaAgGAaCCaCCaTTtttATGGATGGGaTATGAgcTc +CAtCCAGAcAAaTGGACaGT?CAGcc?ATacAgtTACCAgAaaaAGA?gttTGGACaGTcAATGAtAT?C +AgAAAcTagTAGGaAAatTaAAtTGGGCAAGTCAGAT?TAtCCAGGAATaAAAAtaAAGCAgTTaTGtAa +acT?ATaAgAGGAgc?AAga?tCTaACaGAtgtaGTacat?T?ACac??GAaGCAGAAtTaGAaTTAGaA +GAAAAtAgagAaATt?????????cTAaag?aaccAgTAcAtGGggtcTAcTATgA?CCaGa?aaAgaac +TaatAGCAgaaaTACAgAAACAaGG?a???G?CAgTGGAC?TAtCAaATaTtTCAagAac?acatAAgaa +ttTaAAgaCAGGaAAATATGCcAGaCaaAgatcAgCaCAcACtAATGA?aTcAGgCAacTaGctGaAGta +GTgCAAAAAATaGctactGAaAGcATaGT?ATtTGGGGAaAggtA---CCaAAATTTaaaTTaCcagTac +agAagGAAa?tTGGGAagCaTGGTGGtCAGAaTAtTGGCAGGCcACCTGGAT?CCaGAaTGGGAaTTTgT +cAATACcCC?CCctTagT?AaAcT?TGGTAtAattTAgaGaCAGA?cCtATaccag?ggCaGAaAC?TtT +TATGT?GATGGaGCAGC?aAtaGagAaaC?aAaa??GGgAAgGCAGGaTATGT?ACaGAcAgaGG?AgAc +aaAaaaT?Ataa?ctTAgAAAAtACcACtAATCAacAaGCAGAatTaaagGCg?T?ctt?TgGC?tTa?A +gGATTCAga??a?ccaGTtAAtaTAGTcACtGacTCaCAATATGt?TTaGGaaTcaT?CA?tctcagCCA +GAtCAaAGTGAATCAgaatTaGTcaAtcAgATAATagAagAaTTAATaAAaAaAGAAaAaAttTAccTCT +CcTGGGTACCAGCACAtAAAGG?ATaGGAGGAAATGAaCAagTaGAtAAAtTAGTCAGT?cAGGgATCAG +aaAAGT?CT?TTcCTaGAtGGcATAGAtAaaGC?CAaGAa---GAcCATGA?AaaTATCATAGtAATTGG +A?AgCtaTgGC?agTGAtTttAAtcT?CCaCC?aT?GT?---GCtAAaGAaATagTgGCcca?TGTGATA +AaTGtCAggtAAAaGGAGAaGCCAtgCATGGgCAgGTAGAcTGcAGtCCAGG?AT?TGGCAagTAGATTG +tACcCAttTAGAAGG?AAAgT?ATcATAGTgGCAGT?CAtGTagCcAGTGGcTacaTAGAAGCaGAaGT? +AT?cCaGctGAgACAGGacAAga?ACAGC?TAtTTccTgTTAAAATTAGCagG?AGATGGCCaGTAAAAa +c?ATtCAcACtGAtAATGGa?CtAAtTTtACAAGTgctgCAGT?AaaGC?GC?TGtTGGTGGGCagacAT +ccAaCAgGAaTTTGGAATaCCaTAcAATCCACAaAGTCAAGGAGTgGTaGAATCCaTgAATAAAgAatTa +AAGaAAAT?ATAGGACAaaTtAGgGAtCAAGCAGAaCAtTTAAAgACAGCaGTAcaaATGGCAGTgttCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTAcACtgCaGGagAaAGAATaaTAGACATaaT +AGCAaCAGAcaTACAaACAa?c?aATTACAAAAaCAAATTTTA---AAAgTTCAAaAaTTTCGGGTttAT +TACAGgGAcAGCAGAGAcCCaaTtTGGAAAGGACCaGCaaaacTtCTGTGGAAAGGTGAAGGGGCA---G +TAGTaATc?agga??a?g?gga??t?aa?gtagtacc?agaag?aaagcaaa?at?at?a?aga?tatgg +aaaacagatggcaggtg?tgat?gt?tggca?gtagacag?atgag??t--- +>CPZ.CD.CPZANT +TTTTTTAGGGAAACCGACCCCCACGTGGTG------------GGGGTGCAGACCAGGGAACTTTGTGCAG +AAGGAGGAAGTAGTGGAGCCAACAGCTCC---------------------------------ACC---CA +TAGAGATCTATCAGGAGGAGCACAAGAG---------------GACTCAGAAGGGTCTCAAGGGGGAGGA +GGA------------------ACTACCTCCCTCGTATTCCCTGAAATCCCTCTTTGGCAAAGACCAATGA +TGGAAGTTCTCATACAGGGACAAAAATGTCAGGCTCTATTAGATACAGGAGCTGATGACACAGTAGTAGA +GGGAATTCATTTGCAAGGAAACTGGAAGCCAAAAACAATTGGTGGAATTGGGGGTTTTATTTCCGTACAA +CAGTATAACAAAGTACCCATACAAATTGGAGACAGGACAGTACTAGCAACAGTACTGTTGGGACCAAACC +CAGTAAACATTATAGGTAGAAATGTTTTATGTCTTTTAGGATGTACTTTAAATTTTCCAATTAGTAAAGT +TGAAACAGTGCCCGTTAAATTAAAAGAAGGAATG---GATGGACCAAGAGTAAAACAATGGCCGCTCTCA +AAAGAGAAAATAGAGGCCCTAAAAGAAATTTGT------GATAAGTTAGAAGCAGAAAATAAGATTTCTA +GAATAGGGCCAGATAACCCATACAATACACCAATTTTTGCAATAAAAAAGAAAGACACTTCAAAATGGAG +AAAGCTAGTTGATTTCAGAGAATTAAATAAAAGAACACAAGATTTTTGGGAGATACAATTAGGAATTCCT +CATCCAGCCGGATTAAAGCAAAAGAAATCAGTGACAGTATTGGATGTGGGAGATGCCTATTTCTCCATAC +CCTTAGATCAGGACTTTAGAAAATATACAGCTTTCACAATTCCAAGCGTGAACAATGAGACCCCAGGAAT +AAGATATTGTTATAATGTTCTACCACAAGGCTGGAAAGGATCACCAGCTATTTTTCAAGCAAGCATGACC +AAGATCTTAGCACCATTTAGGGATAAGTATCCAGCAGTAGAAATTTATCAATACATGGATGATCTCTATG +TAGGATCTGATATGGAAATTACTGCACATAGAGAAATGATAGAAAAGCTTAGACAACATTTACAGGTCTG +GGGACTAGAGACTCCTGAC---AAAAAGCATCAGAAAGAACCTCCATTTCAATGGATGGGATATGAGTTA +CATCCAGACAAATGGACTGTACAGAAAATAAAGCTACCAGAGCCAGATGATTGGACAGTTAATGACATCC +AGAAACTAGTAGGAAAATTAAATTGGGCAAGTCAGATCTACCCAGGAATCAAAACTAAGCAGTTGTGTAG +ACTCATCAGAGGAGTCAAGAGTCTAACAGATAGAGTACAAATGACTAGGGAAGCAGAATTAGAATTAGAA +GAAAATAAACAAATTAAAAAAAAACTACAGCAGAAAATAGAGGGATACTACTATCAGCCTGGTCTACCAC +TAAAAGCAACCATACAGAAACAAGGGTCAGGACAATGGACCTATCAAATATATCAAAATGAAGGAAAACT +GTTAAAAGCAGGAAAATATGCTAGGCCTACAGGAACTCACACTAATGAGGTTAGGCAATTAGCTGGAGTA +GTGCAAAAAATAGGATTAGAAAGTATAGTTATCTGGGGAGAGGTA---CCAAAATTTCAATTACCCATAA +CTAGGGAAACTTGGGATGCCTGGTGGTCAGACTATTGGCAGGCCACCTGGATACCAGAATGGGAGTTTGT +TAATACACCCCCATTAATTAGACTCTGGTATAATCTATTGGCAGACCCTATTCCAGAGGCTGAAACCTTT +TATGTTGATGGGGCAGCTAATAGAAACTCTCAATTGGGGAAGGCAGGCTATGTGACAGACAGAGGAAGAA +GTAGGGTAAAACACCTACAAAAGACCACCAATCAACAAGCAGAATTACAGGCGATTCTTATGGCTCTAGA +GGATTCAACAGGCCCAGTCAATATAGTCACAGATTCACAATATGCATTGGGAGTCTTGCAAGGTACCCCA +GATCAAAGTGAATCACCCCTAGTGGAAGAAATAATCCAGAAATTAATAAAAAGAGAACAGATTTACCTCT +CCTGGGTACCAGCACATAAAGGCATAGGAGGAAATGAACAGGTAGACAAATTAGTCAGTCAAGGGATCAG +ACAAGTCCTCTTTCTGGAAGGCATAGATAAAGCTCAAGAG---GACCATGATAAATATCATAGCAATTGG +AGATCATTAGCAGATGAATACAATCTTCCCCCTATTGTG---GCTAAAGAAATTATAGCACAGTGTGATA +AATGTCACGTAAAGGGAGAAGCCAGGCATGGACAAGTAGACTGCAGTCCAGGAATATGGCAAGTAGATTG +CACCCATTTAGAAGGTAAAGTAATCATAGTAGCAGTGCATGTATCTAGTGGCTTCATAGAAGCTGAAGTA +ATGGCAGATGAGACAGGGAAAAGTACAGCATACTTCCTGTTAAAATTAGCCAGCAGATGGCCAGTAAAAA +CAATACACACTGACAATGGAGCTAATTTCACAAGTGCAGCAGTAAAAGCGGCATGTTGGTGGGCTAATAT +CCAACAGGAATTTGGAATACCATACAATCCACAAAGTCAAGGAGTAGTGGAATCCATGAATAAACAATTG +AAGCAAATTATAGGACAAATTAGAGACCAAGCAGAACAATTAAAGACAGCAGTAGTAATGGCAGTGCACA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACACACCTGGACAGAGAATATTAGACATACT +AGCAACAGACATACAGACAACTCAATTACAAAATCAAATTTTA---AAAATTCAACAATTTCGGGTTCAT +TACAGGGATAGCAGAGACCCTGTGTGGAAAGGACCAGCACAACTTCTGTGGAAAGGTGAAGGGGCA---G +TAGTAATA-------------------------------------------------------------- +---------------------------------------------------- +>CPZ.GA.CPZGAB +TTTTTTAGGGAAAGGTTGGCCTTCCCGCAG------------CGGGAG---GCCAGGCAACTTTGTGCAG +AACAGAAC------AGAACCAACGGCCCC---------------------------------ACC---GA +TAGAGAGTTATGGGTACCAGGAGGAAGA---------------GAAGAGCCAGGAGAAGAAAGAGGGAGA +GAG---------------CAGTCTATATCCACCAACCTCCCTCAAATCACTCTTTGGCAGCGACCCCTCA +TCCCAGTAAAAGTAGAAGGGCAACTATGTGAAGCTTTGCTAGATACAGGAGCTGATGATACAGTAATAGA +GAGAATACAATTACAAGGACTTTGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTCAAA +CAATTTGATAATGTACACATAGAGATAGAAGGGAGAAAAGTAGTAGGGACAGTATTAGTAGGACCAACAC +CTGTAAATATAATTGGAAGAAATATTTTGACACAATTGGGTTGTACTCTAGTGTTCCCAATTAGTTCAAT +TGAAACTGTCCCAGTCAAATTGAAACCAGGAATG---GATGGTCCAAAGGTAAAGCAATGGCCCTTATCA +GCAGAAAAAATTAAAGCCTTAACAGAAATTTGT------CAAGAAATGGAAAAGGAAGGGAAAATATCGA +AAATAGGACCTGAAAATCCATACAATACTCCTATTTTTGCAATCAAAAAGAAAGACAGTACTAAATGGAG +AAAATTAGTAGACTTCAGAGAACTAAATAAGAGAACACAAGACTTTTGGGAAGTGCAGTTAGGCATACCT +CACCCAGCAGGGTTAAAAAAGAAAAAATCAGTGACAGTATTAGATGTAGGAGATGCCTACTTCTCTTGTC +CCCTGGATAAAGATTTCAGGAAATATACTGCATTTACAATTCCCAGTATAAATAATGAGACCCCAGGAGT +TAGATATCAATATAATGTTTTGCCACAAGGATGGAAAGGGTCTCCATCTATCTTCCAAAGCAGTATGACA +AAAATTCTAGAACCCTTCAGAGAAAAGAATCCTGACATTACTATTTACCAGTACATGGATGACCTATATG +TGGGGTCTGATCTTGAAATTGATCAACATAGAAAAAAGGTGGAAGAACTAAGACAACATTTGCTTAAATG +GGGGTTCACAACCCCAGAC---AAAAAACATCAAAAGGAGCCACCCTTTTTATGGATGGGATATGAGCTC +CACCCAGACAAATGGACAGTCCAGCCTATTCAATTACCAGAAAAAGAGGTATGGACTGTCAATGATATAC +AAAAACTGATAGGAAAGTTAAATTGGGCAAGTCAGATTTATCCAGGAATAAAAATAAAGCAATTATGTAA +GCTGATAAGAGGAACAAAGAAACTGACAGATGTAGTTCCTCTCACACCAGAAGCAGAATTAGAATTAGCA +GAAAATAGGGAGATA---------GTAAGCACACCAGTACATGGGGTATACTATGATCCAGACAAAGAGC +TTATAGCAGAAATACAGAAACAGGGCAACTGCCAGTGGACTTATCAGATATTTCAGGAACCACATAAGAA +TTTGAAGACAGGGAAATATGCCAGACAAAGGTCAGCACACACAAATGACATCAGGCAACTGGCGGAAGCA +GTACAAAAAATTGCTACTGAAAGCATAGTAATTTGGGGAAAAACA---CCTAAATTTAGGTTACCAGTAC +AGAAAGAAAGTTGGGAAGCATGGTGGGCAGAATACTGGCAGGCAACCTGGATCCCTGAATGGGAATTTAT +CAATACCCCACCCTTAGTCAAATTATGGTACAGTTTAGAGACAGAACCTATACCAACCACAGATACTTAT +TATGTAGATGGAGCAGCAAATAGGGAAACAAAAACTGGGAAAGCAGGATATGTAACAGACAAAGGGAAAC +AAAAAATCATTAGCTTAGAAAATACCACTAATCAGCAAGCAGAATTAAAGGCTTTGCTTCTGGCCTTGCA +GGATTCAGATCAACAGGTTAACATAGTGACTGACTCACAATATGTGTTAGGGATTATTCAGTCACAGCCA +GATCACAGTGAATCAGAATTAGTCAATCAGATAATAGAAGAGTTAATTAAAAAAGAAAAAATCTACCTCT +CCTGGGTACCAGCACACAAAGGTATAGGAGGAAATGAGCAAGTGGATAAATTAGTCAGTGCAGGAATCAG +GAAAGTGCTATTCCTAGATGGAATAGACAGGGCCCAAGAA---GAACATGAAAGGTATCATAGTAATTGG +AAAGCTATGGCTAGTGATTTTAATTTACCACCCATAGTA---GCAAAAGAAATAGTGGCCCATTGTGATA +AGTGCCAGGTAAAAGGAGAAGCCATGCATGGGCAGGTAGACTGTAGCCCAGGGATTTGGCAAGTAGATTG +TACCCACCTAGAAGGCAAAGTGATCATAGTGGCAGTTCACGTAGCCAGTGGCTATATAGAAGCAGAAGTT +ATCCCAGCTGAGACAGGACAAGAAACAGCTTATTTCCTGTTAAAATTAGCAGGTAGATGGCCAGTAAAAA +CTATTCACACAGATAATGGGCCAAATTTTACAAGTGCTGCAGTCAAGGCTGCCTGTTGGTGGGCAGACAT +CAAGCAGGAATTTGGAATACCCTATAATCCACAGAGTCAAGGAGTGGTAGAATCCTTAAATAAAGAGCTA +AAGAAAATAATAGGACAGGTTAGGGATCAAGCAGAACATTTAAAAACAGCAGTACAAATGGCAGTGTTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTACACTGCAGGGGAAAGAATAATAGACATAAT +AGCAACAGACATACAAACAAGCGAATTACAAAAACAAATTTTA---AAAGTTCAAAAATTTCGGGTTTAT +TACAGAGACAGCAGAGACCCAATTTGGAAAGGACCGGCAACCTTACTGTGGAAAGGTGAAGGGGCA---G +TAGTGATCCAGGATCAAGGGGAACTAAAGGTAGTACCAAGAAGGAAAGCAAAGATCATTAGAGATTATGG +AAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGAATGAGGAT--- +>CPZ.US.CPZUS +TTTTTTAGGGAAACATTGGTCCCCATCGTGGAGCGGGGGATCAAAGAG---ACCAGGGAACTTCCTGGAA +AACAGGAA------GGAGCCCACAGCTCC---------------------------------ACC---AA +TAGAGGATTTCGGGTATCAAGAAGAGAC---------------AGTGACACAGGAGAAACAGGGAAAGGA +AAA---------------GGAGCCCTTTCAGCTAACTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCC +TCAGAATAAAAGTAGCAGGGCAGATAGTAGAAGCTCTTTTAGACACAGGAGCTGATGATACAGTACTAGA +CAACATACAAATTGAAGGGACATGGAGACCAAAAATGATGGGGGGAATTGGAGGTTTTATAAAAGTAAAA +CAATATGATCACGTCAATATAGAAATAGAGGGAAGAAAAGCACAGGGTTCAGTTTTAGTGGGACCAACAC +CAGTAAATATTATTGGCAGGAATATTTTAACACAAATTGGATGTACTTTAAATTTTCCTATCAGCTCTAT +TAAAACTGTACCAGTAAGATTAAAACCAGGAATG---GATGGCCCTAGAGTAAAGCAATGGCCATTGACA +GCAGAAAAAATTAAGGCATTAACAGAAATTTGC------CAAGAAATGGAAAAAGAAGGAAAGATTACAA +GAGTAGGGCCAGAAAATCCTTACAATACACCAATTTTTGCTATCAAAAAGAAAGATAGCACTAAATGGAG +AAAATTAGTAGACTTTAGAGAATTAAATAAAAGGACACAAGACTTCTGGGAAGTTCAGTTAGGCATACCA +CACCCAGCAGGATTAAAGAAGAAAAGATCAGTAACAGTCCTAGATGTAGGGGATGCCTATTTTTCTTGTC +CACTGGATAAGGAATTTAGAAAATACACAGCATTTACCATCCCTAGTGTAAACAATGAGACCCCAGGAAT +TAGATACCAGTATAATGTGTTACCACAAGGATGGAAAGGGTCACCAGCAATTTTCCAGAGCAGTATGACA +AAAATTCTAGATCCTTTTAGAAAACAACATCCAGATGTTATAATCTATCAATATATGGATGATCTCTATG +TAGGGTCAGATCTAAACTTAGAAAAGCATAGGGAAAAGGTAGAACTGCTCAGACAATATTTGCTTACTTG +GGGATTCACTACCCCTGAT---AAGAAGCATCAGGAGGAACCACCATTCTTATGGATGGGCTATGAACTC +CATCCAGATAAGTGGACAGTGCAGCCCATACAGTTACCACAAAAAGAAATTTGGACAGTCAATGATATTC +AGAAATTAGTAGGGAAACTGAACTGGGCAAGTCAGATATATCCAGGAATAAAAATAAAGCAGTTATGCAA +ATTAATAAAAGGAGCTAAAGCTCTAACTGAAGTTGTAAATTTTACACATGAGGCAGAAATGGAGTTAGAA +GAAAACAGAGAAATT---------CTAAAGGAACCAGTACATGGGGTCTATTATGACCCAGAAAAAGAAT +TAGTAGCAGAAGTACAAAAACAAGGAAGGAGTCAGTGGACATACCAAATTTTTCAAGAACGGCATAAGAA +TCTAAAGACAGGAAAATATGCCAGACAAAGATCAGCACATACTAATGATATCAGACAGCTAGTTGAAGTG +GTGCAAAAAATAGCTACTGAGAGCATTGTCATTTGGGGAAAGGTA---CCAAAATTTAAATTGCTAGTGC +AGAAGGAAGTCTGGGAAACATGGTGGTCAGAATATTGGCAGGCCACCTGGATTCCAGATTGGGAATTTGT +CAATACCCCTCCCCTCGTAAAACTTTGGTATAAGTTAGAGACAGAGGCCATAGAGGGGGCAGAAACATTT +TATGTGGATGGAGCAGCCCAACGAGAAACCAAGAAAGGAAAGGCAGGATATGTTACTGATAGGGGTAGAC +AAAAAATTATAACTTTAGAAAATACTACTAATCAAAAGGCAGAGCTCACAGCGGTATACTTAGCATTAAA +AGATTCAGAGAATACAGTTAATGTAGTCACTGGCTCCCAATATGTCTTAGGAATCATCCACTCTCAGCCA +GACCAAAGTGAATCAGAATTGGTCAATCAGATAATAGAAGAATTAATAAAGAAAGAAAAAAGTTATATCT +CATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAACAAATAGATAAACTAGTCAGTTCAGGGATCAG +AAAAGTTCTTTTCCTAGATGGCATAGATAAAGCACAGGAA---GACCATGACAAATATCATAGTAATTGG +ACAGCTATGGCCAGTGATTTTAACCTGCCACCAGTGGTC---GCTAAGGAGATAGTGGCCAGCTGTGATA +AATGTCAGCCAAAAGGAGAGGCCATACATGGGCAGGTAGATTGCAGTCCAGGTATCTGGCAGCTAGATTG +TACACATTTAGAAGGGAAAATCATTATAGTGGCAGTACATGTGGCCAGTGGATACCTAGAAGCAGAGGTC +ATTCCTGCAGAAACAGGACAAGAGACAGCCTATTTTATCTTAAAATTAGCAGGAAGATGGCCTGTAAAAG +TGATTCATACTGATAATGGATCTAACTTTACAAGTAGTACAGTTAGAGCAGCTTGCTGGTGGGCAGGCAT +ACAACAAGAGTTTGGAATTCCATACAATCCACAAAGTCAAGGAGTGGTAGAATCCATGAATAAAGAATTA +AAGAAAATCATAGGACAAATCAGGGATCAAGCAGAGCATTTAAAGACAGCTGTACAGATGGCAGTATTCA +TTCACAATTTTAAAAGAAAAGGGGGG---ATTGGGGGGTATACTGCAGGAGAAAGAATCATAGACATCAT +AGCATCAGAACTACAAACAGACTTATTACAAAAACAAATTTTA---AAAGTTCAAAATTTTCGGGTCTAT +TACAGGGACAGCAGAGATCCAATTTGGAAAGGACCAGCCAAACTTCTGTGGAAAGGTGAAGGGGCA---G +TAGTAATCAAGGAAAACGAGGAGGTTAAAGTAGTACCCAGAAGAAAAGCAAAAATTATAAAAGACTATGG +AAAACAGATGGCAGGTGCTGATAGTATGGCAGGTAGACAGGATGAGAGT--- diff --git a/db/structprot.fasta b/db/structprot.fasta new file mode 100644 index 0000000..e8b9d52 --- /dev/null +++ b/db/structprot.fasta @@ -0,0 +1,6 @@ +>prot +PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGI +GGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNF +>3-struct +----SSS--EEEEEETTEEEEEEE-TT-SS-EE-S----S-EEEEEEEET +TEEEEEEEE-SEEEEETTEEEEE-EEEE--SS-EE-HHHHTTTT----- diff --git a/linux.notes.gde b/linux.notes.gde new file mode 100755 index 0000000..ddf3c56 --- /dev/null +++ b/linux.notes.gde @@ -0,0 +1,44 @@ + +21 January 2001 Tulio de Oliveira & Rob Miller + tuliodna@yahoo.com, rob@inpharmatica.co.uk + +This is veresion 2.2 of GDE which I have attempted to port to +Linux (glibc, kernel 2.0.x, Debian release but no .deb file). + +the basic installation steps are as follows: + +(1) get the OpenWin XView libraries and include files set up on +your system. Binary versions of the libs are included in this +distribution, but they may not work on your system. If they +will work, you should be able to copy them to /usr/local/lib, +run ldconfig, and have the included executable GDE files work. +Use "ldconfig -v | grep libxv" to see if it's really finding the +shared library, else "man ldconfig" and sort it out. It's +better if you get the xview stuff properly installed on your +system, but if you can't just grab the precompiled executables +from the various subdirectories -- i.e., skip step (2) below. + +Rpm version of OpenView (Xview) library are included in the distribution at +the CORE directory. +xview-3.2p1.4-6.i386.rpm , xview-3.2p1.4-6.src.rpm , xview-clients-3.2p1.4-6.i386.rpm , xview-devel-3.2p1.4-6.i386.rpm + +For Redhat system: rpm -i xview-xxxx.rpm + +(2) The linux distribution had been compiled and provide the executables files. Until today GDE for linux had been sucessfull setup in Debian, RedHat, Mandrake and FreeBSD Linux distributions. + +(3) copy all the executables to wherever you want GDE to live, +and set the environment variable GDE_HELP_DIR to that location +in your login/startup files. Mine uses bash and in the +~/.bash_profile file I have + +export GDE_HELP_DIR=/usr/local/GDE + +M. Zuker's RNA folding code requires that there be a +$GDE_HELP_DIR/ZUKER directory containing the ZUKER/*.dat +files. You will probably want to put the $GDE_HELP_DIR +on your path if it is a new directory. + +That should be it. I have gotten everythng to compile though +still with a few warnings here and there that I couldn't figure +out. I think everything runs ok as well, though I'm not certain +about how Zuker's MFOLD is exactly supposed to work. diff --git a/tutorial/GDEtutorial-setup.pdf b/tutorial/GDEtutorial-setup.pdf new file mode 100755 index 0000000..fe20e17 Binary files /dev/null and b/tutorial/GDEtutorial-setup.pdf differ