diff --git a/CORE/.GDEmenus.bak b/CORE/.GDEmenus.bak deleted file mode 100644 index 5a3f56f..0000000 --- a/CORE/.GDEmenus.bak +++ /dev/null @@ -1,761 +0,0 @@ -1menu:File - -item:test cmask output -itemmethod: kedit in1 - -in:in1 -informat:colormask - -item:New sequence -itemmethod:echo "$Type$Name" > out1 -itemmeta:n -itemhelp:new_sequence.help - -arg:Name -argtype:text -arglabel:New Sequence name? -argtext:New - -arg:Type -argtype:choice_list -arglabel:Type? -argchoice:DNA/RNA:# -argchoice:Amino Acid:% -argchoice:Text:\" -argchoice:Mask:@ - -out:out1 -outformat:flat - -item:Import Foreign Format -itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp -itemhelp:readseq.help - -arg:INPUTFILE -argtype:text -arglabel:Name of foreign file? - -out:OUTPUTFILE -outformat:genbank - -item:Export Foreign Format -itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE -itemhelp:readseq.help - -arg:FORMAT -argtype:choice_list -argchoice:FASTA:8 -argchoice:NEXUS:17 -argchoice:Phylip v3.3:12 -argchoice:IG/Stanford:1 -argchoice:GenBank:2 -argchoice:NBRF:3 -argchoice:EMBL:4 -argchoice:GCG:5 -argchoice:DNA Strider:6 -argchoice:Fitch:7 -argchoice:Pearson:8 -argchoice:Zuker:9 -argchoice:Olsen:10 -argchoice:Phylip v3.2:11 -argchoice:Phylip v3.3:12 -argchoice:Plain text:13 - -arg:OUTPUTFILE -argtype:text -arglabel:Save as? - -in:INPUTFILE -informat:genbank - - -item:Save Selection -itemmethod: cat $SAVE_FUNC > $Name -itemhelp:save_selection.help - -arg:SAVE_FUNC -argtype:chooser -arglabel:File format -argchoice:Flat:in1 -argchoice:Genbank:in2 -argchoice:GDE/HGL:in3 - -arg:Name -argtype:text -arglabel:File name? - -in:in1 -informat:flat - -in:in2 -informat:genbank - -in:in3 -informat:gde - -item:Print Selection -itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& -itemhelp:print_alignment.help - -arg:SCALE -argtype:slider -arglabel:Reduce printout by? -argmin:1 -argmax:20 -argvalue:1 - -arg:CMD -argtype:chooser -argchoice:Lpr:lpr -argchoice:Enscript Gaudy:enscript -G -q -argchoice:Enscript Two column:enscript -2rG - -arg:PRINTER -argtype:text -arglabel:Which printer? -argtext:lp - -in:in1 -informat:gde -insave: - -menu:Edit - -item:Sort -itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& -itemhelp:heapsortHGL.help - -arg:PRIM_KEY -argtype:choice_list -argchoice:Group:group-ID -argchoice:type:type -argchoice:name:name -argchoice:Sequence ID:sequence-ID -argchoice:creator:creator -argchoice:offset:offset -arglabel:Primary sort field? - -arg:SEC_KEY -argtype:choice_list -argchoice:None: -argchoice:Group:group-ID -argchoice:type:type -argchoice:name:name -argchoice:Sequence ID:sequence-ID -argchoice:creator:creator -argchoice:offset:offset -arglabel:Secondary sort field? - -in:in1 -informat:gde -insave: - -item:extract -itemmethod:(gde in1;/bin/rm -f in1)& - -in:in1 -informat:gde -inmask: -insave: - -menu:DNA/RNA - -item:Translate... -itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 - -arg:FRAME -argtype:chooser -arglabel:Which reading frame? -argchoice:First:1 -argchoice:Second:2 -argchoice:Third:3 -argchoice:All six:6 - -arg:MNFRM -arglabel:Minimum length of AA sequence to translate? -argtype:slider -argmin:0 -argmax:100 -argvalue:20 - -arg:LTRCODE -argtype:chooser -arglabel:Translate to: -argchoice:Single letter codes: -argchoice:Triple letter codes:-3 - -arg:TBL -arglabel:Codon table? -argtype:chooser -argchoice:universal:1 -argchoice:mycoplasma:2 -argchoice:yeast:3 -argchoice:Vert. mito.:4 -in:in1 -informat:gde - -out:out1 -outformat:gde - -item:Dot plot -itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& -itemhelp:DotPlotTool.help - -in:in1 -informat:gde -insave: - -item:Clustal alignment -itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& - -itemhelp:clustal_help - -arg:KTUP -argtype:slider -arglabel:K-tuple size for pairwise search -argmin:1 -argmax:10 -argvalue:2 - -arg:WIN -argtype:slider -arglabel:Window size -argmin:1 -argmax:10 -argvalue:4 - -arg:Trans -argtype:chooser -arglabel:Transitions weighted? -argchoice:Yes:/TRANSIT -argchoice:No: - -arg:FIXED -argtype:slider -arglabel:Fixed gap penalty -argmin:1 -argmax:100 -argvalue:10 - -arg:FLOAT -arglabel:Floating gap penalty -argtype:slider -argmin:1 -argmax:100 -argvalue:10 - -arg:REPORT -argtype:chooser -arglabel:View assembly report? -argchoice:No: -argchoice:Yes:kedit in1.rpt& - - -in:in1 -informat:flat -insave: - -item:Variable Positions -itemmethod:varpos $REV < in1 > out1 - -arg:REV -argtype:chooser -arglabel:Highlight (darken) -argchoice:Conserved positions: -argchoice:variable positions:-rev - -in:in1 -informat:flat - -out:out1 -outformat:colormask - -item:Phrap -itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; - -in:in1 -informat:genbank - -out:out1 -outformat:genbank - -item:SNAP -itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; - -in:in1 -informat:flat -out:out1 -outformat:text - - - - -item:Find all -itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; -itemhelp:findall.help -itemmeta:f - -arg:SEARCH -argtype:text -arglabel:Search String - -arg:PRCNT -argtype:slider -arglabel:Percent mismatch -argmin:0 -argmax:75 -argvalue:10 - -arg:CASE -argtype:chooser -arglabel:Case -argchoice:Upper equals lower: -argchoice:Upper not equal lower:-case - -arg:UT -argtype:chooser -arglabel:U equal T? -argchoice:Yes:-u=t -argchoice:No: -argvalue:0 - -arg:MAT -arglabel:Match color -argtype:choice_list -argchoice:yellow:1 -argchoice:violet:2 -argchoice:red:3 -argchoice:aqua:4 -argchoice:green:5 -argchoice:blue:6 -argchoice:grey:11 -argchoice:black:8 -argvalue:2 - -arg:MIS -argtype:choice_list -arglabel:Mismatch color -argchoice:yellow:1 -argchoice:violet:2 -argchoice:red:3 -argchoice:aqua:4 -argchoice:green:5 -argchoice:blue:6 -argchoice:grey:11 -argchoice:black:8 -argvalue:7 - -in:in1 -informat:flat - -out:out1 -outformat:colormask - -item:Sequence Consensus -itemmethod:(MakeCons in1 $METHOD $MASK > out1) -itemhelp:MakeCons.help - -arg:METHOD -arglabel:Method -argtype:chooser -argchoice:IUPAC:-iupac -argchoice:Majority:-majority $PERCENT - -arg:MASK -argtype:chooser -arglabel:Create a new: -argchoice:Sequence: -argchoice:Selection Mask: | Consto01mask - -arg:PERCENT -arglabel:Minimum Percentage for Majority -argtype:slider -argmin:50 -argmax:100 -argvalue:75 - -in:in1 -informat:gde - -out:out1 -outformat:gde - - -#Menu for DNA/RNA - -item:blastn -itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& - -in:in1 -informat:flat -insave: - -arg:BLASTDBDNA -argtype:choice_list -arglabel:Which Database -argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2 -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:4 -argmax:18 -argvalue:12 - -arg:MATCH -argtype:slider -arglabel:Match Score -argmin:1 -argmax:10 -argvalue:5 - -arg:MMSCORE -argtype:slider -arglabel:Mismatch Score -argmin:-10 -argmax:-1 -argvalue:-5 - -item:blastx -itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& - - - -in:in1 -informat:flat -insave: - -arg:BLASTDB -argtype:choice_list -arglabel:Which Database -argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta -argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:1 -argmax:5 -argvalue:3 - -arg:Matrix -arglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:CODE -argtype:choice_list -arglabel:Genetic Code -argchoice:Standard or Universal:0 -argchoice:Vertebrate Mitochondrial:1 -argchoice:Yeast Mitochondrial:2 -argchoice:Mold Mitochondrial and Mycoplasma:3 -argchoice:Invertebrate Mitochondrial:4 -argchoice:Ciliate Macronuclear:5 -argchoice:Protozoan Mitochondrial:6 -argchoice:Plant Mitochondrial:7 -argchoice:Echinodermate Mitochondrial:8 - -item:------------------------ - -item:Add a new DNA blast db -itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname; - -arg:sourcefile -argtype:text -arglabel: enter the file name - -arg:menuname -argtype:text -arglabel: enter the name of the DB - -menu:seq. datasets - -item:------------- -item:add a new dataset -itemmethod:cp $file /usr/local/biotools/GDE/db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file - -arg:name -argtype:text -arglabel:Enter the dataset name ? - -arg:file -argtype:text -arglabel:Enter the dataset file (in FASTA) ? - - -#Menu for Protein -menu:protein -item:blastp -itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& - - -in:in1 -informat:flat -insave: - -arg:BLASTDB -argtype:choice_list -arglabel:Which Database -argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta - -arg:Matrix -barglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:1 -argmax:5 -argvalue:3 - -item:tblastn -itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& - -in:in1 -informat:flat -insave: - -arg:BLASTDB -argtype:choice_list -arglabel:Which Database -argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank -argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate - -arg:Matrix -arglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:4 -argmax:18 -argvalue:12 - -arg:CODE -argtype:choice_list -arglabel:Genetic Code -argchoice:Standard or Universal:0 -argchoice:Vertebrate Mitochondrial:1 -argchoice:Yeast Mitochondrial:2 -argchoice:Mold Mitochondrial and Mycoplasma:3 -argchoice:Invertebrate Mitochondrial:4 -argchoice:Ciliate Macronuclear:5 -argchoice:Protozoan Mitochondrial:6 -argchoice:Plant Mitochondrial:7 -argchoice:Echinodermate Mitochondrial:8 - - -item:Map View -itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& -itemhelp:mapview.help - -in:in1 -informat:gde -insave: - -arg:PBL -arglabel:Pixel Between Lines -argtype:slider -argvalue:10 -argmin:1 -argmax:15 - -arg:NPP -arglabel:Nucleotides Per Pixel -argtype:slider -argvalue:1 -argmin:1 -argmax:20 - -arg:LWIDTH -arglabel:Line Thickness -argtype:slider -argvalue:2 -argmin:1 -argmax:5 - -item:-------------------------- -item:Add a new DNA blast db -itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname; - -arg:sourcefile -argtype:text -arglabel: Enter the file (in FASTA) - -arg:menuname -argtype:text -arglabel: Enter the name of the DB - -menu:Phylogeny - - -item:Phylip help -itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)& - -arg:FILE -argtype:choice_list -arglabel:Which program? -argchoice:clique:clique.html -argchoice:consense:consense.html -argchoice:contchar:contchar.html -argchoice:contml:contml.html -argchoice:contrast:contrast.html -argchoice:discrete:discrete.html -argchoice:distance:distance.html -argchoice:dnaboot:dnaboot.html -argchoice:dnacomp:dnacomp.html -argchoice:dnadist:dnadist.html -argchoice:dnainvar:dnainvar.html -argchoice:dnaml:dnaml.html -argchoice:dnamlk:dnamlk.html -argchoice:dnamove:dnamove.html -argchoice:dnapars:dnapars.html -argchoice:dnapenny:dnapenny.html -argchoice:dollop:dollop.html -argchoice:dolmove:dolmove.html -argchoice:dolpenny:dolpenny.html -argchoice:draw:draw.html -argchoice:drawgram:drawgram.html -argchoice:drawtree:drawtree.html -argchoice:factor:factor.html -argchoice:fitch:fitch.html -argchoice:gendist:gendist.html -argchoice:kitsch:kitsch.html -argchoice:main:main.html -argchoice:mix:mix.html -argchoice:move:move.html -argchoice:neighbor:neighbor.html -argchoice:penny:penny.html -argchoice:protpars:protpars.html -argchoice:read.me.general:read.me.general.html -argchoice:restml:restml.html -argchoice:seqboot:seqboot.html -argchoice:sequence:sequence.html - - - -item:Phylip 3.5 -itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& - -arg:PROGRAM -argtype:choice_list -arglabel:Which program to run? -argchoice:DNAPARS:dnapars -argchoice:DNABOOT:dnaboot -argchoice:DNAPENNY:dnapenny -argchoice:DNAML:dnaml -argchoice:DNAMLK:dnamlk -argchoice:DNACOMP:dnacomp -argchoice:DNAMOVE:dnamove -argchoice:DNAINVAR:dnainvar -argchoice:PROTPARS:protpars - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - - - -item:Phylip DNA Distance methods -itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& - -arg:EXPLAIN -argtype:text -arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE - - -arg:PROGRAM -arglabel:Which method? -argtype:chooser -argchoice:DNADIST+NEIGHBOR: -argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; - -arg:PROG -arglabel:Run ? -argtype:chooser -argchoice:Run without Bootstrap: -argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; - -arg:DNA -argtype:text -arglabel:Name of DNADIST outfile? - -arg:NEI -argtype:text -arglabel:Name of NEIGHBOR outfile? - -arg:TREE -argtype:text -arglabel:Name of TREEFILE ? - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - -item:Phylip PROTEIN Distance methods -itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& - -arg:PROGRAM -arglabel:Which method? -argtype:chooser -argchoice:PROTDIST+NEIGHBOR: -argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; - -arg:PROG -arglabel:Which method? -argtype:chooser -argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; -argchoice:No Bootstrap: - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - - - - - -menu:On-Line Res. - -item:GDE for Linux resources at Bioafrica.net -itemmethod:netscape http://www.bioafrica.net & - -item:------------------------- -item:add a new website -itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url - -arg:name -argtype:text -arglabel:Enter the site name - -arg:url -argtype:text -arglabel:Enter the URL (including http://) diff --git a/CORE/.GDEmenusthat~ b/CORE/.GDEmenusthat~ deleted file mode 100644 index ca925b9..0000000 --- a/CORE/.GDEmenusthat~ +++ /dev/null @@ -1,761 +0,0 @@ -1menu:File - -item:test cmask output -itemmethod: kedit in1 - -in:in1 -informat:colormask - -item:New sequence -itemmethod:echo "$Type$Name" > out1 -itemmeta:n -itemhelp:new_sequence.help - -arg:Name -argtype:text -arglabel:New Sequence name? -argtext:New - -arg:Type -argtype:choice_list -arglabel:Type? -argchoice:DNA/RNA:# -argchoice:Amino Acid:% -argchoice:Text:\" -argchoice:Mask:@ - -out:out1 -outformat:flat - -item:Import Foreign Format -itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp -itemhelp:readseq.help - -arg:INPUTFILE -argtype:text -arglabel:Name of foreign file? - -out:OUTPUTFILE -outformat:genbank - -item:Export Foreign Format -itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE -itemhelp:readseq.help - -arg:FORMAT -argtype:choice_list -argchoice:FASTA:8 -argchoice:NEXUS:17 -argchoice:Phylip v3.3:12 -argchoice:IG/Stanford:1 -argchoice:GenBank:2 -argchoice:NBRF:3 -argchoice:EMBL:4 -argchoice:GCG:5 -argchoice:DNA Strider:6 -argchoice:Fitch:7 -argchoice:Pearson:8 -argchoice:Zuker:9 -argchoice:Olsen:10 -argchoice:Phylip v3.2:11 -argchoice:Phylip v3.3:12 -argchoice:Plain text:13 - -arg:OUTPUTFILE -argtype:text -arglabel:Save as? - -in:INPUTFILE -informat:genbank - - -item:Save Selection -itemmethod: cat $SAVE_FUNC > $Name -itemhelp:save_selection.help - -arg:SAVE_FUNC -argtype:chooser -arglabel:File format -argchoice:Flat:in1 -argchoice:Genbank:in2 -argchoice:GDE/HGL:in3 - -arg:Name -argtype:text -arglabel:File name? - -in:in1 -informat:flat - -in:in2 -informat:genbank - -in:in3 -informat:gde - -item:Print Selection -itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& -itemhelp:print_alignment.help - -arg:SCALE -argtype:slider -arglabel:Reduce printout by? -argmin:1 -argmax:20 -argvalue:1 - -arg:CMD -argtype:chooser -argchoice:Lpr:lpr -argchoice:Enscript Gaudy:enscript -G -q -argchoice:Enscript Two column:enscript -2rG - -arg:PRINTER -argtype:text -arglabel:Which printer? -argtext:lp - -in:in1 -informat:gde -insave: - -menu:Edit - -item:Sort -itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& -itemhelp:heapsortHGL.help - -arg:PRIM_KEY -argtype:choice_list -argchoice:Group:group-ID -argchoice:type:type -argchoice:name:name -argchoice:Sequence ID:sequence-ID -argchoice:creator:creator -argchoice:offset:offset -arglabel:Primary sort field? - -arg:SEC_KEY -argtype:choice_list -argchoice:None: -argchoice:Group:group-ID -argchoice:type:type -argchoice:name:name -argchoice:Sequence ID:sequence-ID -argchoice:creator:creator -argchoice:offset:offset -arglabel:Secondary sort field? - -in:in1 -informat:gde -insave: - -item:extract -itemmethod:(gde in1;/bin/rm -f in1)& - -in:in1 -informat:gde -inmask: -insave: - -menu:DNA/RNA - -item:Translate... -itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 - -arg:FRAME -argtype:chooser -arglabel:Which reading frame? -argchoice:First:1 -argchoice:Second:2 -argchoice:Third:3 -argchoice:All six:6 - -arg:MNFRM -arglabel:Minimum length of AA sequence to translate? -argtype:slider -argmin:0 -argmax:100 -argvalue:20 - -arg:LTRCODE -argtype:chooser -arglabel:Translate to: -argchoice:Single letter codes: -argchoice:Triple letter codes:-3 - -arg:TBL -arglabel:Codon table? -argtype:chooser -argchoice:universal:1 -argchoice:mycoplasma:2 -argchoice:yeast:3 -argchoice:Vert. mito.:4 -in:in1 -informat:gde - -out:out1 -outformat:gde - -item:Dot plot -itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& -itemhelp:DotPlotTool.help - -in:in1 -informat:gde -insave: - -item:Clustal alignment -itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& - -itemhelp:clustal_help - -arg:KTUP -argtype:slider -arglabel:K-tuple size for pairwise search -argmin:1 -argmax:10 -argvalue:2 - -arg:WIN -argtype:slider -arglabel:Window size -argmin:1 -argmax:10 -argvalue:4 - -arg:Trans -argtype:chooser -arglabel:Transitions weighted? -argchoice:Yes:/TRANSIT -argchoice:No: - -arg:FIXED -argtype:slider -arglabel:Fixed gap penalty -argmin:1 -argmax:100 -argvalue:10 - -arg:FLOAT -arglabel:Floating gap penalty -argtype:slider -argmin:1 -argmax:100 -argvalue:10 - -arg:REPORT -argtype:chooser -arglabel:View assembly report? -argchoice:No: -argchoice:Yes:kedit in1.rpt& - - -in:in1 -informat:flat -insave: - -item:Variable Positions -itemmethod:varpos $REV < in1 > out1 - -arg:REV -argtype:chooser -arglabel:Highlight (darken) -argchoice:Conserved positions: -argchoice:variable positions:-rev - -in:in1 -informat:flat - -out:out1 -outformat:colormask - -item:Phrap -itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; - -in:in1 -informat:genbank - -out:out1 -outformat:genbank - -item:SNAP -itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; - -in:in1 -informat:flat -out:out1 -outformat:text - - - - -item:Find all -itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; -itemhelp:findall.help -itemmeta:f - -arg:SEARCH -argtype:text -arglabel:Search String - -arg:PRCNT -argtype:slider -arglabel:Percent mismatch -argmin:0 -argmax:75 -argvalue:10 - -arg:CASE -argtype:chooser -arglabel:Case -argchoice:Upper equals lower: -argchoice:Upper not equal lower:-case - -arg:UT -argtype:chooser -arglabel:U equal T? -argchoice:Yes:-u=t -argchoice:No: -argvalue:0 - -arg:MAT -arglabel:Match color -argtype:choice_list -argchoice:yellow:1 -argchoice:violet:2 -argchoice:red:3 -argchoice:aqua:4 -argchoice:green:5 -argchoice:blue:6 -argchoice:grey:11 -argchoice:black:8 -argvalue:2 - -arg:MIS -argtype:choice_list -arglabel:Mismatch color -argchoice:yellow:1 -argchoice:violet:2 -argchoice:red:3 -argchoice:aqua:4 -argchoice:green:5 -argchoice:blue:6 -argchoice:grey:11 -argchoice:black:8 -argvalue:7 - -in:in1 -informat:flat - -out:out1 -outformat:colormask - -item:Sequence Consensus -itemmethod:(MakeCons in1 $METHOD $MASK > out1) -itemhelp:MakeCons.help - -arg:METHOD -arglabel:Method -argtype:chooser -argchoice:IUPAC:-iupac -argchoice:Majority:-majority $PERCENT - -arg:MASK -argtype:chooser -arglabel:Create a new: -argchoice:Sequence: -argchoice:Selection Mask: | Consto01mask - -arg:PERCENT -arglabel:Minimum Percentage for Majority -argtype:slider -argmin:50 -argmax:100 -argvalue:75 - -in:in1 -informat:gde - -out:out1 -outformat:gde - - -#Menu for DNA/RNA - -item:blastn -itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& - -in:in1 -informat:flat -insave: - -arg:BLASTDBDNA -argtype:choice_list -arglabel:Which Database -argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2 -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:4 -argmax:18 -argvalue:12 - -arg:MATCH -argtype:slider -arglabel:Match Score -argmin:1 -argmax:10 -argvalue:5 - -arg:MMSCORE -argtype:slider -arglabel:Mismatch Score -argmin:-10 -argmax:-1 -argvalue:-5 - -item:blastx -itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& - - - -in:in1 -informat:flat -insave: - -arg:BLASTDB -argtype:choice_list -arglabel:Which Database -argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta -argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:1 -argmax:5 -argvalue:3 - -arg:Matrix -arglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:CODE -argtype:choice_list -arglabel:Genetic Code -argchoice:Standard or Universal:0 -argchoice:Vertebrate Mitochondrial:1 -argchoice:Yeast Mitochondrial:2 -argchoice:Mold Mitochondrial and Mycoplasma:3 -argchoice:Invertebrate Mitochondrial:4 -argchoice:Ciliate Macronuclear:5 -argchoice:Protozoan Mitochondrial:6 -argchoice:Plant Mitochondrial:7 -argchoice:Echinodermate Mitochondrial:8 - -item:------------------------ - -item:Add a new DNA blast db -itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname; - -arg:sourcefile -argtype:text -arglabel: enter the file name - -arg:menuname -argtype:text -arglabel: enter the name of the DB - -menu:seq. datasets - -item:------------- -item:add a new dataset -itemmethod:cp $file /usr/local/bio/GDE/db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file - -arg:name -argtype:text -arglabel:Enter the dataset name ? - -arg:file -argtype:text -arglabel:Enter the dataset file (in FASTA) ? - - -#Menu for Protein -menu:protein -item:blastp -itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& - - -in:in1 -informat:flat -insave: - -arg:BLASTDB -argtype:choice_list -arglabel:Which Database -argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta - -arg:Matrix -barglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:1 -argmax:5 -argvalue:3 - -item:tblastn -itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& - -in:in1 -informat:flat -insave: - -arg:BLASTDB -argtype:choice_list -arglabel:Which Database -argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank -argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate - -arg:Matrix -arglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:4 -argmax:18 -argvalue:12 - -arg:CODE -argtype:choice_list -arglabel:Genetic Code -argchoice:Standard or Universal:0 -argchoice:Vertebrate Mitochondrial:1 -argchoice:Yeast Mitochondrial:2 -argchoice:Mold Mitochondrial and Mycoplasma:3 -argchoice:Invertebrate Mitochondrial:4 -argchoice:Ciliate Macronuclear:5 -argchoice:Protozoan Mitochondrial:6 -argchoice:Plant Mitochondrial:7 -argchoice:Echinodermate Mitochondrial:8 - - -item:Map View -itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& -itemhelp:mapview.help - -in:in1 -informat:gde -insave: - -arg:PBL -arglabel:Pixel Between Lines -argtype:slider -argvalue:10 -argmin:1 -argmax:15 - -arg:NPP -arglabel:Nucleotides Per Pixel -argtype:slider -argvalue:1 -argmin:1 -argmax:20 - -arg:LWIDTH -arglabel:Line Thickness -argtype:slider -argvalue:2 -argmin:1 -argmax:5 - -item:-------------------------- -item:Add a new DNA blast db -itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname; - -arg:sourcefile -argtype:text -arglabel: Enter the file (in FASTA) - -arg:menuname -argtype:text -arglabel: Enter the name of the DB - -menu:Phylogeny - - -item:Phylip help -itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)& - -arg:FILE -argtype:choice_list -arglabel:Which program? -argchoice:clique:clique.html -argchoice:consense:consense.html -argchoice:contchar:contchar.html -argchoice:contml:contml.html -argchoice:contrast:contrast.html -argchoice:discrete:discrete.html -argchoice:distance:distance.html -argchoice:dnaboot:dnaboot.html -argchoice:dnacomp:dnacomp.html -argchoice:dnadist:dnadist.html -argchoice:dnainvar:dnainvar.html -argchoice:dnaml:dnaml.html -argchoice:dnamlk:dnamlk.html -argchoice:dnamove:dnamove.html -argchoice:dnapars:dnapars.html -argchoice:dnapenny:dnapenny.html -argchoice:dollop:dollop.html -argchoice:dolmove:dolmove.html -argchoice:dolpenny:dolpenny.html -argchoice:draw:draw.html -argchoice:drawgram:drawgram.html -argchoice:drawtree:drawtree.html -argchoice:factor:factor.html -argchoice:fitch:fitch.html -argchoice:gendist:gendist.html -argchoice:kitsch:kitsch.html -argchoice:main:main.html -argchoice:mix:mix.html -argchoice:move:move.html -argchoice:neighbor:neighbor.html -argchoice:penny:penny.html -argchoice:protpars:protpars.html -argchoice:read.me.general:read.me.general.html -argchoice:restml:restml.html -argchoice:seqboot:seqboot.html -argchoice:sequence:sequence.html - - - -item:Phylip 3.5 -itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& - -arg:PROGRAM -argtype:choice_list -arglabel:Which program to run? -argchoice:DNAPARS:dnapars -argchoice:DNABOOT:dnaboot -argchoice:DNAPENNY:dnapenny -argchoice:DNAML:dnaml -argchoice:DNAMLK:dnamlk -argchoice:DNACOMP:dnacomp -argchoice:DNAMOVE:dnamove -argchoice:DNAINVAR:dnainvar -argchoice:PROTPARS:protpars - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - - - -item:Phylip DNA Distance methods -itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& - -arg:EXPLAIN -argtype:text -arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE - - -arg:PROGRAM -arglabel:Which method? -argtype:chooser -argchoice:DNADIST+NEIGHBOR: -argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; - -arg:PROG -arglabel:Run ? -argtype:chooser -argchoice:Run without Bootstrap: -argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; - -arg:DNA -argtype:text -arglabel:Name of DNADIST outfile? - -arg:NEI -argtype:text -arglabel:Name of NEIGHBOR outfile? - -arg:TREE -argtype:text -arglabel:Name of TREEFILE ? - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - -item:Phylip PROTEIN Distance methods -itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& - -arg:PROGRAM -arglabel:Which method? -argtype:chooser -argchoice:PROTDIST+NEIGHBOR: -argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; - -arg:PROG -arglabel:Which method? -argtype:chooser -argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; -argchoice:No Bootstrap: - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - - - - - -menu:On-Line Res. - -item:GDE for Linux resources at Bioafrica.net -itemmethod:netscape http://www.bioafrica.net & - -item:------------------------- -item:add a new website -itemmethod:xterm -e /usr/local/bio/GDE/newURL.pl $name $url - -arg:name -argtype:text -arglabel:Enter the site name - -arg:url -argtype:text -arglabel:Enter the URL (including http://) diff --git a/CORE/.GDEmenus~ b/CORE/.GDEmenus~ deleted file mode 100644 index fa1cff0..0000000 --- a/CORE/.GDEmenus~ +++ /dev/null @@ -1,791 +0,0 @@ -1menu:File - -item:test cmask output -itemmethod: kedit in1 - -in:in1 -informat:colormask - -item:New sequence -itemmethod:echo "$Type$Name" > out1 -itemmeta:n -itemhelp:new_sequence.help - -arg:Name -argtype:text -arglabel:New Sequence name? -argtext:New - -arg:Type -argtype:choice_list -arglabel:Type? -argchoice:DNA/RNA:# -argchoice:Amino Acid:% -argchoice:Text:\" -argchoice:Mask:@ - -out:out1 -outformat:flat - -item:Import Foreign Format -itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp -itemhelp:readseq.help - -arg:INPUTFILE -argtype:text -arglabel:Name of foreign file? - -out:OUTPUTFILE -outformat:genbank - -item:Export Foreign Format -itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE -itemhelp:readseq.help - -arg:FORMAT -argtype:choice_list -argchoice:FASTA:8 -argchoice:NEXUS:17 -argchoice:Phylip v3.3:12 -argchoice:IG/Stanford:1 -argchoice:GenBank:2 -argchoice:NBRF:3 -argchoice:EMBL:4 -argchoice:GCG:5 -argchoice:DNA Strider:6 -argchoice:Fitch:7 -argchoice:Pearson:8 -argchoice:Zuker:9 -argchoice:Olsen:10 -argchoice:Phylip v3.2:11 -argchoice:Phylip v3.3:12 -argchoice:Plain text:13 - -arg:OUTPUTFILE -argtype:text -arglabel:Save as? - -in:INPUTFILE -informat:genbank - - -item:Save Selection -itemmethod: cat $SAVE_FUNC > $Name -itemhelp:save_selection.help - -arg:SAVE_FUNC -argtype:chooser -arglabel:File format -argchoice:Flat:in1 -argchoice:Genbank:in2 -argchoice:GDE/HGL:in3 - -arg:Name -argtype:text -arglabel:File name? - -in:in1 -informat:flat - -in:in2 -informat:genbank - -in:in3 -informat:gde - -item:Print Selection -itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)& -itemhelp:print_alignment.help - -arg:SCALE -argtype:slider -arglabel:Reduce printout by? -argmin:1 -argmax:20 -argvalue:1 - -arg:CMD -argtype:chooser -argchoice:Lpr:lpr -argchoice:Enscript Gaudy:enscript -G -q -argchoice:Enscript Two column:enscript -2rG - -arg:PRINTER -argtype:text -arglabel:Which printer? -argtext:lp - -in:in1 -informat:gde -insave: - -menu:Edit - -item:Sort -itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)& -itemhelp:heapsortHGL.help - -arg:PRIM_KEY -argtype:choice_list -argchoice:Group:group-ID -argchoice:type:type -argchoice:name:name -argchoice:Sequence ID:sequence-ID -argchoice:creator:creator -argchoice:offset:offset -arglabel:Primary sort field? - -arg:SEC_KEY -argtype:choice_list -argchoice:None: -argchoice:Group:group-ID -argchoice:type:type -argchoice:name:name -argchoice:Sequence ID:sequence-ID -argchoice:creator:creator -argchoice:offset:offset -arglabel:Secondary sort field? - -in:in1 -informat:gde -insave: - -item:extract -itemmethod:(gde in1;/bin/rm -f in1)& - -in:in1 -informat:gde -inmask: -insave: - -menu:DNA/RNA - -item:Translate... -itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1 - -arg:FRAME -argtype:chooser -arglabel:Which reading frame? -argchoice:First:1 -argchoice:Second:2 -argchoice:Third:3 -argchoice:All six:6 - -arg:MNFRM -arglabel:Minimum length of AA sequence to translate? -argtype:slider -argmin:0 -argmax:100 -argvalue:20 - -arg:LTRCODE -argtype:chooser -arglabel:Translate to: -argchoice:Single letter codes: -argchoice:Triple letter codes:-3 - -arg:TBL -arglabel:Codon table? -argtype:chooser -argchoice:universal:1 -argchoice:mycoplasma:2 -argchoice:yeast:3 -argchoice:Vert. mito.:4 -in:in1 -informat:gde - -out:out1 -outformat:gde - -item:Dot plot -itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)& -itemhelp:DotPlotTool.help - -in:in1 -informat:gde -insave: - -item:Clustal alignment -itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )& - -itemhelp:clustal_help - -arg:KTUP -argtype:slider -arglabel:K-tuple size for pairwise search -argmin:1 -argmax:10 -argvalue:2 - -arg:WIN -argtype:slider -arglabel:Window size -argmin:1 -argmax:10 -argvalue:4 - -arg:Trans -argtype:chooser -arglabel:Transitions weighted? -argchoice:Yes:/TRANSIT -argchoice:No: - -arg:FIXED -argtype:slider -arglabel:Fixed gap penalty -argmin:1 -argmax:100 -argvalue:10 - -arg:FLOAT -arglabel:Floating gap penalty -argtype:slider -argmin:1 -argmax:100 -argvalue:10 - -arg:REPORT -argtype:chooser -arglabel:View assembly report? -argchoice:No: -argchoice:Yes:kedit in1.rpt& - - -in:in1 -informat:flat -insave: - -item:Variable Positions -itemmethod:varpos $REV < in1 > out1 - -arg:REV -argtype:chooser -arglabel:Highlight (darken) -argchoice:Conserved positions: -argchoice:variable positions:-rev - -in:in1 -informat:flat - -out:out1 -outformat:colormask - -item:Phrap -itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*; - -in:in1 -informat:genbank - -out:out1 -outformat:genbank - -item:SNAP -itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*; - -in:in1 -informat:flat -out:out1 -outformat:text - - - - -item:Find all -itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1; -itemhelp:findall.help -itemmeta:f - -arg:SEARCH -argtype:text -arglabel:Search String - -arg:PRCNT -argtype:slider -arglabel:Percent mismatch -argmin:0 -argmax:75 -argvalue:10 - -arg:CASE -argtype:chooser -arglabel:Case -argchoice:Upper equals lower: -argchoice:Upper not equal lower:-case - -arg:UT -argtype:chooser -arglabel:U equal T? -argchoice:Yes:-u=t -argchoice:No: -argvalue:0 - -arg:MAT -arglabel:Match color -argtype:choice_list -argchoice:yellow:1 -argchoice:violet:2 -argchoice:red:3 -argchoice:aqua:4 -argchoice:green:5 -argchoice:blue:6 -argchoice:grey:11 -argchoice:black:8 -argvalue:2 - -arg:MIS -argtype:choice_list -arglabel:Mismatch color -argchoice:yellow:1 -argchoice:violet:2 -argchoice:red:3 -argchoice:aqua:4 -argchoice:green:5 -argchoice:blue:6 -argchoice:grey:11 -argchoice:black:8 -argvalue:7 - -in:in1 -informat:flat - -out:out1 -outformat:colormask - -item:Sequence Consensus -itemmethod:(MakeCons in1 $METHOD $MASK > out1) -itemhelp:MakeCons.help - -arg:METHOD -arglabel:Method -argtype:chooser -argchoice:IUPAC:-iupac -argchoice:Majority:-majority $PERCENT - -arg:MASK -argtype:chooser -arglabel:Create a new: -argchoice:Sequence: -argchoice:Selection Mask: | Consto01mask - -arg:PERCENT -arglabel:Minimum Percentage for Majority -argtype:slider -argmin:50 -argmax:100 -argvalue:75 - -in:in1 -informat:gde - -out:out1 -outformat:gde - - -#Menu for DNA/RNA - -item:blastn -itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)& - -in:in1 -informat:flat -insave: - -arg:BLASTDBDNA -argtype:choice_list -arglabel:Which Database -argchoice:HIV-1 Seq. Db.:/usr/local/biotools/db/DNA/hiv17-08-01.fasta2 -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:4 -argmax:18 -argvalue:12 - -arg:MATCH -argtype:slider -arglabel:Match Score -argmin:1 -argmax:10 -argvalue:5 - -arg:MMSCORE -argtype:slider -arglabel:Mismatch Score -argmin:-10 -argmax:-1 -argvalue:-5 - -item:blastx -itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)& - - - -in:in1 -informat:flat -insave: - -arg:BLASTDBDNA -argtype:choice_list -arglabel:Which Database -argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta -argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:1 -argmax:5 -argvalue:3 - -arg:Matrix -arglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:CODE -argtype:choice_list -arglabel:Genetic Code - -argchoice:Standard or Universal:0 -argchoice:Vertebrate Mitochondrial:1 -argchoice:Yeast Mitochondrial:2 -argchoice:Mold Mitochondrial and Mycoplasma:3 -argchoice:Invertebrate Mitochondrial:4 -argchoice:Ciliate Macronuclear:5 -argchoice:Protozoan Mitochondrial:6 -argchoice:Plant Mitochondrial:7 -argchoice:Echinodermate Mitochondrial:8 - -item:------------------------ - -item:Add a new DNA blast db -itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/biotools/GDE/bin/installBLASTDB.pl $sourcefile $menuname; - -arg:sourcefile -argtype:text -arglabel: enter the file name - -arg:menuname -argtype:text -arglabel: enter the name of the DB - -menu:seq. datasets -item:tttt -itemmethod:readseq /usr/local/biotools/GDE/db/ttttt -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp -out:OUTPUTFILE -outformat:genbank - -item:HIV1POLDNA.fasta -itemmethod:readseq /usr/local/biotools/GDE/db/HIV1POLDNA.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp -out:OUTPUTFILE -outformat:genbank - -item:structure -itemmethod:readseq /usr/local/biotools/GDE/db/structprot.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp -out:OUTPUTFILE -outformat:genbank - -item:------------- -item:add a new dataset -itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file - -arg:name -argtype:text -arglabel:Enter the dataset name ? - -arg:file -argtype:text -arglabel:Enter the dataset file (in FASTA) ? - - -#Menu for Protein -menu:protein -item:blastp -itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/biotools/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)& - - -in:in1 -informat:flat -insave: - -arg:BLASTDBPROT -argtype:choice_list -arglabel:Which Database -argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta -argchoice:ttttt:/usr/local/biotools/db/tttt -argchoice:tytuiphn:/usr/local/biotools/db/yejhuh[9hp -argchoice:yyyy:/usr/local/biotools/db/test - -arg:Matrix -barglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:1 -argmax:5 -argvalue:3 - -item:tblastn -itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/biotools/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)& - -in:in1 -informat:flat -insave: - -arg:BLASTDB -argtype:choice_list -arglabel:Which Database -argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank -argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate - -arg:Matrix -arglabel:Substitution Matrix: -argtype:choice_list -argchoice:PAM30:PAM30 -argchoice:PAM70:PAM70 - -arg:WORDLEN -argtype:slider -arglabel:Word Size -argmin:4 -argmax:18 -argvalue:12 - -arg:CODE -argtype:choice_list -arglabel:Genetic Code -argchoice:Standard or Universal:0 -argchoice:Vertebrate Mitochondrial:1 -argchoice:Yeast Mitochondrial:2 -argchoice:Mold Mitochondrial and Mycoplasma:3 -argchoice:Invertebrate Mitochondrial:4 -argchoice:Ciliate Macronuclear:5 -argchoice:Protozoan Mitochondrial:6 -argchoice:Plant Mitochondrial:7 -argchoice:Echinodermate Mitochondrial:8 - - -item:Map View -itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)& -itemhelp:mapview.help - -in:in1 -informat:gde -insave: - -arg:PBL -arglabel:Pixel Between Lines -argtype:slider -argvalue:10 -argmin:1 -argmax:15 - -arg:NPP -arglabel:Nucleotides Per Pixel -argtype:slider -argvalue:1 -argmin:1 -argmax:20 - -arg:LWIDTH -arglabel:Line Thickness -argtype:slider -argvalue:2 -argmin:1 -argmax:5 - -item:-------------------------- -item:Add a new Protein blast db -itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/biotools/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname; - -arg:sourcefile -argtype:text -arglabel: Enter the file (in FASTA) - -arg:menuname -argtype:text -arglabel: Enter the name of the DB - -menu:Phylogeny - - -item:Phylip help -itemmethod:(netscape /usr/local/biotools/phylip/doc/$FILE)& - -arg:FILE -argtype:choice_list -arglabel:Which program? -argchoice:clique:clique.html -argchoice:consense:consense.html -argchoice:contchar:contchar.html -argchoice:contml:contml.html -argchoice:contrast:contrast.html -argchoice:discrete:discrete.html -argchoice:distance:distance.html -argchoice:dnaboot:dnaboot.html -argchoice:dnacomp:dnacomp.html -argchoice:dnadist:dnadist.html -argchoice:dnainvar:dnainvar.html -argchoice:dnaml:dnaml.html -argchoice:dnamlk:dnamlk.html -argchoice:dnamove:dnamove.html -argchoice:dnapars:dnapars.html -argchoice:dnapenny:dnapenny.html -argchoice:dollop:dollop.html -argchoice:dolmove:dolmove.html -argchoice:dolpenny:dolpenny.html -argchoice:draw:draw.html -argchoice:drawgram:drawgram.html -argchoice:drawtree:drawtree.html -argchoice:factor:factor.html -argchoice:fitch:fitch.html -argchoice:gendist:gendist.html -argchoice:kitsch:kitsch.html -argchoice:main:main.html -argchoice:mix:mix.html -argchoice:move:move.html -argchoice:neighbor:neighbor.html -argchoice:penny:penny.html -argchoice:protpars:protpars.html -argchoice:read.me.general:read.me.general.html -argchoice:restml:restml.html -argchoice:seqboot:seqboot.html -argchoice:sequence:sequence.html - - - -item:Phylip 3.5 -itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )& - -arg:PROGRAM -argtype:choice_list -arglabel:Which program to run? -argchoice:DNAPARS:dnapars -argchoice:DNABOOT:dnaboot -argchoice:DNAPENNY:dnapenny -argchoice:DNAML:dnaml -argchoice:DNAMLK:dnamlk -argchoice:DNACOMP:dnacomp -argchoice:DNAMOVE:dnamove -argchoice:DNAINVAR:dnainvar -argchoice:PROTPARS:protpars - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - - - -item:Phylip DNA Distance methods -itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)& - -arg:EXPLAIN -argtype:text -arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE - - -arg:PROGRAM -arglabel:Which method? -argtype:chooser -argchoice:DNADIST+NEIGHBOR: -argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; - -arg:PROG -arglabel:Run ? -argtype:chooser -argchoice:Run without Bootstrap: -argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot; - -arg:DNA -argtype:text -arglabel:Name of DNADIST outfile? - -arg:NEI -argtype:text -arglabel:Name of NEIGHBOR outfile? - -arg:TREE -argtype:text -arglabel:Name of TREEFILE ? - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - -item:Phylip PROTEIN Distance methods -itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)& - -arg:PROGRAM -arglabel:Which method? -argtype:chooser -argchoice:PROTDIST+NEIGHBOR: -argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense; - -arg:PROG -arglabel:Which method? -argtype:chooser -argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot; -argchoice:No Bootstrap: - -arg:PREEDIT -argtype:chooser -arglabel:Edit input before running? -argchoice:No: -argchoice:Yes:kedit infile; - -in:in1 -informat:genbank -inmask: -insave: - - - - - -menu:On-Line Res. -item:tytyt -itemmethod:netscape hnu[phoph & -item:SANBI -itemmethod:netscape again & -item:PlasmoDB -itemmethod:netscape http://www.plasmodb.org & -item:NCBI -itemmethod:netscape http://www.ncbi.nlm.nih.gov & -item:sanbi -itemmethod:netscape http://www.sanbi.ac.za & -item:SANBI -itemmethod:netscape http://www.sanbi.ac.za & - -item:GDE for Linux resources at Bioafrica.net -itemmethod:netscape http://www.bioafrica.net & - -item:------------------------- -item:add a new website -itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url - -arg:name -argtype:text -arglabel:Enter the site name - -arg:url -argtype:text -arglabel:Enter the URL (including http://) diff --git a/CORE/BasicDisplay.o b/CORE/BasicDisplay.o deleted file mode 100644 index 5ac3fee..0000000 Binary files a/CORE/BasicDisplay.o and /dev/null differ diff --git a/CORE/BuiltIn.o b/CORE/BuiltIn.o deleted file mode 100644 index 8714e2a..0000000 Binary files a/CORE/BuiltIn.o and /dev/null differ diff --git a/CORE/ChooseFile.o b/CORE/ChooseFile.o deleted file mode 100644 index c21acc0..0000000 Binary files a/CORE/ChooseFile.o and /dev/null differ diff --git a/CORE/CutCopyPaste.o b/CORE/CutCopyPaste.o deleted file mode 100644 index 37e24c2..0000000 Binary files a/CORE/CutCopyPaste.o and /dev/null differ diff --git a/CORE/DrawNA.o b/CORE/DrawNA.o deleted file mode 100644 index 269a7df..0000000 Binary files a/CORE/DrawNA.o and /dev/null differ diff --git a/CORE/Edit.o b/CORE/Edit.o deleted file mode 100644 index 838edab..0000000 Binary files a/CORE/Edit.o and /dev/null differ diff --git a/CORE/EventHandler.o b/CORE/EventHandler.o deleted file mode 100644 index b6a4ed0..0000000 Binary files a/CORE/EventHandler.o and /dev/null differ diff --git a/CORE/FileIO.c~ b/CORE/FileIO.c~ deleted file mode 100755 index 3087c50..0000000 --- a/CORE/FileIO.c~ +++ /dev/null @@ -1,1056 +0,0 @@ -#include -#include -#include -#include -#include -#include "menudefs.h" -#include "defines.h" - -/* -LoadData(): - Load a data set from the command line argument. - -Copyright (c) 1989, University of Illinois board of trustees. All rights -reserved. Written by Steven Smith at the Center for Prokaryote Genome -Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. -Carl Woese. - -Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. -All rights reserved. - -*/ - -LoadData(filename) -char *filename; -{ - extern NA_Alignment *DataSet; - extern int DataType,FileFormat,Default_DNA_Trans[],Default_RNA_Trans[]; - extern int Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[]; - - extern Frame frame; - extern Canvas EditCan,EditNameCan; - extern char FileName[]; - FILE *file; - NA_Alignment *DataNaAln; - char temp[1024]; -/* -* Get file name, determine the file type, and away we go.. -*/ - if(Find2(filename,"gde")!=0) - strcpy(FileName,filename); - if( (file=fopen(filename,"r"))!=0 ) - { - FindType(filename,&DataType,&FileFormat); - switch(DataType) - { - case NASEQ_ALIGN: - if(DataSet == NULL) - { - DataSet = (NA_Alignment*)Calloc(1, - sizeof(NA_Alignment)); - DataNaAln =(NA_Alignment*)DataSet; - DataSet->rel_offset = 0; - } - else - DataNaAln = (NA_Alignment*)DataSet; - - LoadFile(filename,DataNaAln, - DataType,FileFormat); - - break; - default: - break; - } - } - fclose(file); - sprintf(temp,"Genetic Data Environment 2.2 (%s)",FileName); - xv_set(frame, - FRAME_LABEL, temp, - 0); - return; -} - - -/* -LoadFile(): - Load the given filename into the given dataset. Handle any -type conversion needed to get the data into the specified data type. -This routine is used in situations where the format and datatype is known. - -Copyright (c) 1989-1990, University of Illinois board of trustees. All -rights reserved. Written by Steven Smith at the Center for Prokaryote Genome -Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. -Carl Woese. - -Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. -All rights reserved. -*/ - -LoadFile(filename,dataset,type,format) -char *filename; -char *dataset; -int type,format; -{ - extern int DataType; - - if (DataType != type) - fprintf(stderr,"Warning, datatypes do not match.\n"); -/* -Handle the overwrite/create/merge dialog here. -*/ - switch(format) - { - case NA_FLAT: - ReadNA_Flat(filename,dataset,type); - ((NA_Alignment*)dataset)->format = GDE; - break; - - case GENBANK: - ReadGen(filename,dataset,type); - ((NA_Alignment*)dataset)->format = GENBANK; - break; - - case GDE: - ReadGDE(filename,dataset,type); - ((NA_Alignment*)dataset)->format = GDE; - break; - case COLORMASK: - ReadCMask(filename); - - default: - break; - } - return; -} - - - -/* -* Print error message, and die -*/ -ErrorOut(code,string) -int code; -char *string; -{ - if (code == 0) - { - fprintf(stderr,"Error:%s\n",string); - exit(1); - } - return; -} - - -/* -* More robust memory management routines -*/ -char *Calloc(count,size) -int count,size; -{ - char *temp; -#ifdef SeeAlloc - extern int TotalCalloc; - TotalCalloc += count*size; - fprintf(stderr,"Calloc %d %d\n",count*size,TotalCalloc); -#endif - temp = calloc(count,size); - ErrorOut(temp,"Cannot allocate memory"); - return(temp); -} - -char *Realloc(block,size) -char *block; -int size; -{ - char *temp; -#ifdef SeeAlloc - extern int TotalRealloc; - TotalRealloc += size; - fprintf(stderr,"Realloc %d\n",TotalRealloc); -#endif - temp=realloc(block,size); - ErrorOut(temp,"Cannot change memory size"); - return(temp); -} - -Cfree(block) -char* block; -{ - if (block) - { - /* rtm 18.III.98 - FileIO.c: In function `Cfree': - FileIO.c:181: void value not ignored as it ought to be - - if(cfree(block) == 0) - Warning("Error in Cfree..."); - */ - cfree(block); - } - else - Warning("Error in Cfree, NULL block"); - return; -} - - - -/* -* same as strdup -*/ -char *String(string) -char *string; -{ - char *temp; - - temp = Calloc(strlen(string)+1,sizeof(char)); - strcpy(temp,string); - return(temp); -} - - -FindType(name,dtype,ftype) -char *name; -int *dtype,*ftype; -{ - FILE *file; - char Inline[GBUFSIZ]; - - file = fopen(name,"r"); - *dtype=0; - *ftype=0; - - if (file == NULL) - return(1); - - /* -* Is this a flat file? -* Get the first non blank line, see if a type marker shows up. -*/ - fgets(Inline,GBUFSIZ,file); - for(;strlen(Inline)<2 && fgets(Inline,GBUFSIZ,file) != NULL;); - if(Inline[0] == '#' || Inline[0] == '%' || - Inline[0] == '"' || Inline[0] == '@' ) - { - *dtype=NASEQ_ALIGN; - *ftype=NA_FLAT; - } - - /* -* Else, try genbank -*/ - else - { - fclose(file); - file = fopen(name,"r"); - *dtype=0; - *ftype=0; - - if (file == NULL) - return(1); - - for(;fgets(Inline,GBUFSIZ,file) != NULL;) - if(Find(Inline,"LOCUS")) - { - *dtype=NASEQ_ALIGN; - *ftype=GENBANK; - fclose(file); - return(0); - } - /* -* and last, try GDE -*/ - else if(Find(Inline,"sequence")) - { - *dtype = NASEQ_ALIGN; - *ftype = GDE; - fclose(file); - return(0); - } - else if(Find(Inline,"start:")) - { - *dtype = NASEQ_ALIGN; - *ftype = COLORMASK; - fclose(file); - return(0); - } - } - - fclose(file); - return(0); -} - -AppendNA(buffer,len,seq) -NA_Base *buffer; -int len; -NA_Sequence *seq; -{ - int curlen=0,j; - NA_Base *temp; - - if(seq->seqlen+len >= seq->seqmaxlen) - { - if(seq->seqlen>0) - seq->sequence = (NA_Base*)Realloc(seq->sequence, - (seq->seqlen + len+GBUFSIZ) * sizeof(NA_Base)); - else - seq->sequence = (NA_Base*)Calloc(1,(seq->seqlen + - len+GBUFSIZ) * sizeof(NA_Base)); - seq->seqmaxlen = seq->seqlen + len+GBUFSIZ; - } - /* -* seqlen is the length, and the index of the next free -* base -*/ - curlen = seq->seqlen + seq->offset; - for(j=0;jseqlen += len; - return; -} - -Ascii2NA(buffer,len,matrix) -char *buffer; -int len; -int matrix[16]; -{ - /* -* if the translation matrix exists, use it to -* encode the buffer. -*/ - register i; - if(matrix != NULL) - for(i=0;inumelements == (int) NULL) - return; - seqs = aln->element; - - file = fopen(filename,"w"); - if(file == NULL) - { - Warning("Cannot open file for output"); - return(1); - } - if(maskable && (method != SELECT_REGION)) - { - for(j=0;jnumelements;j++) - if(seqs[j].elementtype == MASK && - seqs[j].selected) - mask = j; - } - for(j=0;jnumelements;j++) - { - SeqNorm(&(seqs[j])); - } - - for(j=0;jnumelements;j++) - { - if(method != SELECT_REGION) - offset = seqs[j].offset; - else - for(offset=seqs[j].offset; - aln->selection_mask[offset] == '0'; - offset++); - - if(offset+aln->rel_offset != 0) - sprintf(offset_str,"(%d)",offset+aln->rel_offset); - else - offset_str[0] = '\0'; - - if(((j!=mask) && (seqs[j].selected) && method != SELECT_REGION) - || (method == SELECT_REGION && seqs[j].subselected) - || method == ALL) - { - fprintf(file,"%c%s%s\n", - seqs[j].elementtype == DNA?'#': - seqs[j].elementtype == RNA?'#': - seqs[j].elementtype == PROTEIN?'%': - seqs[j].elementtype == TEXT?'"': - seqs[j].elementtype == MASK?'@':'"', - seqs[j].short_name, - (offset+aln->rel_offset == 0)? "":offset_str); - if(seqs[j].tmatrix) - { - if(mask == -1) - for(k=0,kk=0;kk0) - { - buf[60] = '\0'; - fputs(buf,file); - putc('\n',file); - } - if(method == SELECT_REGION) - { - if(aln->selection_mask[kk+offset]=='1') - { - buf[k%60] =((char)seqs[j].tmatrix[ - (int)getelem( &(seqs[j]),kk+offset) ]); - k++; - } - } - else - { - buf[k%60] =((char)seqs[j].tmatrix[ - (int)getelem( &(seqs[j]),kk+offset) ]); - k++; - } - } - else - for(k=0,kk=0;kk1) - { - buf[60] = '\0'; - fputs(buf,file); - putc('\n',file); - } - buf[k%60] = ((char)seqs[j].tmatrix - [getelem(&(seqs[j]),kk+offset)]); - } - } - } - else - { - if(mask == -1) - for(k=0,kk=0;kk0) - { - buf[60] = '\0'; - fputs(buf,file); - putc('\n',file); - } - if(method == SELECT_REGION) - { - if(aln->selection_mask[kk+offset]=='1') - { - buf[k%60] =(getelem( &(seqs[j]),kk+offset)); - k++; - } - } - else - { - buf[k%60] =( getelem( &(seqs[j]),kk+offset) ); - k++; - } - } - else - for(k=0,kk=0;kk1) - { - buf[60] = '\0'; - fputs(buf,file); - putc('\n',file); - } - buf[k%60] =((char)getelem(&(seqs[j]), - kk+offset)); - } - } - } - buf[(k%60)>0 ? (k%60):60] = '\0'; - fputs(buf,file); - putc('\n',file); - } - } - fclose(file); - return(0); -} - - -Warning(s) -char *s; -{ - extern Frame frame; - extern Panel_item left_foot,right_foot; - Beep(); - xv_set(frame,FRAME_RIGHT_FOOTER,s,0); - xv_set(right_foot,PANEL_LABEL_STRING,s,0); -} - - -InitNASeq(seq,type) -NA_Sequence *seq; -int type; -{ - extern int Default_RNA_Trans[]; /* rtm 18.III.98 */ - extern int Default_DNA_Trans[],Default_NA_RTrans[]; - extern int - Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[]; - - SetTime(&(seq->t_stamp.origin)); - SetTime(&(seq->t_stamp.modify)); - strncpy(seq->id,uniqueID(),79); - seq->seq_name[0] = '\0'; - seq->barcode[0] = '\0'; - seq->contig[0] = '\0'; - seq->membrane[0] = '\0'; - seq->authority[0] = '\0'; - seq->short_name[0] = '\0'; - seq->sequence = NULL; - seq->offset = 0; - seq->baggage = NULL; - seq->baggage_len = 0; - seq->baggage_maxlen = 0; - seq->comments = NULL; - seq->comments_len = 0; - seq->comments_maxlen = 0; - seq->description[0] = '\0'; - seq->mask = NULL; - seq->seqlen = 0; - seq->seqmaxlen = 0; - seq->protect = PROT_WHITE_SPACE + PROT_TRANSLATION; -#ifdef HGL - seq->attr = 0; -#else - seq->attr = IS_5_TO_3 + IS_PRIMARY; -#endif - seq->elementtype = type; - seq->groupid = 0; - seq->groupb = NULL; - seq->groupf = NULL; - seq->cmask = NULL; - seq->selected = 0; - seq->subselected = 0; - - switch (type) - { - case DNA: - seq->tmatrix = Default_DNA_Trans; - seq->rmatrix = Default_NA_RTrans; - seq->col_lut = Default_NAColor_LKUP; - break; - case RNA: - seq->tmatrix = Default_RNA_Trans; - seq->rmatrix = Default_NA_RTrans; - seq->col_lut = Default_NAColor_LKUP; - break; - case PROTEIN: - seq->tmatrix = NULL; - seq->rmatrix = NULL; - seq->col_lut = Default_PROColor_LKUP; - break; - case MASK: - case TEXT: - default: - seq->tmatrix = NULL; - seq->rmatrix = NULL; - seq->col_lut = NULL; - break; - } - return; -} - - -ReadCMask(filename) -char *filename; -{ - extern Frame frame; - extern NA_Alignment *DataSet; - - char Inline[GBUFSIZ],head[GBUFSIZ],curname[GBUFSIZ], - temp[GBUFSIZ]; - int IGNORE_DASH = FALSE,offset; - NA_DisplayData *NAdd; - NA_Alignment *aln; - - int i,j,k,curlen = 0,*colors,orig_ctype,jj,indx = 0; - FILE *file; - - if(DataSet == NULL) return; - - NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata; - - if(NAdd == NULL) - return; - - aln = (NA_Alignment*)DataSet; - - curname[0] = '\0'; - orig_ctype = NAdd->color_type; - file = fopen(filename,"r"); - if(file == NULL) - { - Warning("File not found"); - Warning(filename); - return; - } - - NAdd->color_type = COLOR_ALN_MASK; - for(;fgets(Inline,GBUFSIZ,file) !=0;) - { - if(Find(Inline,"offset:")) - { - crop(Inline,head,temp); - sscanf(temp,"%d",&(aln->cmask_offset)); - } - else if(Find(Inline,"nodash:")) - IGNORE_DASH = TRUE; - else if(Find(Inline,"dash:")) - IGNORE_DASH = TRUE; - else if(Find(Inline,"name:")) - { - crop(Inline,head,curname); - curname[strlen(curname)-1] = '\0'; - for(j=0;jcolor_type = orig_ctype; - return; - } - if(strlen(curname) != 0) - { - indx = -1; - for(j=0;jnumelements;j++) - if(Find(aln->element[j].short_name,curname) - || Find(aln->element[j].id,curname)) - { - if(aln->element[j].cmask != NULL) - Cfree(aln -> element[j].cmask); - colors=(int*)Calloc(aln->element[j] - .seqmaxlen+1+aln->element[j].offset - ,sizeof(int)); - aln->element[j].cmask = colors; - NAdd->color_type = COLOR_SEQ_MASK; - indx = j; - j = aln->numelements; - } - if(indx == -1) - colors=NULL; - } - else - { - if(aln->cmask != NULL) Cfree(aln->cmask); - colors=(int*)Calloc(curlen,sizeof(int)); - aln->cmask = colors; - aln->cmask_len = curlen; - NAdd->color_type = COLOR_ALN_MASK; - for(j=0;jelement[indx].seqlen);j++,jj++) - { - offset = aln->element[indx].offset; - if(fgets(Inline,GBUFSIZ,file)==NULL) - { - Warning - ("illegal format in colormask"); - NAdd->color_type = orig_ctype; - return; - } -/* -* Fixed so that the keyword nodash causes the colormask to be mapped -* to the sequence, not the alignment. -* -* The allocated space is equal the seqlen of the matched sequence. -* -*/ - if(aln->element[indx].tmatrix) - for(;(getelem(&(aln->element[indx]),jj - +offset) - ==(aln->element[indx].tmatrix['-']) - || (getelem(&(aln->element[indx]),jj - +offset) - ==aln->element[indx].tmatrix['~'])) - && jj < aln->element[indx].seqlen;) - colors[jj++] = 12; - else - for(;getelem(&(aln->element[indx]),jj - +offset) - =='-' && jj < aln->element[indx].seqlen;) - colors[jj++] = 12; - - sscanf(Inline,"%d",&(colors[jj])); - } - } - else if((indx == -1) && (strlen(curname) != 0)) - for(j=0;jcolor_type = orig_ctype; - return; - } - sscanf(Inline,"%d",&(colors[j])); - } - IGNORE_DASH = FALSE; - curname[0] = '\0'; - } - - } - RepaintAll(TRUE); - return; -} - - -ReadNA_Flat(filename,dataset,type) -char *filename; -char *dataset; -int type; -{ - int i, j, jj, c, curelem,offset; - char name[GBUFSIZ]; - char buffer[GBUFSIZ]; - char origin[GBUFSIZ],ref[GBUFSIZ]; - char Inline[GBUFSIZ],head[GBUFSIZ],tail[GBUFSIZ],temp[GBUFSIZ]; - char curname[GBUFSIZ]; - - NA_Sequence *this_elem; - NA_Alignment *data; - extern int Default_DNA_Trans[],Default_RNA_Trans[],Default_NA_RTrans[]; - - FILE *file; - - curname[0] = '\0'; - data = (NA_Alignment*)dataset; - - file = fopen(filename,"r"); - if(file == NULL) - { - fprintf(stderr,"Cannot open %s.\n",filename); - return; - } - for(;fgets(Inline,GBUFSIZ,file) !=0;) - { - if( - Inline[0] == '#' || - Inline[0] == '%' || - Inline[0] == '"' || - Inline[0] == '@' - ) - { - offset = 0; - for(j=0;jnumelements++; - if( curelem == 0 ) - { - data->element=(NA_Sequence*) - Calloc(5,sizeof(NA_Sequence)); - data->maxnumelements = 5; - } - else if (curelem==data->maxnumelements) - { - (data->maxnumelements) *= 2; - data->element= - (NA_Sequence*)Realloc(data->element - ,data->maxnumelements*sizeof(NA_Sequence)); - } - - InitNASeq(&(data->element[curelem]), - Inline[0] == '#'?DNA: - Inline[0] == '%'?PROTEIN: - Inline[0] == '"'?TEXT: - Inline[0] == '@'?MASK:TEXT); - this_elem= &(data->element[curelem]); - if(Inline[strlen(Inline)-1] == '\n') - Inline[strlen(Inline)-1] = '\0'; - strncpy(this_elem->short_name,(char*)&(Inline[1]),31); - this_elem->offset = offset; - } - else if(Inline[0] != '\n') - { - for(j=0,jj=0;jelement[curelem].rmatrix) - Ascii2NA(buffer,jj,data->element[curelem] - .rmatrix); - AppendNA(buffer,jj,&(data->element[curelem])); - } - } - - for(j=0;jnumelements;j++) - data->maxlen = MAX(data->maxlen,data->element[j].seqlen + - data->element[j].offset); - - for(j=0;jnumelements;j++) - if(data->element[j].seqlen==0) - data->element[j].protect = - PROT_BASE_CHANGES+ PROT_GREY_SPACE+ - PROT_WHITE_SPACE+ PROT_TRANSLATION; - - NormalizeOffset(data); - Regroup(data); - return; -} - - -WriteStatus(aln,filename,method) -NA_Alignment *aln; -char *filename; -int method; -{ - extern int EditMode,FileFormat; - extern NA_Alignment *DataSet; - NA_DisplayData *NAdd; - NA_Sequence *this_seq; - int j; - FILE *file; - - if(DataSet == NULL) - return; - - NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata; - if(NAdd == NULL) - return; - - file = fopen(filename,"w"); - if (file == NULL) - { - Warning("Cannot open status file."); - return(1); - } - fprintf(file,"File_format: %s\n",FileFormat==GENBANK?"genbank":"flat"); - /* - fprintf(file,"EditMode: %s\n",EditMode==INSERT?"insert": - "check"); -*/ - - this_seq = &(aln->element[NAdd->cursor_y]); - if(this_seq->id != NULL) - fprintf(file,"sequence-ID %s\n",this_seq->id); - fprintf(file,"Column: %d\nPos:%d\n",NAdd->cursor_x,NAdd->position); - switch(this_seq->elementtype) - { - case DNA: - case RNA: - fprintf(file,"#%s\n", - this_seq->short_name); - break; - case PROTEIN: - fprintf(file,"%%%s\n", - this_seq->short_name); - break; - case MASK: - fprintf(file,"@%s\n", - this_seq->short_name); - break; - case TEXT: - fprintf(file,"%c%s\n",'"', - this_seq->short_name); - break; - default: - break; - } - if(this_seq->tmatrix) - for(j=0;jseqlen;j++) - putc(this_seq->tmatrix[getelem(this_seq,j)],file); - else - for(j=0;jseqlen;j++) - putc(getelem(this_seq,j),file); - - fclose(file); - return; -} - -ReadStatus(filename) -char *filename; -{ - /* - int i,j; - FILE *file; - char Inline[GBUFSIZ],head[GBUFSIZ]; - file = fopen(filename,"r"); - for(;!DONE;) - { - fgets(Inline,GBUFSIZ,file); - if(strlen(Inline) == 0) - DONE = TRUE; - else - { - sscanf(Inline,"%s",head); - if(strncmp(head,"Col",3) != 0) - { - sscanf(Inline,"%*s %d",head,&(DataSet->nadd-> - cursor_x),&(DataSet->nadd->cursory); - } - else if(strncmp(head,"Pos",3) != 0) - { - } - } - } - -*/ -} - - -NormalizeOffset(aln) -NA_Alignment *aln; -{ - int i,j,offset = 99999999; - - for(j=0;jnumelements;j++) - offset = MIN(offset,aln->element[j].offset); - - for(j=0;jnumelements;j++) - aln->element[j].offset -= offset; - - aln->maxlen = -999999999; - for(j=0;jnumelements;j++) - aln->maxlen = MAX(aln->element[j].seqlen+aln->element[j].offset, - aln->maxlen); - - aln->rel_offset += offset; - - if(aln->numelements == 0) - aln->rel_offset = 0; - - return; -} - -WriteCMask(aln,filename,method,maskable) -NA_Alignment *aln; -char *filename; -int method,maskable; -{ - int j,kk,mask = -1,k,offset,min_offset= -999999; - char offset_str[100]; - int *buf; - NA_Sequence *seqs; - FILE *file; - if(aln == NULL) - return; - if(aln->numelements == (int) NULL) - return; - seqs = aln->element; - - file = fopen(filename,"w"); - if(file == NULL) - { - Warning("Cannot open file for output"); - return(1); - } - if(maskable && (method != SELECT_REGION)) - { - for(j=0;jnumelements;j++) - if(seqs[j].elementtype == MASK && - seqs[j].selected) - mask = j; - } - for(j=0;jnumelements;j++) - { - SeqNorm(&(seqs[j])); - } - - for(j=0;jnumelements;j++) - { - if(method != SELECT_REGION) - offset = seqs[j].offset; - else - for(offset=seqs[j].offset; - aln->selection_mask[offset] == '0'; - offset++); - - if(offset+aln->rel_offset != 0) - sprintf(offset_str,"(%d)",offset+aln->rel_offset); - else - offset_str[0] = '\0'; - - if(((j!=mask) && (seqs[j].selected) && method != SELECT_REGION) - || (method == SELECT_REGION && seqs[j].subselected) - || method == ALL) - { - fprintf(file,"%c%s%s\n", - seqs[j].elementtype == DNA?'#': - seqs[j].elementtype == RNA?'#': - seqs[j].elementtype == PROTEIN?'%': - seqs[j].elementtype == TEXT?'"': - seqs[j].elementtype == MASK?'@':'"', - seqs[j].short_name, - (offset+aln->rel_offset == 0)? "":offset_str); - - if(seqs[j].cmask != NULL) - { - - buf =(int*) Calloc(seqs[j].seqlen,sizeof(int) ); - - if(mask == -1) - { - for(k=0,kk=0;kkselection_mask[kk+offset]=='1') - buf[k++] = (getcmask( &(seqs[j]),kk+offset)); - } - - else - buf[k++] =( getcmask( &(seqs[j]),kk+offset) ); - } - } - else - { - for(k=0,kk=0;kk igfile - - DESCRIPTION - Converts interleaved .aln output from Clustal V into - sequential .ig (IntelliGenetics) format for use by MASE. - - clustalfile: - CLUSTAL V multiple sequence alignment - - name1 AACTTTCG - name2 ATCTTTCG - * ****** - - name1 CCTGCT - name2 CCCGCT - ** *** - - igfile: - ; - name1 - AACTTTCG - CCTGCT - : - name2 - ATCTTTCG - CCCGCT - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/dbstat.doc b/CORE/xylem/dbstat.doc deleted file mode 100644 index fa922c9..0000000 --- a/CORE/xylem/dbstat.doc +++ /dev/null @@ -1,36 +0,0 @@ - dbstat update 3 Feb 94 - - NAME - dbstat - calculates amino acid frequencies in a protein - database - - SYNOPSIS - dbstat - - DESCRIPTION - dbstat reads a file of one or more nucleic acid sequences - and calculates the amino acid frequencies, both in terms of - absolute numbers, and as a fraction of the total. - - input - The input file is the standard .wrp (Pearson) format, - such as that produced by getob: - - >name - ; one or more comment lines (optional) - sequence lines - - Comments begin either with semicolon (;) or right arrow (>) - characters. - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/expfile.template b/CORE/xylem/expfile.template deleted file mode 100644 index 9c82cb8..0000000 --- a/CORE/xylem/expfile.template +++ /dev/null @@ -1,30 +0,0 @@ -;--------------------------------------------------------------------------- -; FEATURES/GDE Expression File Instructions 8/7/95 -; -; 1. Type in one or more GenBank expressions below, -; or -; Place cursor at end of this file and choose 'Include File' in the FILE -; menu to read in a file of feature keys. -; or -; Copy expressions from another window and Paste into this window. -; 2. Choose 'Save Current File' in the File menu -; 3. Quit this window -; -; NOTES: -; 1) FEATURES will then extract the appropriate sequences. -; YOU DON'T NEED TO EDIT OUT THESE COMMENT LINES. -; 2) All expressions referring to GenBank entries must begin with a '@' -; Literals (ie. sequences to be embedded in the final output) -; do NOT begin with a '@'. -; 3) Put each expression on a separate line. -; -; SAMPLE EXPRESSION FILE: -; -; @J05635:83..1813 -; ; EcoRI/NotI adaptor {this is a comment line} -; AATTGCGGCCGC -; @J05635:/product="flagellin A" -; @x17548:singed_trans -; -;--------------------------------------------------------------------------- - diff --git a/CORE/xylem/feafile.template b/CORE/xylem/feafile.template deleted file mode 100644 index 12e8dd9..0000000 --- a/CORE/xylem/feafile.template +++ /dev/null @@ -1,23 +0,0 @@ -;--------------------------------------------------------------------------- -; FEATURES/GDE Feature Key File Instructions -; -; 1. Type in one or more GenBank FEATURE Table feature keys below, -; or -; Place cursor at end of this file and choose 'Include File' in the FILE -; menu to read in a file of feature keys. -; -; 2. Choose 'Save Current File' in the File menu -; 3. Quit this window -; -; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT -; OUT THESE COMMENT LINES. -; -; NOTE: Put each feature key on a separate line -; SAMPLE FEATURE KEY FILE: -; -; mRNA -; CDS -; mat_peptide -; -;--------------------------------------------------------------------------- - diff --git a/CORE/xylem/features.doc b/CORE/xylem/features.doc deleted file mode 100644 index 8e1321c..0000000 --- a/CORE/xylem/features.doc +++ /dev/null @@ -1,407 +0,0 @@ - - FEATURES.DOC update 7 Feb 94 - - - NAME - FEATURES - extracts features from GenBank entries - - SYNOPSIS - features - features expression - features [-f featurekey | -F keyfile] - [-n name |-a accession | -e expression | - -N namefile |-A accfile | -E expfile] - [-u dbfile | -U dbfile | -g ] - features -h - - DESCRIPTION - FEATURES extracts sequence objects from GenBank entries, using - the Features Table language. Features can be retrieved either by - specifying keywords (eg. CDS, mRNA, exon, intron etc.) or by - evaluating expressions. In practical terms, FEATURES is actually - a user interface for GETOB, which actually performs the parsing - and extraction of sequence objects. FEATURES can be run either as - an interactive program or with command line arguments. - - 'features' with no arguments runs the program interactively. - 'features' followed by an expression retrieves the data directly - from GenBank and evaluates the expression. The third form of - features requires all arguments to be accompanied by their - respective option flags. Finally, 'features -h' prints the - SYNOPSIS. - - - INTERACTIVE EXECUTION - FEATURES executed with no arguments runs interactively. An example of the - FEATURES menu is shown below: - - ___________________________________________________________________ - FEATURES - Version 7 FEB 94 - Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003 - ___________________________________________________________________ - Features: tRNA - Entries: EPFCPCG - Dataset: - ___________________________________________________________________ - Parameter Description Value - ------------------------------------------------------------------- - 1).................... FEATURES TO EXTRACT ....................> f - f:Type a feature at the keyboard - F:Read a list of features from a file - 2)....................ENTRIES TO BE PROCESSED (choose one).....> n - Keyboard input - n:name a:accession # e:expression - File input - N:name(s) A:accession #(s) E:expression(s) - 3)....................WHERE TO GET IT .........................> g - u:Genbank dataset g:complete GenBank database - U: same as u, but all entries - 4)....................WHERE TO SEND IT ........................> a - s:Each feature to a separate file a:All output to same file - --------------------------------------------------------------- - Type number of your choice or 0 to continue: - 0 - Messages will be written to EPFCPCG.msg - Final sequence output will be written to EPFCPCG.out - Expressions will be written to EPFCPCG.exp - Extracting features... - - In the example, FEATURES was instructed to retrieve all tRNAs from - the GenBank entry EPFCPCG, which contains the Epifagus plastid - genome. By default, the GenBank database was the source of the - sequence. Messages indicate the progress of the job. A log describing - the extraction of each feature is written to EPFCPCG.msg, while the - extracted features themselves are written to EPFCPCG.out. Feature - expressions which could be used by FEATURES to reconstruct the .out - file, are written to EPFCPCG.exp. - - The first step is to retrieve the EPFCPCG entry from GenBank, which is - accomplished by calling FETCH. Next, FEATURES extracts the specified - features from the entry. - - An excerpt from EPFCPCG.msg is shown below, describing the extraction - of the fifth tRNA found in this entry. To create this tRNA, two exons - had to be joined. The qualifier line associated with this feature - indicates that it is an Isoleucine tRNA with a gat anticodon. - - - EPFCPCG:anticodon gtg - complement - ( - join - ( - 70023 70028 - - 1 69 - - ) - - ) - - - /product="transfer RNA-His" - /gene="His-tRNA" - /label=anticodon gtg - /note="anticodon gtg" - //---------------------------------------------- - - - The actual sequence for this feature, as written to EPFCPCG.out, is - written with each exon beginning a new line: - - >EPFCPCG:anticodon gtg - ggcggatgtagccaaatggatcaaggtagtggattgtgaatccaacatat - gcgggttcaattcccgtcg - ttcgcc - - Finally, the expression that was evaluated to create this feature is - written to EPFCPCG.exp: - - >EPFCPCG:anticodon gtg - @M81884:anticodon gtg - - If EPFCPCG.exp was used as an expression file in option 2 (E) of FEATURES, - EPFCPCG.out would be recreated. - - OPTIONS - 1) FEATURES - choosing f will cause FEATURES to prompt for - a feature to extract. If you wish to extract several types of - features simultaneously (ie. F), you must construct a file listing the - feature keywords. The following example would retrieve both tRNA and - rRNA sequences: - - OBJECTS - tRNA - rRNA - SITES - - The words 'OBJECTS' and 'SITES' must enclose the feature keywords, - and each keyword must be on a separate line. For a rigorous - definition of the input file format, see the GETOB manual pages - (getob.doc). - - In the menu shown above, f was chosen, and the user entered tRNA at - the prompt. Thus tRNA is now displayed on the Features: line. If - features had been specified from a file (suboption F) then the - filename containing the feature keywords would be displayed instead. - A complete list of legal feature keywords can be found in the GenBank - Release notes (gbrel.txt) under the subheading 'Feature Key Names'. - - 2) ENTRIES - n User is prompted for the name of an entry from which the - feature is to be extracted. The name of the entry will appear - on the 'Entries' line of the menu. - - N User is prompted for a filename containing one or more - entry names. Each name must be on a separate line. The filename - will be displayed on the 'Entries' menu line. - - a User is prompted for an accession number, which will appear - on the 'Entries' line of the menu. - - A User is prompted for a filename for accession numbers. The filename - will appear on the 'Entries:' line. - - e User is prompted for a GenBank Features expression of the - form accession:location.'accession' refers to a GenBank - accession number, while 'location' is any legal feature location. - A brief description of location syntax can be found under the - subheading "Feature Location" in the GenBank release notes - (gbrel.txt). See "The DDBJ/EMBL/GenBank Feature Table: - Definition" Version 1.04 for a complete definition. - E User is prompted for a filename containing one or more Feature - expressions. EACH EXPRESSION MUST BEGIN A '@'. All lines beginning - with '@' are processed as expressions, and all other lines are - copied to the output file unchanged. - - Examples: - - The tRNA shown above could have been extracted by choosing - suboption e and entering either of the following expressions: - - M81884:complement(join(70023..70028,1..69)) - M81884:anticodon gtg - - In the first example, the feature line from the original entry - is used as the location. In the second example, the feature is - found by its qualifier line, which also appeared in the - original entry. It must be noted that the qualifier line must - be unique from others in the same entry in its first 15 - characters after the = . - - The flaL protein coding region of B. licheniformis is described - in GenBank entry BLIFALA, accession number M60287 in the - following feature: - - CDS 305..640 - /note="flaD (sin) homologue" - /gene="flaL" - /label=ORF2 - /codon_start=1 - - This feature could be retrieved using any of the following - expressions: - - M60287:305..640 - M60287:ORF2 - M60287:/label=ORF2 - M60287:/gene="flaL" - M60287:/note="flaD (sin) homologue" - - Note that the /label= qualifier is special, in that labels are - specifically intented as unique tags on an feature. For labels, - only the label itself is need be specified. Thus, /label=ORF2 is - equivalent to ORF2. For other qualifiers, the qualifier keyword - (eg. /note=) must be included. - - 3) DATABASE (WHERE TO GET IT) - By default, all entries processed will - be automatically retrieved from GenBank using FETCH. Specifying 'u' - (User-defined database subset) makes it possible to extract features - from GenBank subsets created by the user. Usually, retrieval of - features is much faster with a User-defined subset, so if you - frequently work with sets of genes, it is best to retrieve them - en-masse using FETCH, and work with them directly. For example, if - you had retrieved a set of Beta-globin sequences into a file called - 'globin.gen', you could directly extract features from these entries - by specifying 'globin' or 'globin.gen' as your User-defined database. - If the file extension is '.gen', FEATURES will automatically create - temporary files called globin.ano, globin.wrp and globin.ind, - containing annotation, sequence, and an index, respectively. These - files will be read during feature extraction, and then discarded. If - you have already created such files using SPLITDB, simply specify - any of 'globin', 'globin.ano', etc. ie. anything, as long as it does - not have the .gen file extension. - - 'U' rather than 'u' causes ALL entries in the user-defined - database to be subset. This means that it is unnecessary to - specify entry options (eg -n, -N etc.), as these will be - ignored, if given. - - One consequence of these conventions is that the individual GenBank - divisions can be processed directly. For example, suppose you were only - interested in rodent globins. You could directly access the rodent - division of GenBank by specifying the base name of that file division - (eg. /home/psgendb/GenBank/gbrod) as your user-defined database. In - this case, the files gbrod.ano, gbrod.wrp and gbrod.ind already - exist. Again, this approach is faster, since FEATURES would not have - to find and retrieve the sequences, but can read directly from the - database files. Finally, if you wanted to process all of the entries - in the database division, simply use -U. The user is warned that a - GenBank division is a huge amount of data, and processing every entry - could take a long time. - - 4) WHERE TO SEND IT - By default (a), the output for all entries goes - to a single set of files, whose names are chosen by FEATURES, - depending on the setting of option 2, Entries. If a single name (n) or - accession number (a) has been chosen, that will be used as - the raw filename. For example, if you were processing the entry - WHTCAB, the output files would be WHTCAB.msg and WHTCAB.out. If names - (N), accession numbers (A) or expressions (E) were read from a file, - the raw name of that file would be used eg. cellulase.nam would result - in cellulase.msg and cellulase.out. Finally, if a single expression - is processed (e), then the primary accession number in that - expression will be used for the filenames. In all cases, FEATURES - will tell you the names of the files being written. - - Choosing suboption s, you can specify that the features created for - each entry be sent to separate files. In this case, each file will - have the name of that entry, with the extension .obj. However, all - messages and expressions will still go to a single files. While this - can be a convenient way of creating separate files when you need them, - this option still has the limitation of writing all features for a - given entry (if there are more than one) to the same file. Also, - successive resolution of features (anything requiring 'getob -r') - will not work with this option. This may be corrected in future - versions. - - - COMMAND LINE EXECUTION - - There are two ways of running FEATURES from the command line. If only one - argument is supplied, that argument is interpreted as an expression, and - the result of that expression (ie. a sequence ) is written to the - standard output. .msg, .out and .exp files are NOT created. For example, - GenBank entry BACFLALA (M60287) contains the following feature: - - CDS 95..271 - /label=LORF- - /codon_start=1 - /translation="MNKDKNEKEELDEEWTELIKHALEQGISPDDIRIFLNLGKKSSK - PSASIERSHSINPF" - Any of - - features M60287:LORF- - features M60287:95..271 - features M60287:/label=LORF- - - would write the open reading frame to the standard output: - - atgaataaagataaaaatgagaaagaagaattggatgaggagtggacaga - actgattaaacacgctcttgaacaaggcattagtccagacgatatacgta - tttttctcaatttgggtaagaagtcttcaaaaccttccgcatcaattgaa - agaagtcattcaataaatcctttctga - - This form of FEATURES is provided to make it easy to pipe output to - other programs for further processing. For example - - features M60287:LORF- |ribosome >LORF.protein - - would write the translation of the open reading frame to a file called - LORF.protein. - - The full functionality of the FEATURES can be accessed using arguments on - the command line. In particular, when there are multiple entries to be - processed, or multiple features within entries, it is much faster to - supply FEATURES with lists of entries, feature keys or expressions. - Command line options are similar to suboptions in menu items 1-3 above: - - Feature keys: - -f key {feature key} - -F filename {file of feature keys} - - Entries: - -n name {GenBank LOCUS name} - -N filename {file of GenBank LOCUS names} - -a accession {GenBank ACCESSION number} - -A filename {file of GenBank ACCESSION numbers} - -e expression {Feature Table expression} - -E filename {file of Feature Table expressions, each begin- - ning with '@'} - - Databases: - -u filename {GenBank dataset} - -U filename { " " " " " " , - process all entries ie. -nNaAeE options - will be ignored} - -g {GenBank} - - Examples: - - features -f tRNA -n EPFCPCG - - retrieves all tRNAs from GenBank entry EPFCPCG and writes .msg, .out, - and .exp files. - - features -e M60287:LORF- - - would retrieve the same open reading frame as in the earlier example. - - - Since most time-consuming operation in FEATURES is sequence retrieval, - it is often best to retrieve frequently-used sequences as database - subsets. For example, a set GenBank entries for chlorophyl a/b binding - protein genes might be stored in a file called CAB.gen. - - features -f CDS -N CAB.nam -u CAB.gen - - would generate the files CAB.msg, CAB.out and CAB.exp containing output - for all CDS features in the entries listed in the file CAB.nam. - - features -E CAB.exp -u CAB.gen - - would re-create the output file CAB.out. - - - - BUGS - FEATURES does no preliminary error checking for syntax of - GenBank expressions prior to their evaluation. Expressions that can - not be evaluated will be flagged by GETOB in the .msg file. - - At present, little checking is done to test for the presence or - correctness of input files. Some errors may cause the program to - crash. - - For User-defined datasets, filename expansion is not performed. - - FILES - Temporary files: - X.term X.ano X.wrp X.ind X.gen {X is raw filename, see 4) } - UNRESOLVED.fea UNRESOLVED.out - FEA.inf FEA.nam FEA.gen FEA.ano FEA.wrp FEA.ind FEA.msg FEA.out - - SEE ALSO - grep(1V) fetch getob splitdb - - TRANSPORTATION NOTES - It should be fairly easy to get FEATURES to work even on systems - in which GenBank has not been formatted for the XYLEM package. - This is because FEATURES does not work directly on the database, but - rather retrieves all necessary sequences by calling FETCH. Thus, - statements like 'fetch FEA.nam FEA.gen' could be replaced with any - command that, given a file containing names or accession numbers, - returns a file containing GenBank entries. In principle, you - could even implement this sort of command to retrieve entries from - the email server (retrieve@ncbi.nlm.nih.gov) at NCBI, although - such a setup would undoubtedly be quite slow. - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/fetch.doc b/CORE/xylem/fetch.doc deleted file mode 100644 index 9b4b1a6..0000000 --- a/CORE/xylem/fetch.doc +++ /dev/null @@ -1,320 +0,0 @@ - - FETCH.DOC update 24 Feb 96 - - - NAME - fetch - retrieves database entries by name or accession number - - SYNOPSIS - fetch {interactive mode} - fetch [options] namefile [output file] {batch mode} - - DESCRIPTION - fetch retrieves one or more entries from a database. - - Interactive mode: fetch prompts the user to set search parameters, - using an interactive menu: - ___________________________________________________________________ - FETCH - Version 7 Feb 94 - Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003 - ___________________________________________________________________ - Namefile: - Outfile: - Database: - ------------------------------------------------------------------- - Parameter Description Value - - 1) Name/Acc Name or Accession sequence to get - 2) Namefile Get list of sequences from Namefile - 3) WhatToGet a:annotation s:sequence b:both b - 4) Database g:GenBank p:PIR v:VecBase l:LiMB g - G:GenBank dataset P:PIR dataset - 5) Outfile Send all output to a single file (Outfile) - 6) Files f:Send each entry to a separate file f - ------------------------------------------------------------- - Type number of your choice or 0 to continue: - - After all parameters have been set, type 0 to commence the search. - Messages regarding the progress of the search will be printed. - - (1,2) Which entries to get? - If you want to get a single entry, option 1 lets you type in the - name of that entry, without having to create a namefile. To get - more than one entry, choose option 2, and specify the name of a - file containing sequence names or accession numbers. - - namefile is a file containing one or more sequence names or - accession numbers, each on a separate line. Names and accession - numbers can even be interspersed, in upper or lowercase, and in - any order. For example, the namefile prp.nam might contain - - ; plant pathogenesis related proteins - ; (these are sample comment lines) - ; note that any line containing a semicolon is ignored - x06362 - x05454 - TOBPR1A1 - ; comments can be interspersed with names. - PUMPR13 - tobpr1ar - - Options 1 & 2 are mutually exclusive. Setting one will negate the - other. If option 2 is chosen, the name of the namefile will appear - at the top of the menu. - - (3) WhatToGet - Use this option to specify whether to get annotation, sequence, - or both (default=both). - - (4) Database - Use this option to select the database. (default=GenBank). - G and P select user-created database subsets containing GenBank - or PIR entries, respectively. It is assumed that the database - has been split into .ano, .wrp and .ind files using splitdb. - For example, if you had created a database subset called PR1.pir, - splitdb would create PR1.ano, PR1.wrp and PR1.ind. These are - the files actually read by FETCH. When prompted for the name - of the database, simply type "PR1", without a file extension. - (If you do type a file extension, it will be ignored). - - (5, 6) Where to send output - By default, option 6 is set to f, and each entry will be written to - a separate file, where the name of the file is the name of the - entry, followed by a file extension. If a complete entry is - retrieved, the file extension will indicate the type of database - (GenBank: .gen; PIR: .pir, Vecbase: .vec; LiMB: .LiMB). If only - annotation or sequence are retrieved, the file extensions will be - .ano or .wrp, respectively. Using the default, the namefile above - would create the following files: - - PUMPR13.gen - TOBPR1A1.gen - TOBPR1AR.gen - TOBPR1CR.gen - TOBPR1PS.gen - - By choosing option 5, you can specify the name of an output file - for all entries to go to. This filename will appear at the top - of the menu. Obviously, options 5 & 6 are mutually exclusive. - Note entries retrieved are writen in alphabetical order (sorting by - ASCII values), not the order in which they appeared in namefile. - - (Note for remote users only: -f will only work for a single - name/accession supplied in 1). -f IS NOT ENABLED FOR NAMEFILES - specified in 2).) - - Batch mode: - Although it is transparent to the user, all fetch really does - is call getloc, saving the user the trouble of knowing which - database files to retrieve sequences from, or of having to - execute getloc multiple times to retrieve sequences from - different database files. Thus, the options are identical to those - for getloc: - - -a Write annotation portions of entries only, terminated by '//'. - -s Write sequence data only, in Pearson (.wrp) format. - -f Write each entry to a separate file. - -g GenBank (default) - -e EMBL {not implemented} - -p PIR (NBRF) - -v Vecbase - -l LiMB - -G GenBank_dataset - -P PIR_dataset - - If -f is not specified, outfile must be specified. - - -L force execution of findkey on local host even if - $XYLEM_RHOST is set. See "REMOTE EXECUTION" below - - - PIR_dataset - GenBank_dataset - This can be either a file of PIR entries, a file of GenBank entries, - or a XYLEM dataset created by splitdb. A file of PIR entries must - have the file extension ".pir". A file of GenBank entries must have - the file extension ".gen". A XYLEM dataset contains PIR entries split - among three files by splitdb: annotation (.ano), sequence (.wrp) - and index (.ind). These file extensions must be used! - - When specifying a split dataset, only the base name needs to be - used. For example given a XYLEM dataset consisting of the files - myset.ano, myset.wrp and myset.ind, the following two commands - are equivalent: - - fetch -P myset something.nam something.pir - fetch -P myset.ano something.nam something.pir - - If the original .pir file had been used, the command would have - been - - fetch -P myset.pir something.nam something.pir - - The ability to work directly with .gen or .pir files is quite - convenient. However, since FETCH needs to work with a split - FETCH automatically splits .pir or .gen files into .ano, .wrp - and .ind files, which are removed when finished. This requires - extra disk space and execution time, which could be significant - for large datasets. - - EXAMPLES - Batch example: - fetch -f chitinase.nam - will retrieve annotation and sequence for sequences listed in - chitinase.nam from GenBank, writing each entry to a separate file - with the extension .gen. - - fetch -s -v pbr.nam pbr.wrp - will retrieve sequence data only for the entries listed in pbr.nam, - from VecBase, and write all sequences to a Pearson format file - (ie. readable by fasta) with the name pbr.wrp. - - fetch -G sample sample.nam new.gen - fetch -G sample.ano sample.nam new.gen - Assumes that a set of GenBank entries has been split by splitdb - into sample.ano sample.wrp and sample.ind. The entries listed in - sample.nam are written to new.gen. - - - FILES - Database files: - The directories for database files are specified by the environment - variables $GB (GenBank) $PIR (PIR/NBRF) $VEC(Vecbase) and $LIMB - (LiMB). - - Index files are $GB/gbacc.idx for GenBank (this file is supplied - with each GenBank release), while the other databases - use .ind files generated by splitdb. Split database files MUST - have the following file extensions: .ano {annotation}, .wrp - {sequence} and .ind {index}. Thus, when creating database files - for pir1.dat with splitdb, the output files should be pir1.ano, - pir1.wrp and pir1.ind. - - Temporary files: - NAMEFILE.fetch - PRELIMINARY.fetch - TMP.fetch - FOUND.fetch - FETCHDIR {temporary directory} - - REMOTE EXECUTION - Where the databases can not be stored locally, FETCH can call - FETCH on another system and retrieve the results. To run - FETCH remotely, your .cshrc file should contain the following - lines: - - setenv XYLEM_RHOST remotehostname - setenv XYLEM_USERID remoteuserid - - where remotehostname is the name of the host on which the - databases reside (in XYLEM split format) and remoteuserid - is your userid on the remote system. When run remotely, - your local copy of FETCH will generate the following - commands: - - rcp filename $XYLEM_USERID@$XYLEM_HOST:filename - rsh $XYLEM_RHOST -l $XYLEM_USERID fetch ... - rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename - rsh $XYLEM_RHOST -l $XYLEM_USERID $RM temporary_files - - Because FETCH uses rsh and rcp, your home directory on both - the local and remote systems must have a world-readable - file called .rhosts, containing the names of trusted remote - hosts and your userid on each host. Before trying to get - FETCH to work remotely, make sure that you can rcp and - rsh to the remote host. - - Obviously, remote execution of FETCH implies that FETCH - must already be installed on the remote host. When FETCH - runs another copy of FETCH remotely, it uses the -L option - (findkey -L) to insure that the remote FETCH job executes, - rather than calling yet another FETCH on another host. - - - ---------- Remote execution on more than 1 host ----------- - If more than 1 remote host is available for running FINDKEY - (say, in a clustered environment where many servers mount - a common filesystem) the choice of a host can be determined - by the csh script choosehost, such that execution of - choosehost returns the name of a remote server. To use this - approach, the following script, called 'choosehost' should - be in your bin directory: - - #!/bin/csh - # choosehost - choose a host to use for a remote job. - # This script rotates among servers listed in .rexhosts, - # by choosing the host at the top of the list and moving - # it to the bottom. - - #Rotate the list, putting the current host to the bottom. - set HOST = `head -1 $home/.rexhosts` - set JOBID = $$ - tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID - echo $HOST >> /tmp/.rexhosts.$JOBID - /usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts - - # Write out the current host name - echo $HOST - - You must also have a file in your home directory called - .rexhosts, listing remote hosts, such as - - graucho.cc.umanitoba.ca - harpo.cc.umanitoba.ca - chico.cc.umanitoba.ca - zeppo.cc.umanitoba.ca - - Each time choosehost is called, choosehost will rotate the - names in the file. For example, starting with the .rexhosts - as shown, it will move graucho.cc.umanitoba.ca to the bottom - of the file, and write the line 'graucho.cc.umanitoba.ca' - to the standard output. The next time choosehosts is - run, it would write 'harpo.cc.umanitoba.ca', and so on. - - Depending on your local configuration, you may wish to - rewrite choosehosts. All that is really necessary is that - echo `choosehost` should return the name of a valid host. - - Once you have installed choosehost and tested it, you can - get FINDKEY to use choosehost simply by setting - - setenv XYLEM_RHOST choosehost - - in your .cshrc file. - - --------------- Remote filesystems ----------------------- - Finally, an alternative to remote execution is to remotely mount - the file system containing the databases across the network. - This has the advantage of simplicity, and means that the - databases are available for ALL programs on your local - workstation. However, it may still be advantageous to run - FETCH remotely, since that will shift much of the computational - load to another host. - - BUGS - When retrieving entries directly from GenBank, FETCH uses the - Accession Number index file gbacc.idx. In this case, FETCH - can retrieve all entries containing a given accession number. - This capability makes it possible to retrieve an entry using a - secondary accession number. However if more than one entry - share a secondary accession number, all of those entries will - be retrieved. While this behavior might be a bit of an - annoyance at times, it can also be useful because it alerts - the user to the presence of other, related entries that might - be of interest. - - SEE ALSO - getloc features - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/findkey.doc b/CORE/xylem/findkey.doc deleted file mode 100644 index c3197c7..0000000 --- a/CORE/xylem/findkey.doc +++ /dev/null @@ -1,365 +0,0 @@ - - FINDKEY.DOC update 13 Mar 97 - - - NAME - findkey - finds database entries containg one or more keywords - - SYNOPSIS - findkey - findkey [-pvbmgrdutielnsaxzL] keywordfile [namefile findfile] - findkey [-P PIR_dataset] keywordfile [namefile findfile] - findkey [-G GenBank_dataset] keywordfile [namefile findfile] - - DESCRIPTION - findkey uses the grep family of commands to find lines in database - annotation files containing one or more keywords. Next, identify - is called to create a .nam file, containing the names of entries - containing the keywords, and a .fnd file, containing the actual - lines from each entry containing hits. A PIR or GenBank dataset is - either a file containing one or more GenBank or PIR entries, or - the name of a XYLEM dataset created by splitdb. See FILES below - for a more detailed description. - - INTERACTIVE USE - findkey prompts the user to set search parameters, using an interactive - menu: - - ___________________________________________________________________ - FINDKEY - Version 12 Aug 94 - Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003 - ___________________________________________________________________ - Keyfile: - Dataset: - ------------------------------------------------------------------- - Parameter Description Value - ------------------------------------------------------------------- - 1) Keyword Keyword to find thionin - 2) Keyfile Get list of keywords from Keyfile - 3) WhereToLook p:PIR v:VecBase p - GenBank - b:bacterial i:invertebrate - m:mamalian e:expressed seq. tag - g:phage l:plant - r:primate n:rna - d:rodent s:synthetic - u:unannotated a:viral - t:vertebrate x:patented - z:STS - G: GenBank dataset P: PIR dataset - ------------------------------------------------------------- - Type number of your choice or 0 to continue: - 0 - Searching /home/psgendb/PIR/pir1.ano... - Sequence names will be written to thionin~pir.nam - Lines containing keyword(s) will be written to thionin~pir.fnd - Searching /home/psgendb/PIR/pir2.ano... - Sequence names will be written to thionin~pir.nam - Lines containing keyword(s) will be written to thionin~pir.fnd - Searching /home/psgendb/PIR/pir3.ano... - Sequence names will be written to thionin~pir.nam - Lines containing keyword(s) will be written to thionin~pir.fnd - - As shown in the example above, the keyword thionin was specified - as the keyword to search for. By default, option 3 is set to p, - and the PIR protein database is searched. Messages describe the - progress of the search. Since PIR is broken up into two divisions - (new and protein) both are searched, but all output is written to - thionin.pir.nam and thionin.pir.fnd - - OPTIONS - (1,2) Which keywords to search for? - If you want to search for a single keyword, option 1 lets you type - the keyword, without having to create a file. To search for more - than one keyword, choose option 2, and specify the name of a - file containing the keywords. For example, entries containing - genes for antibiotic resistance might be found using the - following keyword file: - - ampicillin - chloramphenicol - kanamycin - neomycin - tetracycline - - Note: keyword searches are case insensitive. - - As you might expect, it takes longer to search for multiple - keywords than a single keyword. - - Options 1 & 2 are mutually exclusive. Setting one will negate the - other. If option 2 is chosen, the name of the keyword file will - appear at the top of the menu. - - Finally, it is probably not a good idea to search GenBank - entries using very short keywords consisting only of letters. - This is because GenBank entries now include a /translation - field containing the amino acid sequence of each protein - coding sequence. Consequently, 3 or 4 letter keywords - consisting of legal amino acid symbols (eg. CAP, recA) will - turn up fairly often in protein translations. - - (3) WhereToLook - Use this option to specify the database to be searched In the - case of GenBank, only one division at a time may be searched. - User-created database subsets containing PIR (P) or GenBank (G) - entries may also be searched. User-created database subsets - must be in the .ano/.wrp/.ind form created by splitdb. - - OUTPUT - The output filenames take the following form: - - name_ex1.ex2 - - The 'name' part of the filename is either the keyword searched for, - if option 1 was chosen, or the name of the keyword file,if option 2 - obtains. 'ex1' indicates the database division that was searched. For - PIR and VecBase, ex1 is 'pir' and 'vec', respectively. For GenBank, - ex1 is as follows: - - bct - bacterial - inv - invertebrate - mam - other mamalian - est - expressed sequence tag - phg - phage - pln - plant (includes fungi) - pri - primate - rna - structural RNAs - rod - rodent - syn - synthetic sequences - sts - sequence tagged sites - una - unannotated (new) sequences - vrl - viral - vrt - other vertebrate - - 'ex2' distinguishes the files containing the names of entries - containing keywords (.nam) and the files containing the lines found - in each entry (.fnd). - - The .nam file can be used directly as a namefile for fetch, getloc, - or getob. - - COMMAND LINE USE - - OPTIONS - p search PIR (default) - P PIR dataset search dbfile, containing PIR entries - v search VecBase - b search Genbank bacterial division - m search Genbank mamalian division - g search Genbank phage division - r search Genbank primate division - d search Genbank rodent division - u search Genbank unannotated division - t search Genbank vertebrate division - i search Genbank invertebrate division - l search Genbank plant division - n search Genbank rna division - s search Genbank synthetic division - a search Genbank viral division - x search Genbank patented division - e search Genbank exp.seq.tag division - z search GenBank STS division - S search GenBank Genom. Survey division - h search GenBank High Thrput. division - G GenBank dataset search dbfile, containing GenBank entries - - L force execution of findkey on local host - even if $XYLEM_RHOST is set. See "REMOTE - EXECUTION" below - - FILES - - keywordfile - contains keywords to search for - - namefile - LOCUS names of hits are written to this file - - findfile - for each hit, a report listing the LOCUS name and the - lines matching the keyword if written to this file. - - If namefile and findfile are not specified on the command line, - filenames will be created as described above for interactive - use. - - PIR_dataset - GenBank_dataset - This can be either a file of PIR entries, a file of GenBank entries, - or a XYLEM dataset created by splitdb. A file of PIR entries must - have the file extension ".pir". A file of GenBank entries must have - the file extension ".gen". A XYLEM dataset contains PIR entries split - among three files by splitdb: annotation (.ano), sequence (.wrp) - and index (.ind). These file extensions must be used! - - When specifying a split dataset, only the base name needs to be - used. For example given a XYLEM dataset consisting of the files - myset.ano, myset.wrp and myset.ind, the following two commands - are equivalent: - - findkey -P myset something.kw - findkey -P myset.ano something.kw - - If the original .pir file had been used, the command would have - been - - findkey -P myset.pir something.kw - - The ability to work directly with .gen or .pir files is quite - convenient. However, since FINDKEY needs to work with a split - FINDKEY automatically splits .pir or .gen files into .ano, .wrp - and .ind files, which are removed when finished. This requires - extra disk space and execution time, which could be significant - for large datasets. - - EXAMPLES - If the list of antibiotics shown above was stored in the file - antibiotic.kw, and option 3 was set to 'b', then the annotation - portion of the GenBank bacterial division would be searched, and - all lines containing any of these keywords would be written to - antibiotic~bac.fnd. The corresponding GenBank entry names would - appear in antibiotic~bac.nam. - - The same keyword file could be used to search other database files. - If VecBase was searched, the output files would be antibiotic~vec.fnd - and antibiotic~vec.nam. These filename conventions make it easy - to search different database divisions, and to keep track of where - data came from. - - Command line examples: - - findkey thionin.kw - - would be equivalent to the interactive example shown above. In - this case, the file thionin.kw contains the word 'thionin'. - (Note that since PIR is the default, -p need not be supplied.) - - findkey -b antibiotic.kw drugs.nam drugs.fnd - - would search the GenBank bacterial division for the keywords - contained in antibiotic.kw, and write the output to drugs.nam - and drugs.kw. - - FILES - Database files: - The directories for database files are specified by the environment - variables $GB (GenBank) $PIR (PIR/NBRF) and $VEC(Vecbase). - Annotation (.ano) and index (.ind) are those generated by splitdb. - - Temporary files: - $jobid.fnd - $jobid.nam - $jobid.grep - - where $jobid is a unique jobid generated by the shell - - REMOTE EXECUTION - Where the databases can not be stored locally, FINDKEY can call - FINDKEY on another system and retrieve the results. To run - FINDKEY remotely, your .cshrc file should contain the following - lines: - - setenv XYLEM_RHOST remotehostname - setenv XYLEM_USERID remoteuserid - - where remotehostname is the name of the host on which the - databases reside (in XYLEM split format) and remoteuserid - is your userid on the remote system. When run remotely, - your local copy of FINDKEY will generate the following - commands: - - rcp filename $XYLEM_USERID@$XYLEM_HOST:filename - rsh $XYLEM_RHOST -l $XYLEM_USERID findkey ... - rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename - rsh $XYLEM_RHOST -l $XYLEM_USERID rm temporary_files - - Because FINDKEY uses rsh and rcp, your home directory on both - the local and remote systems must have a world-readable - file called .rhosts, containing the names of trusted remote - hosts and your userid on each host. Before trying to get - FINDKEY to work remotely, make sure that you can rcp and - rsh to the remote host. - - Obviously, remote execution of FINDKEY implies that FINDKEY - must already be installed on the remote host. When FINDKEY - runs another copy of FINDKEY remotely, it uses the -L option - (findkey -L) to insure that the remote FINDKEY job executes, - rather than calling yet another FINDKEY on another host. - - ---------- Remote execution on more than 1 host ----------- - If more than 1 remote host is available for running FINDKEY - (say, in a clustered environment where many servers mount - a common filesystem) the choice of a host can be determined - by the csh script choosehost, such that execution of - choosehost returns the name of a remote server. To use this - approach, the following script, called 'choosehost' should - be in your bin directory: - - #!/bin/csh - # choosehost - choose a host to use for a remote job. - # This script rotates among servers listed in .rexhosts, - # by choosing the host at the top of the list and moving - # it to the bottom. - - #Rotate the list, putting the current host to the bottom. - set HOST = `head -1 $home/.rexhosts` - set JOBID = $$ - tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID - echo $HOST >> /tmp/.rexhosts.$JOBID - /usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts - - # Write out the current host name - echo $HOST - - You must also have a file in your home directory called - .rexhosts, listing remote hosts, such as - - graucho.cc.umanitoba.ca - harpo.cc.umanitoba.ca - chico.cc.umanitoba.ca - zeppo.cc.umanitoba.ca - - Each time choosehost is called, choosehost will rotate the - names in the file. For example, starting with the .rexhosts - as shown, it will move graucho.cc.umanitoba.ca to the bottom - of the file, and write the line 'graucho.cc.umanitoba.ca' - to the standard output. The next time choosehosts is - run, it would write 'harpo.cc.umanitoba.ca', and so on. - - Depending on your local configuration, you may wish to - rewrite choosehosts. All that is really necessary is that - echo `choosehost` should return the name of a valid host. - - Once you have installed choosehost and tested it, you can - get FINDKEY to use choosehost simply by setting - - setenv XYLEM_RHOST choosehost - - in your .cshrc file. - - --------------- Remote filesystems ----------------------- - Finally, an alternative to remote execution is to remotely mount - the file system containing the databases across the network. - This has the advantage of simplicity, and means that the - databases are available for ALL programs on your local - workstation. However, it may still be advantageous to run - XYLEM remotely, since that will shift much of the computational - load to another host. - - - BUGS - At present, regular expression characters cannot be used for - keyword searches. - - SEE ALSO - grep(1V) identify splitdb - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/getloc.doc b/CORE/xylem/getloc.doc deleted file mode 100644 index f1c1bc1..0000000 --- a/CORE/xylem/getloc.doc +++ /dev/null @@ -1,65 +0,0 @@ - - GETLOC.DOC update 30 May 95 - - - NAME - getloc - retrieve database entries listed in namefile to outfile. - - SYNOPSIS - getloc [-asfcgepvl] namefile [anofile] [seqfile] indfile outfile - - DESCRIPTION - getloc reads a list of names from namefile and recreates - entries by combining the annotation and sequence portions of each - entry from anofile and seqfile. getloc will work most quickly - when the namefile is in alphabetical order, but it will also - work on unsorted lists. The following options affect the output: - - a Write annotation portions of entries only, terminated by '//'. - seqfile is not included on command line. - - s Write sequence data only, in Pearson (.wrp) format. - anofile is not included on commandline. - - f Write each entry to a separate file. The filename will - consist of the LOCUS name, followed by .ano for annotation - only, .wrp for sequence only, or gen for complete GenBank - format. - - c namefile contains accession numbers, rather than names - - The following options identify the type of database being read: - - g GenBank (default) - e EMBL - p PIR (NBRF) - v Vecbase - l LiMB - - namefile consists of an alphabetically ordered list of LOCUS names, - each on a separate line. Indfile could be used to create a - namefile by simply editing out some subset of names. (This can also - be done using the Unix comm command.) If the entire indfile was - used, the entire database would be recreated, minus the header - information that might have been present in the original, but - deleted by splitdb. - - NOTE - Getloc automatically expands leading blanks that have been - compressed using splitdb -c. See splitdb.doc for more information. - - SEE ALSO - splitdb, comm(1). - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/getob.doc b/CORE/xylem/getob.doc deleted file mode 100644 index 895bd17..0000000 --- a/CORE/xylem/getob.doc +++ /dev/null @@ -1,327 +0,0 @@ - - GETOB 21 Dec 94 - - - NAME - getob - Get an object from GenBank - - SYNOPSIS - getob [-frcn] infile namefile anofile seqfile indfile message - [outfile] expfile - - DESCRIPTION - getob extracts 'objects' (subsequences) from GenBank entries, using - the features table, and writes them to outfile (.out). A log - describing the construction of each object is written to message - (.msg). If -r is not set, a list of expressions that would recreate - the .out file if evaluated by getob -r, is written to expfile (.exp) - - The following options are available: - - f Write each entry to a separate file. The name will consist - of the entry name, and the extension '.obj'. - - r Resolve expressions from namefile into objects. - Expressions take the form: - - @[::]: - - In effect, r makes it possible to use getob to resolve - features that span more than one entry, such as segmented - files. In the first run of the program, features that require - data from outside the entry in which they are defined will be - written to outfile with those externally-defined parts rep- - resented using the '@' notation described above. During a - subsequent run, the outfile from the previous run is used as - namefile. When r is set, all lines not beginning with '@' (ie. - name lines and sequence lines) are simply copied to the new - outfile. When an '@' is encountered, the expression is parsed - into accession number and location. The entry with the - specified accession number is located in indfile, and read from - anofile and seqfile. It is then evaluated, and the result - written to outfile in place of the '@' expression. - - getob can also be used to get specific labeled objects from - a given entry. Examples: - - @k30576:polyprotein - @k30576:/label=polyprotein - @x10345:/product="hsp70" - @j00879:group(1..2200,mutation_37) - - The first two constructs given above are equivalent. Both - will extract the feature called polyprotein. The third - construct shows that any feature label can be specified. If - none is specified, as in the first example, then /label= is - assumed. One limitation, however, is that the label sought - must be unique within the entry in its first 15 characters - including double quotes ("). Otherwise, only the first - matching label expression will be evaluated. Finally, the - last example shows that a mutant sequence can be constructed - by first specifying an expression that evaluates to a - sequence (ie. 1..2200) and then a labeled expression that - upon evaluation, uses replace() to modify that sequence. The - usage shown in examples 3 & 4 above represent extensions to - the DDBJ/EMBL/GenBank Features Table Format. - - As touched on briefly above, the r option makes it possible - to construct objects that include recursive references to - other entries (eg. segmented files) by iterative calls to - getob. The 'features' command automates this process. The basic - algorithm is as follows: - - getob infile namefile anofile seqfile indfile ... - - #Pull out all lines containing indirect references - grep '@' outfile > unresolved.grep - - while (unresolved.grep is not empty) - - #extract accession numbers to be retrieved - cut -c2-7 unresolved.grep > unresolved.nam - - #retrieve the sequences into a new file, and create - #a database subset to be used by getob - fetch unresolved.nam new.gen - splitdb new.gen new.ano new.wrp new.ind - - #run getob again to resolve indirect references - getob -r infile outfile new.ano new.wrp new.ind ... - - #Pull out all lines containing indirect references - grep '@' outfile > unresolved.grep - end - - c NAMEFILE contains accession numbers, rather than locus names - - n By default, the qualifier 'codon_start' is used to determine - how many n's, if necessary, must be added to the 5' end of - CDS, mat_peptide, or sig_peptide, to preserve the reading - frame. To turn OFF this feature, -n must be set. -n must be set - for GenBank Releases 67.0 and earlier. - - infile contains commands indicating what data is to be pulled from - each entry. Two types of output may be presented, GenBank or - OBJECTS. These are described below: - - 1) GenBank output - If the word 'GENBANK' is the first line in - infile, a pseudo-GenBank entry will be recreated. This option - is only intended for debugging purposes and will probably be - removed in later releases. - - 2) Object format - This option instructs getob to write part or - all of each sequence, along with site annotation, by specifying - feature key names. The syntax for infile is shown below: - - Backus-Naur format: Example: - ---------------------------------------------------------- - OBJECTS OBJECTS - tRNA - { rRNA - . . . SITES - } stem_loop - SITES - { - . . . - } - - In the example above, getob is instructed to extract all tRNA or - rRNA sequences from each entry, and annotate the position of each - stem/loop structure. Note that the SITES coordinates written to the - file tell the positions of those SITES relative to the start of the - object, rather than the original location in the sequence. As above, - each word begins a separate line. - - While the -r option does not use infile, at least a dummy infile - must be included in the command line. This dummy file need only - contain two lines: - - OBJECTS - SITES - - NOTE: SITES IS NOT YET IMPLEMENTED! Although inclusion of SITES in - the input file will have no effect, the word SITES must still be - present after the last feature key. - - - namefile - namefile consists of a list of LOCUS names or accession numbers, - each on a separate line. Names or accession numbers should appear - in the order in which they appear in the database file. Unordered - namefiles will slow the progress of the search. Since only the - first non-blank field of each line in namefile is read, indfile - could be used to create a namefile. If the entire indfile was - used, the entire database file would be processed. A sample - namefile requesting four sequences by LOCUS name is shown below: - - POTPR1A - POTPSTH2 - POTPSTH21 - POTSTHA - - anofile, seqfile, and indfile - The database subset containing GenBank entries must be divided - among annotation, sequence and an index by splitdb. - - message - message contains a log describing the parsing of each object. - For annotative purposes, qualifier lines from the object are - included in along with the location expression being parsed. - The beginning of a typical message file is shown below: - - GETOB Version 0.962 14 May 1992 - - POTPR1A:CDS1 - join - ( - 295 603 - - 1011 1355 - - ) - - - /note="pathogenesis-related protein (prp1)" - /codon_start=1 - /translation="MAEVKLLGLRYSPFSHRVEWALKIKGVKYEFIEEDLQNKSPLLL - QSNPIHKKIPVLIHNGKCICESMVILEYIDEAFEGPSILPKDPYDRALARFWAKYVED - KGAAVWKSFFSKGEEQEKAKEEAYEMLKILDNEFKDKKCFVGDKFGFADIVANGAALY - LGILEEVSGIVLATSEKFPNFCAWRDEYCTQNEEYFPSRDELLIRYRAYIQPVDASK" - //---------------------------------------------- - - In the example above, getob was instructed to retrieve all CDS - features from the database subset. The message for the entry - POTPR1A is shown, along with a reconstruction of the location - expression that was evaluated to create the object. In this - case, protien coding sequences from two exons had to be joined - to create the object. - - outfile - outfile contains the actual objects constructed, consisting of - sites found and sequences. The beginning of a typical output file - is shown below: - - >POTPR1A:CDS1 - atggcagaagtgaagttgcttggtctaaggtatagtccttttagccatag - agttgaatgggctctaaaaattaagggagtgaaatatgaatttatagagg - aagatttacaaaataagagccctttacttcttcaatctaatccaattcac - aagaaaattccagtgttaattcacaatggcaagtgcatttgtgagtctat - ggtcattcttgaatacattgatgaggcatttgaaggcccttccattttgc - ctaaagacccttatgatcgcgctttagcacgattttgggctaaatacgtc - gaagataag - ggggcagcagtgtggaaaagtttcttttcgaaaggagaggaacaagagaa - agctaaagaggaagcttatgagatgttgaaaattcttgataatgagttca - aggacaagaagtgctttgttggtgacaaatttggatttgctgatattgtt - gcaaatggtgcagcactttatttgggaattcttgaagaagtatctggaat - tgttttggcaacaagtgaaaaatttccaaatttttgtgcttggagagatg - aatattgcacacaaaacgaggaatattttccttcaagagatgaattgctt - atccgttaccgagcctacattcagcctgttgatgcttcaaaatga - - In the example, the CDS from entry POTPR1A has been written in - two chunks, corresponding to the two exon portions of the coding - sequence. Each location retrieved in constructing the object is - written as a separate block of sequence. By comparing message file - to outfile, it is possible to verify the correctness of the - operation. - - Numbers are appended to the sequence names to indicate - which CDS in the entry has been retrieved. Thus, if two CDS - features were present, the second one would be named >POTPR1A:2. - For compatiblility with the FASTA programs of Pearson, the name line - begins with a '>'. - - expfile - The expression evaluated to create this feature is written - to expfile: - - >POTPR1A:CDS1 - @J03679:join(295..603,1011..1355) - - expfile is only created if -r is not set. It is itended as a way - of automating the creation of a feature expression file for use - in generating customized datasets. Expressions in expfile can be - deleted or modified, or new expressions added, to tailor the - dataset to individual needs. To generate a dataset from expfile: - - getob -r infile expfile anofile seqfile indfile message outfile - - EXTENSIONS TO THE FEATURE TABLE LANGUAGE - - 1) poly(||,x) - - This operator evaluates an absolute location, literal, or - feature name (ie. any location not containing functional - operators) and writes it x times. The most obvious - application of poly is to create spacers to represent regions - of unknown sequence between sequences that are known. For - example, the restriction map of a 4kb EcoR1 fragment with a - Hind3 site 1000 bp from one end could be represented as follows: - - join("gaattc",poly("n",1000),"aagctt",poly("n",3000),"gaattc") - - 2) The following feature keys are recognized by GETOB, although - not included in the language definition. While they will not - appear in GenBank entries, they could be used in user-created - GenBank-format files: - - contig - This feature key is meant to be used to assemble large - sequence segments from smaller segments, possibly using the - poly() operator. - - chromosome - Intended to annotate the complete sequence of a chromosome. This - feature may be constructed by a join of two or more contigs. - - Use of these keywords is illustrated in the features table - shown below, which could be used to construct a model of part - of the E.coli chromosome, spanning map units 763.4 to 1031.4 kb: - - contig join(J01619:1..13063,poly("n",7140), - J03939:1..1363,poly("n",14380), - X02306:complement(1..1622),poly("n",14710), - J04423:1..5793,poly("n",22500), - X03722:1..2400,poly("n",123750), - one-of(X05017:complement(1..1854),X05017:1..1854)) - /label=Eco_contig8 - /map=763.4-950.6kb - contig join(V00352:1..2412,poly("n",28800),M15273:1..3409) - /label=Eco_contig9 - /map=972.9-1001.7kb - contig join(X02826:1..1357,poly("n",13540), - J01654:complement(1..2270)) - /label=Eco_contig10 - /map=1016.5-1031.4kb - chromosome join(Eco_contig8,poly("n",22300), - Eco_contig9,poly("n",14800), - Eco_contig10) - /label=Ecoli_chromosome - - NOTES - 1) If the const DEBUG is set to true in the Pascal source code, getob - writes messages to the standard output, indicating the progress of - processing for each entry read in. By default, DEBUG=false. - This feature is solely for debugging purposes and will be removed in - later releases. - - 2) GETOB automatically expands leading blanks that have been - compressed using splitdb -c. See splitdb.doc for more information. - - SEE ALSO - features, splitdb, getloc - The DDBJ/EMBL/GenBank Feature Table: Definition, Version 1.04 - September 1, 1992 - GenBank Release Notes for Release 79.0. - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/identify.doc b/CORE/xylem/identify.doc deleted file mode 100644 index 56ced71..0000000 --- a/CORE/xylem/identify.doc +++ /dev/null @@ -1,83 +0,0 @@ - - IDENTIFY update 3 Feb 94 - - - NAME - identify - creates a file of locus names corresponding to lines - found by grep in a GenBank annotation file. - - SYNOPSIS - identify grepfile indfile namefile findfile - - DESCRIPTION - grepfile is created using the Unix grep command to search a .ano - file created by splitgb. For example, to find all lines containing - the word 'chlorophyll' in plant.ano, use - - grep -n -i 'chlorophyll' plant.ano > plant.grep - - In the example shown, the -n option causes each line written to - plant.grep to be preceeded by the number of that line in plant.ano. - (The -i option causes grep to ignore case.) Identify can use the - indfile do determine which entry a given numbered line was found - in, and writes the corresponding LOCUS name to namefile. In - addition, all lines found in a given entry are re-written to - findfile without the line numbers, and preceeded by the LOCUS name - for that entry. - - EXAMPLES - Suppose you wanted to obtain a list of names for all plant - sequences which code for proteins. The task is complicated by the - fact that many fungal sequences are included in the GenBank plant - file. You could begin by searching plant.ano (containing all - GenBank plant entries) for the word 'Planta': - - grep -n 'Planta' plant.ano > Planta.grep - - However, we want to eliminate all fungal sequences, as well as all - sequences for RNAs other than mRNAs. If we create the file - bad.str containing the keywords - - Mycophyta - tRNA - rRNA - uRNA - - we can then type - - grep -n -f bad.str plant.ano > bad.grep - - bad.grep now contains all lines containing the offending keywords. - We next use identify to find the names of the entries found by - grep. - - identify Planta.grep plant.ind Planta.nam Planta.fnd - identify bad.grep plant.ind bad.nam bad.fnd - - Next, we can use the Unix comm command to compare the two .nam - files and produce an output file containing only names which are - present in Planta.nam but not bad.nam: - - comm -23 Planta.nam bad.nam > plants.nam - - The file plants.nam now contains names of either plant cDNA or - genomic sequences which do not code for structural RNAs. - At this point, getloc could to create a sub-database containing - only those entries listed in planta.nam. See documentation for - getloc for a more detailed discussion. - - SEE ALSO - grep, fgrep, egrep, ngrep, comm, splitgb, getloc - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/keyfile.template b/CORE/xylem/keyfile.template deleted file mode 100644 index 66ac651..0000000 --- a/CORE/xylem/keyfile.template +++ /dev/null @@ -1,23 +0,0 @@ -;--------------------------------------------------------------------------- -; FINDKEY/GDE Keyword File Instructions -; -; 1. Type in one or more keywords below, -; or -; Place cursor at end of this file and choose 'Include File' in the FILE -; menu to read in a file of keywords. -; -; 2. Choose 'Save Current File' in the File menu -; 3. Quit this window -; -; FINDKEY will then perform the keyword search. YOU DON'T NEED TO EDIT -; OUT THESE COMMENT LINES. -; -; NOTE: Put each keyword on a separate line -; SAMPLE KEYWORD FILE: -; -; maize -; corn -; Z.mays -; Zea -;--------------------------------------------------------------------------- - diff --git a/CORE/xylem/namefile.template b/CORE/xylem/namefile.template deleted file mode 100644 index cd63482..0000000 --- a/CORE/xylem/namefile.template +++ /dev/null @@ -1,25 +0,0 @@ -;--------------------------------------------------------------------------- -; FETCH/GDE Name/Accession File Instructions -; -; 1. Type in one or more LOCUS names or Accession #'s below, -; or -; Place cursor at end of this file and choose 'Include File' in the FILE -; menu to read in a file of names or accession #'s. -; or -; Copy names or accession #'s from another window and Paste into this window. -; -; 2. Choose 'Save Current File' in the File menu -; 3. Quit this window -; -; FETCH will then retrieve the data. YOU DON'T NEED TO EDIT -; OUT THESE COMMENT LINES. -; -; NOTE: Put each name on a separate line -; SAMPLE NAME/ACCESSION FILE: -; -; X30412 -; PSDRR1 -; PEADRRG -; -;--------------------------------------------------------------------------- - diff --git a/CORE/xylem/names.template b/CORE/xylem/names.template deleted file mode 100644 index e2e4f23..0000000 --- a/CORE/xylem/names.template +++ /dev/null @@ -1,25 +0,0 @@ -;--------------------------------------------------------------------------- -; FEATURES/GDE Name File Instructions -; -; 1. Type in one or more GenBank LOCUS names below, -; or -; Place cursor at end of this file and choose 'Include File' in the FILE -; menu to read in a file of names. -; -; (NOTE: File can not contain accession numbers.) -; -; 2. Choose 'Save Current File' in the File menu -; 3. Quit this window -; -; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT -; OUT THESE COMMENT LINES. -; -; NOTE: Put each name on a separate line -; SAMPLE NAME FILE: -; -; PEADRRA -; PSDRR1 -; PEADRRG -; -;--------------------------------------------------------------------------- - diff --git a/CORE/xylem/printdoc.doc b/CORE/xylem/printdoc.doc deleted file mode 100644 index 8ca092d..0000000 --- a/CORE/xylem/printdoc.doc +++ /dev/null @@ -1,56 +0,0 @@ - printdoc update 3 Feb 94 - - NAME - printdoc - prints documentation files - - SYNOPSIS - printdoc filename - - DESCRIPTION - printdoc uses the file extension to decide how to print a - documentation file. If necessary, a filter such as pr or nroff - is used to format the file before sending to the appropriate - printer. A list of file extensions recognized by printdoc is - given below. If no file extension is given, or the extension is - not in the list, printdoc assumes .doc. - - .doc - (default) Uses pr to print the text, using the default - settings provided by pr (56 text lines per page plus a 5 line - header and footer). Printing is at 12 cpi, front only. This works - reasonbly well for most unformatted documentation files, - provided that the line length doesn't exceed 80 char. This - option assumes that a half-inch left margin is automatically - provided by the printer. - - .tex - Assumes that document is already pre-formatted. Thus, - no headers or footers are provided, and it is assumed that - the top and bottom of pages are padded with blanks or header/ - footer lines as needed. Form-feed characters (^L) may be - included in the text to force page breaks. - - .ps - Assumes file is in PostScript format. Sends it to the - PostScript printer. - - .nroff - Assumes file is formatted for use by nroff, using the - standard macro set (nroff -ms). - - .nroff.me - Assumes file is formatted for use by nroff, using the - e macro set (nroff -me). - - TRANSPORTATION NOTES - For reasons which should be obvious, this script needs major - rewriting at each site, since the available printers will - be of different types and have different names. - - SEE ALSO - pr, pr(V), xlp, nroff - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - diff --git a/CORE/xylem/prot2nuc.doc b/CORE/xylem/prot2nuc.doc deleted file mode 100644 index 0212a58..0000000 --- a/CORE/xylem/prot2nuc.doc +++ /dev/null @@ -1,123 +0,0 @@ - prot2nuc update 10 Aug 94 - - NAME - prot2nuc - reverse translates protein into nucleic acid - - SYNOPSIS - prot2nuc [-ln -gn] < input > output - - DESCRIPTION - prot2nuc reads a file containing an amino acid sequence - and writes the corresponding reverse translated nucleic acid - sequence, using the standard IUPAC-IUB ambiguity codes to output. - The amino acid sequence may contain internal stop '*' characters. - That is, all legal amino acid characters will be processed. - - -ln print n amino acids/codons per line. (default = 25) - - -gn number the amino acid sequence every n amino acids/codons. - (defalut = 5) - - If l is not evenly divisible by g, the defaults are used. - - input - If the first line of the file begins with '>' or ';', - input will be read as the standard .wrp (Pearson) format, - such as that produced by getob: - - >name - sequence lines - - - Otherwise, it will be assumed that the file ONLY contains - sequence, and all legal IUPAC/IUB DNA characters will be - read as sequence. - - output - The output begins with a header, listing the both - 1 and 3 letter amino acid codes [J. Biol. Chem. 243, 3557-3559 - (1968)], as well as the nucleic acid ambiguity codes [Cornish- - Bowden (1985) Nucl. Acids Res. 13:3021-3030.]. The amino acid - sequence, along with its reverse translation, are then printed on - lines of l amino acids/codons, numbering every g amino acids/codons. - Non-ambiguous nucleotides appear capitalized, while ambiguous - nucleotides are in lowercase. A sample output file appears below: - - PROT2NUC Version 8/10/94 - - IUPAC-IUP AMINO ACID SYMBOLS - [J. Biol. Chem. 243, 3557-3559 (1968)] - - Phe F Leu L Ile I - Met M Val V Ser S - Pro P Thr T Ala A - Tyr Y His H Gln Q - Asn N Lys K Asp D - Glu E Cys C Trp W - Arg R Gly G STOP * - Asx B Glx Z UNKNOWN X - - - IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE - [Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.] - - Symbol Meaning | Symbol Meaning - ------------------------------------+--------------------------------- - G Guanine | k G or T - A Adenine | s G or C - C Cytosine | w A or T - T Thymine | h A or C or T - U Uracil | b G or T or C - r Purine (A or G) | v G or C or A - y Pyrimidine (C or T) | d G or T or A - m A or C | n G or A or T or C - - pI39 - 5 10 15 20 - M E K K S L A A L S F L L L L V L F V A - ATGGArAArAArTCnCTnGCnGCnCTnTCnTTyCTnCTnCTnCTnGTnCTnTTyGTnGCn - AGyTTr TTrAGy TTrTTrTTrTTr TTr - - 25 30 35 40 - Q E I V V T E A N T C E H L A D T Y R G - CArGArAThGTnGTnACnGArGCnAAyACnTGyGArCAyCTnGCnGAyACnTAyCGnGGn - TTr AGr - - 45 50 55 60 - V C F T N A S C D D H C K N K A H L I S - GTnTGyTTyACnAAyGCnTCnTGyGAyGAyCAyTGyAArAAyAArGCnCAyCTnAThTCn - AGy TTr AGy - - 65 70 - G T C H D W K C F C T Q N C - GGnACnTGyCAyGAyTGGAArTGyTTyTGyACnCArAAyTGy - - - With the Universal Genetic code, ambiguity symbols make it possible - to represent all possible codons for an amino acid using two output - lines. It is important to realize that the ambiguities on each line - can not be combined. For example, CTn and TTr represent all codons for - Leucine. However, attempting to combine them into a single triplet, - yTn, would be incorrect. For example, TTT and TTC are codons for - Phenylalanine, not Leucine. - - FUTURE PLANS - 1. It wouldn't be hard to have the output printed as nucleic acid - sequences in Perason format, so that the output could be read back - into GDE. I don't know why you would want to do this, but it could - be done. - 2. Right now, only the Universal Genetic Code is used, but it should - be possible to read in alternative genetic codes, have prot2nuc - figure out the ambiguity rules (as is already done in ribosome) and - print out the appropriate ambiguous codons. - 3. It might be useful to have each possible codon printed out, rather - than ambiguous codons. This would take up a lot more space and - wouldn't be as pretty. If there's a lot of demand I could do this. - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - diff --git a/CORE/xylem/reform.doc b/CORE/xylem/reform.doc deleted file mode 100644 index add7a38..0000000 --- a/CORE/xylem/reform.doc +++ /dev/null @@ -1,107 +0,0 @@ - reform update 3 Feb 94 - - NAME - reform - reformats multiply-aligned sequences for printing. - - SYNOPSIS - reform [-gpcnm] [-fx] [-sn] [-ln] [file {ralign only}] - or - ralign file parameters | reform [-gpcn] [-sn] [-ln] file - - DESCRIPTION - - g Gaps are to be represented by dashes (-). - p Bases which agree with the consensus are - represented by periods (.). - c Positions at which all sequences agree are - capitalized in the consensus. - n Sequence data is nucleic acid. Protein default - fx Specify input file format, where x is - r:RALIGN (default) p:PEARSON i:MBCRR-MASE (Intelligenetics) - m Input file contains multiline format sequences already aligned, - as opposed to ralign output. This option is obsolete, and is - equivalent to -fp. - ln The output linelength is set to n. - Default is 70. - sn numbering starts with n (default=0) - - file Sequence file as described in ralign docu- - mentation. reform needs to re-read the - sequence file read by ralign to get the - names of the sequences, which ralign ignores. - This filename is only included for ralign output. - If -m is set, file is ignored, and sequence names - must be read from the input. - - Note that positions in the consensus at which no nucleotide is in the - majority are represented by n's (for nucleic acids) or x's (for proteins), - rather than periods, as in ralign. - - Gaps in the input sequences may be represented by either blanks or dashes. - - INPUT FILE FORMATS - - (a) ralign (default, -fr) - As described in ralign documentation, the input file (which is assumed to - be ralign output) must have each sequence on a single long line. All - characters on a given line will be included in the alignment. All lines - must be exactly the same length. For example, if ralign had been read - sequence from a file called 'allcab.seq' and written output to 'allcab.ral', - the following command might be used: - - reform allcab.seq allcab.ref - - (b) Pearson (-fp, -m) - Compatible with sequence files used by Pearson's fasta programs as shown: - >name1 - sequence1 - >name2 - sequence2 - ... - >namen - sequencen - - Sequences may run over many lines and line length does not have to be - uniform. However, both dashes ('-') and blanks (' ') will be read in - as gaps in the alignment. A right arrow (>) at the beginning of a line - indicates the name line at the beginning of a new sequence. - - Any line beginning with a semicolon (';') will be considered a comment, - and will be ignored. - - (c) MBCRR-MASE (Intelligenetics) (-fi) - Compatible with .mase files produced by MBCRR's mase and pima programs, - which use the Intelligenetics format as shown: - - ;one or more comment lines - name1 - sequence1 - ;one or more comment lines - name2 - sequence2 - ... - ;one or more comment lines - namen - sequencen - - Sequences may run over many lines and line length does not have to be - uniform. However, both dashes ('-') and blanks (' ') will be read in - as gaps in the alignment. Each sequence MUST begin with at least one - comment line. When a comment line is encountered, that signals the - beginning of a new sequence. The first line after the comment is read - as the name, and the sequence begins on the next line after that. - - SEE ALSO ralign, mase - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/ribosome.doc b/CORE/xylem/ribosome.doc deleted file mode 100644 index df13855..0000000 --- a/CORE/xylem/ribosome.doc +++ /dev/null @@ -1,84 +0,0 @@ - ribosome update 3 Feb 94 - - NAME - ribosome - translates nucleic acid into protein - - SYNOPSIS - ribosome [-g gcfile] < input > output - - DESCRIPTION - ribosome reads a file of one or more nucleic acid sequences - and writes the corresponding amino acid sequence, in the standard - one letter code, to output. Ribosome begins translating at the - first nucleotide in each input sequence and continues to the end. - If the length of the translated sequence is not divisible by 3, - ribosome pads the final codon with N's and attempts to use ambi- - guity rules to translate the final codon. Based on the genetic - code used, ribosome derives a set of rules to resolve all ambi- - guities that can possibly be resolved. - - -g read in an alternative genetic code from gcfile. If this - option is not specified, ribosome uses the universal - genetic code. - - gcfile - This file specifies an alternative genetic code. An - example is shown below. ribosome reads the first 64 legal - capital letters as amino acids. Consequently, lowercase letters - can be used for annotation purposes, as shown in the example. - All non-amino acid characters are ignored. - - sgc2 - yeast mitochondrial genetic code - - second position - first position ------------------------------- third position - (5' end) u c a g (3' end) - ----------------------------------------------------------------- - u F S Y C u - F S Y C c - L S * W a - L S * W g - ----------------------------------------------------------------- - c T P H R u - T P H R c - T P Q R a - T P Q R g - ----------------------------------------------------------------- - a I T N S u - I T N S c - M T K R a - M T K R g - ----------------------------------------------------------------- - g V A D G u - V A D G c - V A E G a - V A E G g - - - input - If the first line of the file begins with '>' or ';', - input will be read as the standard .wrp (Pearson) format, - such as that produced by getob: - - >name - ; one or more comment lines (optional) - sequence lines - - - Otherwise, it will be assumed that the file ONLY contains - sequence, and all legal IUPAC/IUB DNA characters will be - read as sequence. - - SEE ALSO - getob - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/shuffle.doc b/CORE/xylem/shuffle.doc deleted file mode 100644 index 77c69e8..0000000 --- a/CORE/xylem/shuffle.doc +++ /dev/null @@ -1,66 +0,0 @@ - shuffle.doc update 3 Feb 94 - - SYNOPSIS - shuffle -sn [-wn -on] - - DESCRIPTION - Shuffles sequences locally. See Lipman DJ, Wilbur WJ, Smith TF - and Waterman MS (1984) On the statistical significance of nucleic - acid similarities. Nucl. Acids Res. 12:215-226. - -sn n is a random integer between 0 and 32767. This number - must be provided for each run. - - -wn n is an integer, indicating the width of the window for - random localization. If w exceeds the length of a sequence, - or is negative, the entire sequence is scrambled as a single - window. This is also the case if w is not specified. - - -on n is an integer, indicating the number of nucleotides - overlap between adjacent windows. It should never exceed - the window size. o defaults to 0 if not specified. - - If w and o are specified, overlapping windows of w nucleotides - are shuffled, thus preserving the local characteristic base - composition. Windows overlap by o nucleotides. - If w and o are not specified, each sequence is shuffled globally, - thus preserving the overall base composition, but not the local - variations in comp. - - Any number of sequences may be processed from a single input - file. In Pearson-format files, each new sequence begins with a - '>' comment line, indicating the name and a short description of - the sequence. - - No distinction is made between protein or nucleic acid sequences. - That is, shuffle will read any of the following characters as - sequence: - - T,U,C,A,G,N,R,Y,M,W,S,K,D,H,V,B,L,Z,F,P,E,I,Q,X,*,- - - where '*' is the result of translating a stop codon, and '-' - is a gap generated during sequence alignment. Lowercase is - also accepted. - - EXAMPLE - A sample output file is shown below. Note that the first two - lines of output are comment lines, listing the version of the - program and the parameters used in the run. - - >SHUFFLE VERSION 11/ 8/93 - >RANDOM SEED: 9873 WINDOW: 12 OVERLAP: 3 - >BAZFAZ - Borborigmus azerbi F-actin-zeta gene - ctgagtagctagtcctaaatagttagtccatagtactagtacgggtcgtt - cacccttgggcagtg.....(etc.) - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/splitdb.doc b/CORE/xylem/splitdb.doc deleted file mode 100644 index 49e97c2..0000000 --- a/CORE/xylem/splitdb.doc +++ /dev/null @@ -1,141 +0,0 @@ - - SPLITDB update 28 Mar 98 - - - NAME - splitdb - split GenBank files into annotation, sequence, and index - - SYNOPSIS - splitdb [-gepvlct] dbfile anofile seqfile indfile - - DESCRIPTION - Splitdb splits a database (dbfile) among three files: anofile, seqfile - and indfile. Splitdb ignores any header information that might be in the - file and begins processing at the first entry. - - anofile contains the annotation portion of each entry. Entries are - terminated with '//' or '///' (PIR only). Trailing blanks present in - dbfile are omitted in anofile. - - seqfile contains the sequence data for each entry. Each sequence - entry begins with a header line, followed by sequence data on - succeeding lines of 75 characters per line. The header line - includes the header flag character '>' in column 1, followed by the - name, followed by the first 50 characters of the 1st - DEFINITION line. An example is shown below: - - >UNHOR1 - Unicorn horn protein 1, complete cDNA sequence - attcctctatagtctattctagctagccaaataggttagatggctgtcttactacttacgc - ... - - Removal of blanks and numbers from sequence lines makes makes split - datasets about 8-9% smaller than the original GenBank files. - - indfile is an index which tells the line numbers for each entry in - anofile and seqfile. It is assumed to be in alphabetical order by - name. Each line contains a name and accession number, followed by the - line numbers on which the annotation and sequence data begin in anofile - and seqfile, respectively. Thus the file plants.ind might contain: - - - A15660 TA156608 1 1 - A15671 A15671 33 11 - A15673 A15673 65 25 - A15675 AK156751 97 36 - A15677 BA156770 128 46 - A16780 BA167807 160 57 - A16782 A16782 192 70 - ATHRPRP1C GM905105 225 83 - etc... - - Note that indfile is a perfectly legitimate .nam file, for use with - programs such as getloc, getob, or comm. - - - The following options identify the type of database being read: - - -g GenBank (default) - -e EMBL - -p PIR (NBRF) - -v Vecbase - -l LiMB - - Other options: - -c Compress 3 or more leading blanks in annotation lines - to take the form , where CRUNCHFLAG - is the ASCII character specified by the Pascal const - CRUNCHOFFSET, which is set to 33 ("!") in the current - implementation. For each annotation line read, if the - number of leading blanks is >=3, splitdb sets CRUNCHCHAR - to CRUNCHOFFSET+the number of blanks. Thus, for lines - with 3, 4, or 5 leading blanks, CRUNCHCHAR would be - '$', '%' and '&', respectively. GETLOC and GETOB - automatically expand crunched blanks when CRUNCHFLAG - is encountered on an input line. Empiracle observations - indicate that the -c option decreases the size of - GenBank files by about 10%. - - This compression method may fail when the number of - leading blanks exceeds 127-CRUNCHOFFSET. However, - none of the above mentioned databases currently - supports any datafield with anywhere near that number - of leading blanks. - - -t (GenBank only) Append all information in the first - ORGANISM to the end of each line in indfile. For example, - the entry which begins: - - LOCUS GORMTDLOOZ 282 bp DNA UNA 11-MAR-1996 - DEFINITION GGGOMT493; Gorilla gorilla gorilla (BomBom, ISIS 438, Audubon - Zoological Gardens) mitochondrial D-loop DNA. - ACCESSION L76759 - NID g1222584 - KEYWORDS D-loop. - SOURCE Mitochondrion Gorilla gorilla gorilla (individual_isolate BomBom, - ISIS 438, Audubon Zoological Gardens, sub_species gorilla) male - DNA. - ORGANISM Mitochondrion Gorilla gorilla gorilla - Eukaryotae; mitochondrial eukaryotes; Metazoa; Chordata; - Vertebrata; Eutheria; Primates; Catarrhini; Hominidae; Gorilla. - - might be indexed as - - GORMTDLOOZ L76759 1 1 Mitochondrion Gorilla gorilla gorilla - - This is useful for taxonomic studies, or as a way of making - it easy to create subsets from a single index. Thus, - 'grep gorilla primates.ind' would print all lines in the - file that contained the word gorilla. The output from - this command could be used as a .nam file for extracting - just gorilla sequences from a larger dataset using - fetch. - - - NOTES - 1. Header lines that aren't part of entries are automatically - stripped out during processing. For example, in a file containing - GenBank entries, all lines up to the first occurrence of 'LOCUS' - starting in column 1, are ignored. Similarly for PIR, processing - begins on the first line containing 'ENTRY' beginning in column 1. - 2. GenBank/EMBL/DDBJ entries created on or after Feb. 1, 1996, - have accession numbers of 8 characters, rather than 6. Previously - assigned accession numbers will remain at 6 characters. Splitdb has - been updated to write all accession numbers to the .ind file, left - justified in a field of 8 characters, in columns 14-21 of the .ind - file. - - SEE ALSO - getloc, getob, comm(1) (Unix command). - - AUTHOR - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB Canada R3T 2N2 - Phone: 204-474-6085 - FAX: 204-261-5732 - frist@cc.umanitoba.ca - - REFERENCE - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. diff --git a/CORE/xylem/xylem.doc b/CORE/xylem/xylem.doc deleted file mode 100644 index e8bf2cd..0000000 --- a/CORE/xylem/xylem.doc +++ /dev/null @@ -1,125 +0,0 @@ - - - XYLEM.DOC update 10 Aug 1994 - - XYLEM: TOOLS FOR MANIPULATION OF GENETIC DATABASES - Brian Fristensky, University of Manitoba - - Fristensky, B. (1993) Feature expressions: creating and manipulating - sequence datasets. Nucleic Acids Research 21:5997-6003. - - SPLITDB - Splits files containing one or more GenBank entries into - annotation, sequence, and index files. Indexfiles can also serve as - namefiles for GETLOC. Sequence files are in the format required for - use with the Pearson programs (FASTA,LFASTA etc.). - - GETLOC - Reads a file containing LOCUS names (namefile) and - retrieves either annotation, sequence, or both from a split - database or database subset created by SPLITDB. - - FETCH - A c-shell script that provides a convenient menu-driven - front end for retrieval of database entries using GETLOC. - - FINDKEY - A c-shell script that provides a convenient menu-driven - front end for keyword searches of database annotation files, - using IDENTIFY. - - IDENTIFY- Given line-numbered output from grep, IDENTIFY uses the - index file to determine which entries contained the keywords - searched for by grep. It then produces a namefile for use by - GETLOC. Namefiles can serve as logical databases, and utilities - such as the Unix comm command can perform logical operations on - these namefiles to produce database subsets. - - FEATURES/GETOB - Given a namefile, pulls objects (mRNA, tRNA, CDS - etc.) from each of the named entries, using the new - DDBJ/EMBL/GenBank International Features Table Format. A future - version will also allow the annotation of sites within objects that - are extracted. - - DBSTAT - Calculates amino acid frequencies in a protein database. - - RIBOSOME - Given a file of one or more nucleic acids (eg. output - from GETOB) , RIBOSOME translates them into protein, using either - the universal genetic code or an alternative genetic code supplied - by the user. All ambiguities that can be resolved are translated. - - PROT2NUC - reverse translates a sequence from protein to nucleic - acid, using IUPAC-IUB ambiguity codes. - - SHUFFLE - Given a random seed, shuffles each sequence in a Pearson- - format (.wrp) file. Shuffling is done locally in overlapping windows - across the length of a given sequence. The window size and overlap - length can be specified by the user. - - REFORM - Reformats multiply aligned nucleic acid or protein - sequences for publication. Output for M. Waterman's RALIGN - program, or the MBCRR MASE editor, can be directly used as input. - A variety of options are available for representing gaps, consensus - sequences and other features. - - Fristensky (Cornell) Sequence Analysis Package - General purpose - sequence analysis package written in Standard Pascal. Features - include: sequence numbering, formatting, & translation, restriction - site searches & mapping, matrix similarity searches, TESTCODE - analysis, base composition analysis. All programs are interactive - and read free-format, BIONET, and GenBank files. - - - - - - - - XYLEM DATABASE TOOLS - - - - ---------- - | .gen | getloc - |----------|<-------------------------- - | GenBank | | - ---------- | - | | - | splitgb | - /|\ | - / | \ | - / | \ | - / | \ | - / | \ | - / | \ | - v v v | - ---------- ---------- ---------- | - | .ano | | .wrp | | .ind | | - |----------| |----------| |----------| | - |annotation| | sequence | | index | | - ---------- ---------- ---------- | - | \ | / | - | \ | / | - | \ | / | - | \ | / | - grep -n | \ | / | - | \ | / | - | | | - | | -------------------------------+ - | ^ | - v | getob | - ---------- ---------- v - | .grep | identify | .nam | ---------- - |----------| --------->|----------| | .wrp | - | numbered | | LOCUS | ---------- - |file lines| ---------- | eg. mRNA | - ---------- | ^ | tRNA | - | | | rRNA | - | | | CDS | - --comm-- ---------- - (logical operations on - sets of names) - - Dr. Brian Fristensky - Dept. of Plant Science - University of Manitoba - Winnipeg, MB R3T 2N2 CANADA - 204-474-6085 - frist@cc.umanitoba.ca - diff --git a/HGL_SRC/Alloc.o b/HGL_SRC/Alloc.o deleted file mode 100755 index 0269c43..0000000 Binary files a/HGL_SRC/Alloc.o and /dev/null differ diff --git a/HGL_SRC/Consto01mask b/HGL_SRC/Consto01mask deleted file mode 100755 index 16b4c92..0000000 Binary files a/HGL_SRC/Consto01mask and /dev/null differ diff --git a/HGL_SRC/DotPlotTool b/HGL_SRC/DotPlotTool deleted file mode 100755 index b0dfb05..0000000 Binary files a/HGL_SRC/DotPlotTool and /dev/null differ diff --git a/HGL_SRC/HGLfuncs.o b/HGL_SRC/HGLfuncs.o deleted file mode 100755 index 05f85b1..0000000 Binary files a/HGL_SRC/HGLfuncs.o and /dev/null differ diff --git a/HGL_SRC/MAP_ChooseFile.o b/HGL_SRC/MAP_ChooseFile.o deleted file mode 100755 index 2bbac68..0000000 Binary files a/HGL_SRC/MAP_ChooseFile.o and /dev/null differ diff --git a/HGL_SRC/MakeCons b/HGL_SRC/MakeCons deleted file mode 100755 index a1777f5..0000000 Binary files a/HGL_SRC/MakeCons and /dev/null differ diff --git a/HGL_SRC/Makefile b/HGL_SRC/Makefile index 95a04e1..9632d10 100755 --- a/HGL_SRC/Makefile +++ b/HGL_SRC/Makefile @@ -1,10 +1,10 @@ CC = cc -#FLAGS = -g -OPENWINHOME = /usr/openwin +FLAGS = -m32 +OPENWINHOME = ../usr MFILE = -INCDIR = -I$(OPENWINHOME)/include -LIBDIR = -L$(OPENWINHOME)/lib +INCDIR = -I/usr/include/xview +LIBDIR = -L/usr/lib32 LIBS = -lxview -lolgx -lX11 libs.o = Alloc.o HGLfuncs.o diff --git a/HGL_SRC/PrintStrat b/HGL_SRC/PrintStrat deleted file mode 100755 index 0087fa7..0000000 Binary files a/HGL_SRC/PrintStrat and /dev/null differ diff --git a/HGL_SRC/Translate b/HGL_SRC/Translate deleted file mode 100755 index 91e68fc..0000000 Binary files a/HGL_SRC/Translate and /dev/null differ diff --git a/HGL_SRC/heapsortHGL b/HGL_SRC/heapsortHGL deleted file mode 100755 index b692971..0000000 Binary files a/HGL_SRC/heapsortHGL and /dev/null differ diff --git a/HGL_SRC/install.csh b/HGL_SRC/install.csh deleted file mode 100755 index eee9c3d..0000000 --- a/HGL_SRC/install.csh +++ /dev/null @@ -1,5 +0,0 @@ -#/bin/csh -make all -cp Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool ../bin -rm Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool -rm *.o diff --git a/HGL_SRC/mapview b/HGL_SRC/mapview deleted file mode 100755 index 52453f6..0000000 Binary files a/HGL_SRC/mapview and /dev/null differ diff --git a/ZUKER/rfd.inc b/ZUKER/rfd.inc index bab54f7..691f11f 100755 --- a/ZUKER/rfd.inc +++ b/ZUKER/rfd.inc @@ -1,33 +1,33 @@ implicit integer (a-z) - parameter (maxn=1500,maxn2=3000) - parameter (fldmax=maxn2) + +c parameter (maxn=625,fldmax=2*maxn) + parameter (maxn=1500,maxn2=3000) + parameter (fldmax=maxn2) parameter (infinity=16000,sortmax=30000) parameter (mxbits=(maxn*(maxn+1)+31)/32) parameter (maxtloops=40) parameter (maxsiz=10000) - integer*2 vst(maxn*maxn),wst1(maxn*maxn),wst2(maxn*maxn) + integer*2 vst(maxn*maxn),wst(maxn*maxn) integer poppen(4),maxpen real prelog - - dimension newnum(maxsiz),hstnum(maxn2),force(maxn2),numseq(maxn2), - . work1(maxn2,0:2),work2(maxn2), + dimension newnum(maxsiz),hstnum(fldmax),force(fldmax), + . numseq(fldmax), work(fldmax,0:2), . stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30) dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2) -c common /main/ newnum,hstnum,force,work1,work2, - common /main/ newnum,hstnum,force,work1,work2, - . stack,tstk,dangle,hairpin,bulge,inter,eparam,cntrl,nsave,n, - . numseq,poppen,prelog,maxpen,vst,wst1,wst2 + common /main/ vst,wst,newnum,hstnum,force,numseq,work,stack,tstk, + . dangle,hairpin,bulge,inter,eparam,cntrl,nsave,poppen,maxpen,prelog character*1 seq(maxsiz) c character*5 inbuf character*10 progtitle character*30 seqlab common /seq/ seq,seqlab + data progtitle/'crna'/ + dimension list(100,4) common /list/ list,listsz - common /nm/ vmin - data progtitle/'lrna'/ + common /nm/ n,vmin dimension basepr(maxn) common /traceback/ basepr @@ -40,21 +40,3 @@ c character*5 inbuf integer*2 tloop(maxtloops,2),numoftloops common/tloops/tloop,numoftloops - - - - - - - - - - - - - - - - - -