clean: repo

This commit is contained in:
kuoi 2023-04-12 00:08:58 +08:00
parent a80e729e21
commit cd056bb91b
63 changed files with 17 additions and 6147 deletions

View File

@ -1,761 +0,0 @@
1menu:File
item:test cmask output
itemmethod: kedit in1
in:in1
informat:colormask
item:New sequence <meta N>
itemmethod:echo "$Type$Name" > out1
itemmeta:n
itemhelp:new_sequence.help
arg:Name
argtype:text
arglabel:New Sequence name?
argtext:New
arg:Type
argtype:choice_list
arglabel:Type?
argchoice:DNA/RNA:#
argchoice:Amino Acid:%
argchoice:Text:\"
argchoice:Mask:@
out:out1
outformat:flat
item:Import Foreign Format
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
itemhelp:readseq.help
arg:INPUTFILE
argtype:text
arglabel:Name of foreign file?
out:OUTPUTFILE
outformat:genbank
item:Export Foreign Format
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
itemhelp:readseq.help
arg:FORMAT
argtype:choice_list
argchoice:FASTA:8
argchoice:NEXUS:17
argchoice:Phylip v3.3:12
argchoice:IG/Stanford:1
argchoice:GenBank:2
argchoice:NBRF:3
argchoice:EMBL:4
argchoice:GCG:5
argchoice:DNA Strider:6
argchoice:Fitch:7
argchoice:Pearson:8
argchoice:Zuker:9
argchoice:Olsen:10
argchoice:Phylip v3.2:11
argchoice:Phylip v3.3:12
argchoice:Plain text:13
arg:OUTPUTFILE
argtype:text
arglabel:Save as?
in:INPUTFILE
informat:genbank
item:Save Selection
itemmethod: cat $SAVE_FUNC > $Name
itemhelp:save_selection.help
arg:SAVE_FUNC
argtype:chooser
arglabel:File format
argchoice:Flat:in1
argchoice:Genbank:in2
argchoice:GDE/HGL:in3
arg:Name
argtype:text
arglabel:File name?
in:in1
informat:flat
in:in2
informat:genbank
in:in3
informat:gde
item:Print Selection
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
itemhelp:print_alignment.help
arg:SCALE
argtype:slider
arglabel:Reduce printout by?
argmin:1
argmax:20
argvalue:1
arg:CMD
argtype:chooser
argchoice:Lpr:lpr
argchoice:Enscript Gaudy:enscript -G -q
argchoice:Enscript Two column:enscript -2rG
arg:PRINTER
argtype:text
arglabel:Which printer?
argtext:lp
in:in1
informat:gde
insave:
menu:Edit
item:Sort
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
itemhelp:heapsortHGL.help
arg:PRIM_KEY
argtype:choice_list
argchoice:Group:group-ID
argchoice:type:type
argchoice:name:name
argchoice:Sequence ID:sequence-ID
argchoice:creator:creator
argchoice:offset:offset
arglabel:Primary sort field?
arg:SEC_KEY
argtype:choice_list
argchoice:None:
argchoice:Group:group-ID
argchoice:type:type
argchoice:name:name
argchoice:Sequence ID:sequence-ID
argchoice:creator:creator
argchoice:offset:offset
arglabel:Secondary sort field?
in:in1
informat:gde
insave:
item:extract
itemmethod:(gde in1;/bin/rm -f in1)&
in:in1
informat:gde
inmask:
insave:
menu:DNA/RNA
item:Translate...
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
arg:FRAME
argtype:chooser
arglabel:Which reading frame?
argchoice:First:1
argchoice:Second:2
argchoice:Third:3
argchoice:All six:6
arg:MNFRM
arglabel:Minimum length of AA sequence to translate?
argtype:slider
argmin:0
argmax:100
argvalue:20
arg:LTRCODE
argtype:chooser
arglabel:Translate to:
argchoice:Single letter codes:
argchoice:Triple letter codes:-3
arg:TBL
arglabel:Codon table?
argtype:chooser
argchoice:universal:1
argchoice:mycoplasma:2
argchoice:yeast:3
argchoice:Vert. mito.:4
in:in1
informat:gde
out:out1
outformat:gde
item:Dot plot
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
itemhelp:DotPlotTool.help
in:in1
informat:gde
insave:
item:Clustal alignment
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
itemhelp:clustal_help
arg:KTUP
argtype:slider
arglabel:K-tuple size for pairwise search
argmin:1
argmax:10
argvalue:2
arg:WIN
argtype:slider
arglabel:Window size
argmin:1
argmax:10
argvalue:4
arg:Trans
argtype:chooser
arglabel:Transitions weighted?
argchoice:Yes:/TRANSIT
argchoice:No:
arg:FIXED
argtype:slider
arglabel:Fixed gap penalty
argmin:1
argmax:100
argvalue:10
arg:FLOAT
arglabel:Floating gap penalty
argtype:slider
argmin:1
argmax:100
argvalue:10
arg:REPORT
argtype:chooser
arglabel:View assembly report?
argchoice:No:
argchoice:Yes:kedit in1.rpt&
in:in1
informat:flat
insave:
item:Variable Positions
itemmethod:varpos $REV < in1 > out1
arg:REV
argtype:chooser
arglabel:Highlight (darken)
argchoice:Conserved positions:
argchoice:variable positions:-rev
in:in1
informat:flat
out:out1
outformat:colormask
item:Phrap
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
in:in1
informat:genbank
out:out1
outformat:genbank
item:SNAP
itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
in:in1
informat:flat
out:out1
outformat:text
item:Find all <meta-f>
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
itemhelp:findall.help
itemmeta:f
arg:SEARCH
argtype:text
arglabel:Search String
arg:PRCNT
argtype:slider
arglabel:Percent mismatch
argmin:0
argmax:75
argvalue:10
arg:CASE
argtype:chooser
arglabel:Case
argchoice:Upper equals lower:
argchoice:Upper not equal lower:-case
arg:UT
argtype:chooser
arglabel:U equal T?
argchoice:Yes:-u=t
argchoice:No:
argvalue:0
arg:MAT
arglabel:Match color
argtype:choice_list
argchoice:yellow:1
argchoice:violet:2
argchoice:red:3
argchoice:aqua:4
argchoice:green:5
argchoice:blue:6
argchoice:grey:11
argchoice:black:8
argvalue:2
arg:MIS
argtype:choice_list
arglabel:Mismatch color
argchoice:yellow:1
argchoice:violet:2
argchoice:red:3
argchoice:aqua:4
argchoice:green:5
argchoice:blue:6
argchoice:grey:11
argchoice:black:8
argvalue:7
in:in1
informat:flat
out:out1
outformat:colormask
item:Sequence Consensus
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
itemhelp:MakeCons.help
arg:METHOD
arglabel:Method
argtype:chooser
argchoice:IUPAC:-iupac
argchoice:Majority:-majority $PERCENT
arg:MASK
argtype:chooser
arglabel:Create a new:
argchoice:Sequence:
argchoice:Selection Mask: | Consto01mask
arg:PERCENT
arglabel:Minimum Percentage for Majority
argtype:slider
argmin:50
argmax:100
argvalue:75
in:in1
informat:gde
out:out1
outformat:gde
#Menu for DNA/RNA
item:blastn
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
in:in1
informat:flat
insave:
arg:BLASTDBDNA
argtype:choice_list
arglabel:Which Database
argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:4
argmax:18
argvalue:12
arg:MATCH
argtype:slider
arglabel:Match Score
argmin:1
argmax:10
argvalue:5
arg:MMSCORE
argtype:slider
arglabel:Mismatch Score
argmin:-10
argmax:-1
argvalue:-5
item:blastx
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
in:in1
informat:flat
insave:
arg:BLASTDB
argtype:choice_list
arglabel:Which Database
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:1
argmax:5
argvalue:3
arg:Matrix
arglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:CODE
argtype:choice_list
arglabel:Genetic Code
argchoice:Standard or Universal:0
argchoice:Vertebrate Mitochondrial:1
argchoice:Yeast Mitochondrial:2
argchoice:Mold Mitochondrial and Mycoplasma:3
argchoice:Invertebrate Mitochondrial:4
argchoice:Ciliate Macronuclear:5
argchoice:Protozoan Mitochondrial:6
argchoice:Plant Mitochondrial:7
argchoice:Echinodermate Mitochondrial:8
item:------------------------
item:Add a new DNA blast db
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
arg:sourcefile
argtype:text
arglabel: enter the file name
arg:menuname
argtype:text
arglabel: enter the name of the DB
menu:seq. datasets
item:-------------
item:add a new dataset
itemmethod:cp $file /usr/local/biotools/GDE/db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
arg:name
argtype:text
arglabel:Enter the dataset name ?
arg:file
argtype:text
arglabel:Enter the dataset file (in FASTA) ?
#Menu for Protein
menu:protein
item:blastp
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
in:in1
informat:flat
insave:
arg:BLASTDB
argtype:choice_list
arglabel:Which Database
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
arg:Matrix
barglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:1
argmax:5
argvalue:3
item:tblastn
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
in:in1
informat:flat
insave:
arg:BLASTDB
argtype:choice_list
arglabel:Which Database
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
arg:Matrix
arglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:4
argmax:18
argvalue:12
arg:CODE
argtype:choice_list
arglabel:Genetic Code
argchoice:Standard or Universal:0
argchoice:Vertebrate Mitochondrial:1
argchoice:Yeast Mitochondrial:2
argchoice:Mold Mitochondrial and Mycoplasma:3
argchoice:Invertebrate Mitochondrial:4
argchoice:Ciliate Macronuclear:5
argchoice:Protozoan Mitochondrial:6
argchoice:Plant Mitochondrial:7
argchoice:Echinodermate Mitochondrial:8
item:Map View
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
itemhelp:mapview.help
in:in1
informat:gde
insave:
arg:PBL
arglabel:Pixel Between Lines
argtype:slider
argvalue:10
argmin:1
argmax:15
arg:NPP
arglabel:Nucleotides Per Pixel
argtype:slider
argvalue:1
argmin:1
argmax:20
arg:LWIDTH
arglabel:Line Thickness
argtype:slider
argvalue:2
argmin:1
argmax:5
item:--------------------------
item:Add a new DNA blast db
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
arg:sourcefile
argtype:text
arglabel: Enter the file (in FASTA)
arg:menuname
argtype:text
arglabel: Enter the name of the DB
menu:Phylogeny
item:Phylip help
itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
arg:FILE
argtype:choice_list
arglabel:Which program?
argchoice:clique:clique.html
argchoice:consense:consense.html
argchoice:contchar:contchar.html
argchoice:contml:contml.html
argchoice:contrast:contrast.html
argchoice:discrete:discrete.html
argchoice:distance:distance.html
argchoice:dnaboot:dnaboot.html
argchoice:dnacomp:dnacomp.html
argchoice:dnadist:dnadist.html
argchoice:dnainvar:dnainvar.html
argchoice:dnaml:dnaml.html
argchoice:dnamlk:dnamlk.html
argchoice:dnamove:dnamove.html
argchoice:dnapars:dnapars.html
argchoice:dnapenny:dnapenny.html
argchoice:dollop:dollop.html
argchoice:dolmove:dolmove.html
argchoice:dolpenny:dolpenny.html
argchoice:draw:draw.html
argchoice:drawgram:drawgram.html
argchoice:drawtree:drawtree.html
argchoice:factor:factor.html
argchoice:fitch:fitch.html
argchoice:gendist:gendist.html
argchoice:kitsch:kitsch.html
argchoice:main:main.html
argchoice:mix:mix.html
argchoice:move:move.html
argchoice:neighbor:neighbor.html
argchoice:penny:penny.html
argchoice:protpars:protpars.html
argchoice:read.me.general:read.me.general.html
argchoice:restml:restml.html
argchoice:seqboot:seqboot.html
argchoice:sequence:sequence.html
item:Phylip 3.5
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
arg:PROGRAM
argtype:choice_list
arglabel:Which program to run?
argchoice:DNAPARS:dnapars
argchoice:DNABOOT:dnaboot
argchoice:DNAPENNY:dnapenny
argchoice:DNAML:dnaml
argchoice:DNAMLK:dnamlk
argchoice:DNACOMP:dnacomp
argchoice:DNAMOVE:dnamove
argchoice:DNAINVAR:dnainvar
argchoice:PROTPARS:protpars
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
item:Phylip DNA Distance methods
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
arg:EXPLAIN
argtype:text
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
arg:PROGRAM
arglabel:Which method?
argtype:chooser
argchoice:DNADIST+NEIGHBOR:
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
arg:PROG
arglabel:Run ?
argtype:chooser
argchoice:Run without Bootstrap:
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
arg:DNA
argtype:text
arglabel:Name of DNADIST outfile?
arg:NEI
argtype:text
arglabel:Name of NEIGHBOR outfile?
arg:TREE
argtype:text
arglabel:Name of TREEFILE ?
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
item:Phylip PROTEIN Distance methods
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
arg:PROGRAM
arglabel:Which method?
argtype:chooser
argchoice:PROTDIST+NEIGHBOR:
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
arg:PROG
arglabel:Which method?
argtype:chooser
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
argchoice:No Bootstrap:
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
menu:On-Line Res.
item:GDE for Linux resources at Bioafrica.net
itemmethod:netscape http://www.bioafrica.net &
item:-------------------------
item:add a new website
itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
arg:name
argtype:text
arglabel:Enter the site name
arg:url
argtype:text
arglabel:Enter the URL (including http://)

View File

@ -1,761 +0,0 @@
1menu:File
item:test cmask output
itemmethod: kedit in1
in:in1
informat:colormask
item:New sequence <meta N>
itemmethod:echo "$Type$Name" > out1
itemmeta:n
itemhelp:new_sequence.help
arg:Name
argtype:text
arglabel:New Sequence name?
argtext:New
arg:Type
argtype:choice_list
arglabel:Type?
argchoice:DNA/RNA:#
argchoice:Amino Acid:%
argchoice:Text:\"
argchoice:Mask:@
out:out1
outformat:flat
item:Import Foreign Format
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
itemhelp:readseq.help
arg:INPUTFILE
argtype:text
arglabel:Name of foreign file?
out:OUTPUTFILE
outformat:genbank
item:Export Foreign Format
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
itemhelp:readseq.help
arg:FORMAT
argtype:choice_list
argchoice:FASTA:8
argchoice:NEXUS:17
argchoice:Phylip v3.3:12
argchoice:IG/Stanford:1
argchoice:GenBank:2
argchoice:NBRF:3
argchoice:EMBL:4
argchoice:GCG:5
argchoice:DNA Strider:6
argchoice:Fitch:7
argchoice:Pearson:8
argchoice:Zuker:9
argchoice:Olsen:10
argchoice:Phylip v3.2:11
argchoice:Phylip v3.3:12
argchoice:Plain text:13
arg:OUTPUTFILE
argtype:text
arglabel:Save as?
in:INPUTFILE
informat:genbank
item:Save Selection
itemmethod: cat $SAVE_FUNC > $Name
itemhelp:save_selection.help
arg:SAVE_FUNC
argtype:chooser
arglabel:File format
argchoice:Flat:in1
argchoice:Genbank:in2
argchoice:GDE/HGL:in3
arg:Name
argtype:text
arglabel:File name?
in:in1
informat:flat
in:in2
informat:genbank
in:in3
informat:gde
item:Print Selection
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
itemhelp:print_alignment.help
arg:SCALE
argtype:slider
arglabel:Reduce printout by?
argmin:1
argmax:20
argvalue:1
arg:CMD
argtype:chooser
argchoice:Lpr:lpr
argchoice:Enscript Gaudy:enscript -G -q
argchoice:Enscript Two column:enscript -2rG
arg:PRINTER
argtype:text
arglabel:Which printer?
argtext:lp
in:in1
informat:gde
insave:
menu:Edit
item:Sort
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
itemhelp:heapsortHGL.help
arg:PRIM_KEY
argtype:choice_list
argchoice:Group:group-ID
argchoice:type:type
argchoice:name:name
argchoice:Sequence ID:sequence-ID
argchoice:creator:creator
argchoice:offset:offset
arglabel:Primary sort field?
arg:SEC_KEY
argtype:choice_list
argchoice:None:
argchoice:Group:group-ID
argchoice:type:type
argchoice:name:name
argchoice:Sequence ID:sequence-ID
argchoice:creator:creator
argchoice:offset:offset
arglabel:Secondary sort field?
in:in1
informat:gde
insave:
item:extract
itemmethod:(gde in1;/bin/rm -f in1)&
in:in1
informat:gde
inmask:
insave:
menu:DNA/RNA
item:Translate...
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
arg:FRAME
argtype:chooser
arglabel:Which reading frame?
argchoice:First:1
argchoice:Second:2
argchoice:Third:3
argchoice:All six:6
arg:MNFRM
arglabel:Minimum length of AA sequence to translate?
argtype:slider
argmin:0
argmax:100
argvalue:20
arg:LTRCODE
argtype:chooser
arglabel:Translate to:
argchoice:Single letter codes:
argchoice:Triple letter codes:-3
arg:TBL
arglabel:Codon table?
argtype:chooser
argchoice:universal:1
argchoice:mycoplasma:2
argchoice:yeast:3
argchoice:Vert. mito.:4
in:in1
informat:gde
out:out1
outformat:gde
item:Dot plot
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
itemhelp:DotPlotTool.help
in:in1
informat:gde
insave:
item:Clustal alignment
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
itemhelp:clustal_help
arg:KTUP
argtype:slider
arglabel:K-tuple size for pairwise search
argmin:1
argmax:10
argvalue:2
arg:WIN
argtype:slider
arglabel:Window size
argmin:1
argmax:10
argvalue:4
arg:Trans
argtype:chooser
arglabel:Transitions weighted?
argchoice:Yes:/TRANSIT
argchoice:No:
arg:FIXED
argtype:slider
arglabel:Fixed gap penalty
argmin:1
argmax:100
argvalue:10
arg:FLOAT
arglabel:Floating gap penalty
argtype:slider
argmin:1
argmax:100
argvalue:10
arg:REPORT
argtype:chooser
arglabel:View assembly report?
argchoice:No:
argchoice:Yes:kedit in1.rpt&
in:in1
informat:flat
insave:
item:Variable Positions
itemmethod:varpos $REV < in1 > out1
arg:REV
argtype:chooser
arglabel:Highlight (darken)
argchoice:Conserved positions:
argchoice:variable positions:-rev
in:in1
informat:flat
out:out1
outformat:colormask
item:Phrap
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
in:in1
informat:genbank
out:out1
outformat:genbank
item:SNAP
itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
in:in1
informat:flat
out:out1
outformat:text
item:Find all <meta-f>
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
itemhelp:findall.help
itemmeta:f
arg:SEARCH
argtype:text
arglabel:Search String
arg:PRCNT
argtype:slider
arglabel:Percent mismatch
argmin:0
argmax:75
argvalue:10
arg:CASE
argtype:chooser
arglabel:Case
argchoice:Upper equals lower:
argchoice:Upper not equal lower:-case
arg:UT
argtype:chooser
arglabel:U equal T?
argchoice:Yes:-u=t
argchoice:No:
argvalue:0
arg:MAT
arglabel:Match color
argtype:choice_list
argchoice:yellow:1
argchoice:violet:2
argchoice:red:3
argchoice:aqua:4
argchoice:green:5
argchoice:blue:6
argchoice:grey:11
argchoice:black:8
argvalue:2
arg:MIS
argtype:choice_list
arglabel:Mismatch color
argchoice:yellow:1
argchoice:violet:2
argchoice:red:3
argchoice:aqua:4
argchoice:green:5
argchoice:blue:6
argchoice:grey:11
argchoice:black:8
argvalue:7
in:in1
informat:flat
out:out1
outformat:colormask
item:Sequence Consensus
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
itemhelp:MakeCons.help
arg:METHOD
arglabel:Method
argtype:chooser
argchoice:IUPAC:-iupac
argchoice:Majority:-majority $PERCENT
arg:MASK
argtype:chooser
arglabel:Create a new:
argchoice:Sequence:
argchoice:Selection Mask: | Consto01mask
arg:PERCENT
arglabel:Minimum Percentage for Majority
argtype:slider
argmin:50
argmax:100
argvalue:75
in:in1
informat:gde
out:out1
outformat:gde
#Menu for DNA/RNA
item:blastn
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
in:in1
informat:flat
insave:
arg:BLASTDBDNA
argtype:choice_list
arglabel:Which Database
argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:4
argmax:18
argvalue:12
arg:MATCH
argtype:slider
arglabel:Match Score
argmin:1
argmax:10
argvalue:5
arg:MMSCORE
argtype:slider
arglabel:Mismatch Score
argmin:-10
argmax:-1
argvalue:-5
item:blastx
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
in:in1
informat:flat
insave:
arg:BLASTDB
argtype:choice_list
arglabel:Which Database
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:1
argmax:5
argvalue:3
arg:Matrix
arglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:CODE
argtype:choice_list
arglabel:Genetic Code
argchoice:Standard or Universal:0
argchoice:Vertebrate Mitochondrial:1
argchoice:Yeast Mitochondrial:2
argchoice:Mold Mitochondrial and Mycoplasma:3
argchoice:Invertebrate Mitochondrial:4
argchoice:Ciliate Macronuclear:5
argchoice:Protozoan Mitochondrial:6
argchoice:Plant Mitochondrial:7
argchoice:Echinodermate Mitochondrial:8
item:------------------------
item:Add a new DNA blast db
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
arg:sourcefile
argtype:text
arglabel: enter the file name
arg:menuname
argtype:text
arglabel: enter the name of the DB
menu:seq. datasets
item:-------------
item:add a new dataset
itemmethod:cp $file /usr/local/bio/GDE/db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file
arg:name
argtype:text
arglabel:Enter the dataset name ?
arg:file
argtype:text
arglabel:Enter the dataset file (in FASTA) ?
#Menu for Protein
menu:protein
item:blastp
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
in:in1
informat:flat
insave:
arg:BLASTDB
argtype:choice_list
arglabel:Which Database
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
arg:Matrix
barglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:1
argmax:5
argvalue:3
item:tblastn
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
in:in1
informat:flat
insave:
arg:BLASTDB
argtype:choice_list
arglabel:Which Database
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
arg:Matrix
arglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:4
argmax:18
argvalue:12
arg:CODE
argtype:choice_list
arglabel:Genetic Code
argchoice:Standard or Universal:0
argchoice:Vertebrate Mitochondrial:1
argchoice:Yeast Mitochondrial:2
argchoice:Mold Mitochondrial and Mycoplasma:3
argchoice:Invertebrate Mitochondrial:4
argchoice:Ciliate Macronuclear:5
argchoice:Protozoan Mitochondrial:6
argchoice:Plant Mitochondrial:7
argchoice:Echinodermate Mitochondrial:8
item:Map View
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
itemhelp:mapview.help
in:in1
informat:gde
insave:
arg:PBL
arglabel:Pixel Between Lines
argtype:slider
argvalue:10
argmin:1
argmax:15
arg:NPP
arglabel:Nucleotides Per Pixel
argtype:slider
argvalue:1
argmin:1
argmax:20
arg:LWIDTH
arglabel:Line Thickness
argtype:slider
argvalue:2
argmin:1
argmax:5
item:--------------------------
item:Add a new DNA blast db
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
arg:sourcefile
argtype:text
arglabel: Enter the file (in FASTA)
arg:menuname
argtype:text
arglabel: Enter the name of the DB
menu:Phylogeny
item:Phylip help
itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
arg:FILE
argtype:choice_list
arglabel:Which program?
argchoice:clique:clique.html
argchoice:consense:consense.html
argchoice:contchar:contchar.html
argchoice:contml:contml.html
argchoice:contrast:contrast.html
argchoice:discrete:discrete.html
argchoice:distance:distance.html
argchoice:dnaboot:dnaboot.html
argchoice:dnacomp:dnacomp.html
argchoice:dnadist:dnadist.html
argchoice:dnainvar:dnainvar.html
argchoice:dnaml:dnaml.html
argchoice:dnamlk:dnamlk.html
argchoice:dnamove:dnamove.html
argchoice:dnapars:dnapars.html
argchoice:dnapenny:dnapenny.html
argchoice:dollop:dollop.html
argchoice:dolmove:dolmove.html
argchoice:dolpenny:dolpenny.html
argchoice:draw:draw.html
argchoice:drawgram:drawgram.html
argchoice:drawtree:drawtree.html
argchoice:factor:factor.html
argchoice:fitch:fitch.html
argchoice:gendist:gendist.html
argchoice:kitsch:kitsch.html
argchoice:main:main.html
argchoice:mix:mix.html
argchoice:move:move.html
argchoice:neighbor:neighbor.html
argchoice:penny:penny.html
argchoice:protpars:protpars.html
argchoice:read.me.general:read.me.general.html
argchoice:restml:restml.html
argchoice:seqboot:seqboot.html
argchoice:sequence:sequence.html
item:Phylip 3.5
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
arg:PROGRAM
argtype:choice_list
arglabel:Which program to run?
argchoice:DNAPARS:dnapars
argchoice:DNABOOT:dnaboot
argchoice:DNAPENNY:dnapenny
argchoice:DNAML:dnaml
argchoice:DNAMLK:dnamlk
argchoice:DNACOMP:dnacomp
argchoice:DNAMOVE:dnamove
argchoice:DNAINVAR:dnainvar
argchoice:PROTPARS:protpars
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
item:Phylip DNA Distance methods
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
arg:EXPLAIN
argtype:text
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
arg:PROGRAM
arglabel:Which method?
argtype:chooser
argchoice:DNADIST+NEIGHBOR:
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
arg:PROG
arglabel:Run ?
argtype:chooser
argchoice:Run without Bootstrap:
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
arg:DNA
argtype:text
arglabel:Name of DNADIST outfile?
arg:NEI
argtype:text
arglabel:Name of NEIGHBOR outfile?
arg:TREE
argtype:text
arglabel:Name of TREEFILE ?
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
item:Phylip PROTEIN Distance methods
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
arg:PROGRAM
arglabel:Which method?
argtype:chooser
argchoice:PROTDIST+NEIGHBOR:
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
arg:PROG
arglabel:Which method?
argtype:chooser
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
argchoice:No Bootstrap:
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
menu:On-Line Res.
item:GDE for Linux resources at Bioafrica.net
itemmethod:netscape http://www.bioafrica.net &
item:-------------------------
item:add a new website
itemmethod:xterm -e /usr/local/bio/GDE/newURL.pl $name $url
arg:name
argtype:text
arglabel:Enter the site name
arg:url
argtype:text
arglabel:Enter the URL (including http://)

View File

@ -1,791 +0,0 @@
1menu:File
item:test cmask output
itemmethod: kedit in1
in:in1
informat:colormask
item:New sequence <meta N>
itemmethod:echo "$Type$Name" > out1
itemmeta:n
itemhelp:new_sequence.help
arg:Name
argtype:text
arglabel:New Sequence name?
argtext:New
arg:Type
argtype:choice_list
arglabel:Type?
argchoice:DNA/RNA:#
argchoice:Amino Acid:%
argchoice:Text:\"
argchoice:Mask:@
out:out1
outformat:flat
item:Import Foreign Format
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
itemhelp:readseq.help
arg:INPUTFILE
argtype:text
arglabel:Name of foreign file?
out:OUTPUTFILE
outformat:genbank
item:Export Foreign Format
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
itemhelp:readseq.help
arg:FORMAT
argtype:choice_list
argchoice:FASTA:8
argchoice:NEXUS:17
argchoice:Phylip v3.3:12
argchoice:IG/Stanford:1
argchoice:GenBank:2
argchoice:NBRF:3
argchoice:EMBL:4
argchoice:GCG:5
argchoice:DNA Strider:6
argchoice:Fitch:7
argchoice:Pearson:8
argchoice:Zuker:9
argchoice:Olsen:10
argchoice:Phylip v3.2:11
argchoice:Phylip v3.3:12
argchoice:Plain text:13
arg:OUTPUTFILE
argtype:text
arglabel:Save as?
in:INPUTFILE
informat:genbank
item:Save Selection
itemmethod: cat $SAVE_FUNC > $Name
itemhelp:save_selection.help
arg:SAVE_FUNC
argtype:chooser
arglabel:File format
argchoice:Flat:in1
argchoice:Genbank:in2
argchoice:GDE/HGL:in3
arg:Name
argtype:text
arglabel:File name?
in:in1
informat:flat
in:in2
informat:genbank
in:in3
informat:gde
item:Print Selection
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
itemhelp:print_alignment.help
arg:SCALE
argtype:slider
arglabel:Reduce printout by?
argmin:1
argmax:20
argvalue:1
arg:CMD
argtype:chooser
argchoice:Lpr:lpr
argchoice:Enscript Gaudy:enscript -G -q
argchoice:Enscript Two column:enscript -2rG
arg:PRINTER
argtype:text
arglabel:Which printer?
argtext:lp
in:in1
informat:gde
insave:
menu:Edit
item:Sort
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
itemhelp:heapsortHGL.help
arg:PRIM_KEY
argtype:choice_list
argchoice:Group:group-ID
argchoice:type:type
argchoice:name:name
argchoice:Sequence ID:sequence-ID
argchoice:creator:creator
argchoice:offset:offset
arglabel:Primary sort field?
arg:SEC_KEY
argtype:choice_list
argchoice:None:
argchoice:Group:group-ID
argchoice:type:type
argchoice:name:name
argchoice:Sequence ID:sequence-ID
argchoice:creator:creator
argchoice:offset:offset
arglabel:Secondary sort field?
in:in1
informat:gde
insave:
item:extract
itemmethod:(gde in1;/bin/rm -f in1)&
in:in1
informat:gde
inmask:
insave:
menu:DNA/RNA
item:Translate...
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
arg:FRAME
argtype:chooser
arglabel:Which reading frame?
argchoice:First:1
argchoice:Second:2
argchoice:Third:3
argchoice:All six:6
arg:MNFRM
arglabel:Minimum length of AA sequence to translate?
argtype:slider
argmin:0
argmax:100
argvalue:20
arg:LTRCODE
argtype:chooser
arglabel:Translate to:
argchoice:Single letter codes:
argchoice:Triple letter codes:-3
arg:TBL
arglabel:Codon table?
argtype:chooser
argchoice:universal:1
argchoice:mycoplasma:2
argchoice:yeast:3
argchoice:Vert. mito.:4
in:in1
informat:gde
out:out1
outformat:gde
item:Dot plot
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
itemhelp:DotPlotTool.help
in:in1
informat:gde
insave:
item:Clustal alignment
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
itemhelp:clustal_help
arg:KTUP
argtype:slider
arglabel:K-tuple size for pairwise search
argmin:1
argmax:10
argvalue:2
arg:WIN
argtype:slider
arglabel:Window size
argmin:1
argmax:10
argvalue:4
arg:Trans
argtype:chooser
arglabel:Transitions weighted?
argchoice:Yes:/TRANSIT
argchoice:No:
arg:FIXED
argtype:slider
arglabel:Fixed gap penalty
argmin:1
argmax:100
argvalue:10
arg:FLOAT
arglabel:Floating gap penalty
argtype:slider
argmin:1
argmax:100
argvalue:10
arg:REPORT
argtype:chooser
arglabel:View assembly report?
argchoice:No:
argchoice:Yes:kedit in1.rpt&
in:in1
informat:flat
insave:
item:Variable Positions
itemmethod:varpos $REV < in1 > out1
arg:REV
argtype:chooser
arglabel:Highlight (darken)
argchoice:Conserved positions:
argchoice:variable positions:-rev
in:in1
informat:flat
out:out1
outformat:colormask
item:Phrap
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
in:in1
informat:genbank
out:out1
outformat:genbank
item:SNAP
itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
in:in1
informat:flat
out:out1
outformat:text
item:Find all <meta-f>
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
itemhelp:findall.help
itemmeta:f
arg:SEARCH
argtype:text
arglabel:Search String
arg:PRCNT
argtype:slider
arglabel:Percent mismatch
argmin:0
argmax:75
argvalue:10
arg:CASE
argtype:chooser
arglabel:Case
argchoice:Upper equals lower:
argchoice:Upper not equal lower:-case
arg:UT
argtype:chooser
arglabel:U equal T?
argchoice:Yes:-u=t
argchoice:No:
argvalue:0
arg:MAT
arglabel:Match color
argtype:choice_list
argchoice:yellow:1
argchoice:violet:2
argchoice:red:3
argchoice:aqua:4
argchoice:green:5
argchoice:blue:6
argchoice:grey:11
argchoice:black:8
argvalue:2
arg:MIS
argtype:choice_list
arglabel:Mismatch color
argchoice:yellow:1
argchoice:violet:2
argchoice:red:3
argchoice:aqua:4
argchoice:green:5
argchoice:blue:6
argchoice:grey:11
argchoice:black:8
argvalue:7
in:in1
informat:flat
out:out1
outformat:colormask
item:Sequence Consensus
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
itemhelp:MakeCons.help
arg:METHOD
arglabel:Method
argtype:chooser
argchoice:IUPAC:-iupac
argchoice:Majority:-majority $PERCENT
arg:MASK
argtype:chooser
arglabel:Create a new:
argchoice:Sequence:
argchoice:Selection Mask: | Consto01mask
arg:PERCENT
arglabel:Minimum Percentage for Majority
argtype:slider
argmin:50
argmax:100
argvalue:75
in:in1
informat:gde
out:out1
outformat:gde
#Menu for DNA/RNA
item:blastn
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
in:in1
informat:flat
insave:
arg:BLASTDBDNA
argtype:choice_list
arglabel:Which Database
argchoice:HIV-1 Seq. Db.:/usr/local/biotools/db/DNA/hiv17-08-01.fasta2
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:4
argmax:18
argvalue:12
arg:MATCH
argtype:slider
arglabel:Match Score
argmin:1
argmax:10
argvalue:5
arg:MMSCORE
argtype:slider
arglabel:Mismatch Score
argmin:-10
argmax:-1
argvalue:-5
item:blastx
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
in:in1
informat:flat
insave:
arg:BLASTDBDNA
argtype:choice_list
arglabel:Which Database
argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:1
argmax:5
argvalue:3
arg:Matrix
arglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:CODE
argtype:choice_list
arglabel:Genetic Code
argchoice:Standard or Universal:0
argchoice:Vertebrate Mitochondrial:1
argchoice:Yeast Mitochondrial:2
argchoice:Mold Mitochondrial and Mycoplasma:3
argchoice:Invertebrate Mitochondrial:4
argchoice:Ciliate Macronuclear:5
argchoice:Protozoan Mitochondrial:6
argchoice:Plant Mitochondrial:7
argchoice:Echinodermate Mitochondrial:8
item:------------------------
item:Add a new DNA blast db
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/biotools/GDE/bin/installBLASTDB.pl $sourcefile $menuname;
arg:sourcefile
argtype:text
arglabel: enter the file name
arg:menuname
argtype:text
arglabel: enter the name of the DB
menu:seq. datasets
item:tttt
itemmethod:readseq /usr/local/biotools/GDE/db/ttttt -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
out:OUTPUTFILE
outformat:genbank
item:HIV1POLDNA.fasta
itemmethod:readseq /usr/local/biotools/GDE/db/HIV1POLDNA.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
out:OUTPUTFILE
outformat:genbank
item:structure
itemmethod:readseq /usr/local/biotools/GDE/db/structprot.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
out:OUTPUTFILE
outformat:genbank
item:-------------
item:add a new dataset
itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
arg:name
argtype:text
arglabel:Enter the dataset name ?
arg:file
argtype:text
arglabel:Enter the dataset file (in FASTA) ?
#Menu for Protein
menu:protein
item:blastp
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/biotools/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
in:in1
informat:flat
insave:
arg:BLASTDBPROT
argtype:choice_list
arglabel:Which Database
argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
argchoice:ttttt:/usr/local/biotools/db/tttt
argchoice:tytuiphn:/usr/local/biotools/db/yejhuh[9hp
argchoice:yyyy:/usr/local/biotools/db/test
arg:Matrix
barglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:1
argmax:5
argvalue:3
item:tblastn
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/biotools/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
in:in1
informat:flat
insave:
arg:BLASTDB
argtype:choice_list
arglabel:Which Database
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
arg:Matrix
arglabel:Substitution Matrix:
argtype:choice_list
argchoice:PAM30:PAM30
argchoice:PAM70:PAM70
arg:WORDLEN
argtype:slider
arglabel:Word Size
argmin:4
argmax:18
argvalue:12
arg:CODE
argtype:choice_list
arglabel:Genetic Code
argchoice:Standard or Universal:0
argchoice:Vertebrate Mitochondrial:1
argchoice:Yeast Mitochondrial:2
argchoice:Mold Mitochondrial and Mycoplasma:3
argchoice:Invertebrate Mitochondrial:4
argchoice:Ciliate Macronuclear:5
argchoice:Protozoan Mitochondrial:6
argchoice:Plant Mitochondrial:7
argchoice:Echinodermate Mitochondrial:8
item:Map View
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
itemhelp:mapview.help
in:in1
informat:gde
insave:
arg:PBL
arglabel:Pixel Between Lines
argtype:slider
argvalue:10
argmin:1
argmax:15
arg:NPP
arglabel:Nucleotides Per Pixel
argtype:slider
argvalue:1
argmin:1
argmax:20
arg:LWIDTH
arglabel:Line Thickness
argtype:slider
argvalue:2
argmin:1
argmax:5
item:--------------------------
item:Add a new Protein blast db
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/biotools/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname;
arg:sourcefile
argtype:text
arglabel: Enter the file (in FASTA)
arg:menuname
argtype:text
arglabel: Enter the name of the DB
menu:Phylogeny
item:Phylip help
itemmethod:(netscape /usr/local/biotools/phylip/doc/$FILE)&
arg:FILE
argtype:choice_list
arglabel:Which program?
argchoice:clique:clique.html
argchoice:consense:consense.html
argchoice:contchar:contchar.html
argchoice:contml:contml.html
argchoice:contrast:contrast.html
argchoice:discrete:discrete.html
argchoice:distance:distance.html
argchoice:dnaboot:dnaboot.html
argchoice:dnacomp:dnacomp.html
argchoice:dnadist:dnadist.html
argchoice:dnainvar:dnainvar.html
argchoice:dnaml:dnaml.html
argchoice:dnamlk:dnamlk.html
argchoice:dnamove:dnamove.html
argchoice:dnapars:dnapars.html
argchoice:dnapenny:dnapenny.html
argchoice:dollop:dollop.html
argchoice:dolmove:dolmove.html
argchoice:dolpenny:dolpenny.html
argchoice:draw:draw.html
argchoice:drawgram:drawgram.html
argchoice:drawtree:drawtree.html
argchoice:factor:factor.html
argchoice:fitch:fitch.html
argchoice:gendist:gendist.html
argchoice:kitsch:kitsch.html
argchoice:main:main.html
argchoice:mix:mix.html
argchoice:move:move.html
argchoice:neighbor:neighbor.html
argchoice:penny:penny.html
argchoice:protpars:protpars.html
argchoice:read.me.general:read.me.general.html
argchoice:restml:restml.html
argchoice:seqboot:seqboot.html
argchoice:sequence:sequence.html
item:Phylip 3.5
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
arg:PROGRAM
argtype:choice_list
arglabel:Which program to run?
argchoice:DNAPARS:dnapars
argchoice:DNABOOT:dnaboot
argchoice:DNAPENNY:dnapenny
argchoice:DNAML:dnaml
argchoice:DNAMLK:dnamlk
argchoice:DNACOMP:dnacomp
argchoice:DNAMOVE:dnamove
argchoice:DNAINVAR:dnainvar
argchoice:PROTPARS:protpars
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
item:Phylip DNA Distance methods
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
arg:EXPLAIN
argtype:text
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
arg:PROGRAM
arglabel:Which method?
argtype:chooser
argchoice:DNADIST+NEIGHBOR:
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
arg:PROG
arglabel:Run ?
argtype:chooser
argchoice:Run without Bootstrap:
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
arg:DNA
argtype:text
arglabel:Name of DNADIST outfile?
arg:NEI
argtype:text
arglabel:Name of NEIGHBOR outfile?
arg:TREE
argtype:text
arglabel:Name of TREEFILE ?
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
item:Phylip PROTEIN Distance methods
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
arg:PROGRAM
arglabel:Which method?
argtype:chooser
argchoice:PROTDIST+NEIGHBOR:
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
arg:PROG
arglabel:Which method?
argtype:chooser
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
argchoice:No Bootstrap:
arg:PREEDIT
argtype:chooser
arglabel:Edit input before running?
argchoice:No:
argchoice:Yes:kedit infile;
in:in1
informat:genbank
inmask:
insave:
menu:On-Line Res.
item:tytyt
itemmethod:netscape hnu[phoph &
item:SANBI
itemmethod:netscape again &
item:PlasmoDB
itemmethod:netscape http://www.plasmodb.org &
item:NCBI
itemmethod:netscape http://www.ncbi.nlm.nih.gov &
item:sanbi
itemmethod:netscape http://www.sanbi.ac.za &
item:SANBI
itemmethod:netscape http://www.sanbi.ac.za &
item:GDE for Linux resources at Bioafrica.net
itemmethod:netscape http://www.bioafrica.net &
item:-------------------------
item:add a new website
itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
arg:name
argtype:text
arglabel:Enter the site name
arg:url
argtype:text
arglabel:Enter the URL (including http://)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -6,7 +6,7 @@ DrawNA.c Free.c BuiltIn.c Edit.c Genbank.c Scroll.c ChooseFile.c \
CutCopyPaste.c HGLfile.c
LIBS= -lm -lxview -lolgx -lX11
CFLAGS= -g -L/usr/openwin/lib -I/usr/openwin/include
CFLAGS= -g -m32 -L/usr/lib32 -I/usr/include/xview
CC = cc
# Possible defines, SUN4 SGI DEC HGL
DEFINES = -DLINUX

Binary file not shown.

Binary file not shown.

View File

@ -1,8 +0,0 @@
========================[ Feb 1, 2002 1:57 PM ]========================
NOTE: CoreLib [002.003] FileOpen("HIV1POLDNA.fasta","r") failed
Cannot open input database file. Formating failed...
========================[ Feb 1, 2002 7:27 PM ]========================
NOTE: CoreLib [002.003] FileOpen("SIVPOLPRO.fasta","r") failed
Cannot open input database file. Formating failed...

View File

@ -1,191 +0,0 @@
10 916
contig GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG
W22140 AAAAANGCCC NNTTCNAAGN GGGGGGGGGG GGGGGGGATA TTTTGCNNAG
R.C.W27436 GGGNNNNGNN NNNNNNNNNN NNNNNNAANN NNNNNNNNNN NNNNNNNNNN
R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
W28762 TCTTGACATT TGTCTCCATT TCAGCAAAAC GANACCTGTG GTGAAGGGAT
#10005_2 2 GGnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
W28762 ---------- ---------- ---------- ---------- ----------
W28762(165 GGGNNGGNGN GGNNNGNNGN NNNGGNNNNN NNNTNTGTNT GNNGGNAGGG
#10005_2 2 GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG
nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA
GGGGGCATGA TGNNGAGANC NAAAGAAAGN NCNGGGNGGG AAAAAAGAAG
NNNANNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
TTGTGTGCTG GCACTG---- ---------- ---------- ----------
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
---------- ---------- ---------- ---------- ----------
NNTNTNANNN NNTTNTANAG TNAAAGNTTG GTNNNNGTNN NTTTGANGAA
nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA
GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC
GAGGNCCCTG GNGGGAGGGG GGNNCGNNTT TNNTGCNCCG GATGGAGGGN
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT
---------- ---------- ---------- ---------- ----------
nnnnnnnnGn AAnnnnnnnn nnnnnnnnnn nnnnnnnnnT TGAAAACTGT
NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT
---------- ---------- ---------- ---------- ----------
GNTCAANNTG GGGNNNANAN NNGNNNTTGA NTGAAAATGG GGNAANCCCC
GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC
CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC TGAA-n--Tc TACT---CCG
GGGGNTTTTN AAGNNTGTTT NTTTANAAGN AAGAGGGGGA NAAAATTTTT
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAACCGAAA
---------- ---------- ---------- ---------- ----------
TAnCCAAnTG GAATCCTAAG ACAATTTTCT -cCAwTTCA- sCAAC-CGAA
TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAAC-CGAA
---------- ---------- ---------- ---------- ----------
CNTTTTNCCA GTCANCTGGT AAGTCCAAGC TGAA-N--TC TACTC--C-G
CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC Tgaa----Tc TACTC--C-G
CATGTAA-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
TTNNTTCTNT NNCTNGNNNG GGGGGGGGGG GGGGCCCCCA ATAAGNNNTT
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
CCCTGTGGTG GAGGGAATTN CGTTCTTGGC NCTTCAGACT NCAGGGCAGG
---------- ---------- ---------- ----CAGACT GCAGGGNAGG
ACCCTGTGGT GrAGGGATTT GTGTGCT-GG CACTGCAGAC TGCAGGGCAG
ACCCTGTGGT GGAGGGAATT NCGTTCTTGG CNCTTCAGAC TNCAGGGCAG
---------- ---------- ---------- ---------- ----------
CATGTAACCC C-NAAAGAGT TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
CATGTAa-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
GNGCNCAGAA NNAGGGGGGG GNGGGGGGGC CCCTTTNCTC CNAAAAATTT
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
AA-------- ---------- ---------- ---------- ----------
AA-------- ---------- ---------- ---------- ----------
GAAAGGGCTA GGGCCCAGGG GCTGGGAmAT GCATGAGGT- gCTCGGAGGA
GAAAGGGCTA GGGCCCAGGG GCTGGGAAAT GCATGAGGTT GCTCGGAGGA
---------- ---------- ---------- ---------- ----------
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
CTaAGCAGAT AGCAAAGaAG ATaATGGAGG AgCAATTGGT CATGGCCtTG
CCCCCCNTTT TGGGNAAGGG TGGGGGAAAN NNTTTGGGCA AANAGGGGAA
NNNNNNNNNN NNNAANNAGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA
---------- -------AGG GCTAGGGCCC AGGGGCTGGG AAATGCATGA
---------- -------AGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA
GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC
GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC
---------- ---------- ---------- ---------- ----------
CTAAGCAGAT AGCAAAGNAG ATNATGGAGG ANCAATTGGT CATGGCCNTG
CTAAGCAGAT AGCAAAGAAG ATAATGGAGG AGCAATTGGT CATGGCCTTG
GTTTCCCTCk AAACaACgCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
AAAAAAAGNG GGGGGGGGCG GNTTCCANAA AANAANAAAG GGTNCACCCN
GG-TTCTNGG NGGAGCCTGG CTAAANCCAA GCACCAGCAC CTGTGAGTCT
GGTTGCTCGG AGGAGCCTGG CTAAATCCAA GCACCAGCAC CTGTGAGTCT
GG-TGCTCGG AGGAGCCTGG NTAAATCCAA GCACCAGCAC CTGTGAGTCT
TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC
TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC
---------- ---------- ---------- ---------- ----------
GTTTCCCTCC AAACNACNCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
GTTTCCCTCk AAACAACGCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
tmGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
TNGGGGGNCN CCCCCCCCNC NNGNAAATCN TCCCTTTTTT TGANGGGCNA
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT NCCTCTTCTC
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC
TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCAAAAG
TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCCNAAG
---------- ---------- ---------- ---------- ----------
TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
ACGAGAGCTG GGAGAAGAGG cAAAGCTACA GGTTTACTTG GGAGCCAGCT
ANNNCATTTN CTTGNCCTTG AAGATTGACC NTGACTGCTC TGGCAAGAAG
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTkGA GGGAAACCAA
TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTTGA GGGAAACCAA
---------- ---------- ---------- ---------- ----------
ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT
ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGaTTTA gCCAGGCTCC
AAGAGGTGTC CTTACAGAGA CCTCTTTACT GACCAACTGA AGNATAGACT
CTTTCCCCCN AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT
CTTTCCCCCC NAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT
CTTTCCCCCA AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGNGTNGTT
GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT
GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT
---------- ---------- ---------- ---------- ----------
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA NCCAGGCTCC
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA GCCAGGCTCC
tCCgAGkA-- CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT-----
TACTGCTGGA CAATCTGCAT GGGCATCACC CCTCCCCGCA TGTAACCC-A
TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC
TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC
TGGAGGGAAA CCANGGCCAT GACCAATTGN TCCTCCATNA TCTNCTTTGC
ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA
ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA
---------- ---------- ---------- ---------- ----------
TCCGAGC--A CCTCATGCAT GTCCCAGCCC CTGGGCCCTA GCCCT-----
TCCGAGc--A CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT-----
---------- ---------- ---------- ---------- ----------
AAAGAGGTGT CCAGAGCCAA GGCTTCTACC TTCATTGTCC CTCTCTGTGC
TATCTGCTNA GAGNANNCAA NNNAANNNA- ---------- ----------
TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC
TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC
TGAAGGTAGA AGCCTTGGCT CTGGACAmCT CTTTTGGG-t TACATGCG--
TGAAGGTAGA AGCCTTGGCT CTGGACACCT CTTTTGGG-T TACATGCGGT
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- TTCCTgCCCT GcAGTCTGAA GnGCCAAG-A -ACGnAATTC
TCAAGGAGTT CCATTCCAGG AGGAAGAGAT CTATACCCT- ----------
---------- ---------- ---------- ---------- ----------
ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC ACCTCTTTT-
ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC AACTCTTTNG
GAGTAgA-tt cAGCTTGGAC TTACCAGnTG ACTGGnAAAA nGGGGGnTTn
GAGTANA-NN NA-------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- TTCCTNCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C
---------- TTCCTGCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C
CCTCCACCAC AGGGTTTCG- GTTGGGTGGn TTGGAAGA-A AATTGTCTTA
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
GGGTTACATG CGGTGAGTAN ANNNA----- ---------- ----------
GGGTTACATG CGG--AGTAG ANTTCAGCTT GGACTTACCA GNTGACTGGN
CCCCATTTTC AnTCAAnnnC nnnTnTnnnC CCCAnnTTGA nCTTCnTCAA
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
CCTTCACCAC A-GGTNTCGT TTTGC-TGAA ATGG-AGACA AAT-GTCa-a
CCTrCACCAC AGGGTTTCG- GTTGs-TGAA wTGg-AGA-A AATTGTCTTA
GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
AAAANGGGGG NTTNCCCCAT TTTCANTCAA NNNCNNNTNT NNNCCCCANN
AnnnACnnnn ACCAAnCTTT nACTnTAnAA nnnnnTnAnA nnCCCTnCCn
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
g-a------- ---------- ---------- ---------- ----------
GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn
nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
TTGANCTTCN TCAAANNNAC NNNNACCAAN CTTTNACTNT ANAANNNNNT
nCAnACAnAn nnnnnnnCCn nnnCnnCnnn CCnCnCCnnC CC--------
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
NANANNCCCT NCCNNCANAC ANANNNNNNN NCCNNNNCNN CNNNCCNCNC
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
---------- ---------- ---------- ---------- ----------
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
nnnnnnnnnn nnnnCC
---------- ------
---------- ------
---------- ------
CNNCCC---- ------
---------- ------
---------- ------
---------- ------
---------- ------
nnnnnnnnnn nnnnCC

View File

@ -1,2 +0,0 @@
make
cp gde ../bin

Binary file not shown.

Binary file not shown.

View File

@ -1,34 +0,0 @@
DNA parsimony algorithm, version 3.51c
One most parsimonious tree found:
+-----------------------#10005_2 2
!
! +--------------------W28762(165
+--9 !
! ! ! +--R.C.W27652
! ! ! +-----------6
! ! ! ! +--#10005_2 2
! +--8 !
! ! +--5 +--W28762
! ! ! ! +--7
--1 ! ! ! +--4 +--W28762
! ! ! ! ! !
! +--2 +-----3 +-----R.C.W27652
! ! !
! ! +--------R.C.W27436
! !
! +-----------------W22140
!
+--------------------------contig
remember: this is an unrooted tree!
requires a total of 2453.000

View File

@ -1,2 +0,0 @@
((#10005_2_2,(W28762(165,(((R.C.W27652,#10005_2_2),(((W28762,W28762),
R.C.W27652),R.C.W27436)),W22140))),contig);

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,25 +0,0 @@
;---------------------------------------------------------------------------
; FEATURES/GDE Accession File Instructions
;
; 1. Type in one or more GenBank Accession #'s below,
; or
; Place cursor at end of this file and choose 'Include File' in the FILE
; menu to read in a file of numbers.
;
; (NOTE: File can not contain LOCUS names.)
;
; 2. Choose 'Save Current File' in the File menu
; 3. Quit this window
;
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
; OUT THESE COMMENT LINES.
;
; NOTE: Put each accession # on a separate line
; SAMPLE ACCESSION FILE:
;
; M18249
; X13383
; J03680
;
;---------------------------------------------------------------------------

View File

@ -1,45 +0,0 @@
clu2ig update 3 Feb 94
NAME
clu2ig
SYNOPSIS
clu2ig clustalfile > igfile
DESCRIPTION
Converts interleaved .aln output from Clustal V into
sequential .ig (IntelliGenetics) format for use by MASE.
clustalfile:
CLUSTAL V multiple sequence alignment
name1 AACTTTCG
name2 ATCTTTCG
* ******
name1 CCTGCT
name2 CCCGCT
** ***
igfile:
;
name1
AACTTTCG
CCTGCT
:
name2
ATCTTTCG
CCCGCT
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,36 +0,0 @@
dbstat update 3 Feb 94
NAME
dbstat - calculates amino acid frequencies in a protein
database
SYNOPSIS
dbstat
DESCRIPTION
dbstat reads a file of one or more nucleic acid sequences
and calculates the amino acid frequencies, both in terms of
absolute numbers, and as a fraction of the total.
input - The input file is the standard .wrp (Pearson) format,
such as that produced by getob:
>name
; one or more comment lines (optional)
sequence lines
Comments begin either with semicolon (;) or right arrow (>)
characters.
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,30 +0,0 @@
;---------------------------------------------------------------------------
; FEATURES/GDE Expression File Instructions 8/7/95
;
; 1. Type in one or more GenBank expressions below,
; or
; Place cursor at end of this file and choose 'Include File' in the FILE
; menu to read in a file of feature keys.
; or
; Copy expressions from another window and Paste into this window.
; 2. Choose 'Save Current File' in the File menu
; 3. Quit this window
;
; NOTES:
; 1) FEATURES will then extract the appropriate sequences.
; YOU DON'T NEED TO EDIT OUT THESE COMMENT LINES.
; 2) All expressions referring to GenBank entries must begin with a '@'
; Literals (ie. sequences to be embedded in the final output)
; do NOT begin with a '@'.
; 3) Put each expression on a separate line.
;
; SAMPLE EXPRESSION FILE:
;
; @J05635:83..1813
; ; EcoRI/NotI adaptor {this is a comment line}
; AATTGCGGCCGC
; @J05635:/product="flagellin A"
; @x17548:singed_trans
;
;---------------------------------------------------------------------------

View File

@ -1,23 +0,0 @@
;---------------------------------------------------------------------------
; FEATURES/GDE Feature Key File Instructions
;
; 1. Type in one or more GenBank FEATURE Table feature keys below,
; or
; Place cursor at end of this file and choose 'Include File' in the FILE
; menu to read in a file of feature keys.
;
; 2. Choose 'Save Current File' in the File menu
; 3. Quit this window
;
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
; OUT THESE COMMENT LINES.
;
; NOTE: Put each feature key on a separate line
; SAMPLE FEATURE KEY FILE:
;
; mRNA
; CDS
; mat_peptide
;
;---------------------------------------------------------------------------

View File

@ -1,407 +0,0 @@
FEATURES.DOC update 7 Feb 94
NAME
FEATURES - extracts features from GenBank entries
SYNOPSIS
features
features expression
features [-f featurekey | -F keyfile]
[-n name |-a accession | -e expression |
-N namefile |-A accfile | -E expfile]
[-u dbfile | -U dbfile | -g ]
features -h
DESCRIPTION
FEATURES extracts sequence objects from GenBank entries, using
the Features Table language. Features can be retrieved either by
specifying keywords (eg. CDS, mRNA, exon, intron etc.) or by
evaluating expressions. In practical terms, FEATURES is actually
a user interface for GETOB, which actually performs the parsing
and extraction of sequence objects. FEATURES can be run either as
an interactive program or with command line arguments.
'features' with no arguments runs the program interactively.
'features' followed by an expression retrieves the data directly
from GenBank and evaluates the expression. The third form of
features requires all arguments to be accompanied by their
respective option flags. Finally, 'features -h' prints the
SYNOPSIS.
INTERACTIVE EXECUTION
FEATURES executed with no arguments runs interactively. An example of the
FEATURES menu is shown below:
___________________________________________________________________
FEATURES - Version 7 FEB 94
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
___________________________________________________________________
Features: tRNA
Entries: EPFCPCG
Dataset:
___________________________________________________________________
Parameter Description Value
-------------------------------------------------------------------
1).................... FEATURES TO EXTRACT ....................> f
f:Type a feature at the keyboard
F:Read a list of features from a file
2)....................ENTRIES TO BE PROCESSED (choose one).....> n
Keyboard input - n:name a:accession # e:expression
File input - N:name(s) A:accession #(s) E:expression(s)
3)....................WHERE TO GET IT .........................> g
u:Genbank dataset g:complete GenBank database
U: same as u, but all entries
4)....................WHERE TO SEND IT ........................> a
s:Each feature to a separate file a:All output to same file
---------------------------------------------------------------
Type number of your choice or 0 to continue:
0
Messages will be written to EPFCPCG.msg
Final sequence output will be written to EPFCPCG.out
Expressions will be written to EPFCPCG.exp
Extracting features...
In the example, FEATURES was instructed to retrieve all tRNAs from
the GenBank entry EPFCPCG, which contains the Epifagus plastid
genome. By default, the GenBank database was the source of the
sequence. Messages indicate the progress of the job. A log describing
the extraction of each feature is written to EPFCPCG.msg, while the
extracted features themselves are written to EPFCPCG.out. Feature
expressions which could be used by FEATURES to reconstruct the .out
file, are written to EPFCPCG.exp.
The first step is to retrieve the EPFCPCG entry from GenBank, which is
accomplished by calling FETCH. Next, FEATURES extracts the specified
features from the entry.
An excerpt from EPFCPCG.msg is shown below, describing the extraction
of the fifth tRNA found in this entry. To create this tRNA, two exons
had to be joined. The qualifier line associated with this feature
indicates that it is an Isoleucine tRNA with a gat anticodon.
EPFCPCG:anticodon gtg
complement
(
join
(
70023 70028
1 69
)
)
/product="transfer RNA-His"
/gene="His-tRNA"
/label=anticodon gtg
/note="anticodon gtg"
//----------------------------------------------
The actual sequence for this feature, as written to EPFCPCG.out, is
written with each exon beginning a new line:
>EPFCPCG:anticodon gtg
ggcggatgtagccaaatggatcaaggtagtggattgtgaatccaacatat
gcgggttcaattcccgtcg
ttcgcc
Finally, the expression that was evaluated to create this feature is
written to EPFCPCG.exp:
>EPFCPCG:anticodon gtg
@M81884:anticodon gtg
If EPFCPCG.exp was used as an expression file in option 2 (E) of FEATURES,
EPFCPCG.out would be recreated.
OPTIONS
1) FEATURES - choosing f will cause FEATURES to prompt for
a feature to extract. If you wish to extract several types of
features simultaneously (ie. F), you must construct a file listing the
feature keywords. The following example would retrieve both tRNA and
rRNA sequences:
OBJECTS
tRNA
rRNA
SITES
The words 'OBJECTS' and 'SITES' must enclose the feature keywords,
and each keyword must be on a separate line. For a rigorous
definition of the input file format, see the GETOB manual pages
(getob.doc).
In the menu shown above, f was chosen, and the user entered tRNA at
the prompt. Thus tRNA is now displayed on the Features: line. If
features had been specified from a file (suboption F) then the
filename containing the feature keywords would be displayed instead.
A complete list of legal feature keywords can be found in the GenBank
Release notes (gbrel.txt) under the subheading 'Feature Key Names'.
2) ENTRIES
n User is prompted for the name of an entry from which the
feature is to be extracted. The name of the entry will appear
on the 'Entries' line of the menu.
N User is prompted for a filename containing one or more
entry names. Each name must be on a separate line. The filename
will be displayed on the 'Entries' menu line.
a User is prompted for an accession number, which will appear
on the 'Entries' line of the menu.
A User is prompted for a filename for accession numbers. The filename
will appear on the 'Entries:' line.
e User is prompted for a GenBank Features expression of the
form accession:location.'accession' refers to a GenBank
accession number, while 'location' is any legal feature location.
A brief description of location syntax can be found under the
subheading "Feature Location" in the GenBank release notes
(gbrel.txt). See "The DDBJ/EMBL/GenBank Feature Table:
Definition" Version 1.04 for a complete definition.
E User is prompted for a filename containing one or more Feature
expressions. EACH EXPRESSION MUST BEGIN A '@'. All lines beginning
with '@' are processed as expressions, and all other lines are
copied to the output file unchanged.
Examples:
The tRNA shown above could have been extracted by choosing
suboption e and entering either of the following expressions:
M81884:complement(join(70023..70028,1..69))
M81884:anticodon gtg
In the first example, the feature line from the original entry
is used as the location. In the second example, the feature is
found by its qualifier line, which also appeared in the
original entry. It must be noted that the qualifier line must
be unique from others in the same entry in its first 15
characters after the = .
The flaL protein coding region of B. licheniformis is described
in GenBank entry BLIFALA, accession number M60287 in the
following feature:
CDS 305..640
/note="flaD (sin) homologue"
/gene="flaL"
/label=ORF2
/codon_start=1
This feature could be retrieved using any of the following
expressions:
M60287:305..640
M60287:ORF2
M60287:/label=ORF2
M60287:/gene="flaL"
M60287:/note="flaD (sin) homologue"
Note that the /label= qualifier is special, in that labels are
specifically intented as unique tags on an feature. For labels,
only the label itself is need be specified. Thus, /label=ORF2 is
equivalent to ORF2. For other qualifiers, the qualifier keyword
(eg. /note=) must be included.
3) DATABASE (WHERE TO GET IT) - By default, all entries processed will
be automatically retrieved from GenBank using FETCH. Specifying 'u'
(User-defined database subset) makes it possible to extract features
from GenBank subsets created by the user. Usually, retrieval of
features is much faster with a User-defined subset, so if you
frequently work with sets of genes, it is best to retrieve them
en-masse using FETCH, and work with them directly. For example, if
you had retrieved a set of Beta-globin sequences into a file called
'globin.gen', you could directly extract features from these entries
by specifying 'globin' or 'globin.gen' as your User-defined database.
If the file extension is '.gen', FEATURES will automatically create
temporary files called globin.ano, globin.wrp and globin.ind,
containing annotation, sequence, and an index, respectively. These
files will be read during feature extraction, and then discarded. If
you have already created such files using SPLITDB, simply specify
any of 'globin', 'globin.ano', etc. ie. anything, as long as it does
not have the .gen file extension.
'U' rather than 'u' causes ALL entries in the user-defined
database to be subset. This means that it is unnecessary to
specify entry options (eg -n, -N etc.), as these will be
ignored, if given.
One consequence of these conventions is that the individual GenBank
divisions can be processed directly. For example, suppose you were only
interested in rodent globins. You could directly access the rodent
division of GenBank by specifying the base name of that file division
(eg. /home/psgendb/GenBank/gbrod) as your user-defined database. In
this case, the files gbrod.ano, gbrod.wrp and gbrod.ind already
exist. Again, this approach is faster, since FEATURES would not have
to find and retrieve the sequences, but can read directly from the
database files. Finally, if you wanted to process all of the entries
in the database division, simply use -U. The user is warned that a
GenBank division is a huge amount of data, and processing every entry
could take a long time.
4) WHERE TO SEND IT - By default (a), the output for all entries goes
to a single set of files, whose names are chosen by FEATURES,
depending on the setting of option 2, Entries. If a single name (n) or
accession number (a) has been chosen, that will be used as
the raw filename. For example, if you were processing the entry
WHTCAB, the output files would be WHTCAB.msg and WHTCAB.out. If names
(N), accession numbers (A) or expressions (E) were read from a file,
the raw name of that file would be used eg. cellulase.nam would result
in cellulase.msg and cellulase.out. Finally, if a single expression
is processed (e), then the primary accession number in that
expression will be used for the filenames. In all cases, FEATURES
will tell you the names of the files being written.
Choosing suboption s, you can specify that the features created for
each entry be sent to separate files. In this case, each file will
have the name of that entry, with the extension .obj. However, all
messages and expressions will still go to a single files. While this
can be a convenient way of creating separate files when you need them,
this option still has the limitation of writing all features for a
given entry (if there are more than one) to the same file. Also,
successive resolution of features (anything requiring 'getob -r')
will not work with this option. This may be corrected in future
versions.
COMMAND LINE EXECUTION
There are two ways of running FEATURES from the command line. If only one
argument is supplied, that argument is interpreted as an expression, and
the result of that expression (ie. a sequence ) is written to the
standard output. .msg, .out and .exp files are NOT created. For example,
GenBank entry BACFLALA (M60287) contains the following feature:
CDS 95..271
/label=LORF-
/codon_start=1
/translation="MNKDKNEKEELDEEWTELIKHALEQGISPDDIRIFLNLGKKSSK
PSASIERSHSINPF"
Any of
features M60287:LORF-
features M60287:95..271
features M60287:/label=LORF-
would write the open reading frame to the standard output:
atgaataaagataaaaatgagaaagaagaattggatgaggagtggacaga
actgattaaacacgctcttgaacaaggcattagtccagacgatatacgta
tttttctcaatttgggtaagaagtcttcaaaaccttccgcatcaattgaa
agaagtcattcaataaatcctttctga
This form of FEATURES is provided to make it easy to pipe output to
other programs for further processing. For example
features M60287:LORF- |ribosome >LORF.protein
would write the translation of the open reading frame to a file called
LORF.protein.
The full functionality of the FEATURES can be accessed using arguments on
the command line. In particular, when there are multiple entries to be
processed, or multiple features within entries, it is much faster to
supply FEATURES with lists of entries, feature keys or expressions.
Command line options are similar to suboptions in menu items 1-3 above:
Feature keys:
-f key {feature key}
-F filename {file of feature keys}
Entries:
-n name {GenBank LOCUS name}
-N filename {file of GenBank LOCUS names}
-a accession {GenBank ACCESSION number}
-A filename {file of GenBank ACCESSION numbers}
-e expression {Feature Table expression}
-E filename {file of Feature Table expressions, each begin-
ning with '@'}
Databases:
-u filename {GenBank dataset}
-U filename { " " " " " " ,
process all entries ie. -nNaAeE options
will be ignored}
-g {GenBank}
Examples:
features -f tRNA -n EPFCPCG
retrieves all tRNAs from GenBank entry EPFCPCG and writes .msg, .out,
and .exp files.
features -e M60287:LORF-
would retrieve the same open reading frame as in the earlier example.
Since most time-consuming operation in FEATURES is sequence retrieval,
it is often best to retrieve frequently-used sequences as database
subsets. For example, a set GenBank entries for chlorophyl a/b binding
protein genes might be stored in a file called CAB.gen.
features -f CDS -N CAB.nam -u CAB.gen
would generate the files CAB.msg, CAB.out and CAB.exp containing output
for all CDS features in the entries listed in the file CAB.nam.
features -E CAB.exp -u CAB.gen
would re-create the output file CAB.out.
BUGS
FEATURES does no preliminary error checking for syntax of
GenBank expressions prior to their evaluation. Expressions that can
not be evaluated will be flagged by GETOB in the .msg file.
At present, little checking is done to test for the presence or
correctness of input files. Some errors may cause the program to
crash.
For User-defined datasets, filename expansion is not performed.
FILES
Temporary files:
X.term X.ano X.wrp X.ind X.gen {X is raw filename, see 4) }
UNRESOLVED.fea UNRESOLVED.out
FEA.inf FEA.nam FEA.gen FEA.ano FEA.wrp FEA.ind FEA.msg FEA.out
SEE ALSO
grep(1V) fetch getob splitdb
TRANSPORTATION NOTES
It should be fairly easy to get FEATURES to work even on systems
in which GenBank has not been formatted for the XYLEM package.
This is because FEATURES does not work directly on the database, but
rather retrieves all necessary sequences by calling FETCH. Thus,
statements like 'fetch FEA.nam FEA.gen' could be replaced with any
command that, given a file containing names or accession numbers,
returns a file containing GenBank entries. In principle, you
could even implement this sort of command to retrieve entries from
the email server (retrieve@ncbi.nlm.nih.gov) at NCBI, although
such a setup would undoubtedly be quite slow.
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,320 +0,0 @@
FETCH.DOC update 24 Feb 96
NAME
fetch - retrieves database entries by name or accession number
SYNOPSIS
fetch {interactive mode}
fetch [options] namefile [output file] {batch mode}
DESCRIPTION
fetch retrieves one or more entries from a database.
Interactive mode: fetch prompts the user to set search parameters,
using an interactive menu:
___________________________________________________________________
FETCH - Version 7 Feb 94
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
___________________________________________________________________
Namefile:
Outfile:
Database:
-------------------------------------------------------------------
Parameter Description Value
1) Name/Acc Name or Accession sequence to get
2) Namefile Get list of sequences from Namefile
3) WhatToGet a:annotation s:sequence b:both b
4) Database g:GenBank p:PIR v:VecBase l:LiMB g
G:GenBank dataset P:PIR dataset
5) Outfile Send all output to a single file (Outfile)
6) Files f:Send each entry to a separate file f
-------------------------------------------------------------
Type number of your choice or 0 to continue:
After all parameters have been set, type 0 to commence the search.
Messages regarding the progress of the search will be printed.
(1,2) Which entries to get?
If you want to get a single entry, option 1 lets you type in the
name of that entry, without having to create a namefile. To get
more than one entry, choose option 2, and specify the name of a
file containing sequence names or accession numbers.
namefile is a file containing one or more sequence names or
accession numbers, each on a separate line. Names and accession
numbers can even be interspersed, in upper or lowercase, and in
any order. For example, the namefile prp.nam might contain
; plant pathogenesis related proteins
; (these are sample comment lines)
; note that any line containing a semicolon is ignored
x06362
x05454
TOBPR1A1
; comments can be interspersed with names.
PUMPR13
tobpr1ar
Options 1 & 2 are mutually exclusive. Setting one will negate the
other. If option 2 is chosen, the name of the namefile will appear
at the top of the menu.
(3) WhatToGet
Use this option to specify whether to get annotation, sequence,
or both (default=both).
(4) Database
Use this option to select the database. (default=GenBank).
G and P select user-created database subsets containing GenBank
or PIR entries, respectively. It is assumed that the database
has been split into .ano, .wrp and .ind files using splitdb.
For example, if you had created a database subset called PR1.pir,
splitdb would create PR1.ano, PR1.wrp and PR1.ind. These are
the files actually read by FETCH. When prompted for the name
of the database, simply type "PR1", without a file extension.
(If you do type a file extension, it will be ignored).
(5, 6) Where to send output
By default, option 6 is set to f, and each entry will be written to
a separate file, where the name of the file is the name of the
entry, followed by a file extension. If a complete entry is
retrieved, the file extension will indicate the type of database
(GenBank: .gen; PIR: .pir, Vecbase: .vec; LiMB: .LiMB). If only
annotation or sequence are retrieved, the file extensions will be
.ano or .wrp, respectively. Using the default, the namefile above
would create the following files:
PUMPR13.gen
TOBPR1A1.gen
TOBPR1AR.gen
TOBPR1CR.gen
TOBPR1PS.gen
By choosing option 5, you can specify the name of an output file
for all entries to go to. This filename will appear at the top
of the menu. Obviously, options 5 & 6 are mutually exclusive.
Note entries retrieved are writen in alphabetical order (sorting by
ASCII values), not the order in which they appeared in namefile.
(Note for remote users only: -f will only work for a single
name/accession supplied in 1). -f IS NOT ENABLED FOR NAMEFILES
specified in 2).)
Batch mode:
Although it is transparent to the user, all fetch really does
is call getloc, saving the user the trouble of knowing which
database files to retrieve sequences from, or of having to
execute getloc multiple times to retrieve sequences from
different database files. Thus, the options are identical to those
for getloc:
-a Write annotation portions of entries only, terminated by '//'.
-s Write sequence data only, in Pearson (.wrp) format.
-f Write each entry to a separate file.
-g GenBank (default)
-e EMBL {not implemented}
-p PIR (NBRF)
-v Vecbase
-l LiMB
-G GenBank_dataset
-P PIR_dataset
If -f is not specified, outfile must be specified.
-L force execution of findkey on local host even if
$XYLEM_RHOST is set. See "REMOTE EXECUTION" below
PIR_dataset
GenBank_dataset
This can be either a file of PIR entries, a file of GenBank entries,
or a XYLEM dataset created by splitdb. A file of PIR entries must
have the file extension ".pir". A file of GenBank entries must have
the file extension ".gen". A XYLEM dataset contains PIR entries split
among three files by splitdb: annotation (.ano), sequence (.wrp)
and index (.ind). These file extensions must be used!
When specifying a split dataset, only the base name needs to be
used. For example given a XYLEM dataset consisting of the files
myset.ano, myset.wrp and myset.ind, the following two commands
are equivalent:
fetch -P myset something.nam something.pir
fetch -P myset.ano something.nam something.pir
If the original .pir file had been used, the command would have
been
fetch -P myset.pir something.nam something.pir
The ability to work directly with .gen or .pir files is quite
convenient. However, since FETCH needs to work with a split
FETCH automatically splits .pir or .gen files into .ano, .wrp
and .ind files, which are removed when finished. This requires
extra disk space and execution time, which could be significant
for large datasets.
EXAMPLES
Batch example:
fetch -f chitinase.nam
will retrieve annotation and sequence for sequences listed in
chitinase.nam from GenBank, writing each entry to a separate file
with the extension .gen.
fetch -s -v pbr.nam pbr.wrp
will retrieve sequence data only for the entries listed in pbr.nam,
from VecBase, and write all sequences to a Pearson format file
(ie. readable by fasta) with the name pbr.wrp.
fetch -G sample sample.nam new.gen
fetch -G sample.ano sample.nam new.gen
Assumes that a set of GenBank entries has been split by splitdb
into sample.ano sample.wrp and sample.ind. The entries listed in
sample.nam are written to new.gen.
FILES
Database files:
The directories for database files are specified by the environment
variables $GB (GenBank) $PIR (PIR/NBRF) $VEC(Vecbase) and $LIMB
(LiMB).
Index files are $GB/gbacc.idx for GenBank (this file is supplied
with each GenBank release), while the other databases
use .ind files generated by splitdb. Split database files MUST
have the following file extensions: .ano {annotation}, .wrp
{sequence} and .ind {index}. Thus, when creating database files
for pir1.dat with splitdb, the output files should be pir1.ano,
pir1.wrp and pir1.ind.
Temporary files:
NAMEFILE.fetch
PRELIMINARY.fetch
TMP.fetch
FOUND.fetch
FETCHDIR {temporary directory}
REMOTE EXECUTION
Where the databases can not be stored locally, FETCH can call
FETCH on another system and retrieve the results. To run
FETCH remotely, your .cshrc file should contain the following
lines:
setenv XYLEM_RHOST remotehostname
setenv XYLEM_USERID remoteuserid
where remotehostname is the name of the host on which the
databases reside (in XYLEM split format) and remoteuserid
is your userid on the remote system. When run remotely,
your local copy of FETCH will generate the following
commands:
rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
rsh $XYLEM_RHOST -l $XYLEM_USERID fetch ...
rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
rsh $XYLEM_RHOST -l $XYLEM_USERID $RM temporary_files
Because FETCH uses rsh and rcp, your home directory on both
the local and remote systems must have a world-readable
file called .rhosts, containing the names of trusted remote
hosts and your userid on each host. Before trying to get
FETCH to work remotely, make sure that you can rcp and
rsh to the remote host.
Obviously, remote execution of FETCH implies that FETCH
must already be installed on the remote host. When FETCH
runs another copy of FETCH remotely, it uses the -L option
(findkey -L) to insure that the remote FETCH job executes,
rather than calling yet another FETCH on another host.
---------- Remote execution on more than 1 host -----------
If more than 1 remote host is available for running FINDKEY
(say, in a clustered environment where many servers mount
a common filesystem) the choice of a host can be determined
by the csh script choosehost, such that execution of
choosehost returns the name of a remote server. To use this
approach, the following script, called 'choosehost' should
be in your bin directory:
#!/bin/csh
# choosehost - choose a host to use for a remote job.
# This script rotates among servers listed in .rexhosts,
# by choosing the host at the top of the list and moving
# it to the bottom.
#Rotate the list, putting the current host to the bottom.
set HOST = `head -1 $home/.rexhosts`
set JOBID = $$
tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
echo $HOST >> /tmp/.rexhosts.$JOBID
/usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
# Write out the current host name
echo $HOST
You must also have a file in your home directory called
.rexhosts, listing remote hosts, such as
graucho.cc.umanitoba.ca
harpo.cc.umanitoba.ca
chico.cc.umanitoba.ca
zeppo.cc.umanitoba.ca
Each time choosehost is called, choosehost will rotate the
names in the file. For example, starting with the .rexhosts
as shown, it will move graucho.cc.umanitoba.ca to the bottom
of the file, and write the line 'graucho.cc.umanitoba.ca'
to the standard output. The next time choosehosts is
run, it would write 'harpo.cc.umanitoba.ca', and so on.
Depending on your local configuration, you may wish to
rewrite choosehosts. All that is really necessary is that
echo `choosehost` should return the name of a valid host.
Once you have installed choosehost and tested it, you can
get FINDKEY to use choosehost simply by setting
setenv XYLEM_RHOST choosehost
in your .cshrc file.
--------------- Remote filesystems -----------------------
Finally, an alternative to remote execution is to remotely mount
the file system containing the databases across the network.
This has the advantage of simplicity, and means that the
databases are available for ALL programs on your local
workstation. However, it may still be advantageous to run
FETCH remotely, since that will shift much of the computational
load to another host.
BUGS
When retrieving entries directly from GenBank, FETCH uses the
Accession Number index file gbacc.idx. In this case, FETCH
can retrieve all entries containing a given accession number.
This capability makes it possible to retrieve an entry using a
secondary accession number. However if more than one entry
share a secondary accession number, all of those entries will
be retrieved. While this behavior might be a bit of an
annoyance at times, it can also be useful because it alerts
the user to the presence of other, related entries that might
be of interest.
SEE ALSO
getloc features
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,365 +0,0 @@
FINDKEY.DOC update 13 Mar 97
NAME
findkey - finds database entries containg one or more keywords
SYNOPSIS
findkey
findkey [-pvbmgrdutielnsaxzL] keywordfile [namefile findfile]
findkey [-P PIR_dataset] keywordfile [namefile findfile]
findkey [-G GenBank_dataset] keywordfile [namefile findfile]
DESCRIPTION
findkey uses the grep family of commands to find lines in database
annotation files containing one or more keywords. Next, identify
is called to create a .nam file, containing the names of entries
containing the keywords, and a .fnd file, containing the actual
lines from each entry containing hits. A PIR or GenBank dataset is
either a file containing one or more GenBank or PIR entries, or
the name of a XYLEM dataset created by splitdb. See FILES below
for a more detailed description.
INTERACTIVE USE
findkey prompts the user to set search parameters, using an interactive
menu:
___________________________________________________________________
FINDKEY - Version 12 Aug 94
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
___________________________________________________________________
Keyfile:
Dataset:
-------------------------------------------------------------------
Parameter Description Value
-------------------------------------------------------------------
1) Keyword Keyword to find thionin
2) Keyfile Get list of keywords from Keyfile
3) WhereToLook p:PIR v:VecBase p
GenBank - b:bacterial i:invertebrate
m:mamalian e:expressed seq. tag
g:phage l:plant
r:primate n:rna
d:rodent s:synthetic
u:unannotated a:viral
t:vertebrate x:patented
z:STS
G: GenBank dataset P: PIR dataset
-------------------------------------------------------------
Type number of your choice or 0 to continue:
0
Searching /home/psgendb/PIR/pir1.ano...
Sequence names will be written to thionin~pir.nam
Lines containing keyword(s) will be written to thionin~pir.fnd
Searching /home/psgendb/PIR/pir2.ano...
Sequence names will be written to thionin~pir.nam
Lines containing keyword(s) will be written to thionin~pir.fnd
Searching /home/psgendb/PIR/pir3.ano...
Sequence names will be written to thionin~pir.nam
Lines containing keyword(s) will be written to thionin~pir.fnd
As shown in the example above, the keyword thionin was specified
as the keyword to search for. By default, option 3 is set to p,
and the PIR protein database is searched. Messages describe the
progress of the search. Since PIR is broken up into two divisions
(new and protein) both are searched, but all output is written to
thionin.pir.nam and thionin.pir.fnd
OPTIONS
(1,2) Which keywords to search for?
If you want to search for a single keyword, option 1 lets you type
the keyword, without having to create a file. To search for more
than one keyword, choose option 2, and specify the name of a
file containing the keywords. For example, entries containing
genes for antibiotic resistance might be found using the
following keyword file:
ampicillin
chloramphenicol
kanamycin
neomycin
tetracycline
Note: keyword searches are case insensitive.
As you might expect, it takes longer to search for multiple
keywords than a single keyword.
Options 1 & 2 are mutually exclusive. Setting one will negate the
other. If option 2 is chosen, the name of the keyword file will
appear at the top of the menu.
Finally, it is probably not a good idea to search GenBank
entries using very short keywords consisting only of letters.
This is because GenBank entries now include a /translation
field containing the amino acid sequence of each protein
coding sequence. Consequently, 3 or 4 letter keywords
consisting of legal amino acid symbols (eg. CAP, recA) will
turn up fairly often in protein translations.
(3) WhereToLook
Use this option to specify the database to be searched In the
case of GenBank, only one division at a time may be searched.
User-created database subsets containing PIR (P) or GenBank (G)
entries may also be searched. User-created database subsets
must be in the .ano/.wrp/.ind form created by splitdb.
OUTPUT
The output filenames take the following form:
name_ex1.ex2
The 'name' part of the filename is either the keyword searched for,
if option 1 was chosen, or the name of the keyword file,if option 2
obtains. 'ex1' indicates the database division that was searched. For
PIR and VecBase, ex1 is 'pir' and 'vec', respectively. For GenBank,
ex1 is as follows:
bct - bacterial
inv - invertebrate
mam - other mamalian
est - expressed sequence tag
phg - phage
pln - plant (includes fungi)
pri - primate
rna - structural RNAs
rod - rodent
syn - synthetic sequences
sts - sequence tagged sites
una - unannotated (new) sequences
vrl - viral
vrt - other vertebrate
'ex2' distinguishes the files containing the names of entries
containing keywords (.nam) and the files containing the lines found
in each entry (.fnd).
The .nam file can be used directly as a namefile for fetch, getloc,
or getob.
COMMAND LINE USE
OPTIONS
p search PIR (default)
P PIR dataset search dbfile, containing PIR entries
v search VecBase
b search Genbank bacterial division
m search Genbank mamalian division
g search Genbank phage division
r search Genbank primate division
d search Genbank rodent division
u search Genbank unannotated division
t search Genbank vertebrate division
i search Genbank invertebrate division
l search Genbank plant division
n search Genbank rna division
s search Genbank synthetic division
a search Genbank viral division
x search Genbank patented division
e search Genbank exp.seq.tag division
z search GenBank STS division
S search GenBank Genom. Survey division
h search GenBank High Thrput. division
G GenBank dataset search dbfile, containing GenBank entries
L force execution of findkey on local host
even if $XYLEM_RHOST is set. See "REMOTE
EXECUTION" below
FILES
keywordfile - contains keywords to search for
namefile - LOCUS names of hits are written to this file
findfile - for each hit, a report listing the LOCUS name and the
lines matching the keyword if written to this file.
If namefile and findfile are not specified on the command line,
filenames will be created as described above for interactive
use.
PIR_dataset
GenBank_dataset
This can be either a file of PIR entries, a file of GenBank entries,
or a XYLEM dataset created by splitdb. A file of PIR entries must
have the file extension ".pir". A file of GenBank entries must have
the file extension ".gen". A XYLEM dataset contains PIR entries split
among three files by splitdb: annotation (.ano), sequence (.wrp)
and index (.ind). These file extensions must be used!
When specifying a split dataset, only the base name needs to be
used. For example given a XYLEM dataset consisting of the files
myset.ano, myset.wrp and myset.ind, the following two commands
are equivalent:
findkey -P myset something.kw
findkey -P myset.ano something.kw
If the original .pir file had been used, the command would have
been
findkey -P myset.pir something.kw
The ability to work directly with .gen or .pir files is quite
convenient. However, since FINDKEY needs to work with a split
FINDKEY automatically splits .pir or .gen files into .ano, .wrp
and .ind files, which are removed when finished. This requires
extra disk space and execution time, which could be significant
for large datasets.
EXAMPLES
If the list of antibiotics shown above was stored in the file
antibiotic.kw, and option 3 was set to 'b', then the annotation
portion of the GenBank bacterial division would be searched, and
all lines containing any of these keywords would be written to
antibiotic~bac.fnd. The corresponding GenBank entry names would
appear in antibiotic~bac.nam.
The same keyword file could be used to search other database files.
If VecBase was searched, the output files would be antibiotic~vec.fnd
and antibiotic~vec.nam. These filename conventions make it easy
to search different database divisions, and to keep track of where
data came from.
Command line examples:
findkey thionin.kw
would be equivalent to the interactive example shown above. In
this case, the file thionin.kw contains the word 'thionin'.
(Note that since PIR is the default, -p need not be supplied.)
findkey -b antibiotic.kw drugs.nam drugs.fnd
would search the GenBank bacterial division for the keywords
contained in antibiotic.kw, and write the output to drugs.nam
and drugs.kw.
FILES
Database files:
The directories for database files are specified by the environment
variables $GB (GenBank) $PIR (PIR/NBRF) and $VEC(Vecbase).
Annotation (.ano) and index (.ind) are those generated by splitdb.
Temporary files:
$jobid.fnd
$jobid.nam
$jobid.grep
where $jobid is a unique jobid generated by the shell
REMOTE EXECUTION
Where the databases can not be stored locally, FINDKEY can call
FINDKEY on another system and retrieve the results. To run
FINDKEY remotely, your .cshrc file should contain the following
lines:
setenv XYLEM_RHOST remotehostname
setenv XYLEM_USERID remoteuserid
where remotehostname is the name of the host on which the
databases reside (in XYLEM split format) and remoteuserid
is your userid on the remote system. When run remotely,
your local copy of FINDKEY will generate the following
commands:
rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
rsh $XYLEM_RHOST -l $XYLEM_USERID findkey ...
rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
rsh $XYLEM_RHOST -l $XYLEM_USERID rm temporary_files
Because FINDKEY uses rsh and rcp, your home directory on both
the local and remote systems must have a world-readable
file called .rhosts, containing the names of trusted remote
hosts and your userid on each host. Before trying to get
FINDKEY to work remotely, make sure that you can rcp and
rsh to the remote host.
Obviously, remote execution of FINDKEY implies that FINDKEY
must already be installed on the remote host. When FINDKEY
runs another copy of FINDKEY remotely, it uses the -L option
(findkey -L) to insure that the remote FINDKEY job executes,
rather than calling yet another FINDKEY on another host.
---------- Remote execution on more than 1 host -----------
If more than 1 remote host is available for running FINDKEY
(say, in a clustered environment where many servers mount
a common filesystem) the choice of a host can be determined
by the csh script choosehost, such that execution of
choosehost returns the name of a remote server. To use this
approach, the following script, called 'choosehost' should
be in your bin directory:
#!/bin/csh
# choosehost - choose a host to use for a remote job.
# This script rotates among servers listed in .rexhosts,
# by choosing the host at the top of the list and moving
# it to the bottom.
#Rotate the list, putting the current host to the bottom.
set HOST = `head -1 $home/.rexhosts`
set JOBID = $$
tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
echo $HOST >> /tmp/.rexhosts.$JOBID
/usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
# Write out the current host name
echo $HOST
You must also have a file in your home directory called
.rexhosts, listing remote hosts, such as
graucho.cc.umanitoba.ca
harpo.cc.umanitoba.ca
chico.cc.umanitoba.ca
zeppo.cc.umanitoba.ca
Each time choosehost is called, choosehost will rotate the
names in the file. For example, starting with the .rexhosts
as shown, it will move graucho.cc.umanitoba.ca to the bottom
of the file, and write the line 'graucho.cc.umanitoba.ca'
to the standard output. The next time choosehosts is
run, it would write 'harpo.cc.umanitoba.ca', and so on.
Depending on your local configuration, you may wish to
rewrite choosehosts. All that is really necessary is that
echo `choosehost` should return the name of a valid host.
Once you have installed choosehost and tested it, you can
get FINDKEY to use choosehost simply by setting
setenv XYLEM_RHOST choosehost
in your .cshrc file.
--------------- Remote filesystems -----------------------
Finally, an alternative to remote execution is to remotely mount
the file system containing the databases across the network.
This has the advantage of simplicity, and means that the
databases are available for ALL programs on your local
workstation. However, it may still be advantageous to run
XYLEM remotely, since that will shift much of the computational
load to another host.
BUGS
At present, regular expression characters cannot be used for
keyword searches.
SEE ALSO
grep(1V) identify splitdb
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,65 +0,0 @@
GETLOC.DOC update 30 May 95
NAME
getloc - retrieve database entries listed in namefile to outfile.
SYNOPSIS
getloc [-asfcgepvl] namefile [anofile] [seqfile] indfile outfile
DESCRIPTION
getloc reads a list of names from namefile and recreates
entries by combining the annotation and sequence portions of each
entry from anofile and seqfile. getloc will work most quickly
when the namefile is in alphabetical order, but it will also
work on unsorted lists. The following options affect the output:
a Write annotation portions of entries only, terminated by '//'.
seqfile is not included on command line.
s Write sequence data only, in Pearson (.wrp) format.
anofile is not included on commandline.
f Write each entry to a separate file. The filename will
consist of the LOCUS name, followed by .ano for annotation
only, .wrp for sequence only, or gen for complete GenBank
format.
c namefile contains accession numbers, rather than names
The following options identify the type of database being read:
g GenBank (default)
e EMBL
p PIR (NBRF)
v Vecbase
l LiMB
namefile consists of an alphabetically ordered list of LOCUS names,
each on a separate line. Indfile could be used to create a
namefile by simply editing out some subset of names. (This can also
be done using the Unix comm command.) If the entire indfile was
used, the entire database would be recreated, minus the header
information that might have been present in the original, but
deleted by splitdb.
NOTE
Getloc automatically expands leading blanks that have been
compressed using splitdb -c. See splitdb.doc for more information.
SEE ALSO
splitdb, comm(1).
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,327 +0,0 @@
GETOB 21 Dec 94
NAME
getob - Get an object from GenBank
SYNOPSIS
getob [-frcn] infile namefile anofile seqfile indfile message
[outfile] expfile
DESCRIPTION
getob extracts 'objects' (subsequences) from GenBank entries, using
the features table, and writes them to outfile (.out). A log
describing the construction of each object is written to message
(.msg). If -r is not set, a list of expressions that would recreate
the .out file if evaluated by getob -r, is written to expfile (.exp)
The following options are available:
f Write each entry to a separate file. The name will consist
of the entry name, and the extension '.obj'.
r Resolve expressions from namefile into objects.
Expressions take the form:
@[<database>::]<accession>:<location>
In effect, r makes it possible to use getob to resolve
features that span more than one entry, such as segmented
files. In the first run of the program, features that require
data from outside the entry in which they are defined will be
written to outfile with those externally-defined parts rep-
resented using the '@' notation described above. During a
subsequent run, the outfile from the previous run is used as
namefile. When r is set, all lines not beginning with '@' (ie.
name lines and sequence lines) are simply copied to the new
outfile. When an '@' is encountered, the expression is parsed
into accession number and location. The entry with the
specified accession number is located in indfile, and read from
anofile and seqfile. It is then evaluated, and the result
written to outfile in place of the '@' expression.
getob can also be used to get specific labeled objects from
a given entry. Examples:
@k30576:polyprotein
@k30576:/label=polyprotein
@x10345:/product="hsp70"
@j00879:group(1..2200,mutation_37)
The first two constructs given above are equivalent. Both
will extract the feature called polyprotein. The third
construct shows that any feature label can be specified. If
none is specified, as in the first example, then /label= is
assumed. One limitation, however, is that the label sought
must be unique within the entry in its first 15 characters
including double quotes ("). Otherwise, only the first
matching label expression will be evaluated. Finally, the
last example shows that a mutant sequence can be constructed
by first specifying an expression that evaluates to a
sequence (ie. 1..2200) and then a labeled expression that
upon evaluation, uses replace() to modify that sequence. The
usage shown in examples 3 & 4 above represent extensions to
the DDBJ/EMBL/GenBank Features Table Format.
As touched on briefly above, the r option makes it possible
to construct objects that include recursive references to
other entries (eg. segmented files) by iterative calls to
getob. The 'features' command automates this process. The basic
algorithm is as follows:
getob infile namefile anofile seqfile indfile ...
#Pull out all lines containing indirect references
grep '@' outfile > unresolved.grep
while (unresolved.grep is not empty)
#extract accession numbers to be retrieved
cut -c2-7 unresolved.grep > unresolved.nam
#retrieve the sequences into a new file, and create
#a database subset to be used by getob
fetch unresolved.nam new.gen
splitdb new.gen new.ano new.wrp new.ind
#run getob again to resolve indirect references
getob -r infile outfile new.ano new.wrp new.ind ...
#Pull out all lines containing indirect references
grep '@' outfile > unresolved.grep
end
c NAMEFILE contains accession numbers, rather than locus names
n By default, the qualifier 'codon_start' is used to determine
how many n's, if necessary, must be added to the 5' end of
CDS, mat_peptide, or sig_peptide, to preserve the reading
frame. To turn OFF this feature, -n must be set. -n must be set
for GenBank Releases 67.0 and earlier.
infile contains commands indicating what data is to be pulled from
each entry. Two types of output may be presented, GenBank or
OBJECTS. These are described below:
1) GenBank output - If the word 'GENBANK' is the first line in
infile, a pseudo-GenBank entry will be recreated. This option
is only intended for debugging purposes and will probably be
removed in later releases.
2) Object format - This option instructs getob to write part or
all of each sequence, along with site annotation, by specifying
feature key names. The syntax for infile is shown below:
Backus-Naur format: Example:
----------------------------------------------------------
OBJECTS OBJECTS
<feature key> tRNA
{<feature key> rRNA
. . . SITES
<feature key>} stem_loop
SITES
{<feature key>
. . .
<feature key>}
In the example above, getob is instructed to extract all tRNA or
rRNA sequences from each entry, and annotate the position of each
stem/loop structure. Note that the SITES coordinates written to the
file tell the positions of those SITES relative to the start of the
object, rather than the original location in the sequence. As above,
each word begins a separate line.
While the -r option does not use infile, at least a dummy infile
must be included in the command line. This dummy file need only
contain two lines:
OBJECTS
SITES
NOTE: SITES IS NOT YET IMPLEMENTED! Although inclusion of SITES in
the input file will have no effect, the word SITES must still be
present after the last feature key.
namefile
namefile consists of a list of LOCUS names or accession numbers,
each on a separate line. Names or accession numbers should appear
in the order in which they appear in the database file. Unordered
namefiles will slow the progress of the search. Since only the
first non-blank field of each line in namefile is read, indfile
could be used to create a namefile. If the entire indfile was
used, the entire database file would be processed. A sample
namefile requesting four sequences by LOCUS name is shown below:
POTPR1A
POTPSTH2
POTPSTH21
POTSTHA
anofile, seqfile, and indfile
The database subset containing GenBank entries must be divided
among annotation, sequence and an index by splitdb.
message
message contains a log describing the parsing of each object.
For annotative purposes, qualifier lines from the object are
included in along with the location expression being parsed.
The beginning of a typical message file is shown below:
GETOB Version 0.962 14 May 1992
POTPR1A:CDS1
join
(
295 603
1011 1355
)
/note="pathogenesis-related protein (prp1)"
/codon_start=1
/translation="MAEVKLLGLRYSPFSHRVEWALKIKGVKYEFIEEDLQNKSPLLL
QSNPIHKKIPVLIHNGKCICESMVILEYIDEAFEGPSILPKDPYDRALARFWAKYVED
KGAAVWKSFFSKGEEQEKAKEEAYEMLKILDNEFKDKKCFVGDKFGFADIVANGAALY
LGILEEVSGIVLATSEKFPNFCAWRDEYCTQNEEYFPSRDELLIRYRAYIQPVDASK"
//----------------------------------------------
In the example above, getob was instructed to retrieve all CDS
features from the database subset. The message for the entry
POTPR1A is shown, along with a reconstruction of the location
expression that was evaluated to create the object. In this
case, protien coding sequences from two exons had to be joined
to create the object.
outfile
outfile contains the actual objects constructed, consisting of
sites found and sequences. The beginning of a typical output file
is shown below:
>POTPR1A:CDS1
atggcagaagtgaagttgcttggtctaaggtatagtccttttagccatag
agttgaatgggctctaaaaattaagggagtgaaatatgaatttatagagg
aagatttacaaaataagagccctttacttcttcaatctaatccaattcac
aagaaaattccagtgttaattcacaatggcaagtgcatttgtgagtctat
ggtcattcttgaatacattgatgaggcatttgaaggcccttccattttgc
ctaaagacccttatgatcgcgctttagcacgattttgggctaaatacgtc
gaagataag
ggggcagcagtgtggaaaagtttcttttcgaaaggagaggaacaagagaa
agctaaagaggaagcttatgagatgttgaaaattcttgataatgagttca
aggacaagaagtgctttgttggtgacaaatttggatttgctgatattgtt
gcaaatggtgcagcactttatttgggaattcttgaagaagtatctggaat
tgttttggcaacaagtgaaaaatttccaaatttttgtgcttggagagatg
aatattgcacacaaaacgaggaatattttccttcaagagatgaattgctt
atccgttaccgagcctacattcagcctgttgatgcttcaaaatga
In the example, the CDS from entry POTPR1A has been written in
two chunks, corresponding to the two exon portions of the coding
sequence. Each location retrieved in constructing the object is
written as a separate block of sequence. By comparing message file
to outfile, it is possible to verify the correctness of the
operation.
Numbers are appended to the sequence names to indicate
which CDS in the entry has been retrieved. Thus, if two CDS
features were present, the second one would be named >POTPR1A:2.
For compatiblility with the FASTA programs of Pearson, the name line
begins with a '>'.
expfile
The expression evaluated to create this feature is written
to expfile:
>POTPR1A:CDS1
@J03679:join(295..603,1011..1355)
expfile is only created if -r is not set. It is itended as a way
of automating the creation of a feature expression file for use
in generating customized datasets. Expressions in expfile can be
deleted or modified, or new expressions added, to tailor the
dataset to individual needs. To generate a dataset from expfile:
getob -r infile expfile anofile seqfile indfile message outfile
EXTENSIONS TO THE FEATURE TABLE LANGUAGE
1) poly(<absolute_location>|<literal>|<feature_name>,x)
This operator evaluates an absolute location, literal, or
feature name (ie. any location not containing functional
operators) and writes it x times. The most obvious
application of poly is to create spacers to represent regions
of unknown sequence between sequences that are known. For
example, the restriction map of a 4kb EcoR1 fragment with a
Hind3 site 1000 bp from one end could be represented as follows:
join("gaattc",poly("n",1000),"aagctt",poly("n",3000),"gaattc")
2) The following feature keys are recognized by GETOB, although
not included in the language definition. While they will not
appear in GenBank entries, they could be used in user-created
GenBank-format files:
contig
This feature key is meant to be used to assemble large
sequence segments from smaller segments, possibly using the
poly() operator.
chromosome
Intended to annotate the complete sequence of a chromosome. This
feature may be constructed by a join of two or more contigs.
Use of these keywords is illustrated in the features table
shown below, which could be used to construct a model of part
of the E.coli chromosome, spanning map units 763.4 to 1031.4 kb:
contig join(J01619:1..13063,poly("n",7140),
J03939:1..1363,poly("n",14380),
X02306:complement(1..1622),poly("n",14710),
J04423:1..5793,poly("n",22500),
X03722:1..2400,poly("n",123750),
one-of(X05017:complement(1..1854),X05017:1..1854))
/label=Eco_contig8
/map=763.4-950.6kb
contig join(V00352:1..2412,poly("n",28800),M15273:1..3409)
/label=Eco_contig9
/map=972.9-1001.7kb
contig join(X02826:1..1357,poly("n",13540),
J01654:complement(1..2270))
/label=Eco_contig10
/map=1016.5-1031.4kb
chromosome join(Eco_contig8,poly("n",22300),
Eco_contig9,poly("n",14800),
Eco_contig10)
/label=Ecoli_chromosome
NOTES
1) If the const DEBUG is set to true in the Pascal source code, getob
writes messages to the standard output, indicating the progress of
processing for each entry read in. By default, DEBUG=false.
This feature is solely for debugging purposes and will be removed in
later releases.
2) GETOB automatically expands leading blanks that have been
compressed using splitdb -c. See splitdb.doc for more information.
SEE ALSO
features, splitdb, getloc
The DDBJ/EMBL/GenBank Feature Table: Definition, Version 1.04
September 1, 1992
GenBank Release Notes for Release 79.0.
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,83 +0,0 @@
IDENTIFY update 3 Feb 94
NAME
identify - creates a file of locus names corresponding to lines
found by grep in a GenBank annotation file.
SYNOPSIS
identify grepfile indfile namefile findfile
DESCRIPTION
grepfile is created using the Unix grep command to search a .ano
file created by splitgb. For example, to find all lines containing
the word 'chlorophyll' in plant.ano, use
grep -n -i 'chlorophyll' plant.ano > plant.grep
In the example shown, the -n option causes each line written to
plant.grep to be preceeded by the number of that line in plant.ano.
(The -i option causes grep to ignore case.) Identify can use the
indfile do determine which entry a given numbered line was found
in, and writes the corresponding LOCUS name to namefile. In
addition, all lines found in a given entry are re-written to
findfile without the line numbers, and preceeded by the LOCUS name
for that entry.
EXAMPLES
Suppose you wanted to obtain a list of names for all plant
sequences which code for proteins. The task is complicated by the
fact that many fungal sequences are included in the GenBank plant
file. You could begin by searching plant.ano (containing all
GenBank plant entries) for the word 'Planta':
grep -n 'Planta' plant.ano > Planta.grep
However, we want to eliminate all fungal sequences, as well as all
sequences for RNAs other than mRNAs. If we create the file
bad.str containing the keywords
Mycophyta
tRNA
rRNA
uRNA
we can then type
grep -n -f bad.str plant.ano > bad.grep
bad.grep now contains all lines containing the offending keywords.
We next use identify to find the names of the entries found by
grep.
identify Planta.grep plant.ind Planta.nam Planta.fnd
identify bad.grep plant.ind bad.nam bad.fnd
Next, we can use the Unix comm command to compare the two .nam
files and produce an output file containing only names which are
present in Planta.nam but not bad.nam:
comm -23 Planta.nam bad.nam > plants.nam
The file plants.nam now contains names of either plant cDNA or
genomic sequences which do not code for structural RNAs.
At this point, getloc could to create a sub-database containing
only those entries listed in planta.nam. See documentation for
getloc for a more detailed discussion.
SEE ALSO
grep, fgrep, egrep, ngrep, comm, splitgb, getloc
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,23 +0,0 @@
;---------------------------------------------------------------------------
; FINDKEY/GDE Keyword File Instructions
;
; 1. Type in one or more keywords below,
; or
; Place cursor at end of this file and choose 'Include File' in the FILE
; menu to read in a file of keywords.
;
; 2. Choose 'Save Current File' in the File menu
; 3. Quit this window
;
; FINDKEY will then perform the keyword search. YOU DON'T NEED TO EDIT
; OUT THESE COMMENT LINES.
;
; NOTE: Put each keyword on a separate line
; SAMPLE KEYWORD FILE:
;
; maize
; corn
; Z.mays
; Zea
;---------------------------------------------------------------------------

View File

@ -1,25 +0,0 @@
;---------------------------------------------------------------------------
; FETCH/GDE Name/Accession File Instructions
;
; 1. Type in one or more LOCUS names or Accession #'s below,
; or
; Place cursor at end of this file and choose 'Include File' in the FILE
; menu to read in a file of names or accession #'s.
; or
; Copy names or accession #'s from another window and Paste into this window.
;
; 2. Choose 'Save Current File' in the File menu
; 3. Quit this window
;
; FETCH will then retrieve the data. YOU DON'T NEED TO EDIT
; OUT THESE COMMENT LINES.
;
; NOTE: Put each name on a separate line
; SAMPLE NAME/ACCESSION FILE:
;
; X30412
; PSDRR1
; PEADRRG
;
;---------------------------------------------------------------------------

View File

@ -1,25 +0,0 @@
;---------------------------------------------------------------------------
; FEATURES/GDE Name File Instructions
;
; 1. Type in one or more GenBank LOCUS names below,
; or
; Place cursor at end of this file and choose 'Include File' in the FILE
; menu to read in a file of names.
;
; (NOTE: File can not contain accession numbers.)
;
; 2. Choose 'Save Current File' in the File menu
; 3. Quit this window
;
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
; OUT THESE COMMENT LINES.
;
; NOTE: Put each name on a separate line
; SAMPLE NAME FILE:
;
; PEADRRA
; PSDRR1
; PEADRRG
;
;---------------------------------------------------------------------------

View File

@ -1,56 +0,0 @@
printdoc update 3 Feb 94
NAME
printdoc - prints documentation files
SYNOPSIS
printdoc filename
DESCRIPTION
printdoc uses the file extension to decide how to print a
documentation file. If necessary, a filter such as pr or nroff
is used to format the file before sending to the appropriate
printer. A list of file extensions recognized by printdoc is
given below. If no file extension is given, or the extension is
not in the list, printdoc assumes .doc.
.doc - (default) Uses pr to print the text, using the default
settings provided by pr (56 text lines per page plus a 5 line
header and footer). Printing is at 12 cpi, front only. This works
reasonbly well for most unformatted documentation files,
provided that the line length doesn't exceed 80 char. This
option assumes that a half-inch left margin is automatically
provided by the printer.
.tex - Assumes that document is already pre-formatted. Thus,
no headers or footers are provided, and it is assumed that
the top and bottom of pages are padded with blanks or header/
footer lines as needed. Form-feed characters (^L) may be
included in the text to force page breaks.
.ps - Assumes file is in PostScript format. Sends it to the
PostScript printer.
.nroff - Assumes file is formatted for use by nroff, using the
standard macro set (nroff -ms).
.nroff.me - Assumes file is formatted for use by nroff, using the
e macro set (nroff -me).
TRANSPORTATION NOTES
For reasons which should be obvious, this script needs major
rewriting at each site, since the available printers will
be of different types and have different names.
SEE ALSO
pr, pr(V), xlp, nroff
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca

View File

@ -1,123 +0,0 @@
prot2nuc update 10 Aug 94
NAME
prot2nuc - reverse translates protein into nucleic acid
SYNOPSIS
prot2nuc [-ln -gn] < input > output
DESCRIPTION
prot2nuc reads a file containing an amino acid sequence
and writes the corresponding reverse translated nucleic acid
sequence, using the standard IUPAC-IUB ambiguity codes to output.
The amino acid sequence may contain internal stop '*' characters.
That is, all legal amino acid characters will be processed.
-ln print n amino acids/codons per line. (default = 25)
-gn number the amino acid sequence every n amino acids/codons.
(defalut = 5)
If l is not evenly divisible by g, the defaults are used.
input - If the first line of the file begins with '>' or ';',
input will be read as the standard .wrp (Pearson) format,
such as that produced by getob:
>name
sequence lines
Otherwise, it will be assumed that the file ONLY contains
sequence, and all legal IUPAC/IUB DNA characters will be
read as sequence.
output - The output begins with a header, listing the both
1 and 3 letter amino acid codes [J. Biol. Chem. 243, 3557-3559
(1968)], as well as the nucleic acid ambiguity codes [Cornish-
Bowden (1985) Nucl. Acids Res. 13:3021-3030.]. The amino acid
sequence, along with its reverse translation, are then printed on
lines of l amino acids/codons, numbering every g amino acids/codons.
Non-ambiguous nucleotides appear capitalized, while ambiguous
nucleotides are in lowercase. A sample output file appears below:
PROT2NUC Version 8/10/94
IUPAC-IUP AMINO ACID SYMBOLS
[J. Biol. Chem. 243, 3557-3559 (1968)]
Phe F Leu L Ile I
Met M Val V Ser S
Pro P Thr T Ala A
Tyr Y His H Gln Q
Asn N Lys K Asp D
Glu E Cys C Trp W
Arg R Gly G STOP *
Asx B Glx Z UNKNOWN X
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE
[Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.]
Symbol Meaning | Symbol Meaning
------------------------------------+---------------------------------
G Guanine | k G or T
A Adenine | s G or C
C Cytosine | w A or T
T Thymine | h A or C or T
U Uracil | b G or T or C
r Purine (A or G) | v G or C or A
y Pyrimidine (C or T) | d G or T or A
m A or C | n G or A or T or C
pI39
5 10 15 20
M E K K S L A A L S F L L L L V L F V A
ATGGArAArAArTCnCTnGCnGCnCTnTCnTTyCTnCTnCTnCTnGTnCTnTTyGTnGCn
AGyTTr TTrAGy TTrTTrTTrTTr TTr
25 30 35 40
Q E I V V T E A N T C E H L A D T Y R G
CArGArAThGTnGTnACnGArGCnAAyACnTGyGArCAyCTnGCnGAyACnTAyCGnGGn
TTr AGr
45 50 55 60
V C F T N A S C D D H C K N K A H L I S
GTnTGyTTyACnAAyGCnTCnTGyGAyGAyCAyTGyAArAAyAArGCnCAyCTnAThTCn
AGy TTr AGy
65 70
G T C H D W K C F C T Q N C
GGnACnTGyCAyGAyTGGAArTGyTTyTGyACnCArAAyTGy
With the Universal Genetic code, ambiguity symbols make it possible
to represent all possible codons for an amino acid using two output
lines. It is important to realize that the ambiguities on each line
can not be combined. For example, CTn and TTr represent all codons for
Leucine. However, attempting to combine them into a single triplet,
yTn, would be incorrect. For example, TTT and TTC are codons for
Phenylalanine, not Leucine.
FUTURE PLANS
1. It wouldn't be hard to have the output printed as nucleic acid
sequences in Perason format, so that the output could be read back
into GDE. I don't know why you would want to do this, but it could
be done.
2. Right now, only the Universal Genetic Code is used, but it should
be possible to read in alternative genetic codes, have prot2nuc
figure out the ambiguity rules (as is already done in ribosome) and
print out the appropriate ambiguous codons.
3. It might be useful to have each possible codon printed out, rather
than ambiguous codons. This would take up a lot more space and
wouldn't be as pretty. If there's a lot of demand I could do this.
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca

View File

@ -1,107 +0,0 @@
reform update 3 Feb 94
NAME
reform - reformats multiply-aligned sequences for printing.
SYNOPSIS
reform [-gpcnm] [-fx] [-sn] [-ln] [file {ralign only}]
or
ralign file parameters | reform [-gpcn] [-sn] [-ln] file
DESCRIPTION
g Gaps are to be represented by dashes (-).
p Bases which agree with the consensus are
represented by periods (.).
c Positions at which all sequences agree are
capitalized in the consensus.
n Sequence data is nucleic acid. Protein default
fx Specify input file format, where x is
r:RALIGN (default) p:PEARSON i:MBCRR-MASE (Intelligenetics)
m Input file contains multiline format sequences already aligned,
as opposed to ralign output. This option is obsolete, and is
equivalent to -fp.
ln The output linelength is set to n.
Default is 70.
sn numbering starts with n (default=0)
file Sequence file as described in ralign docu-
mentation. reform needs to re-read the
sequence file read by ralign to get the
names of the sequences, which ralign ignores.
This filename is only included for ralign output.
If -m is set, file is ignored, and sequence names
must be read from the input.
Note that positions in the consensus at which no nucleotide is in the
majority are represented by n's (for nucleic acids) or x's (for proteins),
rather than periods, as in ralign.
Gaps in the input sequences may be represented by either blanks or dashes.
INPUT FILE FORMATS
(a) ralign (default, -fr)
As described in ralign documentation, the input file (which is assumed to
be ralign output) must have each sequence on a single long line. All
characters on a given line will be included in the alignment. All lines
must be exactly the same length. For example, if ralign had been read
sequence from a file called 'allcab.seq' and written output to 'allcab.ral',
the following command might be used:
reform allcab.seq <allcab.ralign >allcab.ref
(b) Pearson (-fp, -m)
Compatible with sequence files used by Pearson's fasta programs as shown:
>name1
sequence1
>name2
sequence2
...
>namen
sequencen
Sequences may run over many lines and line length does not have to be
uniform. However, both dashes ('-') and blanks (' ') will be read in
as gaps in the alignment. A right arrow (>) at the beginning of a line
indicates the name line at the beginning of a new sequence.
Any line beginning with a semicolon (';') will be considered a comment,
and will be ignored.
(c) MBCRR-MASE (Intelligenetics) (-fi)
Compatible with .mase files produced by MBCRR's mase and pima programs,
which use the Intelligenetics format as shown:
;one or more comment lines
name1
sequence1
;one or more comment lines
name2
sequence2
...
;one or more comment lines
namen
sequencen
Sequences may run over many lines and line length does not have to be
uniform. However, both dashes ('-') and blanks (' ') will be read in
as gaps in the alignment. Each sequence MUST begin with at least one
comment line. When a comment line is encountered, that signals the
beginning of a new sequence. The first line after the comment is read
as the name, and the sequence begins on the next line after that.
SEE ALSO ralign, mase
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,84 +0,0 @@
ribosome update 3 Feb 94
NAME
ribosome - translates nucleic acid into protein
SYNOPSIS
ribosome [-g gcfile] < input > output
DESCRIPTION
ribosome reads a file of one or more nucleic acid sequences
and writes the corresponding amino acid sequence, in the standard
one letter code, to output. Ribosome begins translating at the
first nucleotide in each input sequence and continues to the end.
If the length of the translated sequence is not divisible by 3,
ribosome pads the final codon with N's and attempts to use ambi-
guity rules to translate the final codon. Based on the genetic
code used, ribosome derives a set of rules to resolve all ambi-
guities that can possibly be resolved.
-g read in an alternative genetic code from gcfile. If this
option is not specified, ribosome uses the universal
genetic code.
gcfile - This file specifies an alternative genetic code. An
example is shown below. ribosome reads the first 64 legal
capital letters as amino acids. Consequently, lowercase letters
can be used for annotation purposes, as shown in the example.
All non-amino acid characters are ignored.
sgc2 - yeast mitochondrial genetic code
second position
first position ------------------------------- third position
(5' end) u c a g (3' end)
-----------------------------------------------------------------
u F S Y C u
F S Y C c
L S * W a
L S * W g
-----------------------------------------------------------------
c T P H R u
T P H R c
T P Q R a
T P Q R g
-----------------------------------------------------------------
a I T N S u
I T N S c
M T K R a
M T K R g
-----------------------------------------------------------------
g V A D G u
V A D G c
V A E G a
V A E G g
input - If the first line of the file begins with '>' or ';',
input will be read as the standard .wrp (Pearson) format,
such as that produced by getob:
>name
; one or more comment lines (optional)
sequence lines
Otherwise, it will be assumed that the file ONLY contains
sequence, and all legal IUPAC/IUB DNA characters will be
read as sequence.
SEE ALSO
getob
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,66 +0,0 @@
shuffle.doc update 3 Feb 94
SYNOPSIS
shuffle -sn [-wn -on]
DESCRIPTION
Shuffles sequences locally. See Lipman DJ, Wilbur WJ, Smith TF
and Waterman MS (1984) On the statistical significance of nucleic
acid similarities. Nucl. Acids Res. 12:215-226.
-sn n is a random integer between 0 and 32767. This number
must be provided for each run.
-wn n is an integer, indicating the width of the window for
random localization. If w exceeds the length of a sequence,
or is negative, the entire sequence is scrambled as a single
window. This is also the case if w is not specified.
-on n is an integer, indicating the number of nucleotides
overlap between adjacent windows. It should never exceed
the window size. o defaults to 0 if not specified.
If w and o are specified, overlapping windows of w nucleotides
are shuffled, thus preserving the local characteristic base
composition. Windows overlap by o nucleotides.
If w and o are not specified, each sequence is shuffled globally,
thus preserving the overall base composition, but not the local
variations in comp.
Any number of sequences may be processed from a single input
file. In Pearson-format files, each new sequence begins with a
'>' comment line, indicating the name and a short description of
the sequence.
No distinction is made between protein or nucleic acid sequences.
That is, shuffle will read any of the following characters as
sequence:
T,U,C,A,G,N,R,Y,M,W,S,K,D,H,V,B,L,Z,F,P,E,I,Q,X,*,-
where '*' is the result of translating a stop codon, and '-'
is a gap generated during sequence alignment. Lowercase is
also accepted.
EXAMPLE
A sample output file is shown below. Note that the first two
lines of output are comment lines, listing the version of the
program and the parameters used in the run.
>SHUFFLE VERSION 11/ 8/93
>RANDOM SEED: 9873 WINDOW: 12 OVERLAP: 3
>BAZFAZ - Borborigmus azerbi F-actin-zeta gene
ctgagtagctagtcctaaatagttagtccatagtactagtacgggtcgtt
cacccttgggcagtg.....(etc.)
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,141 +0,0 @@
SPLITDB update 28 Mar 98
NAME
splitdb - split GenBank files into annotation, sequence, and index
SYNOPSIS
splitdb [-gepvlct] dbfile anofile seqfile indfile
DESCRIPTION
Splitdb splits a database (dbfile) among three files: anofile, seqfile
and indfile. Splitdb ignores any header information that might be in the
file and begins processing at the first entry.
anofile contains the annotation portion of each entry. Entries are
terminated with '//' or '///' (PIR only). Trailing blanks present in
dbfile are omitted in anofile.
seqfile contains the sequence data for each entry. Each sequence
entry begins with a header line, followed by sequence data on
succeeding lines of 75 characters per line. The header line
includes the header flag character '>' in column 1, followed by the
name, followed by the first 50 characters of the 1st
DEFINITION line. An example is shown below:
>UNHOR1 - Unicorn horn protein 1, complete cDNA sequence
attcctctatagtctattctagctagccaaataggttagatggctgtcttactacttacgc
...
Removal of blanks and numbers from sequence lines makes makes split
datasets about 8-9% smaller than the original GenBank files.
indfile is an index which tells the line numbers for each entry in
anofile and seqfile. It is assumed to be in alphabetical order by
name. Each line contains a name and accession number, followed by the
line numbers on which the annotation and sequence data begin in anofile
and seqfile, respectively. Thus the file plants.ind might contain:
A15660 TA156608 1 1
A15671 A15671 33 11
A15673 A15673 65 25
A15675 AK156751 97 36
A15677 BA156770 128 46
A16780 BA167807 160 57
A16782 A16782 192 70
ATHRPRP1C GM905105 225 83
etc...
Note that indfile is a perfectly legitimate .nam file, for use with
programs such as getloc, getob, or comm.
The following options identify the type of database being read:
-g GenBank (default)
-e EMBL
-p PIR (NBRF)
-v Vecbase
-l LiMB
Other options:
-c Compress 3 or more leading blanks in annotation lines
to take the form <CRUNCHFLAG><CRUNCHCHAR>, where CRUNCHFLAG
is the ASCII character specified by the Pascal const
CRUNCHOFFSET, which is set to 33 ("!") in the current
implementation. For each annotation line read, if the
number of leading blanks is >=3, splitdb sets CRUNCHCHAR
to CRUNCHOFFSET+the number of blanks. Thus, for lines
with 3, 4, or 5 leading blanks, CRUNCHCHAR would be
'$', '%' and '&', respectively. GETLOC and GETOB
automatically expand crunched blanks when CRUNCHFLAG
is encountered on an input line. Empiracle observations
indicate that the -c option decreases the size of
GenBank files by about 10%.
This compression method may fail when the number of
leading blanks exceeds 127-CRUNCHOFFSET. However,
none of the above mentioned databases currently
supports any datafield with anywhere near that number
of leading blanks.
-t (GenBank only) Append all information in the first
ORGANISM to the end of each line in indfile. For example,
the entry which begins:
LOCUS GORMTDLOOZ 282 bp DNA UNA 11-MAR-1996
DEFINITION GGGOMT493; Gorilla gorilla gorilla (BomBom, ISIS 438, Audubon
Zoological Gardens) mitochondrial D-loop DNA.
ACCESSION L76759
NID g1222584
KEYWORDS D-loop.
SOURCE Mitochondrion Gorilla gorilla gorilla (individual_isolate BomBom,
ISIS 438, Audubon Zoological Gardens, sub_species gorilla) male
DNA.
ORGANISM Mitochondrion Gorilla gorilla gorilla
Eukaryotae; mitochondrial eukaryotes; Metazoa; Chordata;
Vertebrata; Eutheria; Primates; Catarrhini; Hominidae; Gorilla.
might be indexed as
GORMTDLOOZ L76759 1 1 Mitochondrion Gorilla gorilla gorilla
This is useful for taxonomic studies, or as a way of making
it easy to create subsets from a single index. Thus,
'grep gorilla primates.ind' would print all lines in the
file that contained the word gorilla. The output from
this command could be used as a .nam file for extracting
just gorilla sequences from a larger dataset using
fetch.
NOTES
1. Header lines that aren't part of entries are automatically
stripped out during processing. For example, in a file containing
GenBank entries, all lines up to the first occurrence of 'LOCUS'
starting in column 1, are ignored. Similarly for PIR, processing
begins on the first line containing 'ENTRY' beginning in column 1.
2. GenBank/EMBL/DDBJ entries created on or after Feb. 1, 1996,
have accession numbers of 8 characters, rather than 6. Previously
assigned accession numbers will remain at 6 characters. Splitdb has
been updated to write all accession numbers to the .ind file, left
justified in a field of 8 characters, in columns 14-21 of the .ind
file.
SEE ALSO
getloc, getob, comm(1) (Unix command).
AUTHOR
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB Canada R3T 2N2
Phone: 204-474-6085
FAX: 204-261-5732
frist@cc.umanitoba.ca
REFERENCE
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.

View File

@ -1,125 +0,0 @@
XYLEM.DOC update 10 Aug 1994
XYLEM: TOOLS FOR MANIPULATION OF GENETIC DATABASES
Brian Fristensky, University of Manitoba
Fristensky, B. (1993) Feature expressions: creating and manipulating
sequence datasets. Nucleic Acids Research 21:5997-6003.
SPLITDB - Splits files containing one or more GenBank entries into
annotation, sequence, and index files. Indexfiles can also serve as
namefiles for GETLOC. Sequence files are in the format required for
use with the Pearson programs (FASTA,LFASTA etc.).
GETLOC - Reads a file containing LOCUS names (namefile) and
retrieves either annotation, sequence, or both from a split
database or database subset created by SPLITDB.
FETCH - A c-shell script that provides a convenient menu-driven
front end for retrieval of database entries using GETLOC.
FINDKEY - A c-shell script that provides a convenient menu-driven
front end for keyword searches of database annotation files,
using IDENTIFY.
IDENTIFY- Given line-numbered output from grep, IDENTIFY uses the
index file to determine which entries contained the keywords
searched for by grep. It then produces a namefile for use by
GETLOC. Namefiles can serve as logical databases, and utilities
such as the Unix comm command can perform logical operations on
these namefiles to produce database subsets.
FEATURES/GETOB - Given a namefile, pulls objects (mRNA, tRNA, CDS
etc.) from each of the named entries, using the new
DDBJ/EMBL/GenBank International Features Table Format. A future
version will also allow the annotation of sites within objects that
are extracted.
DBSTAT - Calculates amino acid frequencies in a protein database.
RIBOSOME - Given a file of one or more nucleic acids (eg. output
from GETOB) , RIBOSOME translates them into protein, using either
the universal genetic code or an alternative genetic code supplied
by the user. All ambiguities that can be resolved are translated.
PROT2NUC - reverse translates a sequence from protein to nucleic
acid, using IUPAC-IUB ambiguity codes.
SHUFFLE - Given a random seed, shuffles each sequence in a Pearson-
format (.wrp) file. Shuffling is done locally in overlapping windows
across the length of a given sequence. The window size and overlap
length can be specified by the user.
REFORM - Reformats multiply aligned nucleic acid or protein
sequences for publication. Output for M. Waterman's RALIGN
program, or the MBCRR MASE editor, can be directly used as input.
A variety of options are available for representing gaps, consensus
sequences and other features.
Fristensky (Cornell) Sequence Analysis Package - General purpose
sequence analysis package written in Standard Pascal. Features
include: sequence numbering, formatting, & translation, restriction
site searches & mapping, matrix similarity searches, TESTCODE
analysis, base composition analysis. All programs are interactive
and read free-format, BIONET, and GenBank files.
XYLEM DATABASE TOOLS
----------
| .gen | getloc
|----------|<--------------------------
| GenBank | |
---------- |
| |
| splitgb |
/|\ |
/ | \ |
/ | \ |
/ | \ |
/ | \ |
/ | \ |
v v v |
---------- ---------- ---------- |
| .ano | | .wrp | | .ind | |
|----------| |----------| |----------| |
|annotation| | sequence | | index | |
---------- ---------- ---------- |
| \ | / |
| \ | / |
| \ | / |
| \ | / |
grep -n | \ | / |
| \ | / |
| | |
| | -------------------------------+
| ^ |
v | getob |
---------- ---------- v
| .grep | identify | .nam | ----------
|----------| --------->|----------| | .wrp |
| numbered | | LOCUS | ----------
|file lines| ---------- | eg. mRNA |
---------- | ^ | tRNA |
| | | rRNA |
| | | CDS |
--comm-- ----------
(logical operations on
sets of names)
Dr. Brian Fristensky
Dept. of Plant Science
University of Manitoba
Winnipeg, MB R3T 2N2 CANADA
204-474-6085
frist@cc.umanitoba.ca

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,10 +1,10 @@
CC = cc
#FLAGS = -g
OPENWINHOME = /usr/openwin
FLAGS = -m32
OPENWINHOME = ../usr
MFILE =
INCDIR = -I$(OPENWINHOME)/include
LIBDIR = -L$(OPENWINHOME)/lib
INCDIR = -I/usr/include/xview
LIBDIR = -L/usr/lib32
LIBS = -lxview -lolgx -lX11
libs.o = Alloc.o HGLfuncs.o

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,5 +0,0 @@
#/bin/csh
make all
cp Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool ../bin
rm Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool
rm *.o

Binary file not shown.

View File

@ -1,33 +1,33 @@
implicit integer (a-z)
parameter (maxn=1500,maxn2=3000)
parameter (fldmax=maxn2)
c parameter (maxn=625,fldmax=2*maxn)
parameter (maxn=1500,maxn2=3000)
parameter (fldmax=maxn2)
parameter (infinity=16000,sortmax=30000)
parameter (mxbits=(maxn*(maxn+1)+31)/32)
parameter (maxtloops=40)
parameter (maxsiz=10000)
integer*2 vst(maxn*maxn),wst1(maxn*maxn),wst2(maxn*maxn)
integer*2 vst(maxn*maxn),wst(maxn*maxn)
integer poppen(4),maxpen
real prelog
dimension newnum(maxsiz),hstnum(maxn2),force(maxn2),numseq(maxn2),
. work1(maxn2,0:2),work2(maxn2),
dimension newnum(maxsiz),hstnum(fldmax),force(fldmax),
. numseq(fldmax), work(fldmax,0:2),
. stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30)
dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2)
c common /main/ newnum,hstnum,force,work1,work2,
common /main/ newnum,hstnum,force,work1,work2,
. stack,tstk,dangle,hairpin,bulge,inter,eparam,cntrl,nsave,n,
. numseq,poppen,prelog,maxpen,vst,wst1,wst2
common /main/ vst,wst,newnum,hstnum,force,numseq,work,stack,tstk,
. dangle,hairpin,bulge,inter,eparam,cntrl,nsave,poppen,maxpen,prelog
character*1 seq(maxsiz)
c character*5 inbuf
character*10 progtitle
character*30 seqlab
common /seq/ seq,seqlab
data progtitle/'crna'/
dimension list(100,4)
common /list/ list,listsz
common /nm/ vmin
data progtitle/'lrna'/
common /nm/ n,vmin
dimension basepr(maxn)
common /traceback/ basepr
@ -40,21 +40,3 @@ c character*5 inbuf
integer*2 tloop(maxtloops,2),numoftloops
common/tloops/tloop,numoftloops