clean: repo
This commit is contained in:
parent
a80e729e21
commit
cd056bb91b
63 changed files with 17 additions and 6147 deletions
|
@ -1,761 +0,0 @@
|
||||||
1menu:File
|
|
||||||
|
|
||||||
item:test cmask output
|
|
||||||
itemmethod: kedit in1
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:colormask
|
|
||||||
|
|
||||||
item:New sequence <meta N>
|
|
||||||
itemmethod:echo "$Type$Name" > out1
|
|
||||||
itemmeta:n
|
|
||||||
itemhelp:new_sequence.help
|
|
||||||
|
|
||||||
arg:Name
|
|
||||||
argtype:text
|
|
||||||
arglabel:New Sequence name?
|
|
||||||
argtext:New
|
|
||||||
|
|
||||||
arg:Type
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Type?
|
|
||||||
argchoice:DNA/RNA:#
|
|
||||||
argchoice:Amino Acid:%
|
|
||||||
argchoice:Text:\"
|
|
||||||
argchoice:Mask:@
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:flat
|
|
||||||
|
|
||||||
item:Import Foreign Format
|
|
||||||
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
|
||||||
itemhelp:readseq.help
|
|
||||||
|
|
||||||
arg:INPUTFILE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of foreign file?
|
|
||||||
|
|
||||||
out:OUTPUTFILE
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:Export Foreign Format
|
|
||||||
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
|
|
||||||
itemhelp:readseq.help
|
|
||||||
|
|
||||||
arg:FORMAT
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:FASTA:8
|
|
||||||
argchoice:NEXUS:17
|
|
||||||
argchoice:Phylip v3.3:12
|
|
||||||
argchoice:IG/Stanford:1
|
|
||||||
argchoice:GenBank:2
|
|
||||||
argchoice:NBRF:3
|
|
||||||
argchoice:EMBL:4
|
|
||||||
argchoice:GCG:5
|
|
||||||
argchoice:DNA Strider:6
|
|
||||||
argchoice:Fitch:7
|
|
||||||
argchoice:Pearson:8
|
|
||||||
argchoice:Zuker:9
|
|
||||||
argchoice:Olsen:10
|
|
||||||
argchoice:Phylip v3.2:11
|
|
||||||
argchoice:Phylip v3.3:12
|
|
||||||
argchoice:Plain text:13
|
|
||||||
|
|
||||||
arg:OUTPUTFILE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Save as?
|
|
||||||
|
|
||||||
in:INPUTFILE
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
|
|
||||||
item:Save Selection
|
|
||||||
itemmethod: cat $SAVE_FUNC > $Name
|
|
||||||
itemhelp:save_selection.help
|
|
||||||
|
|
||||||
arg:SAVE_FUNC
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:File format
|
|
||||||
argchoice:Flat:in1
|
|
||||||
argchoice:Genbank:in2
|
|
||||||
argchoice:GDE/HGL:in3
|
|
||||||
|
|
||||||
arg:Name
|
|
||||||
argtype:text
|
|
||||||
arglabel:File name?
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
in:in2
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
in:in3
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
item:Print Selection
|
|
||||||
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
|
|
||||||
itemhelp:print_alignment.help
|
|
||||||
|
|
||||||
arg:SCALE
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Reduce printout by?
|
|
||||||
argmin:1
|
|
||||||
argmax:20
|
|
||||||
argvalue:1
|
|
||||||
|
|
||||||
arg:CMD
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Lpr:lpr
|
|
||||||
argchoice:Enscript Gaudy:enscript -G -q
|
|
||||||
argchoice:Enscript Two column:enscript -2rG
|
|
||||||
|
|
||||||
arg:PRINTER
|
|
||||||
argtype:text
|
|
||||||
arglabel:Which printer?
|
|
||||||
argtext:lp
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
menu:Edit
|
|
||||||
|
|
||||||
item:Sort
|
|
||||||
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
|
|
||||||
itemhelp:heapsortHGL.help
|
|
||||||
|
|
||||||
arg:PRIM_KEY
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:Group:group-ID
|
|
||||||
argchoice:type:type
|
|
||||||
argchoice:name:name
|
|
||||||
argchoice:Sequence ID:sequence-ID
|
|
||||||
argchoice:creator:creator
|
|
||||||
argchoice:offset:offset
|
|
||||||
arglabel:Primary sort field?
|
|
||||||
|
|
||||||
arg:SEC_KEY
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:None:
|
|
||||||
argchoice:Group:group-ID
|
|
||||||
argchoice:type:type
|
|
||||||
argchoice:name:name
|
|
||||||
argchoice:Sequence ID:sequence-ID
|
|
||||||
argchoice:creator:creator
|
|
||||||
argchoice:offset:offset
|
|
||||||
arglabel:Secondary sort field?
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:extract
|
|
||||||
itemmethod:(gde in1;/bin/rm -f in1)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
menu:DNA/RNA
|
|
||||||
|
|
||||||
item:Translate...
|
|
||||||
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
|
|
||||||
|
|
||||||
arg:FRAME
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Which reading frame?
|
|
||||||
argchoice:First:1
|
|
||||||
argchoice:Second:2
|
|
||||||
argchoice:Third:3
|
|
||||||
argchoice:All six:6
|
|
||||||
|
|
||||||
arg:MNFRM
|
|
||||||
arglabel:Minimum length of AA sequence to translate?
|
|
||||||
argtype:slider
|
|
||||||
argmin:0
|
|
||||||
argmax:100
|
|
||||||
argvalue:20
|
|
||||||
|
|
||||||
arg:LTRCODE
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Translate to:
|
|
||||||
argchoice:Single letter codes:
|
|
||||||
argchoice:Triple letter codes:-3
|
|
||||||
|
|
||||||
arg:TBL
|
|
||||||
arglabel:Codon table?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:universal:1
|
|
||||||
argchoice:mycoplasma:2
|
|
||||||
argchoice:yeast:3
|
|
||||||
argchoice:Vert. mito.:4
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:gde
|
|
||||||
|
|
||||||
item:Dot plot
|
|
||||||
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
|
|
||||||
itemhelp:DotPlotTool.help
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Clustal alignment
|
|
||||||
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
|
|
||||||
|
|
||||||
itemhelp:clustal_help
|
|
||||||
|
|
||||||
arg:KTUP
|
|
||||||
argtype:slider
|
|
||||||
arglabel:K-tuple size for pairwise search
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:2
|
|
||||||
|
|
||||||
arg:WIN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Window size
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:4
|
|
||||||
|
|
||||||
arg:Trans
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Transitions weighted?
|
|
||||||
argchoice:Yes:/TRANSIT
|
|
||||||
argchoice:No:
|
|
||||||
|
|
||||||
arg:FIXED
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Fixed gap penalty
|
|
||||||
argmin:1
|
|
||||||
argmax:100
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:FLOAT
|
|
||||||
arglabel:Floating gap penalty
|
|
||||||
argtype:slider
|
|
||||||
argmin:1
|
|
||||||
argmax:100
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:REPORT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:View assembly report?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit in1.rpt&
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Variable Positions
|
|
||||||
itemmethod:varpos $REV < in1 > out1
|
|
||||||
|
|
||||||
arg:REV
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Highlight (darken)
|
|
||||||
argchoice:Conserved positions:
|
|
||||||
argchoice:variable positions:-rev
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:colormask
|
|
||||||
|
|
||||||
item:Phrap
|
|
||||||
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:SNAP
|
|
||||||
itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
out:out1
|
|
||||||
outformat:text
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Find all <meta-f>
|
|
||||||
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
|
|
||||||
itemhelp:findall.help
|
|
||||||
itemmeta:f
|
|
||||||
|
|
||||||
arg:SEARCH
|
|
||||||
argtype:text
|
|
||||||
arglabel:Search String
|
|
||||||
|
|
||||||
arg:PRCNT
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Percent mismatch
|
|
||||||
argmin:0
|
|
||||||
argmax:75
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:CASE
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Case
|
|
||||||
argchoice:Upper equals lower:
|
|
||||||
argchoice:Upper not equal lower:-case
|
|
||||||
|
|
||||||
arg:UT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:U equal T?
|
|
||||||
argchoice:Yes:-u=t
|
|
||||||
argchoice:No:
|
|
||||||
argvalue:0
|
|
||||||
|
|
||||||
arg:MAT
|
|
||||||
arglabel:Match color
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:yellow:1
|
|
||||||
argchoice:violet:2
|
|
||||||
argchoice:red:3
|
|
||||||
argchoice:aqua:4
|
|
||||||
argchoice:green:5
|
|
||||||
argchoice:blue:6
|
|
||||||
argchoice:grey:11
|
|
||||||
argchoice:black:8
|
|
||||||
argvalue:2
|
|
||||||
|
|
||||||
arg:MIS
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Mismatch color
|
|
||||||
argchoice:yellow:1
|
|
||||||
argchoice:violet:2
|
|
||||||
argchoice:red:3
|
|
||||||
argchoice:aqua:4
|
|
||||||
argchoice:green:5
|
|
||||||
argchoice:blue:6
|
|
||||||
argchoice:grey:11
|
|
||||||
argchoice:black:8
|
|
||||||
argvalue:7
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:colormask
|
|
||||||
|
|
||||||
item:Sequence Consensus
|
|
||||||
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
|
|
||||||
itemhelp:MakeCons.help
|
|
||||||
|
|
||||||
arg:METHOD
|
|
||||||
arglabel:Method
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:IUPAC:-iupac
|
|
||||||
argchoice:Majority:-majority $PERCENT
|
|
||||||
|
|
||||||
arg:MASK
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Create a new:
|
|
||||||
argchoice:Sequence:
|
|
||||||
argchoice:Selection Mask: | Consto01mask
|
|
||||||
|
|
||||||
arg:PERCENT
|
|
||||||
arglabel:Minimum Percentage for Majority
|
|
||||||
argtype:slider
|
|
||||||
argmin:50
|
|
||||||
argmax:100
|
|
||||||
argvalue:75
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:gde
|
|
||||||
|
|
||||||
|
|
||||||
#Menu for DNA/RNA
|
|
||||||
|
|
||||||
item:blastn
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDBDNA
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:4
|
|
||||||
argmax:18
|
|
||||||
argvalue:12
|
|
||||||
|
|
||||||
arg:MATCH
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Match Score
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:5
|
|
||||||
|
|
||||||
arg:MMSCORE
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Mismatch Score
|
|
||||||
argmin:-10
|
|
||||||
argmax:-1
|
|
||||||
argvalue:-5
|
|
||||||
|
|
||||||
item:blastx
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDB
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
|
||||||
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
argvalue:3
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
arglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:CODE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Genetic Code
|
|
||||||
argchoice:Standard or Universal:0
|
|
||||||
argchoice:Vertebrate Mitochondrial:1
|
|
||||||
argchoice:Yeast Mitochondrial:2
|
|
||||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
|
||||||
argchoice:Invertebrate Mitochondrial:4
|
|
||||||
argchoice:Ciliate Macronuclear:5
|
|
||||||
argchoice:Protozoan Mitochondrial:6
|
|
||||||
argchoice:Plant Mitochondrial:7
|
|
||||||
argchoice:Echinodermate Mitochondrial:8
|
|
||||||
|
|
||||||
item:------------------------
|
|
||||||
|
|
||||||
item:Add a new DNA blast db
|
|
||||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
|
|
||||||
|
|
||||||
arg:sourcefile
|
|
||||||
argtype:text
|
|
||||||
arglabel: enter the file name
|
|
||||||
|
|
||||||
arg:menuname
|
|
||||||
argtype:text
|
|
||||||
arglabel: enter the name of the DB
|
|
||||||
|
|
||||||
menu:seq. datasets
|
|
||||||
|
|
||||||
item:-------------
|
|
||||||
item:add a new dataset
|
|
||||||
itemmethod:cp $file /usr/local/biotools/GDE/db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
|
|
||||||
|
|
||||||
arg:name
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the dataset name ?
|
|
||||||
|
|
||||||
arg:file
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the dataset file (in FASTA) ?
|
|
||||||
|
|
||||||
|
|
||||||
#Menu for Protein
|
|
||||||
menu:protein
|
|
||||||
item:blastp
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDB
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
barglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
argvalue:3
|
|
||||||
|
|
||||||
item:tblastn
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDB
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
|
|
||||||
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
arglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:4
|
|
||||||
argmax:18
|
|
||||||
argvalue:12
|
|
||||||
|
|
||||||
arg:CODE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Genetic Code
|
|
||||||
argchoice:Standard or Universal:0
|
|
||||||
argchoice:Vertebrate Mitochondrial:1
|
|
||||||
argchoice:Yeast Mitochondrial:2
|
|
||||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
|
||||||
argchoice:Invertebrate Mitochondrial:4
|
|
||||||
argchoice:Ciliate Macronuclear:5
|
|
||||||
argchoice:Protozoan Mitochondrial:6
|
|
||||||
argchoice:Plant Mitochondrial:7
|
|
||||||
argchoice:Echinodermate Mitochondrial:8
|
|
||||||
|
|
||||||
|
|
||||||
item:Map View
|
|
||||||
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
|
|
||||||
itemhelp:mapview.help
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:PBL
|
|
||||||
arglabel:Pixel Between Lines
|
|
||||||
argtype:slider
|
|
||||||
argvalue:10
|
|
||||||
argmin:1
|
|
||||||
argmax:15
|
|
||||||
|
|
||||||
arg:NPP
|
|
||||||
arglabel:Nucleotides Per Pixel
|
|
||||||
argtype:slider
|
|
||||||
argvalue:1
|
|
||||||
argmin:1
|
|
||||||
argmax:20
|
|
||||||
|
|
||||||
arg:LWIDTH
|
|
||||||
arglabel:Line Thickness
|
|
||||||
argtype:slider
|
|
||||||
argvalue:2
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
|
|
||||||
item:--------------------------
|
|
||||||
item:Add a new DNA blast db
|
|
||||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
|
|
||||||
|
|
||||||
arg:sourcefile
|
|
||||||
argtype:text
|
|
||||||
arglabel: Enter the file (in FASTA)
|
|
||||||
|
|
||||||
arg:menuname
|
|
||||||
argtype:text
|
|
||||||
arglabel: Enter the name of the DB
|
|
||||||
|
|
||||||
menu:Phylogeny
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip help
|
|
||||||
itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
|
|
||||||
|
|
||||||
arg:FILE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which program?
|
|
||||||
argchoice:clique:clique.html
|
|
||||||
argchoice:consense:consense.html
|
|
||||||
argchoice:contchar:contchar.html
|
|
||||||
argchoice:contml:contml.html
|
|
||||||
argchoice:contrast:contrast.html
|
|
||||||
argchoice:discrete:discrete.html
|
|
||||||
argchoice:distance:distance.html
|
|
||||||
argchoice:dnaboot:dnaboot.html
|
|
||||||
argchoice:dnacomp:dnacomp.html
|
|
||||||
argchoice:dnadist:dnadist.html
|
|
||||||
argchoice:dnainvar:dnainvar.html
|
|
||||||
argchoice:dnaml:dnaml.html
|
|
||||||
argchoice:dnamlk:dnamlk.html
|
|
||||||
argchoice:dnamove:dnamove.html
|
|
||||||
argchoice:dnapars:dnapars.html
|
|
||||||
argchoice:dnapenny:dnapenny.html
|
|
||||||
argchoice:dollop:dollop.html
|
|
||||||
argchoice:dolmove:dolmove.html
|
|
||||||
argchoice:dolpenny:dolpenny.html
|
|
||||||
argchoice:draw:draw.html
|
|
||||||
argchoice:drawgram:drawgram.html
|
|
||||||
argchoice:drawtree:drawtree.html
|
|
||||||
argchoice:factor:factor.html
|
|
||||||
argchoice:fitch:fitch.html
|
|
||||||
argchoice:gendist:gendist.html
|
|
||||||
argchoice:kitsch:kitsch.html
|
|
||||||
argchoice:main:main.html
|
|
||||||
argchoice:mix:mix.html
|
|
||||||
argchoice:move:move.html
|
|
||||||
argchoice:neighbor:neighbor.html
|
|
||||||
argchoice:penny:penny.html
|
|
||||||
argchoice:protpars:protpars.html
|
|
||||||
argchoice:read.me.general:read.me.general.html
|
|
||||||
argchoice:restml:restml.html
|
|
||||||
argchoice:seqboot:seqboot.html
|
|
||||||
argchoice:sequence:sequence.html
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip 3.5
|
|
||||||
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which program to run?
|
|
||||||
argchoice:DNAPARS:dnapars
|
|
||||||
argchoice:DNABOOT:dnaboot
|
|
||||||
argchoice:DNAPENNY:dnapenny
|
|
||||||
argchoice:DNAML:dnaml
|
|
||||||
argchoice:DNAMLK:dnamlk
|
|
||||||
argchoice:DNACOMP:dnacomp
|
|
||||||
argchoice:DNAMOVE:dnamove
|
|
||||||
argchoice:DNAINVAR:dnainvar
|
|
||||||
argchoice:PROTPARS:protpars
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip DNA Distance methods
|
|
||||||
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
|
|
||||||
|
|
||||||
arg:EXPLAIN
|
|
||||||
argtype:text
|
|
||||||
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
|
|
||||||
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:DNADIST+NEIGHBOR:
|
|
||||||
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
|
||||||
|
|
||||||
arg:PROG
|
|
||||||
arglabel:Run ?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Run without Bootstrap:
|
|
||||||
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
|
||||||
|
|
||||||
arg:DNA
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of DNADIST outfile?
|
|
||||||
|
|
||||||
arg:NEI
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of NEIGHBOR outfile?
|
|
||||||
|
|
||||||
arg:TREE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of TREEFILE ?
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Phylip PROTEIN Distance methods
|
|
||||||
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:PROTDIST+NEIGHBOR:
|
|
||||||
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
|
||||||
|
|
||||||
arg:PROG
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
|
||||||
argchoice:No Bootstrap:
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
menu:On-Line Res.
|
|
||||||
|
|
||||||
item:GDE for Linux resources at Bioafrica.net
|
|
||||||
itemmethod:netscape http://www.bioafrica.net &
|
|
||||||
|
|
||||||
item:-------------------------
|
|
||||||
item:add a new website
|
|
||||||
itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
|
|
||||||
|
|
||||||
arg:name
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the site name
|
|
||||||
|
|
||||||
arg:url
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the URL (including http://)
|
|
|
@ -1,761 +0,0 @@
|
||||||
1menu:File
|
|
||||||
|
|
||||||
item:test cmask output
|
|
||||||
itemmethod: kedit in1
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:colormask
|
|
||||||
|
|
||||||
item:New sequence <meta N>
|
|
||||||
itemmethod:echo "$Type$Name" > out1
|
|
||||||
itemmeta:n
|
|
||||||
itemhelp:new_sequence.help
|
|
||||||
|
|
||||||
arg:Name
|
|
||||||
argtype:text
|
|
||||||
arglabel:New Sequence name?
|
|
||||||
argtext:New
|
|
||||||
|
|
||||||
arg:Type
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Type?
|
|
||||||
argchoice:DNA/RNA:#
|
|
||||||
argchoice:Amino Acid:%
|
|
||||||
argchoice:Text:\"
|
|
||||||
argchoice:Mask:@
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:flat
|
|
||||||
|
|
||||||
item:Import Foreign Format
|
|
||||||
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
|
||||||
itemhelp:readseq.help
|
|
||||||
|
|
||||||
arg:INPUTFILE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of foreign file?
|
|
||||||
|
|
||||||
out:OUTPUTFILE
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:Export Foreign Format
|
|
||||||
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
|
|
||||||
itemhelp:readseq.help
|
|
||||||
|
|
||||||
arg:FORMAT
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:FASTA:8
|
|
||||||
argchoice:NEXUS:17
|
|
||||||
argchoice:Phylip v3.3:12
|
|
||||||
argchoice:IG/Stanford:1
|
|
||||||
argchoice:GenBank:2
|
|
||||||
argchoice:NBRF:3
|
|
||||||
argchoice:EMBL:4
|
|
||||||
argchoice:GCG:5
|
|
||||||
argchoice:DNA Strider:6
|
|
||||||
argchoice:Fitch:7
|
|
||||||
argchoice:Pearson:8
|
|
||||||
argchoice:Zuker:9
|
|
||||||
argchoice:Olsen:10
|
|
||||||
argchoice:Phylip v3.2:11
|
|
||||||
argchoice:Phylip v3.3:12
|
|
||||||
argchoice:Plain text:13
|
|
||||||
|
|
||||||
arg:OUTPUTFILE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Save as?
|
|
||||||
|
|
||||||
in:INPUTFILE
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
|
|
||||||
item:Save Selection
|
|
||||||
itemmethod: cat $SAVE_FUNC > $Name
|
|
||||||
itemhelp:save_selection.help
|
|
||||||
|
|
||||||
arg:SAVE_FUNC
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:File format
|
|
||||||
argchoice:Flat:in1
|
|
||||||
argchoice:Genbank:in2
|
|
||||||
argchoice:GDE/HGL:in3
|
|
||||||
|
|
||||||
arg:Name
|
|
||||||
argtype:text
|
|
||||||
arglabel:File name?
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
in:in2
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
in:in3
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
item:Print Selection
|
|
||||||
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
|
|
||||||
itemhelp:print_alignment.help
|
|
||||||
|
|
||||||
arg:SCALE
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Reduce printout by?
|
|
||||||
argmin:1
|
|
||||||
argmax:20
|
|
||||||
argvalue:1
|
|
||||||
|
|
||||||
arg:CMD
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Lpr:lpr
|
|
||||||
argchoice:Enscript Gaudy:enscript -G -q
|
|
||||||
argchoice:Enscript Two column:enscript -2rG
|
|
||||||
|
|
||||||
arg:PRINTER
|
|
||||||
argtype:text
|
|
||||||
arglabel:Which printer?
|
|
||||||
argtext:lp
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
menu:Edit
|
|
||||||
|
|
||||||
item:Sort
|
|
||||||
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
|
|
||||||
itemhelp:heapsortHGL.help
|
|
||||||
|
|
||||||
arg:PRIM_KEY
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:Group:group-ID
|
|
||||||
argchoice:type:type
|
|
||||||
argchoice:name:name
|
|
||||||
argchoice:Sequence ID:sequence-ID
|
|
||||||
argchoice:creator:creator
|
|
||||||
argchoice:offset:offset
|
|
||||||
arglabel:Primary sort field?
|
|
||||||
|
|
||||||
arg:SEC_KEY
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:None:
|
|
||||||
argchoice:Group:group-ID
|
|
||||||
argchoice:type:type
|
|
||||||
argchoice:name:name
|
|
||||||
argchoice:Sequence ID:sequence-ID
|
|
||||||
argchoice:creator:creator
|
|
||||||
argchoice:offset:offset
|
|
||||||
arglabel:Secondary sort field?
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:extract
|
|
||||||
itemmethod:(gde in1;/bin/rm -f in1)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
menu:DNA/RNA
|
|
||||||
|
|
||||||
item:Translate...
|
|
||||||
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
|
|
||||||
|
|
||||||
arg:FRAME
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Which reading frame?
|
|
||||||
argchoice:First:1
|
|
||||||
argchoice:Second:2
|
|
||||||
argchoice:Third:3
|
|
||||||
argchoice:All six:6
|
|
||||||
|
|
||||||
arg:MNFRM
|
|
||||||
arglabel:Minimum length of AA sequence to translate?
|
|
||||||
argtype:slider
|
|
||||||
argmin:0
|
|
||||||
argmax:100
|
|
||||||
argvalue:20
|
|
||||||
|
|
||||||
arg:LTRCODE
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Translate to:
|
|
||||||
argchoice:Single letter codes:
|
|
||||||
argchoice:Triple letter codes:-3
|
|
||||||
|
|
||||||
arg:TBL
|
|
||||||
arglabel:Codon table?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:universal:1
|
|
||||||
argchoice:mycoplasma:2
|
|
||||||
argchoice:yeast:3
|
|
||||||
argchoice:Vert. mito.:4
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:gde
|
|
||||||
|
|
||||||
item:Dot plot
|
|
||||||
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
|
|
||||||
itemhelp:DotPlotTool.help
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Clustal alignment
|
|
||||||
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
|
|
||||||
|
|
||||||
itemhelp:clustal_help
|
|
||||||
|
|
||||||
arg:KTUP
|
|
||||||
argtype:slider
|
|
||||||
arglabel:K-tuple size for pairwise search
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:2
|
|
||||||
|
|
||||||
arg:WIN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Window size
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:4
|
|
||||||
|
|
||||||
arg:Trans
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Transitions weighted?
|
|
||||||
argchoice:Yes:/TRANSIT
|
|
||||||
argchoice:No:
|
|
||||||
|
|
||||||
arg:FIXED
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Fixed gap penalty
|
|
||||||
argmin:1
|
|
||||||
argmax:100
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:FLOAT
|
|
||||||
arglabel:Floating gap penalty
|
|
||||||
argtype:slider
|
|
||||||
argmin:1
|
|
||||||
argmax:100
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:REPORT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:View assembly report?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit in1.rpt&
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Variable Positions
|
|
||||||
itemmethod:varpos $REV < in1 > out1
|
|
||||||
|
|
||||||
arg:REV
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Highlight (darken)
|
|
||||||
argchoice:Conserved positions:
|
|
||||||
argchoice:variable positions:-rev
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:colormask
|
|
||||||
|
|
||||||
item:Phrap
|
|
||||||
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:SNAP
|
|
||||||
itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
out:out1
|
|
||||||
outformat:text
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Find all <meta-f>
|
|
||||||
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
|
|
||||||
itemhelp:findall.help
|
|
||||||
itemmeta:f
|
|
||||||
|
|
||||||
arg:SEARCH
|
|
||||||
argtype:text
|
|
||||||
arglabel:Search String
|
|
||||||
|
|
||||||
arg:PRCNT
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Percent mismatch
|
|
||||||
argmin:0
|
|
||||||
argmax:75
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:CASE
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Case
|
|
||||||
argchoice:Upper equals lower:
|
|
||||||
argchoice:Upper not equal lower:-case
|
|
||||||
|
|
||||||
arg:UT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:U equal T?
|
|
||||||
argchoice:Yes:-u=t
|
|
||||||
argchoice:No:
|
|
||||||
argvalue:0
|
|
||||||
|
|
||||||
arg:MAT
|
|
||||||
arglabel:Match color
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:yellow:1
|
|
||||||
argchoice:violet:2
|
|
||||||
argchoice:red:3
|
|
||||||
argchoice:aqua:4
|
|
||||||
argchoice:green:5
|
|
||||||
argchoice:blue:6
|
|
||||||
argchoice:grey:11
|
|
||||||
argchoice:black:8
|
|
||||||
argvalue:2
|
|
||||||
|
|
||||||
arg:MIS
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Mismatch color
|
|
||||||
argchoice:yellow:1
|
|
||||||
argchoice:violet:2
|
|
||||||
argchoice:red:3
|
|
||||||
argchoice:aqua:4
|
|
||||||
argchoice:green:5
|
|
||||||
argchoice:blue:6
|
|
||||||
argchoice:grey:11
|
|
||||||
argchoice:black:8
|
|
||||||
argvalue:7
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:colormask
|
|
||||||
|
|
||||||
item:Sequence Consensus
|
|
||||||
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
|
|
||||||
itemhelp:MakeCons.help
|
|
||||||
|
|
||||||
arg:METHOD
|
|
||||||
arglabel:Method
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:IUPAC:-iupac
|
|
||||||
argchoice:Majority:-majority $PERCENT
|
|
||||||
|
|
||||||
arg:MASK
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Create a new:
|
|
||||||
argchoice:Sequence:
|
|
||||||
argchoice:Selection Mask: | Consto01mask
|
|
||||||
|
|
||||||
arg:PERCENT
|
|
||||||
arglabel:Minimum Percentage for Majority
|
|
||||||
argtype:slider
|
|
||||||
argmin:50
|
|
||||||
argmax:100
|
|
||||||
argvalue:75
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:gde
|
|
||||||
|
|
||||||
|
|
||||||
#Menu for DNA/RNA
|
|
||||||
|
|
||||||
item:blastn
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDBDNA
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:4
|
|
||||||
argmax:18
|
|
||||||
argvalue:12
|
|
||||||
|
|
||||||
arg:MATCH
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Match Score
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:5
|
|
||||||
|
|
||||||
arg:MMSCORE
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Mismatch Score
|
|
||||||
argmin:-10
|
|
||||||
argmax:-1
|
|
||||||
argvalue:-5
|
|
||||||
|
|
||||||
item:blastx
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDB
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
|
||||||
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
argvalue:3
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
arglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:CODE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Genetic Code
|
|
||||||
argchoice:Standard or Universal:0
|
|
||||||
argchoice:Vertebrate Mitochondrial:1
|
|
||||||
argchoice:Yeast Mitochondrial:2
|
|
||||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
|
||||||
argchoice:Invertebrate Mitochondrial:4
|
|
||||||
argchoice:Ciliate Macronuclear:5
|
|
||||||
argchoice:Protozoan Mitochondrial:6
|
|
||||||
argchoice:Plant Mitochondrial:7
|
|
||||||
argchoice:Echinodermate Mitochondrial:8
|
|
||||||
|
|
||||||
item:------------------------
|
|
||||||
|
|
||||||
item:Add a new DNA blast db
|
|
||||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
|
|
||||||
|
|
||||||
arg:sourcefile
|
|
||||||
argtype:text
|
|
||||||
arglabel: enter the file name
|
|
||||||
|
|
||||||
arg:menuname
|
|
||||||
argtype:text
|
|
||||||
arglabel: enter the name of the DB
|
|
||||||
|
|
||||||
menu:seq. datasets
|
|
||||||
|
|
||||||
item:-------------
|
|
||||||
item:add a new dataset
|
|
||||||
itemmethod:cp $file /usr/local/bio/GDE/db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file
|
|
||||||
|
|
||||||
arg:name
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the dataset name ?
|
|
||||||
|
|
||||||
arg:file
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the dataset file (in FASTA) ?
|
|
||||||
|
|
||||||
|
|
||||||
#Menu for Protein
|
|
||||||
menu:protein
|
|
||||||
item:blastp
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDB
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
barglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
argvalue:3
|
|
||||||
|
|
||||||
item:tblastn
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDB
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
|
|
||||||
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
arglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:4
|
|
||||||
argmax:18
|
|
||||||
argvalue:12
|
|
||||||
|
|
||||||
arg:CODE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Genetic Code
|
|
||||||
argchoice:Standard or Universal:0
|
|
||||||
argchoice:Vertebrate Mitochondrial:1
|
|
||||||
argchoice:Yeast Mitochondrial:2
|
|
||||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
|
||||||
argchoice:Invertebrate Mitochondrial:4
|
|
||||||
argchoice:Ciliate Macronuclear:5
|
|
||||||
argchoice:Protozoan Mitochondrial:6
|
|
||||||
argchoice:Plant Mitochondrial:7
|
|
||||||
argchoice:Echinodermate Mitochondrial:8
|
|
||||||
|
|
||||||
|
|
||||||
item:Map View
|
|
||||||
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
|
|
||||||
itemhelp:mapview.help
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:PBL
|
|
||||||
arglabel:Pixel Between Lines
|
|
||||||
argtype:slider
|
|
||||||
argvalue:10
|
|
||||||
argmin:1
|
|
||||||
argmax:15
|
|
||||||
|
|
||||||
arg:NPP
|
|
||||||
arglabel:Nucleotides Per Pixel
|
|
||||||
argtype:slider
|
|
||||||
argvalue:1
|
|
||||||
argmin:1
|
|
||||||
argmax:20
|
|
||||||
|
|
||||||
arg:LWIDTH
|
|
||||||
arglabel:Line Thickness
|
|
||||||
argtype:slider
|
|
||||||
argvalue:2
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
|
|
||||||
item:--------------------------
|
|
||||||
item:Add a new DNA blast db
|
|
||||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
|
|
||||||
|
|
||||||
arg:sourcefile
|
|
||||||
argtype:text
|
|
||||||
arglabel: Enter the file (in FASTA)
|
|
||||||
|
|
||||||
arg:menuname
|
|
||||||
argtype:text
|
|
||||||
arglabel: Enter the name of the DB
|
|
||||||
|
|
||||||
menu:Phylogeny
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip help
|
|
||||||
itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
|
|
||||||
|
|
||||||
arg:FILE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which program?
|
|
||||||
argchoice:clique:clique.html
|
|
||||||
argchoice:consense:consense.html
|
|
||||||
argchoice:contchar:contchar.html
|
|
||||||
argchoice:contml:contml.html
|
|
||||||
argchoice:contrast:contrast.html
|
|
||||||
argchoice:discrete:discrete.html
|
|
||||||
argchoice:distance:distance.html
|
|
||||||
argchoice:dnaboot:dnaboot.html
|
|
||||||
argchoice:dnacomp:dnacomp.html
|
|
||||||
argchoice:dnadist:dnadist.html
|
|
||||||
argchoice:dnainvar:dnainvar.html
|
|
||||||
argchoice:dnaml:dnaml.html
|
|
||||||
argchoice:dnamlk:dnamlk.html
|
|
||||||
argchoice:dnamove:dnamove.html
|
|
||||||
argchoice:dnapars:dnapars.html
|
|
||||||
argchoice:dnapenny:dnapenny.html
|
|
||||||
argchoice:dollop:dollop.html
|
|
||||||
argchoice:dolmove:dolmove.html
|
|
||||||
argchoice:dolpenny:dolpenny.html
|
|
||||||
argchoice:draw:draw.html
|
|
||||||
argchoice:drawgram:drawgram.html
|
|
||||||
argchoice:drawtree:drawtree.html
|
|
||||||
argchoice:factor:factor.html
|
|
||||||
argchoice:fitch:fitch.html
|
|
||||||
argchoice:gendist:gendist.html
|
|
||||||
argchoice:kitsch:kitsch.html
|
|
||||||
argchoice:main:main.html
|
|
||||||
argchoice:mix:mix.html
|
|
||||||
argchoice:move:move.html
|
|
||||||
argchoice:neighbor:neighbor.html
|
|
||||||
argchoice:penny:penny.html
|
|
||||||
argchoice:protpars:protpars.html
|
|
||||||
argchoice:read.me.general:read.me.general.html
|
|
||||||
argchoice:restml:restml.html
|
|
||||||
argchoice:seqboot:seqboot.html
|
|
||||||
argchoice:sequence:sequence.html
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip 3.5
|
|
||||||
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which program to run?
|
|
||||||
argchoice:DNAPARS:dnapars
|
|
||||||
argchoice:DNABOOT:dnaboot
|
|
||||||
argchoice:DNAPENNY:dnapenny
|
|
||||||
argchoice:DNAML:dnaml
|
|
||||||
argchoice:DNAMLK:dnamlk
|
|
||||||
argchoice:DNACOMP:dnacomp
|
|
||||||
argchoice:DNAMOVE:dnamove
|
|
||||||
argchoice:DNAINVAR:dnainvar
|
|
||||||
argchoice:PROTPARS:protpars
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip DNA Distance methods
|
|
||||||
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
|
|
||||||
|
|
||||||
arg:EXPLAIN
|
|
||||||
argtype:text
|
|
||||||
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
|
|
||||||
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:DNADIST+NEIGHBOR:
|
|
||||||
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
|
||||||
|
|
||||||
arg:PROG
|
|
||||||
arglabel:Run ?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Run without Bootstrap:
|
|
||||||
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
|
||||||
|
|
||||||
arg:DNA
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of DNADIST outfile?
|
|
||||||
|
|
||||||
arg:NEI
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of NEIGHBOR outfile?
|
|
||||||
|
|
||||||
arg:TREE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of TREEFILE ?
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Phylip PROTEIN Distance methods
|
|
||||||
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:PROTDIST+NEIGHBOR:
|
|
||||||
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
|
||||||
|
|
||||||
arg:PROG
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
|
||||||
argchoice:No Bootstrap:
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
menu:On-Line Res.
|
|
||||||
|
|
||||||
item:GDE for Linux resources at Bioafrica.net
|
|
||||||
itemmethod:netscape http://www.bioafrica.net &
|
|
||||||
|
|
||||||
item:-------------------------
|
|
||||||
item:add a new website
|
|
||||||
itemmethod:xterm -e /usr/local/bio/GDE/newURL.pl $name $url
|
|
||||||
|
|
||||||
arg:name
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the site name
|
|
||||||
|
|
||||||
arg:url
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the URL (including http://)
|
|
791
CORE/.GDEmenus~
791
CORE/.GDEmenus~
|
@ -1,791 +0,0 @@
|
||||||
1menu:File
|
|
||||||
|
|
||||||
item:test cmask output
|
|
||||||
itemmethod: kedit in1
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:colormask
|
|
||||||
|
|
||||||
item:New sequence <meta N>
|
|
||||||
itemmethod:echo "$Type$Name" > out1
|
|
||||||
itemmeta:n
|
|
||||||
itemhelp:new_sequence.help
|
|
||||||
|
|
||||||
arg:Name
|
|
||||||
argtype:text
|
|
||||||
arglabel:New Sequence name?
|
|
||||||
argtext:New
|
|
||||||
|
|
||||||
arg:Type
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Type?
|
|
||||||
argchoice:DNA/RNA:#
|
|
||||||
argchoice:Amino Acid:%
|
|
||||||
argchoice:Text:\"
|
|
||||||
argchoice:Mask:@
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:flat
|
|
||||||
|
|
||||||
item:Import Foreign Format
|
|
||||||
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
|
||||||
itemhelp:readseq.help
|
|
||||||
|
|
||||||
arg:INPUTFILE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of foreign file?
|
|
||||||
|
|
||||||
out:OUTPUTFILE
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:Export Foreign Format
|
|
||||||
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
|
|
||||||
itemhelp:readseq.help
|
|
||||||
|
|
||||||
arg:FORMAT
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:FASTA:8
|
|
||||||
argchoice:NEXUS:17
|
|
||||||
argchoice:Phylip v3.3:12
|
|
||||||
argchoice:IG/Stanford:1
|
|
||||||
argchoice:GenBank:2
|
|
||||||
argchoice:NBRF:3
|
|
||||||
argchoice:EMBL:4
|
|
||||||
argchoice:GCG:5
|
|
||||||
argchoice:DNA Strider:6
|
|
||||||
argchoice:Fitch:7
|
|
||||||
argchoice:Pearson:8
|
|
||||||
argchoice:Zuker:9
|
|
||||||
argchoice:Olsen:10
|
|
||||||
argchoice:Phylip v3.2:11
|
|
||||||
argchoice:Phylip v3.3:12
|
|
||||||
argchoice:Plain text:13
|
|
||||||
|
|
||||||
arg:OUTPUTFILE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Save as?
|
|
||||||
|
|
||||||
in:INPUTFILE
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
|
|
||||||
item:Save Selection
|
|
||||||
itemmethod: cat $SAVE_FUNC > $Name
|
|
||||||
itemhelp:save_selection.help
|
|
||||||
|
|
||||||
arg:SAVE_FUNC
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:File format
|
|
||||||
argchoice:Flat:in1
|
|
||||||
argchoice:Genbank:in2
|
|
||||||
argchoice:GDE/HGL:in3
|
|
||||||
|
|
||||||
arg:Name
|
|
||||||
argtype:text
|
|
||||||
arglabel:File name?
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
in:in2
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
in:in3
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
item:Print Selection
|
|
||||||
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
|
|
||||||
itemhelp:print_alignment.help
|
|
||||||
|
|
||||||
arg:SCALE
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Reduce printout by?
|
|
||||||
argmin:1
|
|
||||||
argmax:20
|
|
||||||
argvalue:1
|
|
||||||
|
|
||||||
arg:CMD
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Lpr:lpr
|
|
||||||
argchoice:Enscript Gaudy:enscript -G -q
|
|
||||||
argchoice:Enscript Two column:enscript -2rG
|
|
||||||
|
|
||||||
arg:PRINTER
|
|
||||||
argtype:text
|
|
||||||
arglabel:Which printer?
|
|
||||||
argtext:lp
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
menu:Edit
|
|
||||||
|
|
||||||
item:Sort
|
|
||||||
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
|
|
||||||
itemhelp:heapsortHGL.help
|
|
||||||
|
|
||||||
arg:PRIM_KEY
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:Group:group-ID
|
|
||||||
argchoice:type:type
|
|
||||||
argchoice:name:name
|
|
||||||
argchoice:Sequence ID:sequence-ID
|
|
||||||
argchoice:creator:creator
|
|
||||||
argchoice:offset:offset
|
|
||||||
arglabel:Primary sort field?
|
|
||||||
|
|
||||||
arg:SEC_KEY
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:None:
|
|
||||||
argchoice:Group:group-ID
|
|
||||||
argchoice:type:type
|
|
||||||
argchoice:name:name
|
|
||||||
argchoice:Sequence ID:sequence-ID
|
|
||||||
argchoice:creator:creator
|
|
||||||
argchoice:offset:offset
|
|
||||||
arglabel:Secondary sort field?
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:extract
|
|
||||||
itemmethod:(gde in1;/bin/rm -f in1)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
menu:DNA/RNA
|
|
||||||
|
|
||||||
item:Translate...
|
|
||||||
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
|
|
||||||
|
|
||||||
arg:FRAME
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Which reading frame?
|
|
||||||
argchoice:First:1
|
|
||||||
argchoice:Second:2
|
|
||||||
argchoice:Third:3
|
|
||||||
argchoice:All six:6
|
|
||||||
|
|
||||||
arg:MNFRM
|
|
||||||
arglabel:Minimum length of AA sequence to translate?
|
|
||||||
argtype:slider
|
|
||||||
argmin:0
|
|
||||||
argmax:100
|
|
||||||
argvalue:20
|
|
||||||
|
|
||||||
arg:LTRCODE
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Translate to:
|
|
||||||
argchoice:Single letter codes:
|
|
||||||
argchoice:Triple letter codes:-3
|
|
||||||
|
|
||||||
arg:TBL
|
|
||||||
arglabel:Codon table?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:universal:1
|
|
||||||
argchoice:mycoplasma:2
|
|
||||||
argchoice:yeast:3
|
|
||||||
argchoice:Vert. mito.:4
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:gde
|
|
||||||
|
|
||||||
item:Dot plot
|
|
||||||
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
|
|
||||||
itemhelp:DotPlotTool.help
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Clustal alignment
|
|
||||||
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
|
|
||||||
|
|
||||||
itemhelp:clustal_help
|
|
||||||
|
|
||||||
arg:KTUP
|
|
||||||
argtype:slider
|
|
||||||
arglabel:K-tuple size for pairwise search
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:2
|
|
||||||
|
|
||||||
arg:WIN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Window size
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:4
|
|
||||||
|
|
||||||
arg:Trans
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Transitions weighted?
|
|
||||||
argchoice:Yes:/TRANSIT
|
|
||||||
argchoice:No:
|
|
||||||
|
|
||||||
arg:FIXED
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Fixed gap penalty
|
|
||||||
argmin:1
|
|
||||||
argmax:100
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:FLOAT
|
|
||||||
arglabel:Floating gap penalty
|
|
||||||
argtype:slider
|
|
||||||
argmin:1
|
|
||||||
argmax:100
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:REPORT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:View assembly report?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit in1.rpt&
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Variable Positions
|
|
||||||
itemmethod:varpos $REV < in1 > out1
|
|
||||||
|
|
||||||
arg:REV
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Highlight (darken)
|
|
||||||
argchoice:Conserved positions:
|
|
||||||
argchoice:variable positions:-rev
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:colormask
|
|
||||||
|
|
||||||
item:Phrap
|
|
||||||
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:SNAP
|
|
||||||
itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
out:out1
|
|
||||||
outformat:text
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Find all <meta-f>
|
|
||||||
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
|
|
||||||
itemhelp:findall.help
|
|
||||||
itemmeta:f
|
|
||||||
|
|
||||||
arg:SEARCH
|
|
||||||
argtype:text
|
|
||||||
arglabel:Search String
|
|
||||||
|
|
||||||
arg:PRCNT
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Percent mismatch
|
|
||||||
argmin:0
|
|
||||||
argmax:75
|
|
||||||
argvalue:10
|
|
||||||
|
|
||||||
arg:CASE
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Case
|
|
||||||
argchoice:Upper equals lower:
|
|
||||||
argchoice:Upper not equal lower:-case
|
|
||||||
|
|
||||||
arg:UT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:U equal T?
|
|
||||||
argchoice:Yes:-u=t
|
|
||||||
argchoice:No:
|
|
||||||
argvalue:0
|
|
||||||
|
|
||||||
arg:MAT
|
|
||||||
arglabel:Match color
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:yellow:1
|
|
||||||
argchoice:violet:2
|
|
||||||
argchoice:red:3
|
|
||||||
argchoice:aqua:4
|
|
||||||
argchoice:green:5
|
|
||||||
argchoice:blue:6
|
|
||||||
argchoice:grey:11
|
|
||||||
argchoice:black:8
|
|
||||||
argvalue:2
|
|
||||||
|
|
||||||
arg:MIS
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Mismatch color
|
|
||||||
argchoice:yellow:1
|
|
||||||
argchoice:violet:2
|
|
||||||
argchoice:red:3
|
|
||||||
argchoice:aqua:4
|
|
||||||
argchoice:green:5
|
|
||||||
argchoice:blue:6
|
|
||||||
argchoice:grey:11
|
|
||||||
argchoice:black:8
|
|
||||||
argvalue:7
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:colormask
|
|
||||||
|
|
||||||
item:Sequence Consensus
|
|
||||||
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
|
|
||||||
itemhelp:MakeCons.help
|
|
||||||
|
|
||||||
arg:METHOD
|
|
||||||
arglabel:Method
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:IUPAC:-iupac
|
|
||||||
argchoice:Majority:-majority $PERCENT
|
|
||||||
|
|
||||||
arg:MASK
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Create a new:
|
|
||||||
argchoice:Sequence:
|
|
||||||
argchoice:Selection Mask: | Consto01mask
|
|
||||||
|
|
||||||
arg:PERCENT
|
|
||||||
arglabel:Minimum Percentage for Majority
|
|
||||||
argtype:slider
|
|
||||||
argmin:50
|
|
||||||
argmax:100
|
|
||||||
argvalue:75
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
|
|
||||||
out:out1
|
|
||||||
outformat:gde
|
|
||||||
|
|
||||||
|
|
||||||
#Menu for DNA/RNA
|
|
||||||
|
|
||||||
item:blastn
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDBDNA
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV-1 Seq. Db.:/usr/local/biotools/db/DNA/hiv17-08-01.fasta2
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:4
|
|
||||||
argmax:18
|
|
||||||
argvalue:12
|
|
||||||
|
|
||||||
arg:MATCH
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Match Score
|
|
||||||
argmin:1
|
|
||||||
argmax:10
|
|
||||||
argvalue:5
|
|
||||||
|
|
||||||
arg:MMSCORE
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Mismatch Score
|
|
||||||
argmin:-10
|
|
||||||
argmax:-1
|
|
||||||
argvalue:-5
|
|
||||||
|
|
||||||
item:blastx
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDBDNA
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
|
|
||||||
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
argvalue:3
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
arglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:CODE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Genetic Code
|
|
||||||
|
|
||||||
argchoice:Standard or Universal:0
|
|
||||||
argchoice:Vertebrate Mitochondrial:1
|
|
||||||
argchoice:Yeast Mitochondrial:2
|
|
||||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
|
||||||
argchoice:Invertebrate Mitochondrial:4
|
|
||||||
argchoice:Ciliate Macronuclear:5
|
|
||||||
argchoice:Protozoan Mitochondrial:6
|
|
||||||
argchoice:Plant Mitochondrial:7
|
|
||||||
argchoice:Echinodermate Mitochondrial:8
|
|
||||||
|
|
||||||
item:------------------------
|
|
||||||
|
|
||||||
item:Add a new DNA blast db
|
|
||||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/biotools/GDE/bin/installBLASTDB.pl $sourcefile $menuname;
|
|
||||||
|
|
||||||
arg:sourcefile
|
|
||||||
argtype:text
|
|
||||||
arglabel: enter the file name
|
|
||||||
|
|
||||||
arg:menuname
|
|
||||||
argtype:text
|
|
||||||
arglabel: enter the name of the DB
|
|
||||||
|
|
||||||
menu:seq. datasets
|
|
||||||
item:tttt
|
|
||||||
itemmethod:readseq /usr/local/biotools/GDE/db/ttttt -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
|
||||||
out:OUTPUTFILE
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:HIV1POLDNA.fasta
|
|
||||||
itemmethod:readseq /usr/local/biotools/GDE/db/HIV1POLDNA.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
|
||||||
out:OUTPUTFILE
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:structure
|
|
||||||
itemmethod:readseq /usr/local/biotools/GDE/db/structprot.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
|
||||||
out:OUTPUTFILE
|
|
||||||
outformat:genbank
|
|
||||||
|
|
||||||
item:-------------
|
|
||||||
item:add a new dataset
|
|
||||||
itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
|
|
||||||
|
|
||||||
arg:name
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the dataset name ?
|
|
||||||
|
|
||||||
arg:file
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the dataset file (in FASTA) ?
|
|
||||||
|
|
||||||
|
|
||||||
#Menu for Protein
|
|
||||||
menu:protein
|
|
||||||
item:blastp
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/biotools/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
|
||||||
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDBPROT
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
|
|
||||||
argchoice:ttttt:/usr/local/biotools/db/tttt
|
|
||||||
argchoice:tytuiphn:/usr/local/biotools/db/yejhuh[9hp
|
|
||||||
argchoice:yyyy:/usr/local/biotools/db/test
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
barglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
argvalue:3
|
|
||||||
|
|
||||||
item:tblastn
|
|
||||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/biotools/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:flat
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:BLASTDB
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which Database
|
|
||||||
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
|
|
||||||
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
|
|
||||||
|
|
||||||
arg:Matrix
|
|
||||||
arglabel:Substitution Matrix:
|
|
||||||
argtype:choice_list
|
|
||||||
argchoice:PAM30:PAM30
|
|
||||||
argchoice:PAM70:PAM70
|
|
||||||
|
|
||||||
arg:WORDLEN
|
|
||||||
argtype:slider
|
|
||||||
arglabel:Word Size
|
|
||||||
argmin:4
|
|
||||||
argmax:18
|
|
||||||
argvalue:12
|
|
||||||
|
|
||||||
arg:CODE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Genetic Code
|
|
||||||
argchoice:Standard or Universal:0
|
|
||||||
argchoice:Vertebrate Mitochondrial:1
|
|
||||||
argchoice:Yeast Mitochondrial:2
|
|
||||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
|
||||||
argchoice:Invertebrate Mitochondrial:4
|
|
||||||
argchoice:Ciliate Macronuclear:5
|
|
||||||
argchoice:Protozoan Mitochondrial:6
|
|
||||||
argchoice:Plant Mitochondrial:7
|
|
||||||
argchoice:Echinodermate Mitochondrial:8
|
|
||||||
|
|
||||||
|
|
||||||
item:Map View
|
|
||||||
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
|
|
||||||
itemhelp:mapview.help
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:gde
|
|
||||||
insave:
|
|
||||||
|
|
||||||
arg:PBL
|
|
||||||
arglabel:Pixel Between Lines
|
|
||||||
argtype:slider
|
|
||||||
argvalue:10
|
|
||||||
argmin:1
|
|
||||||
argmax:15
|
|
||||||
|
|
||||||
arg:NPP
|
|
||||||
arglabel:Nucleotides Per Pixel
|
|
||||||
argtype:slider
|
|
||||||
argvalue:1
|
|
||||||
argmin:1
|
|
||||||
argmax:20
|
|
||||||
|
|
||||||
arg:LWIDTH
|
|
||||||
arglabel:Line Thickness
|
|
||||||
argtype:slider
|
|
||||||
argvalue:2
|
|
||||||
argmin:1
|
|
||||||
argmax:5
|
|
||||||
|
|
||||||
item:--------------------------
|
|
||||||
item:Add a new Protein blast db
|
|
||||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/biotools/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname;
|
|
||||||
|
|
||||||
arg:sourcefile
|
|
||||||
argtype:text
|
|
||||||
arglabel: Enter the file (in FASTA)
|
|
||||||
|
|
||||||
arg:menuname
|
|
||||||
argtype:text
|
|
||||||
arglabel: Enter the name of the DB
|
|
||||||
|
|
||||||
menu:Phylogeny
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip help
|
|
||||||
itemmethod:(netscape /usr/local/biotools/phylip/doc/$FILE)&
|
|
||||||
|
|
||||||
arg:FILE
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which program?
|
|
||||||
argchoice:clique:clique.html
|
|
||||||
argchoice:consense:consense.html
|
|
||||||
argchoice:contchar:contchar.html
|
|
||||||
argchoice:contml:contml.html
|
|
||||||
argchoice:contrast:contrast.html
|
|
||||||
argchoice:discrete:discrete.html
|
|
||||||
argchoice:distance:distance.html
|
|
||||||
argchoice:dnaboot:dnaboot.html
|
|
||||||
argchoice:dnacomp:dnacomp.html
|
|
||||||
argchoice:dnadist:dnadist.html
|
|
||||||
argchoice:dnainvar:dnainvar.html
|
|
||||||
argchoice:dnaml:dnaml.html
|
|
||||||
argchoice:dnamlk:dnamlk.html
|
|
||||||
argchoice:dnamove:dnamove.html
|
|
||||||
argchoice:dnapars:dnapars.html
|
|
||||||
argchoice:dnapenny:dnapenny.html
|
|
||||||
argchoice:dollop:dollop.html
|
|
||||||
argchoice:dolmove:dolmove.html
|
|
||||||
argchoice:dolpenny:dolpenny.html
|
|
||||||
argchoice:draw:draw.html
|
|
||||||
argchoice:drawgram:drawgram.html
|
|
||||||
argchoice:drawtree:drawtree.html
|
|
||||||
argchoice:factor:factor.html
|
|
||||||
argchoice:fitch:fitch.html
|
|
||||||
argchoice:gendist:gendist.html
|
|
||||||
argchoice:kitsch:kitsch.html
|
|
||||||
argchoice:main:main.html
|
|
||||||
argchoice:mix:mix.html
|
|
||||||
argchoice:move:move.html
|
|
||||||
argchoice:neighbor:neighbor.html
|
|
||||||
argchoice:penny:penny.html
|
|
||||||
argchoice:protpars:protpars.html
|
|
||||||
argchoice:read.me.general:read.me.general.html
|
|
||||||
argchoice:restml:restml.html
|
|
||||||
argchoice:seqboot:seqboot.html
|
|
||||||
argchoice:sequence:sequence.html
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip 3.5
|
|
||||||
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
argtype:choice_list
|
|
||||||
arglabel:Which program to run?
|
|
||||||
argchoice:DNAPARS:dnapars
|
|
||||||
argchoice:DNABOOT:dnaboot
|
|
||||||
argchoice:DNAPENNY:dnapenny
|
|
||||||
argchoice:DNAML:dnaml
|
|
||||||
argchoice:DNAMLK:dnamlk
|
|
||||||
argchoice:DNACOMP:dnacomp
|
|
||||||
argchoice:DNAMOVE:dnamove
|
|
||||||
argchoice:DNAINVAR:dnainvar
|
|
||||||
argchoice:PROTPARS:protpars
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
item:Phylip DNA Distance methods
|
|
||||||
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
|
|
||||||
|
|
||||||
arg:EXPLAIN
|
|
||||||
argtype:text
|
|
||||||
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
|
|
||||||
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:DNADIST+NEIGHBOR:
|
|
||||||
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
|
||||||
|
|
||||||
arg:PROG
|
|
||||||
arglabel:Run ?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Run without Bootstrap:
|
|
||||||
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
|
||||||
|
|
||||||
arg:DNA
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of DNADIST outfile?
|
|
||||||
|
|
||||||
arg:NEI
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of NEIGHBOR outfile?
|
|
||||||
|
|
||||||
arg:TREE
|
|
||||||
argtype:text
|
|
||||||
arglabel:Name of TREEFILE ?
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
item:Phylip PROTEIN Distance methods
|
|
||||||
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
|
|
||||||
|
|
||||||
arg:PROGRAM
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:PROTDIST+NEIGHBOR:
|
|
||||||
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
|
||||||
|
|
||||||
arg:PROG
|
|
||||||
arglabel:Which method?
|
|
||||||
argtype:chooser
|
|
||||||
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
|
||||||
argchoice:No Bootstrap:
|
|
||||||
|
|
||||||
arg:PREEDIT
|
|
||||||
argtype:chooser
|
|
||||||
arglabel:Edit input before running?
|
|
||||||
argchoice:No:
|
|
||||||
argchoice:Yes:kedit infile;
|
|
||||||
|
|
||||||
in:in1
|
|
||||||
informat:genbank
|
|
||||||
inmask:
|
|
||||||
insave:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
menu:On-Line Res.
|
|
||||||
item:tytyt
|
|
||||||
itemmethod:netscape hnu[phoph &
|
|
||||||
item:SANBI
|
|
||||||
itemmethod:netscape again &
|
|
||||||
item:PlasmoDB
|
|
||||||
itemmethod:netscape http://www.plasmodb.org &
|
|
||||||
item:NCBI
|
|
||||||
itemmethod:netscape http://www.ncbi.nlm.nih.gov &
|
|
||||||
item:sanbi
|
|
||||||
itemmethod:netscape http://www.sanbi.ac.za &
|
|
||||||
item:SANBI
|
|
||||||
itemmethod:netscape http://www.sanbi.ac.za &
|
|
||||||
|
|
||||||
item:GDE for Linux resources at Bioafrica.net
|
|
||||||
itemmethod:netscape http://www.bioafrica.net &
|
|
||||||
|
|
||||||
item:-------------------------
|
|
||||||
item:add a new website
|
|
||||||
itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
|
|
||||||
|
|
||||||
arg:name
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the site name
|
|
||||||
|
|
||||||
arg:url
|
|
||||||
argtype:text
|
|
||||||
arglabel:Enter the URL (including http://)
|
|
Binary file not shown.
BIN
CORE/BuiltIn.o
BIN
CORE/BuiltIn.o
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
CORE/DrawNA.o
BIN
CORE/DrawNA.o
Binary file not shown.
BIN
CORE/Edit.o
BIN
CORE/Edit.o
Binary file not shown.
Binary file not shown.
1056
CORE/FileIO.c~
1056
CORE/FileIO.c~
File diff suppressed because it is too large
Load diff
BIN
CORE/FileIO.o
BIN
CORE/FileIO.o
Binary file not shown.
BIN
CORE/Free.o
BIN
CORE/Free.o
Binary file not shown.
BIN
CORE/Genbank.o
BIN
CORE/Genbank.o
Binary file not shown.
BIN
CORE/HGLfile.o
BIN
CORE/HGLfile.o
Binary file not shown.
|
@ -6,7 +6,7 @@ DrawNA.c Free.c BuiltIn.c Edit.c Genbank.c Scroll.c ChooseFile.c \
|
||||||
CutCopyPaste.c HGLfile.c
|
CutCopyPaste.c HGLfile.c
|
||||||
|
|
||||||
LIBS= -lm -lxview -lolgx -lX11
|
LIBS= -lm -lxview -lolgx -lX11
|
||||||
CFLAGS= -g -L/usr/openwin/lib -I/usr/openwin/include
|
CFLAGS= -g -m32 -L/usr/lib32 -I/usr/include/xview
|
||||||
CC = cc
|
CC = cc
|
||||||
# Possible defines, SUN4 SGI DEC HGL
|
# Possible defines, SUN4 SGI DEC HGL
|
||||||
DEFINES = -DLINUX
|
DEFINES = -DLINUX
|
||||||
|
|
BIN
CORE/ParseMenu.o
BIN
CORE/ParseMenu.o
Binary file not shown.
BIN
CORE/Scroll.o
BIN
CORE/Scroll.o
Binary file not shown.
|
@ -1,8 +0,0 @@
|
||||||
|
|
||||||
========================[ Feb 1, 2002 1:57 PM ]========================
|
|
||||||
NOTE: CoreLib [002.003] FileOpen("HIV1POLDNA.fasta","r") failed
|
|
||||||
Cannot open input database file. Formating failed...
|
|
||||||
|
|
||||||
========================[ Feb 1, 2002 7:27 PM ]========================
|
|
||||||
NOTE: CoreLib [002.003] FileOpen("SIVPOLPRO.fasta","r") failed
|
|
||||||
Cannot open input database file. Formating failed...
|
|
191
CORE/infile
191
CORE/infile
|
@ -1,191 +0,0 @@
|
||||||
10 916
|
|
||||||
contig GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG
|
|
||||||
W22140 AAAAANGCCC NNTTCNAAGN GGGGGGGGGG GGGGGGGATA TTTTGCNNAG
|
|
||||||
R.C.W27436 GGGNNNNGNN NNNNNNNNNN NNNNNNAANN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
W28762 TCTTGACATT TGTCTCCATT TCAGCAAAAC GANACCTGTG GTGAAGGGAT
|
|
||||||
#10005_2 2 GGnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
|
||||||
R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
W28762 ---------- ---------- ---------- ---------- ----------
|
|
||||||
W28762(165 GGGNNGGNGN GGNNNGNNGN NNNGGNNNNN NNNTNTGTNT GNNGGNAGGG
|
|
||||||
#10005_2 2 GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG
|
|
||||||
nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA
|
|
||||||
GGGGGCATGA TGNNGAGANC NAAAGAAAGN NCNGGGNGGG AAAAAAGAAG
|
|
||||||
NNNANNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
TTGTGTGCTG GCACTG---- ---------- ---------- ----------
|
|
||||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
|
||||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
NNTNTNANNN NNTTNTANAG TNAAAGNTTG GTNNNNGTNN NTTTGANGAA
|
|
||||||
nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA
|
|
||||||
GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC
|
|
||||||
GAGGNCCCTG GNGGGAGGGG GGNNCGNNTT TNNTGCNCCG GATGGAGGGN
|
|
||||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
nnnnnnnnGn AAnnnnnnnn nnnnnnnnnn nnnnnnnnnT TGAAAACTGT
|
|
||||||
NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
GNTCAANNTG GGGNNNANAN NNGNNNTTGA NTGAAAATGG GGNAANCCCC
|
|
||||||
GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC
|
|
||||||
CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC TGAA-n--Tc TACT---CCG
|
|
||||||
GGGGNTTTTN AAGNNTGTTT NTTTANAAGN AAGAGGGGGA NAAAATTTTT
|
|
||||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAACCGAAA
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
TAnCCAAnTG GAATCCTAAG ACAATTTTCT -cCAwTTCA- sCAAC-CGAA
|
|
||||||
TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAAC-CGAA
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
CNTTTTNCCA GTCANCTGGT AAGTCCAAGC TGAA-N--TC TACTC--C-G
|
|
||||||
CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC Tgaa----Tc TACTC--C-G
|
|
||||||
CATGTAA-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
|
||||||
TTNNTTCTNT NNCTNGNNNG GGGGGGGGGG GGGGCCCCCA ATAAGNNNTT
|
|
||||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
CCCTGTGGTG GAGGGAATTN CGTTCTTGGC NCTTCAGACT NCAGGGCAGG
|
|
||||||
---------- ---------- ---------- ----CAGACT GCAGGGNAGG
|
|
||||||
ACCCTGTGGT GrAGGGATTT GTGTGCT-GG CACTGCAGAC TGCAGGGCAG
|
|
||||||
ACCCTGTGGT GGAGGGAATT NCGTTCTTGG CNCTTCAGAC TNCAGGGCAG
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
CATGTAACCC C-NAAAGAGT TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
|
||||||
CATGTAa-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
|
||||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
|
||||||
GNGCNCAGAA NNAGGGGGGG GNGGGGGGGC CCCTTTNCTC CNAAAAATTT
|
|
||||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
|
||||||
AA-------- ---------- ---------- ---------- ----------
|
|
||||||
AA-------- ---------- ---------- ---------- ----------
|
|
||||||
GAAAGGGCTA GGGCCCAGGG GCTGGGAmAT GCATGAGGT- gCTCGGAGGA
|
|
||||||
GAAAGGGCTA GGGCCCAGGG GCTGGGAAAT GCATGAGGTT GCTCGGAGGA
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
|
||||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
|
||||||
CTaAGCAGAT AGCAAAGaAG ATaATGGAGG AgCAATTGGT CATGGCCtTG
|
|
||||||
CCCCCCNTTT TGGGNAAGGG TGGGGGAAAN NNTTTGGGCA AANAGGGGAA
|
|
||||||
NNNNNNNNNN NNNAANNAGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA
|
|
||||||
---------- -------AGG GCTAGGGCCC AGGGGCTGGG AAATGCATGA
|
|
||||||
---------- -------AGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA
|
|
||||||
GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC
|
|
||||||
GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
CTAAGCAGAT AGCAAAGNAG ATNATGGAGG ANCAATTGGT CATGGCCNTG
|
|
||||||
CTAAGCAGAT AGCAAAGAAG ATAATGGAGG AGCAATTGGT CATGGCCTTG
|
|
||||||
GTTTCCCTCk AAACaACgCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
|
||||||
AAAAAAAGNG GGGGGGGGCG GNTTCCANAA AANAANAAAG GGTNCACCCN
|
|
||||||
GG-TTCTNGG NGGAGCCTGG CTAAANCCAA GCACCAGCAC CTGTGAGTCT
|
|
||||||
GGTTGCTCGG AGGAGCCTGG CTAAATCCAA GCACCAGCAC CTGTGAGTCT
|
|
||||||
GG-TGCTCGG AGGAGCCTGG NTAAATCCAA GCACCAGCAC CTGTGAGTCT
|
|
||||||
TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC
|
|
||||||
TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
GTTTCCCTCC AAACNACNCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
|
||||||
GTTTCCCTCk AAACAACGCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
|
||||||
tmGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
|
||||||
TNGGGGGNCN CCCCCCCCNC NNGNAAATCN TCCCTTTTTT TGANGGGCNA
|
|
||||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT NCCTCTTCTC
|
|
||||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC
|
|
||||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC
|
|
||||||
TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCAAAAG
|
|
||||||
TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCCNAAG
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
|
||||||
TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
|
||||||
ACGAGAGCTG GGAGAAGAGG cAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
|
||||||
ANNNCATTTN CTTGNCCTTG AAGATTGACC NTGACTGCTC TGGCAAGAAG
|
|
||||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
|
||||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
|
||||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
|
||||||
TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTkGA GGGAAACCAA
|
|
||||||
TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTTGA GGGAAACCAA
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
|
||||||
ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
|
||||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGaTTTA gCCAGGCTCC
|
|
||||||
AAGAGGTGTC CTTACAGAGA CCTCTTTACT GACCAACTGA AGNATAGACT
|
|
||||||
CTTTCCCCCN AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT
|
|
||||||
CTTTCCCCCC NAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT
|
|
||||||
CTTTCCCCCA AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGNGTNGTT
|
|
||||||
GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT
|
|
||||||
GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA NCCAGGCTCC
|
|
||||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA GCCAGGCTCC
|
|
||||||
tCCgAGkA-- CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT-----
|
|
||||||
TACTGCTGGA CAATCTGCAT GGGCATCACC CCTCCCCGCA TGTAACCC-A
|
|
||||||
TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC
|
|
||||||
TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC
|
|
||||||
TGGAGGGAAA CCANGGCCAT GACCAATTGN TCCTCCATNA TCTNCTTTGC
|
|
||||||
ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA
|
|
||||||
ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
TCCGAGC--A CCTCATGCAT GTCCCAGCCC CTGGGCCCTA GCCCT-----
|
|
||||||
TCCGAGc--A CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT-----
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
AAAGAGGTGT CCAGAGCCAA GGCTTCTACC TTCATTGTCC CTCTCTGTGC
|
|
||||||
TATCTGCTNA GAGNANNCAA NNNAANNNA- ---------- ----------
|
|
||||||
TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC
|
|
||||||
TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC
|
|
||||||
TGAAGGTAGA AGCCTTGGCT CTGGACAmCT CTTTTGGG-t TACATGCG--
|
|
||||||
TGAAGGTAGA AGCCTTGGCT CTGGACACCT CTTTTGGG-T TACATGCGGT
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- TTCCTgCCCT GcAGTCTGAA GnGCCAAG-A -ACGnAATTC
|
|
||||||
TCAAGGAGTT CCATTCCAGG AGGAAGAGAT CTATACCCT- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC ACCTCTTTT-
|
|
||||||
ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC AACTCTTTNG
|
|
||||||
GAGTAgA-tt cAGCTTGGAC TTACCAGnTG ACTGGnAAAA nGGGGGnTTn
|
|
||||||
GAGTANA-NN NA-------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- TTCCTNCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C
|
|
||||||
---------- TTCCTGCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C
|
|
||||||
CCTCCACCAC AGGGTTTCG- GTTGGGTGGn TTGGAAGA-A AATTGTCTTA
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
GGGTTACATG CGGTGAGTAN ANNNA----- ---------- ----------
|
|
||||||
GGGTTACATG CGG--AGTAG ANTTCAGCTT GGACTTACCA GNTGACTGGN
|
|
||||||
CCCCATTTTC AnTCAAnnnC nnnTnTnnnC CCCAnnTTGA nCTTCnTCAA
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
CCTTCACCAC A-GGTNTCGT TTTGC-TGAA ATGG-AGACA AAT-GTCa-a
|
|
||||||
CCTrCACCAC AGGGTTTCG- GTTGs-TGAA wTGg-AGA-A AATTGTCTTA
|
|
||||||
GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
AAAANGGGGG NTTNCCCCAT TTTCANTCAA NNNCNNNTNT NNNCCCCANN
|
|
||||||
AnnnACnnnn ACCAAnCTTT nACTnTAnAA nnnnnTnAnA nnCCCTnCCn
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
g-a------- ---------- ---------- ---------- ----------
|
|
||||||
GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn
|
|
||||||
nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
TTGANCTTCN TCAAANNNAC NNNNACCAAN CTTTNACTNT ANAANNNNNT
|
|
||||||
nCAnACAnAn nnnnnnnCCn nnnCnnCnnn CCnCnCCnnC CC--------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
|
||||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
NANANNCCCT NCCNNCANAC ANANNNNNNN NCCNNNNCNN CNNNCCNCNC
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
---------- ---------- ---------- ---------- ----------
|
|
||||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
|
||||||
nnnnnnnnnn nnnnCC
|
|
||||||
---------- ------
|
|
||||||
---------- ------
|
|
||||||
---------- ------
|
|
||||||
CNNCCC---- ------
|
|
||||||
---------- ------
|
|
||||||
---------- ------
|
|
||||||
---------- ------
|
|
||||||
---------- ------
|
|
||||||
nnnnnnnnnn nnnnCC
|
|
|
@ -1,2 +0,0 @@
|
||||||
make
|
|
||||||
cp gde ../bin
|
|
BIN
CORE/libxview.a
BIN
CORE/libxview.a
Binary file not shown.
BIN
CORE/main.o
BIN
CORE/main.o
Binary file not shown.
34
CORE/outfile
34
CORE/outfile
|
@ -1,34 +0,0 @@
|
||||||
|
|
||||||
DNA parsimony algorithm, version 3.51c
|
|
||||||
|
|
||||||
|
|
||||||
One most parsimonious tree found:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
+-----------------------#10005_2 2
|
|
||||||
!
|
|
||||||
! +--------------------W28762(165
|
|
||||||
+--9 !
|
|
||||||
! ! ! +--R.C.W27652
|
|
||||||
! ! ! +-----------6
|
|
||||||
! ! ! ! +--#10005_2 2
|
|
||||||
! +--8 !
|
|
||||||
! ! +--5 +--W28762
|
|
||||||
! ! ! ! +--7
|
|
||||||
--1 ! ! ! +--4 +--W28762
|
|
||||||
! ! ! ! ! !
|
|
||||||
! +--2 +-----3 +-----R.C.W27652
|
|
||||||
! ! !
|
|
||||||
! ! +--------R.C.W27436
|
|
||||||
! !
|
|
||||||
! +-----------------W22140
|
|
||||||
!
|
|
||||||
+--------------------------contig
|
|
||||||
|
|
||||||
remember: this is an unrooted tree!
|
|
||||||
|
|
||||||
|
|
||||||
requires a total of 2453.000
|
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
((#10005_2_2,(W28762(165,(((R.C.W27652,#10005_2_2),(((W28762,W28762),
|
|
||||||
R.C.W27652),R.C.W27436)),W22140))),contig);
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,25 +0,0 @@
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
; FEATURES/GDE Accession File Instructions
|
|
||||||
;
|
|
||||||
; 1. Type in one or more GenBank Accession #'s below,
|
|
||||||
; or
|
|
||||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
|
||||||
; menu to read in a file of numbers.
|
|
||||||
;
|
|
||||||
; (NOTE: File can not contain LOCUS names.)
|
|
||||||
;
|
|
||||||
; 2. Choose 'Save Current File' in the File menu
|
|
||||||
; 3. Quit this window
|
|
||||||
;
|
|
||||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
|
||||||
; OUT THESE COMMENT LINES.
|
|
||||||
;
|
|
||||||
; NOTE: Put each accession # on a separate line
|
|
||||||
; SAMPLE ACCESSION FILE:
|
|
||||||
;
|
|
||||||
; M18249
|
|
||||||
; X13383
|
|
||||||
; J03680
|
|
||||||
;
|
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
|
|
|
@ -1,45 +0,0 @@
|
||||||
clu2ig update 3 Feb 94
|
|
||||||
|
|
||||||
NAME
|
|
||||||
clu2ig
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
clu2ig clustalfile > igfile
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
Converts interleaved .aln output from Clustal V into
|
|
||||||
sequential .ig (IntelliGenetics) format for use by MASE.
|
|
||||||
|
|
||||||
clustalfile:
|
|
||||||
CLUSTAL V multiple sequence alignment
|
|
||||||
|
|
||||||
name1 AACTTTCG
|
|
||||||
name2 ATCTTTCG
|
|
||||||
* ******
|
|
||||||
|
|
||||||
name1 CCTGCT
|
|
||||||
name2 CCCGCT
|
|
||||||
** ***
|
|
||||||
|
|
||||||
igfile:
|
|
||||||
;
|
|
||||||
name1
|
|
||||||
AACTTTCG
|
|
||||||
CCTGCT
|
|
||||||
:
|
|
||||||
name2
|
|
||||||
ATCTTTCG
|
|
||||||
CCCGCT
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,36 +0,0 @@
|
||||||
dbstat update 3 Feb 94
|
|
||||||
|
|
||||||
NAME
|
|
||||||
dbstat - calculates amino acid frequencies in a protein
|
|
||||||
database
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
dbstat
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
dbstat reads a file of one or more nucleic acid sequences
|
|
||||||
and calculates the amino acid frequencies, both in terms of
|
|
||||||
absolute numbers, and as a fraction of the total.
|
|
||||||
|
|
||||||
input - The input file is the standard .wrp (Pearson) format,
|
|
||||||
such as that produced by getob:
|
|
||||||
|
|
||||||
>name
|
|
||||||
; one or more comment lines (optional)
|
|
||||||
sequence lines
|
|
||||||
|
|
||||||
Comments begin either with semicolon (;) or right arrow (>)
|
|
||||||
characters.
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,30 +0,0 @@
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
; FEATURES/GDE Expression File Instructions 8/7/95
|
|
||||||
;
|
|
||||||
; 1. Type in one or more GenBank expressions below,
|
|
||||||
; or
|
|
||||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
|
||||||
; menu to read in a file of feature keys.
|
|
||||||
; or
|
|
||||||
; Copy expressions from another window and Paste into this window.
|
|
||||||
; 2. Choose 'Save Current File' in the File menu
|
|
||||||
; 3. Quit this window
|
|
||||||
;
|
|
||||||
; NOTES:
|
|
||||||
; 1) FEATURES will then extract the appropriate sequences.
|
|
||||||
; YOU DON'T NEED TO EDIT OUT THESE COMMENT LINES.
|
|
||||||
; 2) All expressions referring to GenBank entries must begin with a '@'
|
|
||||||
; Literals (ie. sequences to be embedded in the final output)
|
|
||||||
; do NOT begin with a '@'.
|
|
||||||
; 3) Put each expression on a separate line.
|
|
||||||
;
|
|
||||||
; SAMPLE EXPRESSION FILE:
|
|
||||||
;
|
|
||||||
; @J05635:83..1813
|
|
||||||
; ; EcoRI/NotI adaptor {this is a comment line}
|
|
||||||
; AATTGCGGCCGC
|
|
||||||
; @J05635:/product="flagellin A"
|
|
||||||
; @x17548:singed_trans
|
|
||||||
;
|
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
; FEATURES/GDE Feature Key File Instructions
|
|
||||||
;
|
|
||||||
; 1. Type in one or more GenBank FEATURE Table feature keys below,
|
|
||||||
; or
|
|
||||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
|
||||||
; menu to read in a file of feature keys.
|
|
||||||
;
|
|
||||||
; 2. Choose 'Save Current File' in the File menu
|
|
||||||
; 3. Quit this window
|
|
||||||
;
|
|
||||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
|
||||||
; OUT THESE COMMENT LINES.
|
|
||||||
;
|
|
||||||
; NOTE: Put each feature key on a separate line
|
|
||||||
; SAMPLE FEATURE KEY FILE:
|
|
||||||
;
|
|
||||||
; mRNA
|
|
||||||
; CDS
|
|
||||||
; mat_peptide
|
|
||||||
;
|
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
|
|
|
@ -1,407 +0,0 @@
|
||||||
|
|
||||||
FEATURES.DOC update 7 Feb 94
|
|
||||||
|
|
||||||
|
|
||||||
NAME
|
|
||||||
FEATURES - extracts features from GenBank entries
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
features
|
|
||||||
features expression
|
|
||||||
features [-f featurekey | -F keyfile]
|
|
||||||
[-n name |-a accession | -e expression |
|
|
||||||
-N namefile |-A accfile | -E expfile]
|
|
||||||
[-u dbfile | -U dbfile | -g ]
|
|
||||||
features -h
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
FEATURES extracts sequence objects from GenBank entries, using
|
|
||||||
the Features Table language. Features can be retrieved either by
|
|
||||||
specifying keywords (eg. CDS, mRNA, exon, intron etc.) or by
|
|
||||||
evaluating expressions. In practical terms, FEATURES is actually
|
|
||||||
a user interface for GETOB, which actually performs the parsing
|
|
||||||
and extraction of sequence objects. FEATURES can be run either as
|
|
||||||
an interactive program or with command line arguments.
|
|
||||||
|
|
||||||
'features' with no arguments runs the program interactively.
|
|
||||||
'features' followed by an expression retrieves the data directly
|
|
||||||
from GenBank and evaluates the expression. The third form of
|
|
||||||
features requires all arguments to be accompanied by their
|
|
||||||
respective option flags. Finally, 'features -h' prints the
|
|
||||||
SYNOPSIS.
|
|
||||||
|
|
||||||
|
|
||||||
INTERACTIVE EXECUTION
|
|
||||||
FEATURES executed with no arguments runs interactively. An example of the
|
|
||||||
FEATURES menu is shown below:
|
|
||||||
|
|
||||||
___________________________________________________________________
|
|
||||||
FEATURES - Version 7 FEB 94
|
|
||||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
|
||||||
___________________________________________________________________
|
|
||||||
Features: tRNA
|
|
||||||
Entries: EPFCPCG
|
|
||||||
Dataset:
|
|
||||||
___________________________________________________________________
|
|
||||||
Parameter Description Value
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
1).................... FEATURES TO EXTRACT ....................> f
|
|
||||||
f:Type a feature at the keyboard
|
|
||||||
F:Read a list of features from a file
|
|
||||||
2)....................ENTRIES TO BE PROCESSED (choose one).....> n
|
|
||||||
Keyboard input - n:name a:accession # e:expression
|
|
||||||
File input - N:name(s) A:accession #(s) E:expression(s)
|
|
||||||
3)....................WHERE TO GET IT .........................> g
|
|
||||||
u:Genbank dataset g:complete GenBank database
|
|
||||||
U: same as u, but all entries
|
|
||||||
4)....................WHERE TO SEND IT ........................> a
|
|
||||||
s:Each feature to a separate file a:All output to same file
|
|
||||||
---------------------------------------------------------------
|
|
||||||
Type number of your choice or 0 to continue:
|
|
||||||
0
|
|
||||||
Messages will be written to EPFCPCG.msg
|
|
||||||
Final sequence output will be written to EPFCPCG.out
|
|
||||||
Expressions will be written to EPFCPCG.exp
|
|
||||||
Extracting features...
|
|
||||||
|
|
||||||
In the example, FEATURES was instructed to retrieve all tRNAs from
|
|
||||||
the GenBank entry EPFCPCG, which contains the Epifagus plastid
|
|
||||||
genome. By default, the GenBank database was the source of the
|
|
||||||
sequence. Messages indicate the progress of the job. A log describing
|
|
||||||
the extraction of each feature is written to EPFCPCG.msg, while the
|
|
||||||
extracted features themselves are written to EPFCPCG.out. Feature
|
|
||||||
expressions which could be used by FEATURES to reconstruct the .out
|
|
||||||
file, are written to EPFCPCG.exp.
|
|
||||||
|
|
||||||
The first step is to retrieve the EPFCPCG entry from GenBank, which is
|
|
||||||
accomplished by calling FETCH. Next, FEATURES extracts the specified
|
|
||||||
features from the entry.
|
|
||||||
|
|
||||||
An excerpt from EPFCPCG.msg is shown below, describing the extraction
|
|
||||||
of the fifth tRNA found in this entry. To create this tRNA, two exons
|
|
||||||
had to be joined. The qualifier line associated with this feature
|
|
||||||
indicates that it is an Isoleucine tRNA with a gat anticodon.
|
|
||||||
|
|
||||||
|
|
||||||
EPFCPCG:anticodon gtg
|
|
||||||
complement
|
|
||||||
(
|
|
||||||
join
|
|
||||||
(
|
|
||||||
70023 70028
|
|
||||||
|
|
||||||
1 69
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
/product="transfer RNA-His"
|
|
||||||
/gene="His-tRNA"
|
|
||||||
/label=anticodon gtg
|
|
||||||
/note="anticodon gtg"
|
|
||||||
//----------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
The actual sequence for this feature, as written to EPFCPCG.out, is
|
|
||||||
written with each exon beginning a new line:
|
|
||||||
|
|
||||||
>EPFCPCG:anticodon gtg
|
|
||||||
ggcggatgtagccaaatggatcaaggtagtggattgtgaatccaacatat
|
|
||||||
gcgggttcaattcccgtcg
|
|
||||||
ttcgcc
|
|
||||||
|
|
||||||
Finally, the expression that was evaluated to create this feature is
|
|
||||||
written to EPFCPCG.exp:
|
|
||||||
|
|
||||||
>EPFCPCG:anticodon gtg
|
|
||||||
@M81884:anticodon gtg
|
|
||||||
|
|
||||||
If EPFCPCG.exp was used as an expression file in option 2 (E) of FEATURES,
|
|
||||||
EPFCPCG.out would be recreated.
|
|
||||||
|
|
||||||
OPTIONS
|
|
||||||
1) FEATURES - choosing f will cause FEATURES to prompt for
|
|
||||||
a feature to extract. If you wish to extract several types of
|
|
||||||
features simultaneously (ie. F), you must construct a file listing the
|
|
||||||
feature keywords. The following example would retrieve both tRNA and
|
|
||||||
rRNA sequences:
|
|
||||||
|
|
||||||
OBJECTS
|
|
||||||
tRNA
|
|
||||||
rRNA
|
|
||||||
SITES
|
|
||||||
|
|
||||||
The words 'OBJECTS' and 'SITES' must enclose the feature keywords,
|
|
||||||
and each keyword must be on a separate line. For a rigorous
|
|
||||||
definition of the input file format, see the GETOB manual pages
|
|
||||||
(getob.doc).
|
|
||||||
|
|
||||||
In the menu shown above, f was chosen, and the user entered tRNA at
|
|
||||||
the prompt. Thus tRNA is now displayed on the Features: line. If
|
|
||||||
features had been specified from a file (suboption F) then the
|
|
||||||
filename containing the feature keywords would be displayed instead.
|
|
||||||
A complete list of legal feature keywords can be found in the GenBank
|
|
||||||
Release notes (gbrel.txt) under the subheading 'Feature Key Names'.
|
|
||||||
|
|
||||||
2) ENTRIES
|
|
||||||
n User is prompted for the name of an entry from which the
|
|
||||||
feature is to be extracted. The name of the entry will appear
|
|
||||||
on the 'Entries' line of the menu.
|
|
||||||
|
|
||||||
N User is prompted for a filename containing one or more
|
|
||||||
entry names. Each name must be on a separate line. The filename
|
|
||||||
will be displayed on the 'Entries' menu line.
|
|
||||||
|
|
||||||
a User is prompted for an accession number, which will appear
|
|
||||||
on the 'Entries' line of the menu.
|
|
||||||
|
|
||||||
A User is prompted for a filename for accession numbers. The filename
|
|
||||||
will appear on the 'Entries:' line.
|
|
||||||
|
|
||||||
e User is prompted for a GenBank Features expression of the
|
|
||||||
form accession:location.'accession' refers to a GenBank
|
|
||||||
accession number, while 'location' is any legal feature location.
|
|
||||||
A brief description of location syntax can be found under the
|
|
||||||
subheading "Feature Location" in the GenBank release notes
|
|
||||||
(gbrel.txt). See "The DDBJ/EMBL/GenBank Feature Table:
|
|
||||||
Definition" Version 1.04 for a complete definition.
|
|
||||||
E User is prompted for a filename containing one or more Feature
|
|
||||||
expressions. EACH EXPRESSION MUST BEGIN A '@'. All lines beginning
|
|
||||||
with '@' are processed as expressions, and all other lines are
|
|
||||||
copied to the output file unchanged.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
|
|
||||||
The tRNA shown above could have been extracted by choosing
|
|
||||||
suboption e and entering either of the following expressions:
|
|
||||||
|
|
||||||
M81884:complement(join(70023..70028,1..69))
|
|
||||||
M81884:anticodon gtg
|
|
||||||
|
|
||||||
In the first example, the feature line from the original entry
|
|
||||||
is used as the location. In the second example, the feature is
|
|
||||||
found by its qualifier line, which also appeared in the
|
|
||||||
original entry. It must be noted that the qualifier line must
|
|
||||||
be unique from others in the same entry in its first 15
|
|
||||||
characters after the = .
|
|
||||||
|
|
||||||
The flaL protein coding region of B. licheniformis is described
|
|
||||||
in GenBank entry BLIFALA, accession number M60287 in the
|
|
||||||
following feature:
|
|
||||||
|
|
||||||
CDS 305..640
|
|
||||||
/note="flaD (sin) homologue"
|
|
||||||
/gene="flaL"
|
|
||||||
/label=ORF2
|
|
||||||
/codon_start=1
|
|
||||||
|
|
||||||
This feature could be retrieved using any of the following
|
|
||||||
expressions:
|
|
||||||
|
|
||||||
M60287:305..640
|
|
||||||
M60287:ORF2
|
|
||||||
M60287:/label=ORF2
|
|
||||||
M60287:/gene="flaL"
|
|
||||||
M60287:/note="flaD (sin) homologue"
|
|
||||||
|
|
||||||
Note that the /label= qualifier is special, in that labels are
|
|
||||||
specifically intented as unique tags on an feature. For labels,
|
|
||||||
only the label itself is need be specified. Thus, /label=ORF2 is
|
|
||||||
equivalent to ORF2. For other qualifiers, the qualifier keyword
|
|
||||||
(eg. /note=) must be included.
|
|
||||||
|
|
||||||
3) DATABASE (WHERE TO GET IT) - By default, all entries processed will
|
|
||||||
be automatically retrieved from GenBank using FETCH. Specifying 'u'
|
|
||||||
(User-defined database subset) makes it possible to extract features
|
|
||||||
from GenBank subsets created by the user. Usually, retrieval of
|
|
||||||
features is much faster with a User-defined subset, so if you
|
|
||||||
frequently work with sets of genes, it is best to retrieve them
|
|
||||||
en-masse using FETCH, and work with them directly. For example, if
|
|
||||||
you had retrieved a set of Beta-globin sequences into a file called
|
|
||||||
'globin.gen', you could directly extract features from these entries
|
|
||||||
by specifying 'globin' or 'globin.gen' as your User-defined database.
|
|
||||||
If the file extension is '.gen', FEATURES will automatically create
|
|
||||||
temporary files called globin.ano, globin.wrp and globin.ind,
|
|
||||||
containing annotation, sequence, and an index, respectively. These
|
|
||||||
files will be read during feature extraction, and then discarded. If
|
|
||||||
you have already created such files using SPLITDB, simply specify
|
|
||||||
any of 'globin', 'globin.ano', etc. ie. anything, as long as it does
|
|
||||||
not have the .gen file extension.
|
|
||||||
|
|
||||||
'U' rather than 'u' causes ALL entries in the user-defined
|
|
||||||
database to be subset. This means that it is unnecessary to
|
|
||||||
specify entry options (eg -n, -N etc.), as these will be
|
|
||||||
ignored, if given.
|
|
||||||
|
|
||||||
One consequence of these conventions is that the individual GenBank
|
|
||||||
divisions can be processed directly. For example, suppose you were only
|
|
||||||
interested in rodent globins. You could directly access the rodent
|
|
||||||
division of GenBank by specifying the base name of that file division
|
|
||||||
(eg. /home/psgendb/GenBank/gbrod) as your user-defined database. In
|
|
||||||
this case, the files gbrod.ano, gbrod.wrp and gbrod.ind already
|
|
||||||
exist. Again, this approach is faster, since FEATURES would not have
|
|
||||||
to find and retrieve the sequences, but can read directly from the
|
|
||||||
database files. Finally, if you wanted to process all of the entries
|
|
||||||
in the database division, simply use -U. The user is warned that a
|
|
||||||
GenBank division is a huge amount of data, and processing every entry
|
|
||||||
could take a long time.
|
|
||||||
|
|
||||||
4) WHERE TO SEND IT - By default (a), the output for all entries goes
|
|
||||||
to a single set of files, whose names are chosen by FEATURES,
|
|
||||||
depending on the setting of option 2, Entries. If a single name (n) or
|
|
||||||
accession number (a) has been chosen, that will be used as
|
|
||||||
the raw filename. For example, if you were processing the entry
|
|
||||||
WHTCAB, the output files would be WHTCAB.msg and WHTCAB.out. If names
|
|
||||||
(N), accession numbers (A) or expressions (E) were read from a file,
|
|
||||||
the raw name of that file would be used eg. cellulase.nam would result
|
|
||||||
in cellulase.msg and cellulase.out. Finally, if a single expression
|
|
||||||
is processed (e), then the primary accession number in that
|
|
||||||
expression will be used for the filenames. In all cases, FEATURES
|
|
||||||
will tell you the names of the files being written.
|
|
||||||
|
|
||||||
Choosing suboption s, you can specify that the features created for
|
|
||||||
each entry be sent to separate files. In this case, each file will
|
|
||||||
have the name of that entry, with the extension .obj. However, all
|
|
||||||
messages and expressions will still go to a single files. While this
|
|
||||||
can be a convenient way of creating separate files when you need them,
|
|
||||||
this option still has the limitation of writing all features for a
|
|
||||||
given entry (if there are more than one) to the same file. Also,
|
|
||||||
successive resolution of features (anything requiring 'getob -r')
|
|
||||||
will not work with this option. This may be corrected in future
|
|
||||||
versions.
|
|
||||||
|
|
||||||
|
|
||||||
COMMAND LINE EXECUTION
|
|
||||||
|
|
||||||
There are two ways of running FEATURES from the command line. If only one
|
|
||||||
argument is supplied, that argument is interpreted as an expression, and
|
|
||||||
the result of that expression (ie. a sequence ) is written to the
|
|
||||||
standard output. .msg, .out and .exp files are NOT created. For example,
|
|
||||||
GenBank entry BACFLALA (M60287) contains the following feature:
|
|
||||||
|
|
||||||
CDS 95..271
|
|
||||||
/label=LORF-
|
|
||||||
/codon_start=1
|
|
||||||
/translation="MNKDKNEKEELDEEWTELIKHALEQGISPDDIRIFLNLGKKSSK
|
|
||||||
PSASIERSHSINPF"
|
|
||||||
Any of
|
|
||||||
|
|
||||||
features M60287:LORF-
|
|
||||||
features M60287:95..271
|
|
||||||
features M60287:/label=LORF-
|
|
||||||
|
|
||||||
would write the open reading frame to the standard output:
|
|
||||||
|
|
||||||
atgaataaagataaaaatgagaaagaagaattggatgaggagtggacaga
|
|
||||||
actgattaaacacgctcttgaacaaggcattagtccagacgatatacgta
|
|
||||||
tttttctcaatttgggtaagaagtcttcaaaaccttccgcatcaattgaa
|
|
||||||
agaagtcattcaataaatcctttctga
|
|
||||||
|
|
||||||
This form of FEATURES is provided to make it easy to pipe output to
|
|
||||||
other programs for further processing. For example
|
|
||||||
|
|
||||||
features M60287:LORF- |ribosome >LORF.protein
|
|
||||||
|
|
||||||
would write the translation of the open reading frame to a file called
|
|
||||||
LORF.protein.
|
|
||||||
|
|
||||||
The full functionality of the FEATURES can be accessed using arguments on
|
|
||||||
the command line. In particular, when there are multiple entries to be
|
|
||||||
processed, or multiple features within entries, it is much faster to
|
|
||||||
supply FEATURES with lists of entries, feature keys or expressions.
|
|
||||||
Command line options are similar to suboptions in menu items 1-3 above:
|
|
||||||
|
|
||||||
Feature keys:
|
|
||||||
-f key {feature key}
|
|
||||||
-F filename {file of feature keys}
|
|
||||||
|
|
||||||
Entries:
|
|
||||||
-n name {GenBank LOCUS name}
|
|
||||||
-N filename {file of GenBank LOCUS names}
|
|
||||||
-a accession {GenBank ACCESSION number}
|
|
||||||
-A filename {file of GenBank ACCESSION numbers}
|
|
||||||
-e expression {Feature Table expression}
|
|
||||||
-E filename {file of Feature Table expressions, each begin-
|
|
||||||
ning with '@'}
|
|
||||||
|
|
||||||
Databases:
|
|
||||||
-u filename {GenBank dataset}
|
|
||||||
-U filename { " " " " " " ,
|
|
||||||
process all entries ie. -nNaAeE options
|
|
||||||
will be ignored}
|
|
||||||
-g {GenBank}
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
|
|
||||||
features -f tRNA -n EPFCPCG
|
|
||||||
|
|
||||||
retrieves all tRNAs from GenBank entry EPFCPCG and writes .msg, .out,
|
|
||||||
and .exp files.
|
|
||||||
|
|
||||||
features -e M60287:LORF-
|
|
||||||
|
|
||||||
would retrieve the same open reading frame as in the earlier example.
|
|
||||||
|
|
||||||
|
|
||||||
Since most time-consuming operation in FEATURES is sequence retrieval,
|
|
||||||
it is often best to retrieve frequently-used sequences as database
|
|
||||||
subsets. For example, a set GenBank entries for chlorophyl a/b binding
|
|
||||||
protein genes might be stored in a file called CAB.gen.
|
|
||||||
|
|
||||||
features -f CDS -N CAB.nam -u CAB.gen
|
|
||||||
|
|
||||||
would generate the files CAB.msg, CAB.out and CAB.exp containing output
|
|
||||||
for all CDS features in the entries listed in the file CAB.nam.
|
|
||||||
|
|
||||||
features -E CAB.exp -u CAB.gen
|
|
||||||
|
|
||||||
would re-create the output file CAB.out.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BUGS
|
|
||||||
FEATURES does no preliminary error checking for syntax of
|
|
||||||
GenBank expressions prior to their evaluation. Expressions that can
|
|
||||||
not be evaluated will be flagged by GETOB in the .msg file.
|
|
||||||
|
|
||||||
At present, little checking is done to test for the presence or
|
|
||||||
correctness of input files. Some errors may cause the program to
|
|
||||||
crash.
|
|
||||||
|
|
||||||
For User-defined datasets, filename expansion is not performed.
|
|
||||||
|
|
||||||
FILES
|
|
||||||
Temporary files:
|
|
||||||
X.term X.ano X.wrp X.ind X.gen {X is raw filename, see 4) }
|
|
||||||
UNRESOLVED.fea UNRESOLVED.out
|
|
||||||
FEA.inf FEA.nam FEA.gen FEA.ano FEA.wrp FEA.ind FEA.msg FEA.out
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
grep(1V) fetch getob splitdb
|
|
||||||
|
|
||||||
TRANSPORTATION NOTES
|
|
||||||
It should be fairly easy to get FEATURES to work even on systems
|
|
||||||
in which GenBank has not been formatted for the XYLEM package.
|
|
||||||
This is because FEATURES does not work directly on the database, but
|
|
||||||
rather retrieves all necessary sequences by calling FETCH. Thus,
|
|
||||||
statements like 'fetch FEA.nam FEA.gen' could be replaced with any
|
|
||||||
command that, given a file containing names or accession numbers,
|
|
||||||
returns a file containing GenBank entries. In principle, you
|
|
||||||
could even implement this sort of command to retrieve entries from
|
|
||||||
the email server (retrieve@ncbi.nlm.nih.gov) at NCBI, although
|
|
||||||
such a setup would undoubtedly be quite slow.
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,320 +0,0 @@
|
||||||
|
|
||||||
FETCH.DOC update 24 Feb 96
|
|
||||||
|
|
||||||
|
|
||||||
NAME
|
|
||||||
fetch - retrieves database entries by name or accession number
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
fetch {interactive mode}
|
|
||||||
fetch [options] namefile [output file] {batch mode}
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
fetch retrieves one or more entries from a database.
|
|
||||||
|
|
||||||
Interactive mode: fetch prompts the user to set search parameters,
|
|
||||||
using an interactive menu:
|
|
||||||
___________________________________________________________________
|
|
||||||
FETCH - Version 7 Feb 94
|
|
||||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
|
||||||
___________________________________________________________________
|
|
||||||
Namefile:
|
|
||||||
Outfile:
|
|
||||||
Database:
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Parameter Description Value
|
|
||||||
|
|
||||||
1) Name/Acc Name or Accession sequence to get
|
|
||||||
2) Namefile Get list of sequences from Namefile
|
|
||||||
3) WhatToGet a:annotation s:sequence b:both b
|
|
||||||
4) Database g:GenBank p:PIR v:VecBase l:LiMB g
|
|
||||||
G:GenBank dataset P:PIR dataset
|
|
||||||
5) Outfile Send all output to a single file (Outfile)
|
|
||||||
6) Files f:Send each entry to a separate file f
|
|
||||||
-------------------------------------------------------------
|
|
||||||
Type number of your choice or 0 to continue:
|
|
||||||
|
|
||||||
After all parameters have been set, type 0 to commence the search.
|
|
||||||
Messages regarding the progress of the search will be printed.
|
|
||||||
|
|
||||||
(1,2) Which entries to get?
|
|
||||||
If you want to get a single entry, option 1 lets you type in the
|
|
||||||
name of that entry, without having to create a namefile. To get
|
|
||||||
more than one entry, choose option 2, and specify the name of a
|
|
||||||
file containing sequence names or accession numbers.
|
|
||||||
|
|
||||||
namefile is a file containing one or more sequence names or
|
|
||||||
accession numbers, each on a separate line. Names and accession
|
|
||||||
numbers can even be interspersed, in upper or lowercase, and in
|
|
||||||
any order. For example, the namefile prp.nam might contain
|
|
||||||
|
|
||||||
; plant pathogenesis related proteins
|
|
||||||
; (these are sample comment lines)
|
|
||||||
; note that any line containing a semicolon is ignored
|
|
||||||
x06362
|
|
||||||
x05454
|
|
||||||
TOBPR1A1
|
|
||||||
; comments can be interspersed with names.
|
|
||||||
PUMPR13
|
|
||||||
tobpr1ar
|
|
||||||
|
|
||||||
Options 1 & 2 are mutually exclusive. Setting one will negate the
|
|
||||||
other. If option 2 is chosen, the name of the namefile will appear
|
|
||||||
at the top of the menu.
|
|
||||||
|
|
||||||
(3) WhatToGet
|
|
||||||
Use this option to specify whether to get annotation, sequence,
|
|
||||||
or both (default=both).
|
|
||||||
|
|
||||||
(4) Database
|
|
||||||
Use this option to select the database. (default=GenBank).
|
|
||||||
G and P select user-created database subsets containing GenBank
|
|
||||||
or PIR entries, respectively. It is assumed that the database
|
|
||||||
has been split into .ano, .wrp and .ind files using splitdb.
|
|
||||||
For example, if you had created a database subset called PR1.pir,
|
|
||||||
splitdb would create PR1.ano, PR1.wrp and PR1.ind. These are
|
|
||||||
the files actually read by FETCH. When prompted for the name
|
|
||||||
of the database, simply type "PR1", without a file extension.
|
|
||||||
(If you do type a file extension, it will be ignored).
|
|
||||||
|
|
||||||
(5, 6) Where to send output
|
|
||||||
By default, option 6 is set to f, and each entry will be written to
|
|
||||||
a separate file, where the name of the file is the name of the
|
|
||||||
entry, followed by a file extension. If a complete entry is
|
|
||||||
retrieved, the file extension will indicate the type of database
|
|
||||||
(GenBank: .gen; PIR: .pir, Vecbase: .vec; LiMB: .LiMB). If only
|
|
||||||
annotation or sequence are retrieved, the file extensions will be
|
|
||||||
.ano or .wrp, respectively. Using the default, the namefile above
|
|
||||||
would create the following files:
|
|
||||||
|
|
||||||
PUMPR13.gen
|
|
||||||
TOBPR1A1.gen
|
|
||||||
TOBPR1AR.gen
|
|
||||||
TOBPR1CR.gen
|
|
||||||
TOBPR1PS.gen
|
|
||||||
|
|
||||||
By choosing option 5, you can specify the name of an output file
|
|
||||||
for all entries to go to. This filename will appear at the top
|
|
||||||
of the menu. Obviously, options 5 & 6 are mutually exclusive.
|
|
||||||
Note entries retrieved are writen in alphabetical order (sorting by
|
|
||||||
ASCII values), not the order in which they appeared in namefile.
|
|
||||||
|
|
||||||
(Note for remote users only: -f will only work for a single
|
|
||||||
name/accession supplied in 1). -f IS NOT ENABLED FOR NAMEFILES
|
|
||||||
specified in 2).)
|
|
||||||
|
|
||||||
Batch mode:
|
|
||||||
Although it is transparent to the user, all fetch really does
|
|
||||||
is call getloc, saving the user the trouble of knowing which
|
|
||||||
database files to retrieve sequences from, or of having to
|
|
||||||
execute getloc multiple times to retrieve sequences from
|
|
||||||
different database files. Thus, the options are identical to those
|
|
||||||
for getloc:
|
|
||||||
|
|
||||||
-a Write annotation portions of entries only, terminated by '//'.
|
|
||||||
-s Write sequence data only, in Pearson (.wrp) format.
|
|
||||||
-f Write each entry to a separate file.
|
|
||||||
-g GenBank (default)
|
|
||||||
-e EMBL {not implemented}
|
|
||||||
-p PIR (NBRF)
|
|
||||||
-v Vecbase
|
|
||||||
-l LiMB
|
|
||||||
-G GenBank_dataset
|
|
||||||
-P PIR_dataset
|
|
||||||
|
|
||||||
If -f is not specified, outfile must be specified.
|
|
||||||
|
|
||||||
-L force execution of findkey on local host even if
|
|
||||||
$XYLEM_RHOST is set. See "REMOTE EXECUTION" below
|
|
||||||
|
|
||||||
|
|
||||||
PIR_dataset
|
|
||||||
GenBank_dataset
|
|
||||||
This can be either a file of PIR entries, a file of GenBank entries,
|
|
||||||
or a XYLEM dataset created by splitdb. A file of PIR entries must
|
|
||||||
have the file extension ".pir". A file of GenBank entries must have
|
|
||||||
the file extension ".gen". A XYLEM dataset contains PIR entries split
|
|
||||||
among three files by splitdb: annotation (.ano), sequence (.wrp)
|
|
||||||
and index (.ind). These file extensions must be used!
|
|
||||||
|
|
||||||
When specifying a split dataset, only the base name needs to be
|
|
||||||
used. For example given a XYLEM dataset consisting of the files
|
|
||||||
myset.ano, myset.wrp and myset.ind, the following two commands
|
|
||||||
are equivalent:
|
|
||||||
|
|
||||||
fetch -P myset something.nam something.pir
|
|
||||||
fetch -P myset.ano something.nam something.pir
|
|
||||||
|
|
||||||
If the original .pir file had been used, the command would have
|
|
||||||
been
|
|
||||||
|
|
||||||
fetch -P myset.pir something.nam something.pir
|
|
||||||
|
|
||||||
The ability to work directly with .gen or .pir files is quite
|
|
||||||
convenient. However, since FETCH needs to work with a split
|
|
||||||
FETCH automatically splits .pir or .gen files into .ano, .wrp
|
|
||||||
and .ind files, which are removed when finished. This requires
|
|
||||||
extra disk space and execution time, which could be significant
|
|
||||||
for large datasets.
|
|
||||||
|
|
||||||
EXAMPLES
|
|
||||||
Batch example:
|
|
||||||
fetch -f chitinase.nam
|
|
||||||
will retrieve annotation and sequence for sequences listed in
|
|
||||||
chitinase.nam from GenBank, writing each entry to a separate file
|
|
||||||
with the extension .gen.
|
|
||||||
|
|
||||||
fetch -s -v pbr.nam pbr.wrp
|
|
||||||
will retrieve sequence data only for the entries listed in pbr.nam,
|
|
||||||
from VecBase, and write all sequences to a Pearson format file
|
|
||||||
(ie. readable by fasta) with the name pbr.wrp.
|
|
||||||
|
|
||||||
fetch -G sample sample.nam new.gen
|
|
||||||
fetch -G sample.ano sample.nam new.gen
|
|
||||||
Assumes that a set of GenBank entries has been split by splitdb
|
|
||||||
into sample.ano sample.wrp and sample.ind. The entries listed in
|
|
||||||
sample.nam are written to new.gen.
|
|
||||||
|
|
||||||
|
|
||||||
FILES
|
|
||||||
Database files:
|
|
||||||
The directories for database files are specified by the environment
|
|
||||||
variables $GB (GenBank) $PIR (PIR/NBRF) $VEC(Vecbase) and $LIMB
|
|
||||||
(LiMB).
|
|
||||||
|
|
||||||
Index files are $GB/gbacc.idx for GenBank (this file is supplied
|
|
||||||
with each GenBank release), while the other databases
|
|
||||||
use .ind files generated by splitdb. Split database files MUST
|
|
||||||
have the following file extensions: .ano {annotation}, .wrp
|
|
||||||
{sequence} and .ind {index}. Thus, when creating database files
|
|
||||||
for pir1.dat with splitdb, the output files should be pir1.ano,
|
|
||||||
pir1.wrp and pir1.ind.
|
|
||||||
|
|
||||||
Temporary files:
|
|
||||||
NAMEFILE.fetch
|
|
||||||
PRELIMINARY.fetch
|
|
||||||
TMP.fetch
|
|
||||||
FOUND.fetch
|
|
||||||
FETCHDIR {temporary directory}
|
|
||||||
|
|
||||||
REMOTE EXECUTION
|
|
||||||
Where the databases can not be stored locally, FETCH can call
|
|
||||||
FETCH on another system and retrieve the results. To run
|
|
||||||
FETCH remotely, your .cshrc file should contain the following
|
|
||||||
lines:
|
|
||||||
|
|
||||||
setenv XYLEM_RHOST remotehostname
|
|
||||||
setenv XYLEM_USERID remoteuserid
|
|
||||||
|
|
||||||
where remotehostname is the name of the host on which the
|
|
||||||
databases reside (in XYLEM split format) and remoteuserid
|
|
||||||
is your userid on the remote system. When run remotely,
|
|
||||||
your local copy of FETCH will generate the following
|
|
||||||
commands:
|
|
||||||
|
|
||||||
rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
|
|
||||||
rsh $XYLEM_RHOST -l $XYLEM_USERID fetch ...
|
|
||||||
rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
|
|
||||||
rsh $XYLEM_RHOST -l $XYLEM_USERID $RM temporary_files
|
|
||||||
|
|
||||||
Because FETCH uses rsh and rcp, your home directory on both
|
|
||||||
the local and remote systems must have a world-readable
|
|
||||||
file called .rhosts, containing the names of trusted remote
|
|
||||||
hosts and your userid on each host. Before trying to get
|
|
||||||
FETCH to work remotely, make sure that you can rcp and
|
|
||||||
rsh to the remote host.
|
|
||||||
|
|
||||||
Obviously, remote execution of FETCH implies that FETCH
|
|
||||||
must already be installed on the remote host. When FETCH
|
|
||||||
runs another copy of FETCH remotely, it uses the -L option
|
|
||||||
(findkey -L) to insure that the remote FETCH job executes,
|
|
||||||
rather than calling yet another FETCH on another host.
|
|
||||||
|
|
||||||
|
|
||||||
---------- Remote execution on more than 1 host -----------
|
|
||||||
If more than 1 remote host is available for running FINDKEY
|
|
||||||
(say, in a clustered environment where many servers mount
|
|
||||||
a common filesystem) the choice of a host can be determined
|
|
||||||
by the csh script choosehost, such that execution of
|
|
||||||
choosehost returns the name of a remote server. To use this
|
|
||||||
approach, the following script, called 'choosehost' should
|
|
||||||
be in your bin directory:
|
|
||||||
|
|
||||||
#!/bin/csh
|
|
||||||
# choosehost - choose a host to use for a remote job.
|
|
||||||
# This script rotates among servers listed in .rexhosts,
|
|
||||||
# by choosing the host at the top of the list and moving
|
|
||||||
# it to the bottom.
|
|
||||||
|
|
||||||
#Rotate the list, putting the current host to the bottom.
|
|
||||||
set HOST = `head -1 $home/.rexhosts`
|
|
||||||
set JOBID = $$
|
|
||||||
tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
|
|
||||||
echo $HOST >> /tmp/.rexhosts.$JOBID
|
|
||||||
/usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
|
|
||||||
|
|
||||||
# Write out the current host name
|
|
||||||
echo $HOST
|
|
||||||
|
|
||||||
You must also have a file in your home directory called
|
|
||||||
.rexhosts, listing remote hosts, such as
|
|
||||||
|
|
||||||
graucho.cc.umanitoba.ca
|
|
||||||
harpo.cc.umanitoba.ca
|
|
||||||
chico.cc.umanitoba.ca
|
|
||||||
zeppo.cc.umanitoba.ca
|
|
||||||
|
|
||||||
Each time choosehost is called, choosehost will rotate the
|
|
||||||
names in the file. For example, starting with the .rexhosts
|
|
||||||
as shown, it will move graucho.cc.umanitoba.ca to the bottom
|
|
||||||
of the file, and write the line 'graucho.cc.umanitoba.ca'
|
|
||||||
to the standard output. The next time choosehosts is
|
|
||||||
run, it would write 'harpo.cc.umanitoba.ca', and so on.
|
|
||||||
|
|
||||||
Depending on your local configuration, you may wish to
|
|
||||||
rewrite choosehosts. All that is really necessary is that
|
|
||||||
echo `choosehost` should return the name of a valid host.
|
|
||||||
|
|
||||||
Once you have installed choosehost and tested it, you can
|
|
||||||
get FINDKEY to use choosehost simply by setting
|
|
||||||
|
|
||||||
setenv XYLEM_RHOST choosehost
|
|
||||||
|
|
||||||
in your .cshrc file.
|
|
||||||
|
|
||||||
--------------- Remote filesystems -----------------------
|
|
||||||
Finally, an alternative to remote execution is to remotely mount
|
|
||||||
the file system containing the databases across the network.
|
|
||||||
This has the advantage of simplicity, and means that the
|
|
||||||
databases are available for ALL programs on your local
|
|
||||||
workstation. However, it may still be advantageous to run
|
|
||||||
FETCH remotely, since that will shift much of the computational
|
|
||||||
load to another host.
|
|
||||||
|
|
||||||
BUGS
|
|
||||||
When retrieving entries directly from GenBank, FETCH uses the
|
|
||||||
Accession Number index file gbacc.idx. In this case, FETCH
|
|
||||||
can retrieve all entries containing a given accession number.
|
|
||||||
This capability makes it possible to retrieve an entry using a
|
|
||||||
secondary accession number. However if more than one entry
|
|
||||||
share a secondary accession number, all of those entries will
|
|
||||||
be retrieved. While this behavior might be a bit of an
|
|
||||||
annoyance at times, it can also be useful because it alerts
|
|
||||||
the user to the presence of other, related entries that might
|
|
||||||
be of interest.
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
getloc features
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,365 +0,0 @@
|
||||||
|
|
||||||
FINDKEY.DOC update 13 Mar 97
|
|
||||||
|
|
||||||
|
|
||||||
NAME
|
|
||||||
findkey - finds database entries containg one or more keywords
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
findkey
|
|
||||||
findkey [-pvbmgrdutielnsaxzL] keywordfile [namefile findfile]
|
|
||||||
findkey [-P PIR_dataset] keywordfile [namefile findfile]
|
|
||||||
findkey [-G GenBank_dataset] keywordfile [namefile findfile]
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
findkey uses the grep family of commands to find lines in database
|
|
||||||
annotation files containing one or more keywords. Next, identify
|
|
||||||
is called to create a .nam file, containing the names of entries
|
|
||||||
containing the keywords, and a .fnd file, containing the actual
|
|
||||||
lines from each entry containing hits. A PIR or GenBank dataset is
|
|
||||||
either a file containing one or more GenBank or PIR entries, or
|
|
||||||
the name of a XYLEM dataset created by splitdb. See FILES below
|
|
||||||
for a more detailed description.
|
|
||||||
|
|
||||||
INTERACTIVE USE
|
|
||||||
findkey prompts the user to set search parameters, using an interactive
|
|
||||||
menu:
|
|
||||||
|
|
||||||
___________________________________________________________________
|
|
||||||
FINDKEY - Version 12 Aug 94
|
|
||||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
|
||||||
___________________________________________________________________
|
|
||||||
Keyfile:
|
|
||||||
Dataset:
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Parameter Description Value
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
1) Keyword Keyword to find thionin
|
|
||||||
2) Keyfile Get list of keywords from Keyfile
|
|
||||||
3) WhereToLook p:PIR v:VecBase p
|
|
||||||
GenBank - b:bacterial i:invertebrate
|
|
||||||
m:mamalian e:expressed seq. tag
|
|
||||||
g:phage l:plant
|
|
||||||
r:primate n:rna
|
|
||||||
d:rodent s:synthetic
|
|
||||||
u:unannotated a:viral
|
|
||||||
t:vertebrate x:patented
|
|
||||||
z:STS
|
|
||||||
G: GenBank dataset P: PIR dataset
|
|
||||||
-------------------------------------------------------------
|
|
||||||
Type number of your choice or 0 to continue:
|
|
||||||
0
|
|
||||||
Searching /home/psgendb/PIR/pir1.ano...
|
|
||||||
Sequence names will be written to thionin~pir.nam
|
|
||||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
|
||||||
Searching /home/psgendb/PIR/pir2.ano...
|
|
||||||
Sequence names will be written to thionin~pir.nam
|
|
||||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
|
||||||
Searching /home/psgendb/PIR/pir3.ano...
|
|
||||||
Sequence names will be written to thionin~pir.nam
|
|
||||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
|
||||||
|
|
||||||
As shown in the example above, the keyword thionin was specified
|
|
||||||
as the keyword to search for. By default, option 3 is set to p,
|
|
||||||
and the PIR protein database is searched. Messages describe the
|
|
||||||
progress of the search. Since PIR is broken up into two divisions
|
|
||||||
(new and protein) both are searched, but all output is written to
|
|
||||||
thionin.pir.nam and thionin.pir.fnd
|
|
||||||
|
|
||||||
OPTIONS
|
|
||||||
(1,2) Which keywords to search for?
|
|
||||||
If you want to search for a single keyword, option 1 lets you type
|
|
||||||
the keyword, without having to create a file. To search for more
|
|
||||||
than one keyword, choose option 2, and specify the name of a
|
|
||||||
file containing the keywords. For example, entries containing
|
|
||||||
genes for antibiotic resistance might be found using the
|
|
||||||
following keyword file:
|
|
||||||
|
|
||||||
ampicillin
|
|
||||||
chloramphenicol
|
|
||||||
kanamycin
|
|
||||||
neomycin
|
|
||||||
tetracycline
|
|
||||||
|
|
||||||
Note: keyword searches are case insensitive.
|
|
||||||
|
|
||||||
As you might expect, it takes longer to search for multiple
|
|
||||||
keywords than a single keyword.
|
|
||||||
|
|
||||||
Options 1 & 2 are mutually exclusive. Setting one will negate the
|
|
||||||
other. If option 2 is chosen, the name of the keyword file will
|
|
||||||
appear at the top of the menu.
|
|
||||||
|
|
||||||
Finally, it is probably not a good idea to search GenBank
|
|
||||||
entries using very short keywords consisting only of letters.
|
|
||||||
This is because GenBank entries now include a /translation
|
|
||||||
field containing the amino acid sequence of each protein
|
|
||||||
coding sequence. Consequently, 3 or 4 letter keywords
|
|
||||||
consisting of legal amino acid symbols (eg. CAP, recA) will
|
|
||||||
turn up fairly often in protein translations.
|
|
||||||
|
|
||||||
(3) WhereToLook
|
|
||||||
Use this option to specify the database to be searched In the
|
|
||||||
case of GenBank, only one division at a time may be searched.
|
|
||||||
User-created database subsets containing PIR (P) or GenBank (G)
|
|
||||||
entries may also be searched. User-created database subsets
|
|
||||||
must be in the .ano/.wrp/.ind form created by splitdb.
|
|
||||||
|
|
||||||
OUTPUT
|
|
||||||
The output filenames take the following form:
|
|
||||||
|
|
||||||
name_ex1.ex2
|
|
||||||
|
|
||||||
The 'name' part of the filename is either the keyword searched for,
|
|
||||||
if option 1 was chosen, or the name of the keyword file,if option 2
|
|
||||||
obtains. 'ex1' indicates the database division that was searched. For
|
|
||||||
PIR and VecBase, ex1 is 'pir' and 'vec', respectively. For GenBank,
|
|
||||||
ex1 is as follows:
|
|
||||||
|
|
||||||
bct - bacterial
|
|
||||||
inv - invertebrate
|
|
||||||
mam - other mamalian
|
|
||||||
est - expressed sequence tag
|
|
||||||
phg - phage
|
|
||||||
pln - plant (includes fungi)
|
|
||||||
pri - primate
|
|
||||||
rna - structural RNAs
|
|
||||||
rod - rodent
|
|
||||||
syn - synthetic sequences
|
|
||||||
sts - sequence tagged sites
|
|
||||||
una - unannotated (new) sequences
|
|
||||||
vrl - viral
|
|
||||||
vrt - other vertebrate
|
|
||||||
|
|
||||||
'ex2' distinguishes the files containing the names of entries
|
|
||||||
containing keywords (.nam) and the files containing the lines found
|
|
||||||
in each entry (.fnd).
|
|
||||||
|
|
||||||
The .nam file can be used directly as a namefile for fetch, getloc,
|
|
||||||
or getob.
|
|
||||||
|
|
||||||
COMMAND LINE USE
|
|
||||||
|
|
||||||
OPTIONS
|
|
||||||
p search PIR (default)
|
|
||||||
P PIR dataset search dbfile, containing PIR entries
|
|
||||||
v search VecBase
|
|
||||||
b search Genbank bacterial division
|
|
||||||
m search Genbank mamalian division
|
|
||||||
g search Genbank phage division
|
|
||||||
r search Genbank primate division
|
|
||||||
d search Genbank rodent division
|
|
||||||
u search Genbank unannotated division
|
|
||||||
t search Genbank vertebrate division
|
|
||||||
i search Genbank invertebrate division
|
|
||||||
l search Genbank plant division
|
|
||||||
n search Genbank rna division
|
|
||||||
s search Genbank synthetic division
|
|
||||||
a search Genbank viral division
|
|
||||||
x search Genbank patented division
|
|
||||||
e search Genbank exp.seq.tag division
|
|
||||||
z search GenBank STS division
|
|
||||||
S search GenBank Genom. Survey division
|
|
||||||
h search GenBank High Thrput. division
|
|
||||||
G GenBank dataset search dbfile, containing GenBank entries
|
|
||||||
|
|
||||||
L force execution of findkey on local host
|
|
||||||
even if $XYLEM_RHOST is set. See "REMOTE
|
|
||||||
EXECUTION" below
|
|
||||||
|
|
||||||
FILES
|
|
||||||
|
|
||||||
keywordfile - contains keywords to search for
|
|
||||||
|
|
||||||
namefile - LOCUS names of hits are written to this file
|
|
||||||
|
|
||||||
findfile - for each hit, a report listing the LOCUS name and the
|
|
||||||
lines matching the keyword if written to this file.
|
|
||||||
|
|
||||||
If namefile and findfile are not specified on the command line,
|
|
||||||
filenames will be created as described above for interactive
|
|
||||||
use.
|
|
||||||
|
|
||||||
PIR_dataset
|
|
||||||
GenBank_dataset
|
|
||||||
This can be either a file of PIR entries, a file of GenBank entries,
|
|
||||||
or a XYLEM dataset created by splitdb. A file of PIR entries must
|
|
||||||
have the file extension ".pir". A file of GenBank entries must have
|
|
||||||
the file extension ".gen". A XYLEM dataset contains PIR entries split
|
|
||||||
among three files by splitdb: annotation (.ano), sequence (.wrp)
|
|
||||||
and index (.ind). These file extensions must be used!
|
|
||||||
|
|
||||||
When specifying a split dataset, only the base name needs to be
|
|
||||||
used. For example given a XYLEM dataset consisting of the files
|
|
||||||
myset.ano, myset.wrp and myset.ind, the following two commands
|
|
||||||
are equivalent:
|
|
||||||
|
|
||||||
findkey -P myset something.kw
|
|
||||||
findkey -P myset.ano something.kw
|
|
||||||
|
|
||||||
If the original .pir file had been used, the command would have
|
|
||||||
been
|
|
||||||
|
|
||||||
findkey -P myset.pir something.kw
|
|
||||||
|
|
||||||
The ability to work directly with .gen or .pir files is quite
|
|
||||||
convenient. However, since FINDKEY needs to work with a split
|
|
||||||
FINDKEY automatically splits .pir or .gen files into .ano, .wrp
|
|
||||||
and .ind files, which are removed when finished. This requires
|
|
||||||
extra disk space and execution time, which could be significant
|
|
||||||
for large datasets.
|
|
||||||
|
|
||||||
EXAMPLES
|
|
||||||
If the list of antibiotics shown above was stored in the file
|
|
||||||
antibiotic.kw, and option 3 was set to 'b', then the annotation
|
|
||||||
portion of the GenBank bacterial division would be searched, and
|
|
||||||
all lines containing any of these keywords would be written to
|
|
||||||
antibiotic~bac.fnd. The corresponding GenBank entry names would
|
|
||||||
appear in antibiotic~bac.nam.
|
|
||||||
|
|
||||||
The same keyword file could be used to search other database files.
|
|
||||||
If VecBase was searched, the output files would be antibiotic~vec.fnd
|
|
||||||
and antibiotic~vec.nam. These filename conventions make it easy
|
|
||||||
to search different database divisions, and to keep track of where
|
|
||||||
data came from.
|
|
||||||
|
|
||||||
Command line examples:
|
|
||||||
|
|
||||||
findkey thionin.kw
|
|
||||||
|
|
||||||
would be equivalent to the interactive example shown above. In
|
|
||||||
this case, the file thionin.kw contains the word 'thionin'.
|
|
||||||
(Note that since PIR is the default, -p need not be supplied.)
|
|
||||||
|
|
||||||
findkey -b antibiotic.kw drugs.nam drugs.fnd
|
|
||||||
|
|
||||||
would search the GenBank bacterial division for the keywords
|
|
||||||
contained in antibiotic.kw, and write the output to drugs.nam
|
|
||||||
and drugs.kw.
|
|
||||||
|
|
||||||
FILES
|
|
||||||
Database files:
|
|
||||||
The directories for database files are specified by the environment
|
|
||||||
variables $GB (GenBank) $PIR (PIR/NBRF) and $VEC(Vecbase).
|
|
||||||
Annotation (.ano) and index (.ind) are those generated by splitdb.
|
|
||||||
|
|
||||||
Temporary files:
|
|
||||||
$jobid.fnd
|
|
||||||
$jobid.nam
|
|
||||||
$jobid.grep
|
|
||||||
|
|
||||||
where $jobid is a unique jobid generated by the shell
|
|
||||||
|
|
||||||
REMOTE EXECUTION
|
|
||||||
Where the databases can not be stored locally, FINDKEY can call
|
|
||||||
FINDKEY on another system and retrieve the results. To run
|
|
||||||
FINDKEY remotely, your .cshrc file should contain the following
|
|
||||||
lines:
|
|
||||||
|
|
||||||
setenv XYLEM_RHOST remotehostname
|
|
||||||
setenv XYLEM_USERID remoteuserid
|
|
||||||
|
|
||||||
where remotehostname is the name of the host on which the
|
|
||||||
databases reside (in XYLEM split format) and remoteuserid
|
|
||||||
is your userid on the remote system. When run remotely,
|
|
||||||
your local copy of FINDKEY will generate the following
|
|
||||||
commands:
|
|
||||||
|
|
||||||
rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
|
|
||||||
rsh $XYLEM_RHOST -l $XYLEM_USERID findkey ...
|
|
||||||
rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
|
|
||||||
rsh $XYLEM_RHOST -l $XYLEM_USERID rm temporary_files
|
|
||||||
|
|
||||||
Because FINDKEY uses rsh and rcp, your home directory on both
|
|
||||||
the local and remote systems must have a world-readable
|
|
||||||
file called .rhosts, containing the names of trusted remote
|
|
||||||
hosts and your userid on each host. Before trying to get
|
|
||||||
FINDKEY to work remotely, make sure that you can rcp and
|
|
||||||
rsh to the remote host.
|
|
||||||
|
|
||||||
Obviously, remote execution of FINDKEY implies that FINDKEY
|
|
||||||
must already be installed on the remote host. When FINDKEY
|
|
||||||
runs another copy of FINDKEY remotely, it uses the -L option
|
|
||||||
(findkey -L) to insure that the remote FINDKEY job executes,
|
|
||||||
rather than calling yet another FINDKEY on another host.
|
|
||||||
|
|
||||||
---------- Remote execution on more than 1 host -----------
|
|
||||||
If more than 1 remote host is available for running FINDKEY
|
|
||||||
(say, in a clustered environment where many servers mount
|
|
||||||
a common filesystem) the choice of a host can be determined
|
|
||||||
by the csh script choosehost, such that execution of
|
|
||||||
choosehost returns the name of a remote server. To use this
|
|
||||||
approach, the following script, called 'choosehost' should
|
|
||||||
be in your bin directory:
|
|
||||||
|
|
||||||
#!/bin/csh
|
|
||||||
# choosehost - choose a host to use for a remote job.
|
|
||||||
# This script rotates among servers listed in .rexhosts,
|
|
||||||
# by choosing the host at the top of the list and moving
|
|
||||||
# it to the bottom.
|
|
||||||
|
|
||||||
#Rotate the list, putting the current host to the bottom.
|
|
||||||
set HOST = `head -1 $home/.rexhosts`
|
|
||||||
set JOBID = $$
|
|
||||||
tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
|
|
||||||
echo $HOST >> /tmp/.rexhosts.$JOBID
|
|
||||||
/usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
|
|
||||||
|
|
||||||
# Write out the current host name
|
|
||||||
echo $HOST
|
|
||||||
|
|
||||||
You must also have a file in your home directory called
|
|
||||||
.rexhosts, listing remote hosts, such as
|
|
||||||
|
|
||||||
graucho.cc.umanitoba.ca
|
|
||||||
harpo.cc.umanitoba.ca
|
|
||||||
chico.cc.umanitoba.ca
|
|
||||||
zeppo.cc.umanitoba.ca
|
|
||||||
|
|
||||||
Each time choosehost is called, choosehost will rotate the
|
|
||||||
names in the file. For example, starting with the .rexhosts
|
|
||||||
as shown, it will move graucho.cc.umanitoba.ca to the bottom
|
|
||||||
of the file, and write the line 'graucho.cc.umanitoba.ca'
|
|
||||||
to the standard output. The next time choosehosts is
|
|
||||||
run, it would write 'harpo.cc.umanitoba.ca', and so on.
|
|
||||||
|
|
||||||
Depending on your local configuration, you may wish to
|
|
||||||
rewrite choosehosts. All that is really necessary is that
|
|
||||||
echo `choosehost` should return the name of a valid host.
|
|
||||||
|
|
||||||
Once you have installed choosehost and tested it, you can
|
|
||||||
get FINDKEY to use choosehost simply by setting
|
|
||||||
|
|
||||||
setenv XYLEM_RHOST choosehost
|
|
||||||
|
|
||||||
in your .cshrc file.
|
|
||||||
|
|
||||||
--------------- Remote filesystems -----------------------
|
|
||||||
Finally, an alternative to remote execution is to remotely mount
|
|
||||||
the file system containing the databases across the network.
|
|
||||||
This has the advantage of simplicity, and means that the
|
|
||||||
databases are available for ALL programs on your local
|
|
||||||
workstation. However, it may still be advantageous to run
|
|
||||||
XYLEM remotely, since that will shift much of the computational
|
|
||||||
load to another host.
|
|
||||||
|
|
||||||
|
|
||||||
BUGS
|
|
||||||
At present, regular expression characters cannot be used for
|
|
||||||
keyword searches.
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
grep(1V) identify splitdb
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,65 +0,0 @@
|
||||||
|
|
||||||
GETLOC.DOC update 30 May 95
|
|
||||||
|
|
||||||
|
|
||||||
NAME
|
|
||||||
getloc - retrieve database entries listed in namefile to outfile.
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
getloc [-asfcgepvl] namefile [anofile] [seqfile] indfile outfile
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
getloc reads a list of names from namefile and recreates
|
|
||||||
entries by combining the annotation and sequence portions of each
|
|
||||||
entry from anofile and seqfile. getloc will work most quickly
|
|
||||||
when the namefile is in alphabetical order, but it will also
|
|
||||||
work on unsorted lists. The following options affect the output:
|
|
||||||
|
|
||||||
a Write annotation portions of entries only, terminated by '//'.
|
|
||||||
seqfile is not included on command line.
|
|
||||||
|
|
||||||
s Write sequence data only, in Pearson (.wrp) format.
|
|
||||||
anofile is not included on commandline.
|
|
||||||
|
|
||||||
f Write each entry to a separate file. The filename will
|
|
||||||
consist of the LOCUS name, followed by .ano for annotation
|
|
||||||
only, .wrp for sequence only, or gen for complete GenBank
|
|
||||||
format.
|
|
||||||
|
|
||||||
c namefile contains accession numbers, rather than names
|
|
||||||
|
|
||||||
The following options identify the type of database being read:
|
|
||||||
|
|
||||||
g GenBank (default)
|
|
||||||
e EMBL
|
|
||||||
p PIR (NBRF)
|
|
||||||
v Vecbase
|
|
||||||
l LiMB
|
|
||||||
|
|
||||||
namefile consists of an alphabetically ordered list of LOCUS names,
|
|
||||||
each on a separate line. Indfile could be used to create a
|
|
||||||
namefile by simply editing out some subset of names. (This can also
|
|
||||||
be done using the Unix comm command.) If the entire indfile was
|
|
||||||
used, the entire database would be recreated, minus the header
|
|
||||||
information that might have been present in the original, but
|
|
||||||
deleted by splitdb.
|
|
||||||
|
|
||||||
NOTE
|
|
||||||
Getloc automatically expands leading blanks that have been
|
|
||||||
compressed using splitdb -c. See splitdb.doc for more information.
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
splitdb, comm(1).
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,327 +0,0 @@
|
||||||
|
|
||||||
GETOB 21 Dec 94
|
|
||||||
|
|
||||||
|
|
||||||
NAME
|
|
||||||
getob - Get an object from GenBank
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
getob [-frcn] infile namefile anofile seqfile indfile message
|
|
||||||
[outfile] expfile
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
getob extracts 'objects' (subsequences) from GenBank entries, using
|
|
||||||
the features table, and writes them to outfile (.out). A log
|
|
||||||
describing the construction of each object is written to message
|
|
||||||
(.msg). If -r is not set, a list of expressions that would recreate
|
|
||||||
the .out file if evaluated by getob -r, is written to expfile (.exp)
|
|
||||||
|
|
||||||
The following options are available:
|
|
||||||
|
|
||||||
f Write each entry to a separate file. The name will consist
|
|
||||||
of the entry name, and the extension '.obj'.
|
|
||||||
|
|
||||||
r Resolve expressions from namefile into objects.
|
|
||||||
Expressions take the form:
|
|
||||||
|
|
||||||
@[<database>::]<accession>:<location>
|
|
||||||
|
|
||||||
In effect, r makes it possible to use getob to resolve
|
|
||||||
features that span more than one entry, such as segmented
|
|
||||||
files. In the first run of the program, features that require
|
|
||||||
data from outside the entry in which they are defined will be
|
|
||||||
written to outfile with those externally-defined parts rep-
|
|
||||||
resented using the '@' notation described above. During a
|
|
||||||
subsequent run, the outfile from the previous run is used as
|
|
||||||
namefile. When r is set, all lines not beginning with '@' (ie.
|
|
||||||
name lines and sequence lines) are simply copied to the new
|
|
||||||
outfile. When an '@' is encountered, the expression is parsed
|
|
||||||
into accession number and location. The entry with the
|
|
||||||
specified accession number is located in indfile, and read from
|
|
||||||
anofile and seqfile. It is then evaluated, and the result
|
|
||||||
written to outfile in place of the '@' expression.
|
|
||||||
|
|
||||||
getob can also be used to get specific labeled objects from
|
|
||||||
a given entry. Examples:
|
|
||||||
|
|
||||||
@k30576:polyprotein
|
|
||||||
@k30576:/label=polyprotein
|
|
||||||
@x10345:/product="hsp70"
|
|
||||||
@j00879:group(1..2200,mutation_37)
|
|
||||||
|
|
||||||
The first two constructs given above are equivalent. Both
|
|
||||||
will extract the feature called polyprotein. The third
|
|
||||||
construct shows that any feature label can be specified. If
|
|
||||||
none is specified, as in the first example, then /label= is
|
|
||||||
assumed. One limitation, however, is that the label sought
|
|
||||||
must be unique within the entry in its first 15 characters
|
|
||||||
including double quotes ("). Otherwise, only the first
|
|
||||||
matching label expression will be evaluated. Finally, the
|
|
||||||
last example shows that a mutant sequence can be constructed
|
|
||||||
by first specifying an expression that evaluates to a
|
|
||||||
sequence (ie. 1..2200) and then a labeled expression that
|
|
||||||
upon evaluation, uses replace() to modify that sequence. The
|
|
||||||
usage shown in examples 3 & 4 above represent extensions to
|
|
||||||
the DDBJ/EMBL/GenBank Features Table Format.
|
|
||||||
|
|
||||||
As touched on briefly above, the r option makes it possible
|
|
||||||
to construct objects that include recursive references to
|
|
||||||
other entries (eg. segmented files) by iterative calls to
|
|
||||||
getob. The 'features' command automates this process. The basic
|
|
||||||
algorithm is as follows:
|
|
||||||
|
|
||||||
getob infile namefile anofile seqfile indfile ...
|
|
||||||
|
|
||||||
#Pull out all lines containing indirect references
|
|
||||||
grep '@' outfile > unresolved.grep
|
|
||||||
|
|
||||||
while (unresolved.grep is not empty)
|
|
||||||
|
|
||||||
#extract accession numbers to be retrieved
|
|
||||||
cut -c2-7 unresolved.grep > unresolved.nam
|
|
||||||
|
|
||||||
#retrieve the sequences into a new file, and create
|
|
||||||
#a database subset to be used by getob
|
|
||||||
fetch unresolved.nam new.gen
|
|
||||||
splitdb new.gen new.ano new.wrp new.ind
|
|
||||||
|
|
||||||
#run getob again to resolve indirect references
|
|
||||||
getob -r infile outfile new.ano new.wrp new.ind ...
|
|
||||||
|
|
||||||
#Pull out all lines containing indirect references
|
|
||||||
grep '@' outfile > unresolved.grep
|
|
||||||
end
|
|
||||||
|
|
||||||
c NAMEFILE contains accession numbers, rather than locus names
|
|
||||||
|
|
||||||
n By default, the qualifier 'codon_start' is used to determine
|
|
||||||
how many n's, if necessary, must be added to the 5' end of
|
|
||||||
CDS, mat_peptide, or sig_peptide, to preserve the reading
|
|
||||||
frame. To turn OFF this feature, -n must be set. -n must be set
|
|
||||||
for GenBank Releases 67.0 and earlier.
|
|
||||||
|
|
||||||
infile contains commands indicating what data is to be pulled from
|
|
||||||
each entry. Two types of output may be presented, GenBank or
|
|
||||||
OBJECTS. These are described below:
|
|
||||||
|
|
||||||
1) GenBank output - If the word 'GENBANK' is the first line in
|
|
||||||
infile, a pseudo-GenBank entry will be recreated. This option
|
|
||||||
is only intended for debugging purposes and will probably be
|
|
||||||
removed in later releases.
|
|
||||||
|
|
||||||
2) Object format - This option instructs getob to write part or
|
|
||||||
all of each sequence, along with site annotation, by specifying
|
|
||||||
feature key names. The syntax for infile is shown below:
|
|
||||||
|
|
||||||
Backus-Naur format: Example:
|
|
||||||
----------------------------------------------------------
|
|
||||||
OBJECTS OBJECTS
|
|
||||||
<feature key> tRNA
|
|
||||||
{<feature key> rRNA
|
|
||||||
. . . SITES
|
|
||||||
<feature key>} stem_loop
|
|
||||||
SITES
|
|
||||||
{<feature key>
|
|
||||||
. . .
|
|
||||||
<feature key>}
|
|
||||||
|
|
||||||
In the example above, getob is instructed to extract all tRNA or
|
|
||||||
rRNA sequences from each entry, and annotate the position of each
|
|
||||||
stem/loop structure. Note that the SITES coordinates written to the
|
|
||||||
file tell the positions of those SITES relative to the start of the
|
|
||||||
object, rather than the original location in the sequence. As above,
|
|
||||||
each word begins a separate line.
|
|
||||||
|
|
||||||
While the -r option does not use infile, at least a dummy infile
|
|
||||||
must be included in the command line. This dummy file need only
|
|
||||||
contain two lines:
|
|
||||||
|
|
||||||
OBJECTS
|
|
||||||
SITES
|
|
||||||
|
|
||||||
NOTE: SITES IS NOT YET IMPLEMENTED! Although inclusion of SITES in
|
|
||||||
the input file will have no effect, the word SITES must still be
|
|
||||||
present after the last feature key.
|
|
||||||
|
|
||||||
|
|
||||||
namefile
|
|
||||||
namefile consists of a list of LOCUS names or accession numbers,
|
|
||||||
each on a separate line. Names or accession numbers should appear
|
|
||||||
in the order in which they appear in the database file. Unordered
|
|
||||||
namefiles will slow the progress of the search. Since only the
|
|
||||||
first non-blank field of each line in namefile is read, indfile
|
|
||||||
could be used to create a namefile. If the entire indfile was
|
|
||||||
used, the entire database file would be processed. A sample
|
|
||||||
namefile requesting four sequences by LOCUS name is shown below:
|
|
||||||
|
|
||||||
POTPR1A
|
|
||||||
POTPSTH2
|
|
||||||
POTPSTH21
|
|
||||||
POTSTHA
|
|
||||||
|
|
||||||
anofile, seqfile, and indfile
|
|
||||||
The database subset containing GenBank entries must be divided
|
|
||||||
among annotation, sequence and an index by splitdb.
|
|
||||||
|
|
||||||
message
|
|
||||||
message contains a log describing the parsing of each object.
|
|
||||||
For annotative purposes, qualifier lines from the object are
|
|
||||||
included in along with the location expression being parsed.
|
|
||||||
The beginning of a typical message file is shown below:
|
|
||||||
|
|
||||||
GETOB Version 0.962 14 May 1992
|
|
||||||
|
|
||||||
POTPR1A:CDS1
|
|
||||||
join
|
|
||||||
(
|
|
||||||
295 603
|
|
||||||
|
|
||||||
1011 1355
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
/note="pathogenesis-related protein (prp1)"
|
|
||||||
/codon_start=1
|
|
||||||
/translation="MAEVKLLGLRYSPFSHRVEWALKIKGVKYEFIEEDLQNKSPLLL
|
|
||||||
QSNPIHKKIPVLIHNGKCICESMVILEYIDEAFEGPSILPKDPYDRALARFWAKYVED
|
|
||||||
KGAAVWKSFFSKGEEQEKAKEEAYEMLKILDNEFKDKKCFVGDKFGFADIVANGAALY
|
|
||||||
LGILEEVSGIVLATSEKFPNFCAWRDEYCTQNEEYFPSRDELLIRYRAYIQPVDASK"
|
|
||||||
//----------------------------------------------
|
|
||||||
|
|
||||||
In the example above, getob was instructed to retrieve all CDS
|
|
||||||
features from the database subset. The message for the entry
|
|
||||||
POTPR1A is shown, along with a reconstruction of the location
|
|
||||||
expression that was evaluated to create the object. In this
|
|
||||||
case, protien coding sequences from two exons had to be joined
|
|
||||||
to create the object.
|
|
||||||
|
|
||||||
outfile
|
|
||||||
outfile contains the actual objects constructed, consisting of
|
|
||||||
sites found and sequences. The beginning of a typical output file
|
|
||||||
is shown below:
|
|
||||||
|
|
||||||
>POTPR1A:CDS1
|
|
||||||
atggcagaagtgaagttgcttggtctaaggtatagtccttttagccatag
|
|
||||||
agttgaatgggctctaaaaattaagggagtgaaatatgaatttatagagg
|
|
||||||
aagatttacaaaataagagccctttacttcttcaatctaatccaattcac
|
|
||||||
aagaaaattccagtgttaattcacaatggcaagtgcatttgtgagtctat
|
|
||||||
ggtcattcttgaatacattgatgaggcatttgaaggcccttccattttgc
|
|
||||||
ctaaagacccttatgatcgcgctttagcacgattttgggctaaatacgtc
|
|
||||||
gaagataag
|
|
||||||
ggggcagcagtgtggaaaagtttcttttcgaaaggagaggaacaagagaa
|
|
||||||
agctaaagaggaagcttatgagatgttgaaaattcttgataatgagttca
|
|
||||||
aggacaagaagtgctttgttggtgacaaatttggatttgctgatattgtt
|
|
||||||
gcaaatggtgcagcactttatttgggaattcttgaagaagtatctggaat
|
|
||||||
tgttttggcaacaagtgaaaaatttccaaatttttgtgcttggagagatg
|
|
||||||
aatattgcacacaaaacgaggaatattttccttcaagagatgaattgctt
|
|
||||||
atccgttaccgagcctacattcagcctgttgatgcttcaaaatga
|
|
||||||
|
|
||||||
In the example, the CDS from entry POTPR1A has been written in
|
|
||||||
two chunks, corresponding to the two exon portions of the coding
|
|
||||||
sequence. Each location retrieved in constructing the object is
|
|
||||||
written as a separate block of sequence. By comparing message file
|
|
||||||
to outfile, it is possible to verify the correctness of the
|
|
||||||
operation.
|
|
||||||
|
|
||||||
Numbers are appended to the sequence names to indicate
|
|
||||||
which CDS in the entry has been retrieved. Thus, if two CDS
|
|
||||||
features were present, the second one would be named >POTPR1A:2.
|
|
||||||
For compatiblility with the FASTA programs of Pearson, the name line
|
|
||||||
begins with a '>'.
|
|
||||||
|
|
||||||
expfile
|
|
||||||
The expression evaluated to create this feature is written
|
|
||||||
to expfile:
|
|
||||||
|
|
||||||
>POTPR1A:CDS1
|
|
||||||
@J03679:join(295..603,1011..1355)
|
|
||||||
|
|
||||||
expfile is only created if -r is not set. It is itended as a way
|
|
||||||
of automating the creation of a feature expression file for use
|
|
||||||
in generating customized datasets. Expressions in expfile can be
|
|
||||||
deleted or modified, or new expressions added, to tailor the
|
|
||||||
dataset to individual needs. To generate a dataset from expfile:
|
|
||||||
|
|
||||||
getob -r infile expfile anofile seqfile indfile message outfile
|
|
||||||
|
|
||||||
EXTENSIONS TO THE FEATURE TABLE LANGUAGE
|
|
||||||
|
|
||||||
1) poly(<absolute_location>|<literal>|<feature_name>,x)
|
|
||||||
|
|
||||||
This operator evaluates an absolute location, literal, or
|
|
||||||
feature name (ie. any location not containing functional
|
|
||||||
operators) and writes it x times. The most obvious
|
|
||||||
application of poly is to create spacers to represent regions
|
|
||||||
of unknown sequence between sequences that are known. For
|
|
||||||
example, the restriction map of a 4kb EcoR1 fragment with a
|
|
||||||
Hind3 site 1000 bp from one end could be represented as follows:
|
|
||||||
|
|
||||||
join("gaattc",poly("n",1000),"aagctt",poly("n",3000),"gaattc")
|
|
||||||
|
|
||||||
2) The following feature keys are recognized by GETOB, although
|
|
||||||
not included in the language definition. While they will not
|
|
||||||
appear in GenBank entries, they could be used in user-created
|
|
||||||
GenBank-format files:
|
|
||||||
|
|
||||||
contig
|
|
||||||
This feature key is meant to be used to assemble large
|
|
||||||
sequence segments from smaller segments, possibly using the
|
|
||||||
poly() operator.
|
|
||||||
|
|
||||||
chromosome
|
|
||||||
Intended to annotate the complete sequence of a chromosome. This
|
|
||||||
feature may be constructed by a join of two or more contigs.
|
|
||||||
|
|
||||||
Use of these keywords is illustrated in the features table
|
|
||||||
shown below, which could be used to construct a model of part
|
|
||||||
of the E.coli chromosome, spanning map units 763.4 to 1031.4 kb:
|
|
||||||
|
|
||||||
contig join(J01619:1..13063,poly("n",7140),
|
|
||||||
J03939:1..1363,poly("n",14380),
|
|
||||||
X02306:complement(1..1622),poly("n",14710),
|
|
||||||
J04423:1..5793,poly("n",22500),
|
|
||||||
X03722:1..2400,poly("n",123750),
|
|
||||||
one-of(X05017:complement(1..1854),X05017:1..1854))
|
|
||||||
/label=Eco_contig8
|
|
||||||
/map=763.4-950.6kb
|
|
||||||
contig join(V00352:1..2412,poly("n",28800),M15273:1..3409)
|
|
||||||
/label=Eco_contig9
|
|
||||||
/map=972.9-1001.7kb
|
|
||||||
contig join(X02826:1..1357,poly("n",13540),
|
|
||||||
J01654:complement(1..2270))
|
|
||||||
/label=Eco_contig10
|
|
||||||
/map=1016.5-1031.4kb
|
|
||||||
chromosome join(Eco_contig8,poly("n",22300),
|
|
||||||
Eco_contig9,poly("n",14800),
|
|
||||||
Eco_contig10)
|
|
||||||
/label=Ecoli_chromosome
|
|
||||||
|
|
||||||
NOTES
|
|
||||||
1) If the const DEBUG is set to true in the Pascal source code, getob
|
|
||||||
writes messages to the standard output, indicating the progress of
|
|
||||||
processing for each entry read in. By default, DEBUG=false.
|
|
||||||
This feature is solely for debugging purposes and will be removed in
|
|
||||||
later releases.
|
|
||||||
|
|
||||||
2) GETOB automatically expands leading blanks that have been
|
|
||||||
compressed using splitdb -c. See splitdb.doc for more information.
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
features, splitdb, getloc
|
|
||||||
The DDBJ/EMBL/GenBank Feature Table: Definition, Version 1.04
|
|
||||||
September 1, 1992
|
|
||||||
GenBank Release Notes for Release 79.0.
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,83 +0,0 @@
|
||||||
|
|
||||||
IDENTIFY update 3 Feb 94
|
|
||||||
|
|
||||||
|
|
||||||
NAME
|
|
||||||
identify - creates a file of locus names corresponding to lines
|
|
||||||
found by grep in a GenBank annotation file.
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
identify grepfile indfile namefile findfile
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
grepfile is created using the Unix grep command to search a .ano
|
|
||||||
file created by splitgb. For example, to find all lines containing
|
|
||||||
the word 'chlorophyll' in plant.ano, use
|
|
||||||
|
|
||||||
grep -n -i 'chlorophyll' plant.ano > plant.grep
|
|
||||||
|
|
||||||
In the example shown, the -n option causes each line written to
|
|
||||||
plant.grep to be preceeded by the number of that line in plant.ano.
|
|
||||||
(The -i option causes grep to ignore case.) Identify can use the
|
|
||||||
indfile do determine which entry a given numbered line was found
|
|
||||||
in, and writes the corresponding LOCUS name to namefile. In
|
|
||||||
addition, all lines found in a given entry are re-written to
|
|
||||||
findfile without the line numbers, and preceeded by the LOCUS name
|
|
||||||
for that entry.
|
|
||||||
|
|
||||||
EXAMPLES
|
|
||||||
Suppose you wanted to obtain a list of names for all plant
|
|
||||||
sequences which code for proteins. The task is complicated by the
|
|
||||||
fact that many fungal sequences are included in the GenBank plant
|
|
||||||
file. You could begin by searching plant.ano (containing all
|
|
||||||
GenBank plant entries) for the word 'Planta':
|
|
||||||
|
|
||||||
grep -n 'Planta' plant.ano > Planta.grep
|
|
||||||
|
|
||||||
However, we want to eliminate all fungal sequences, as well as all
|
|
||||||
sequences for RNAs other than mRNAs. If we create the file
|
|
||||||
bad.str containing the keywords
|
|
||||||
|
|
||||||
Mycophyta
|
|
||||||
tRNA
|
|
||||||
rRNA
|
|
||||||
uRNA
|
|
||||||
|
|
||||||
we can then type
|
|
||||||
|
|
||||||
grep -n -f bad.str plant.ano > bad.grep
|
|
||||||
|
|
||||||
bad.grep now contains all lines containing the offending keywords.
|
|
||||||
We next use identify to find the names of the entries found by
|
|
||||||
grep.
|
|
||||||
|
|
||||||
identify Planta.grep plant.ind Planta.nam Planta.fnd
|
|
||||||
identify bad.grep plant.ind bad.nam bad.fnd
|
|
||||||
|
|
||||||
Next, we can use the Unix comm command to compare the two .nam
|
|
||||||
files and produce an output file containing only names which are
|
|
||||||
present in Planta.nam but not bad.nam:
|
|
||||||
|
|
||||||
comm -23 Planta.nam bad.nam > plants.nam
|
|
||||||
|
|
||||||
The file plants.nam now contains names of either plant cDNA or
|
|
||||||
genomic sequences which do not code for structural RNAs.
|
|
||||||
At this point, getloc could to create a sub-database containing
|
|
||||||
only those entries listed in planta.nam. See documentation for
|
|
||||||
getloc for a more detailed discussion.
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
grep, fgrep, egrep, ngrep, comm, splitgb, getloc
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,23 +0,0 @@
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
; FINDKEY/GDE Keyword File Instructions
|
|
||||||
;
|
|
||||||
; 1. Type in one or more keywords below,
|
|
||||||
; or
|
|
||||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
|
||||||
; menu to read in a file of keywords.
|
|
||||||
;
|
|
||||||
; 2. Choose 'Save Current File' in the File menu
|
|
||||||
; 3. Quit this window
|
|
||||||
;
|
|
||||||
; FINDKEY will then perform the keyword search. YOU DON'T NEED TO EDIT
|
|
||||||
; OUT THESE COMMENT LINES.
|
|
||||||
;
|
|
||||||
; NOTE: Put each keyword on a separate line
|
|
||||||
; SAMPLE KEYWORD FILE:
|
|
||||||
;
|
|
||||||
; maize
|
|
||||||
; corn
|
|
||||||
; Z.mays
|
|
||||||
; Zea
|
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
; FETCH/GDE Name/Accession File Instructions
|
|
||||||
;
|
|
||||||
; 1. Type in one or more LOCUS names or Accession #'s below,
|
|
||||||
; or
|
|
||||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
|
||||||
; menu to read in a file of names or accession #'s.
|
|
||||||
; or
|
|
||||||
; Copy names or accession #'s from another window and Paste into this window.
|
|
||||||
;
|
|
||||||
; 2. Choose 'Save Current File' in the File menu
|
|
||||||
; 3. Quit this window
|
|
||||||
;
|
|
||||||
; FETCH will then retrieve the data. YOU DON'T NEED TO EDIT
|
|
||||||
; OUT THESE COMMENT LINES.
|
|
||||||
;
|
|
||||||
; NOTE: Put each name on a separate line
|
|
||||||
; SAMPLE NAME/ACCESSION FILE:
|
|
||||||
;
|
|
||||||
; X30412
|
|
||||||
; PSDRR1
|
|
||||||
; PEADRRG
|
|
||||||
;
|
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
; FEATURES/GDE Name File Instructions
|
|
||||||
;
|
|
||||||
; 1. Type in one or more GenBank LOCUS names below,
|
|
||||||
; or
|
|
||||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
|
||||||
; menu to read in a file of names.
|
|
||||||
;
|
|
||||||
; (NOTE: File can not contain accession numbers.)
|
|
||||||
;
|
|
||||||
; 2. Choose 'Save Current File' in the File menu
|
|
||||||
; 3. Quit this window
|
|
||||||
;
|
|
||||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
|
||||||
; OUT THESE COMMENT LINES.
|
|
||||||
;
|
|
||||||
; NOTE: Put each name on a separate line
|
|
||||||
; SAMPLE NAME FILE:
|
|
||||||
;
|
|
||||||
; PEADRRA
|
|
||||||
; PSDRR1
|
|
||||||
; PEADRRG
|
|
||||||
;
|
|
||||||
;---------------------------------------------------------------------------
|
|
||||||
|
|
|
@ -1,56 +0,0 @@
|
||||||
printdoc update 3 Feb 94
|
|
||||||
|
|
||||||
NAME
|
|
||||||
printdoc - prints documentation files
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
printdoc filename
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
printdoc uses the file extension to decide how to print a
|
|
||||||
documentation file. If necessary, a filter such as pr or nroff
|
|
||||||
is used to format the file before sending to the appropriate
|
|
||||||
printer. A list of file extensions recognized by printdoc is
|
|
||||||
given below. If no file extension is given, or the extension is
|
|
||||||
not in the list, printdoc assumes .doc.
|
|
||||||
|
|
||||||
.doc - (default) Uses pr to print the text, using the default
|
|
||||||
settings provided by pr (56 text lines per page plus a 5 line
|
|
||||||
header and footer). Printing is at 12 cpi, front only. This works
|
|
||||||
reasonbly well for most unformatted documentation files,
|
|
||||||
provided that the line length doesn't exceed 80 char. This
|
|
||||||
option assumes that a half-inch left margin is automatically
|
|
||||||
provided by the printer.
|
|
||||||
|
|
||||||
.tex - Assumes that document is already pre-formatted. Thus,
|
|
||||||
no headers or footers are provided, and it is assumed that
|
|
||||||
the top and bottom of pages are padded with blanks or header/
|
|
||||||
footer lines as needed. Form-feed characters (^L) may be
|
|
||||||
included in the text to force page breaks.
|
|
||||||
|
|
||||||
.ps - Assumes file is in PostScript format. Sends it to the
|
|
||||||
PostScript printer.
|
|
||||||
|
|
||||||
.nroff - Assumes file is formatted for use by nroff, using the
|
|
||||||
standard macro set (nroff -ms).
|
|
||||||
|
|
||||||
.nroff.me - Assumes file is formatted for use by nroff, using the
|
|
||||||
e macro set (nroff -me).
|
|
||||||
|
|
||||||
TRANSPORTATION NOTES
|
|
||||||
For reasons which should be obvious, this script needs major
|
|
||||||
rewriting at each site, since the available printers will
|
|
||||||
be of different types and have different names.
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
pr, pr(V), xlp, nroff
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
|
@ -1,123 +0,0 @@
|
||||||
prot2nuc update 10 Aug 94
|
|
||||||
|
|
||||||
NAME
|
|
||||||
prot2nuc - reverse translates protein into nucleic acid
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
prot2nuc [-ln -gn] < input > output
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
prot2nuc reads a file containing an amino acid sequence
|
|
||||||
and writes the corresponding reverse translated nucleic acid
|
|
||||||
sequence, using the standard IUPAC-IUB ambiguity codes to output.
|
|
||||||
The amino acid sequence may contain internal stop '*' characters.
|
|
||||||
That is, all legal amino acid characters will be processed.
|
|
||||||
|
|
||||||
-ln print n amino acids/codons per line. (default = 25)
|
|
||||||
|
|
||||||
-gn number the amino acid sequence every n amino acids/codons.
|
|
||||||
(defalut = 5)
|
|
||||||
|
|
||||||
If l is not evenly divisible by g, the defaults are used.
|
|
||||||
|
|
||||||
input - If the first line of the file begins with '>' or ';',
|
|
||||||
input will be read as the standard .wrp (Pearson) format,
|
|
||||||
such as that produced by getob:
|
|
||||||
|
|
||||||
>name
|
|
||||||
sequence lines
|
|
||||||
|
|
||||||
|
|
||||||
Otherwise, it will be assumed that the file ONLY contains
|
|
||||||
sequence, and all legal IUPAC/IUB DNA characters will be
|
|
||||||
read as sequence.
|
|
||||||
|
|
||||||
output - The output begins with a header, listing the both
|
|
||||||
1 and 3 letter amino acid codes [J. Biol. Chem. 243, 3557-3559
|
|
||||||
(1968)], as well as the nucleic acid ambiguity codes [Cornish-
|
|
||||||
Bowden (1985) Nucl. Acids Res. 13:3021-3030.]. The amino acid
|
|
||||||
sequence, along with its reverse translation, are then printed on
|
|
||||||
lines of l amino acids/codons, numbering every g amino acids/codons.
|
|
||||||
Non-ambiguous nucleotides appear capitalized, while ambiguous
|
|
||||||
nucleotides are in lowercase. A sample output file appears below:
|
|
||||||
|
|
||||||
PROT2NUC Version 8/10/94
|
|
||||||
|
|
||||||
IUPAC-IUP AMINO ACID SYMBOLS
|
|
||||||
[J. Biol. Chem. 243, 3557-3559 (1968)]
|
|
||||||
|
|
||||||
Phe F Leu L Ile I
|
|
||||||
Met M Val V Ser S
|
|
||||||
Pro P Thr T Ala A
|
|
||||||
Tyr Y His H Gln Q
|
|
||||||
Asn N Lys K Asp D
|
|
||||||
Glu E Cys C Trp W
|
|
||||||
Arg R Gly G STOP *
|
|
||||||
Asx B Glx Z UNKNOWN X
|
|
||||||
|
|
||||||
|
|
||||||
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE
|
|
||||||
[Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.]
|
|
||||||
|
|
||||||
Symbol Meaning | Symbol Meaning
|
|
||||||
------------------------------------+---------------------------------
|
|
||||||
G Guanine | k G or T
|
|
||||||
A Adenine | s G or C
|
|
||||||
C Cytosine | w A or T
|
|
||||||
T Thymine | h A or C or T
|
|
||||||
U Uracil | b G or T or C
|
|
||||||
r Purine (A or G) | v G or C or A
|
|
||||||
y Pyrimidine (C or T) | d G or T or A
|
|
||||||
m A or C | n G or A or T or C
|
|
||||||
|
|
||||||
pI39
|
|
||||||
5 10 15 20
|
|
||||||
M E K K S L A A L S F L L L L V L F V A
|
|
||||||
ATGGArAArAArTCnCTnGCnGCnCTnTCnTTyCTnCTnCTnCTnGTnCTnTTyGTnGCn
|
|
||||||
AGyTTr TTrAGy TTrTTrTTrTTr TTr
|
|
||||||
|
|
||||||
25 30 35 40
|
|
||||||
Q E I V V T E A N T C E H L A D T Y R G
|
|
||||||
CArGArAThGTnGTnACnGArGCnAAyACnTGyGArCAyCTnGCnGAyACnTAyCGnGGn
|
|
||||||
TTr AGr
|
|
||||||
|
|
||||||
45 50 55 60
|
|
||||||
V C F T N A S C D D H C K N K A H L I S
|
|
||||||
GTnTGyTTyACnAAyGCnTCnTGyGAyGAyCAyTGyAArAAyAArGCnCAyCTnAThTCn
|
|
||||||
AGy TTr AGy
|
|
||||||
|
|
||||||
65 70
|
|
||||||
G T C H D W K C F C T Q N C
|
|
||||||
GGnACnTGyCAyGAyTGGAArTGyTTyTGyACnCArAAyTGy
|
|
||||||
|
|
||||||
|
|
||||||
With the Universal Genetic code, ambiguity symbols make it possible
|
|
||||||
to represent all possible codons for an amino acid using two output
|
|
||||||
lines. It is important to realize that the ambiguities on each line
|
|
||||||
can not be combined. For example, CTn and TTr represent all codons for
|
|
||||||
Leucine. However, attempting to combine them into a single triplet,
|
|
||||||
yTn, would be incorrect. For example, TTT and TTC are codons for
|
|
||||||
Phenylalanine, not Leucine.
|
|
||||||
|
|
||||||
FUTURE PLANS
|
|
||||||
1. It wouldn't be hard to have the output printed as nucleic acid
|
|
||||||
sequences in Perason format, so that the output could be read back
|
|
||||||
into GDE. I don't know why you would want to do this, but it could
|
|
||||||
be done.
|
|
||||||
2. Right now, only the Universal Genetic Code is used, but it should
|
|
||||||
be possible to read in alternative genetic codes, have prot2nuc
|
|
||||||
figure out the ambiguity rules (as is already done in ribosome) and
|
|
||||||
print out the appropriate ambiguous codons.
|
|
||||||
3. It might be useful to have each possible codon printed out, rather
|
|
||||||
than ambiguous codons. This would take up a lot more space and
|
|
||||||
wouldn't be as pretty. If there's a lot of demand I could do this.
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
|
@ -1,107 +0,0 @@
|
||||||
reform update 3 Feb 94
|
|
||||||
|
|
||||||
NAME
|
|
||||||
reform - reformats multiply-aligned sequences for printing.
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
reform [-gpcnm] [-fx] [-sn] [-ln] [file {ralign only}]
|
|
||||||
or
|
|
||||||
ralign file parameters | reform [-gpcn] [-sn] [-ln] file
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
|
|
||||||
g Gaps are to be represented by dashes (-).
|
|
||||||
p Bases which agree with the consensus are
|
|
||||||
represented by periods (.).
|
|
||||||
c Positions at which all sequences agree are
|
|
||||||
capitalized in the consensus.
|
|
||||||
n Sequence data is nucleic acid. Protein default
|
|
||||||
fx Specify input file format, where x is
|
|
||||||
r:RALIGN (default) p:PEARSON i:MBCRR-MASE (Intelligenetics)
|
|
||||||
m Input file contains multiline format sequences already aligned,
|
|
||||||
as opposed to ralign output. This option is obsolete, and is
|
|
||||||
equivalent to -fp.
|
|
||||||
ln The output linelength is set to n.
|
|
||||||
Default is 70.
|
|
||||||
sn numbering starts with n (default=0)
|
|
||||||
|
|
||||||
file Sequence file as described in ralign docu-
|
|
||||||
mentation. reform needs to re-read the
|
|
||||||
sequence file read by ralign to get the
|
|
||||||
names of the sequences, which ralign ignores.
|
|
||||||
This filename is only included for ralign output.
|
|
||||||
If -m is set, file is ignored, and sequence names
|
|
||||||
must be read from the input.
|
|
||||||
|
|
||||||
Note that positions in the consensus at which no nucleotide is in the
|
|
||||||
majority are represented by n's (for nucleic acids) or x's (for proteins),
|
|
||||||
rather than periods, as in ralign.
|
|
||||||
|
|
||||||
Gaps in the input sequences may be represented by either blanks or dashes.
|
|
||||||
|
|
||||||
INPUT FILE FORMATS
|
|
||||||
|
|
||||||
(a) ralign (default, -fr)
|
|
||||||
As described in ralign documentation, the input file (which is assumed to
|
|
||||||
be ralign output) must have each sequence on a single long line. All
|
|
||||||
characters on a given line will be included in the alignment. All lines
|
|
||||||
must be exactly the same length. For example, if ralign had been read
|
|
||||||
sequence from a file called 'allcab.seq' and written output to 'allcab.ral',
|
|
||||||
the following command might be used:
|
|
||||||
|
|
||||||
reform allcab.seq <allcab.ralign >allcab.ref
|
|
||||||
|
|
||||||
(b) Pearson (-fp, -m)
|
|
||||||
Compatible with sequence files used by Pearson's fasta programs as shown:
|
|
||||||
>name1
|
|
||||||
sequence1
|
|
||||||
>name2
|
|
||||||
sequence2
|
|
||||||
...
|
|
||||||
>namen
|
|
||||||
sequencen
|
|
||||||
|
|
||||||
Sequences may run over many lines and line length does not have to be
|
|
||||||
uniform. However, both dashes ('-') and blanks (' ') will be read in
|
|
||||||
as gaps in the alignment. A right arrow (>) at the beginning of a line
|
|
||||||
indicates the name line at the beginning of a new sequence.
|
|
||||||
|
|
||||||
Any line beginning with a semicolon (';') will be considered a comment,
|
|
||||||
and will be ignored.
|
|
||||||
|
|
||||||
(c) MBCRR-MASE (Intelligenetics) (-fi)
|
|
||||||
Compatible with .mase files produced by MBCRR's mase and pima programs,
|
|
||||||
which use the Intelligenetics format as shown:
|
|
||||||
|
|
||||||
;one or more comment lines
|
|
||||||
name1
|
|
||||||
sequence1
|
|
||||||
;one or more comment lines
|
|
||||||
name2
|
|
||||||
sequence2
|
|
||||||
...
|
|
||||||
;one or more comment lines
|
|
||||||
namen
|
|
||||||
sequencen
|
|
||||||
|
|
||||||
Sequences may run over many lines and line length does not have to be
|
|
||||||
uniform. However, both dashes ('-') and blanks (' ') will be read in
|
|
||||||
as gaps in the alignment. Each sequence MUST begin with at least one
|
|
||||||
comment line. When a comment line is encountered, that signals the
|
|
||||||
beginning of a new sequence. The first line after the comment is read
|
|
||||||
as the name, and the sequence begins on the next line after that.
|
|
||||||
|
|
||||||
SEE ALSO ralign, mase
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,84 +0,0 @@
|
||||||
ribosome update 3 Feb 94
|
|
||||||
|
|
||||||
NAME
|
|
||||||
ribosome - translates nucleic acid into protein
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
ribosome [-g gcfile] < input > output
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
ribosome reads a file of one or more nucleic acid sequences
|
|
||||||
and writes the corresponding amino acid sequence, in the standard
|
|
||||||
one letter code, to output. Ribosome begins translating at the
|
|
||||||
first nucleotide in each input sequence and continues to the end.
|
|
||||||
If the length of the translated sequence is not divisible by 3,
|
|
||||||
ribosome pads the final codon with N's and attempts to use ambi-
|
|
||||||
guity rules to translate the final codon. Based on the genetic
|
|
||||||
code used, ribosome derives a set of rules to resolve all ambi-
|
|
||||||
guities that can possibly be resolved.
|
|
||||||
|
|
||||||
-g read in an alternative genetic code from gcfile. If this
|
|
||||||
option is not specified, ribosome uses the universal
|
|
||||||
genetic code.
|
|
||||||
|
|
||||||
gcfile - This file specifies an alternative genetic code. An
|
|
||||||
example is shown below. ribosome reads the first 64 legal
|
|
||||||
capital letters as amino acids. Consequently, lowercase letters
|
|
||||||
can be used for annotation purposes, as shown in the example.
|
|
||||||
All non-amino acid characters are ignored.
|
|
||||||
|
|
||||||
sgc2 - yeast mitochondrial genetic code
|
|
||||||
|
|
||||||
second position
|
|
||||||
first position ------------------------------- third position
|
|
||||||
(5' end) u c a g (3' end)
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
u F S Y C u
|
|
||||||
F S Y C c
|
|
||||||
L S * W a
|
|
||||||
L S * W g
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
c T P H R u
|
|
||||||
T P H R c
|
|
||||||
T P Q R a
|
|
||||||
T P Q R g
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
a I T N S u
|
|
||||||
I T N S c
|
|
||||||
M T K R a
|
|
||||||
M T K R g
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
g V A D G u
|
|
||||||
V A D G c
|
|
||||||
V A E G a
|
|
||||||
V A E G g
|
|
||||||
|
|
||||||
|
|
||||||
input - If the first line of the file begins with '>' or ';',
|
|
||||||
input will be read as the standard .wrp (Pearson) format,
|
|
||||||
such as that produced by getob:
|
|
||||||
|
|
||||||
>name
|
|
||||||
; one or more comment lines (optional)
|
|
||||||
sequence lines
|
|
||||||
|
|
||||||
|
|
||||||
Otherwise, it will be assumed that the file ONLY contains
|
|
||||||
sequence, and all legal IUPAC/IUB DNA characters will be
|
|
||||||
read as sequence.
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
getob
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,66 +0,0 @@
|
||||||
shuffle.doc update 3 Feb 94
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
shuffle -sn [-wn -on]
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
Shuffles sequences locally. See Lipman DJ, Wilbur WJ, Smith TF
|
|
||||||
and Waterman MS (1984) On the statistical significance of nucleic
|
|
||||||
acid similarities. Nucl. Acids Res. 12:215-226.
|
|
||||||
-sn n is a random integer between 0 and 32767. This number
|
|
||||||
must be provided for each run.
|
|
||||||
|
|
||||||
-wn n is an integer, indicating the width of the window for
|
|
||||||
random localization. If w exceeds the length of a sequence,
|
|
||||||
or is negative, the entire sequence is scrambled as a single
|
|
||||||
window. This is also the case if w is not specified.
|
|
||||||
|
|
||||||
-on n is an integer, indicating the number of nucleotides
|
|
||||||
overlap between adjacent windows. It should never exceed
|
|
||||||
the window size. o defaults to 0 if not specified.
|
|
||||||
|
|
||||||
If w and o are specified, overlapping windows of w nucleotides
|
|
||||||
are shuffled, thus preserving the local characteristic base
|
|
||||||
composition. Windows overlap by o nucleotides.
|
|
||||||
If w and o are not specified, each sequence is shuffled globally,
|
|
||||||
thus preserving the overall base composition, but not the local
|
|
||||||
variations in comp.
|
|
||||||
|
|
||||||
Any number of sequences may be processed from a single input
|
|
||||||
file. In Pearson-format files, each new sequence begins with a
|
|
||||||
'>' comment line, indicating the name and a short description of
|
|
||||||
the sequence.
|
|
||||||
|
|
||||||
No distinction is made between protein or nucleic acid sequences.
|
|
||||||
That is, shuffle will read any of the following characters as
|
|
||||||
sequence:
|
|
||||||
|
|
||||||
T,U,C,A,G,N,R,Y,M,W,S,K,D,H,V,B,L,Z,F,P,E,I,Q,X,*,-
|
|
||||||
|
|
||||||
where '*' is the result of translating a stop codon, and '-'
|
|
||||||
is a gap generated during sequence alignment. Lowercase is
|
|
||||||
also accepted.
|
|
||||||
|
|
||||||
EXAMPLE
|
|
||||||
A sample output file is shown below. Note that the first two
|
|
||||||
lines of output are comment lines, listing the version of the
|
|
||||||
program and the parameters used in the run.
|
|
||||||
|
|
||||||
>SHUFFLE VERSION 11/ 8/93
|
|
||||||
>RANDOM SEED: 9873 WINDOW: 12 OVERLAP: 3
|
|
||||||
>BAZFAZ - Borborigmus azerbi F-actin-zeta gene
|
|
||||||
ctgagtagctagtcctaaatagttagtccatagtactagtacgggtcgtt
|
|
||||||
cacccttgggcagtg.....(etc.)
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,141 +0,0 @@
|
||||||
|
|
||||||
SPLITDB update 28 Mar 98
|
|
||||||
|
|
||||||
|
|
||||||
NAME
|
|
||||||
splitdb - split GenBank files into annotation, sequence, and index
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
splitdb [-gepvlct] dbfile anofile seqfile indfile
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
Splitdb splits a database (dbfile) among three files: anofile, seqfile
|
|
||||||
and indfile. Splitdb ignores any header information that might be in the
|
|
||||||
file and begins processing at the first entry.
|
|
||||||
|
|
||||||
anofile contains the annotation portion of each entry. Entries are
|
|
||||||
terminated with '//' or '///' (PIR only). Trailing blanks present in
|
|
||||||
dbfile are omitted in anofile.
|
|
||||||
|
|
||||||
seqfile contains the sequence data for each entry. Each sequence
|
|
||||||
entry begins with a header line, followed by sequence data on
|
|
||||||
succeeding lines of 75 characters per line. The header line
|
|
||||||
includes the header flag character '>' in column 1, followed by the
|
|
||||||
name, followed by the first 50 characters of the 1st
|
|
||||||
DEFINITION line. An example is shown below:
|
|
||||||
|
|
||||||
>UNHOR1 - Unicorn horn protein 1, complete cDNA sequence
|
|
||||||
attcctctatagtctattctagctagccaaataggttagatggctgtcttactacttacgc
|
|
||||||
...
|
|
||||||
|
|
||||||
Removal of blanks and numbers from sequence lines makes makes split
|
|
||||||
datasets about 8-9% smaller than the original GenBank files.
|
|
||||||
|
|
||||||
indfile is an index which tells the line numbers for each entry in
|
|
||||||
anofile and seqfile. It is assumed to be in alphabetical order by
|
|
||||||
name. Each line contains a name and accession number, followed by the
|
|
||||||
line numbers on which the annotation and sequence data begin in anofile
|
|
||||||
and seqfile, respectively. Thus the file plants.ind might contain:
|
|
||||||
|
|
||||||
|
|
||||||
A15660 TA156608 1 1
|
|
||||||
A15671 A15671 33 11
|
|
||||||
A15673 A15673 65 25
|
|
||||||
A15675 AK156751 97 36
|
|
||||||
A15677 BA156770 128 46
|
|
||||||
A16780 BA167807 160 57
|
|
||||||
A16782 A16782 192 70
|
|
||||||
ATHRPRP1C GM905105 225 83
|
|
||||||
etc...
|
|
||||||
|
|
||||||
Note that indfile is a perfectly legitimate .nam file, for use with
|
|
||||||
programs such as getloc, getob, or comm.
|
|
||||||
|
|
||||||
|
|
||||||
The following options identify the type of database being read:
|
|
||||||
|
|
||||||
-g GenBank (default)
|
|
||||||
-e EMBL
|
|
||||||
-p PIR (NBRF)
|
|
||||||
-v Vecbase
|
|
||||||
-l LiMB
|
|
||||||
|
|
||||||
Other options:
|
|
||||||
-c Compress 3 or more leading blanks in annotation lines
|
|
||||||
to take the form <CRUNCHFLAG><CRUNCHCHAR>, where CRUNCHFLAG
|
|
||||||
is the ASCII character specified by the Pascal const
|
|
||||||
CRUNCHOFFSET, which is set to 33 ("!") in the current
|
|
||||||
implementation. For each annotation line read, if the
|
|
||||||
number of leading blanks is >=3, splitdb sets CRUNCHCHAR
|
|
||||||
to CRUNCHOFFSET+the number of blanks. Thus, for lines
|
|
||||||
with 3, 4, or 5 leading blanks, CRUNCHCHAR would be
|
|
||||||
'$', '%' and '&', respectively. GETLOC and GETOB
|
|
||||||
automatically expand crunched blanks when CRUNCHFLAG
|
|
||||||
is encountered on an input line. Empiracle observations
|
|
||||||
indicate that the -c option decreases the size of
|
|
||||||
GenBank files by about 10%.
|
|
||||||
|
|
||||||
This compression method may fail when the number of
|
|
||||||
leading blanks exceeds 127-CRUNCHOFFSET. However,
|
|
||||||
none of the above mentioned databases currently
|
|
||||||
supports any datafield with anywhere near that number
|
|
||||||
of leading blanks.
|
|
||||||
|
|
||||||
-t (GenBank only) Append all information in the first
|
|
||||||
ORGANISM to the end of each line in indfile. For example,
|
|
||||||
the entry which begins:
|
|
||||||
|
|
||||||
LOCUS GORMTDLOOZ 282 bp DNA UNA 11-MAR-1996
|
|
||||||
DEFINITION GGGOMT493; Gorilla gorilla gorilla (BomBom, ISIS 438, Audubon
|
|
||||||
Zoological Gardens) mitochondrial D-loop DNA.
|
|
||||||
ACCESSION L76759
|
|
||||||
NID g1222584
|
|
||||||
KEYWORDS D-loop.
|
|
||||||
SOURCE Mitochondrion Gorilla gorilla gorilla (individual_isolate BomBom,
|
|
||||||
ISIS 438, Audubon Zoological Gardens, sub_species gorilla) male
|
|
||||||
DNA.
|
|
||||||
ORGANISM Mitochondrion Gorilla gorilla gorilla
|
|
||||||
Eukaryotae; mitochondrial eukaryotes; Metazoa; Chordata;
|
|
||||||
Vertebrata; Eutheria; Primates; Catarrhini; Hominidae; Gorilla.
|
|
||||||
|
|
||||||
might be indexed as
|
|
||||||
|
|
||||||
GORMTDLOOZ L76759 1 1 Mitochondrion Gorilla gorilla gorilla
|
|
||||||
|
|
||||||
This is useful for taxonomic studies, or as a way of making
|
|
||||||
it easy to create subsets from a single index. Thus,
|
|
||||||
'grep gorilla primates.ind' would print all lines in the
|
|
||||||
file that contained the word gorilla. The output from
|
|
||||||
this command could be used as a .nam file for extracting
|
|
||||||
just gorilla sequences from a larger dataset using
|
|
||||||
fetch.
|
|
||||||
|
|
||||||
|
|
||||||
NOTES
|
|
||||||
1. Header lines that aren't part of entries are automatically
|
|
||||||
stripped out during processing. For example, in a file containing
|
|
||||||
GenBank entries, all lines up to the first occurrence of 'LOCUS'
|
|
||||||
starting in column 1, are ignored. Similarly for PIR, processing
|
|
||||||
begins on the first line containing 'ENTRY' beginning in column 1.
|
|
||||||
2. GenBank/EMBL/DDBJ entries created on or after Feb. 1, 1996,
|
|
||||||
have accession numbers of 8 characters, rather than 6. Previously
|
|
||||||
assigned accession numbers will remain at 6 characters. Splitdb has
|
|
||||||
been updated to write all accession numbers to the .ind file, left
|
|
||||||
justified in a field of 8 characters, in columns 14-21 of the .ind
|
|
||||||
file.
|
|
||||||
|
|
||||||
SEE ALSO
|
|
||||||
getloc, getob, comm(1) (Unix command).
|
|
||||||
|
|
||||||
AUTHOR
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB Canada R3T 2N2
|
|
||||||
Phone: 204-474-6085
|
|
||||||
FAX: 204-261-5732
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
||||||
REFERENCE
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
|
@ -1,125 +0,0 @@
|
||||||
|
|
||||||
|
|
||||||
XYLEM.DOC update 10 Aug 1994
|
|
||||||
|
|
||||||
XYLEM: TOOLS FOR MANIPULATION OF GENETIC DATABASES
|
|
||||||
Brian Fristensky, University of Manitoba
|
|
||||||
|
|
||||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
|
||||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
||||||
|
|
||||||
SPLITDB - Splits files containing one or more GenBank entries into
|
|
||||||
annotation, sequence, and index files. Indexfiles can also serve as
|
|
||||||
namefiles for GETLOC. Sequence files are in the format required for
|
|
||||||
use with the Pearson programs (FASTA,LFASTA etc.).
|
|
||||||
|
|
||||||
GETLOC - Reads a file containing LOCUS names (namefile) and
|
|
||||||
retrieves either annotation, sequence, or both from a split
|
|
||||||
database or database subset created by SPLITDB.
|
|
||||||
|
|
||||||
FETCH - A c-shell script that provides a convenient menu-driven
|
|
||||||
front end for retrieval of database entries using GETLOC.
|
|
||||||
|
|
||||||
FINDKEY - A c-shell script that provides a convenient menu-driven
|
|
||||||
front end for keyword searches of database annotation files,
|
|
||||||
using IDENTIFY.
|
|
||||||
|
|
||||||
IDENTIFY- Given line-numbered output from grep, IDENTIFY uses the
|
|
||||||
index file to determine which entries contained the keywords
|
|
||||||
searched for by grep. It then produces a namefile for use by
|
|
||||||
GETLOC. Namefiles can serve as logical databases, and utilities
|
|
||||||
such as the Unix comm command can perform logical operations on
|
|
||||||
these namefiles to produce database subsets.
|
|
||||||
|
|
||||||
FEATURES/GETOB - Given a namefile, pulls objects (mRNA, tRNA, CDS
|
|
||||||
etc.) from each of the named entries, using the new
|
|
||||||
DDBJ/EMBL/GenBank International Features Table Format. A future
|
|
||||||
version will also allow the annotation of sites within objects that
|
|
||||||
are extracted.
|
|
||||||
|
|
||||||
DBSTAT - Calculates amino acid frequencies in a protein database.
|
|
||||||
|
|
||||||
RIBOSOME - Given a file of one or more nucleic acids (eg. output
|
|
||||||
from GETOB) , RIBOSOME translates them into protein, using either
|
|
||||||
the universal genetic code or an alternative genetic code supplied
|
|
||||||
by the user. All ambiguities that can be resolved are translated.
|
|
||||||
|
|
||||||
PROT2NUC - reverse translates a sequence from protein to nucleic
|
|
||||||
acid, using IUPAC-IUB ambiguity codes.
|
|
||||||
|
|
||||||
SHUFFLE - Given a random seed, shuffles each sequence in a Pearson-
|
|
||||||
format (.wrp) file. Shuffling is done locally in overlapping windows
|
|
||||||
across the length of a given sequence. The window size and overlap
|
|
||||||
length can be specified by the user.
|
|
||||||
|
|
||||||
REFORM - Reformats multiply aligned nucleic acid or protein
|
|
||||||
sequences for publication. Output for M. Waterman's RALIGN
|
|
||||||
program, or the MBCRR MASE editor, can be directly used as input.
|
|
||||||
A variety of options are available for representing gaps, consensus
|
|
||||||
sequences and other features.
|
|
||||||
|
|
||||||
Fristensky (Cornell) Sequence Analysis Package - General purpose
|
|
||||||
sequence analysis package written in Standard Pascal. Features
|
|
||||||
include: sequence numbering, formatting, & translation, restriction
|
|
||||||
site searches & mapping, matrix similarity searches, TESTCODE
|
|
||||||
analysis, base composition analysis. All programs are interactive
|
|
||||||
and read free-format, BIONET, and GenBank files.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
XYLEM DATABASE TOOLS
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
----------
|
|
||||||
| .gen | getloc
|
|
||||||
|----------|<--------------------------
|
|
||||||
| GenBank | |
|
|
||||||
---------- |
|
|
||||||
| |
|
|
||||||
| splitgb |
|
|
||||||
/|\ |
|
|
||||||
/ | \ |
|
|
||||||
/ | \ |
|
|
||||||
/ | \ |
|
|
||||||
/ | \ |
|
|
||||||
/ | \ |
|
|
||||||
v v v |
|
|
||||||
---------- ---------- ---------- |
|
|
||||||
| .ano | | .wrp | | .ind | |
|
|
||||||
|----------| |----------| |----------| |
|
|
||||||
|annotation| | sequence | | index | |
|
|
||||||
---------- ---------- ---------- |
|
|
||||||
| \ | / |
|
|
||||||
| \ | / |
|
|
||||||
| \ | / |
|
|
||||||
| \ | / |
|
|
||||||
grep -n | \ | / |
|
|
||||||
| \ | / |
|
|
||||||
| | |
|
|
||||||
| | -------------------------------+
|
|
||||||
| ^ |
|
|
||||||
v | getob |
|
|
||||||
---------- ---------- v
|
|
||||||
| .grep | identify | .nam | ----------
|
|
||||||
|----------| --------->|----------| | .wrp |
|
|
||||||
| numbered | | LOCUS | ----------
|
|
||||||
|file lines| ---------- | eg. mRNA |
|
|
||||||
---------- | ^ | tRNA |
|
|
||||||
| | | rRNA |
|
|
||||||
| | | CDS |
|
|
||||||
--comm-- ----------
|
|
||||||
(logical operations on
|
|
||||||
sets of names)
|
|
||||||
|
|
||||||
Dr. Brian Fristensky
|
|
||||||
Dept. of Plant Science
|
|
||||||
University of Manitoba
|
|
||||||
Winnipeg, MB R3T 2N2 CANADA
|
|
||||||
204-474-6085
|
|
||||||
frist@cc.umanitoba.ca
|
|
||||||
|
|
BIN
HGL_SRC/Alloc.o
BIN
HGL_SRC/Alloc.o
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
HGL_SRC/MakeCons
BIN
HGL_SRC/MakeCons
Binary file not shown.
|
@ -1,10 +1,10 @@
|
||||||
|
|
||||||
CC = cc
|
CC = cc
|
||||||
#FLAGS = -g
|
FLAGS = -m32
|
||||||
OPENWINHOME = /usr/openwin
|
OPENWINHOME = ../usr
|
||||||
MFILE =
|
MFILE =
|
||||||
INCDIR = -I$(OPENWINHOME)/include
|
INCDIR = -I/usr/include/xview
|
||||||
LIBDIR = -L$(OPENWINHOME)/lib
|
LIBDIR = -L/usr/lib32
|
||||||
LIBS = -lxview -lolgx -lX11
|
LIBS = -lxview -lolgx -lX11
|
||||||
|
|
||||||
libs.o = Alloc.o HGLfuncs.o
|
libs.o = Alloc.o HGLfuncs.o
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,5 +0,0 @@
|
||||||
#/bin/csh
|
|
||||||
make all
|
|
||||||
cp Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool ../bin
|
|
||||||
rm Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool
|
|
||||||
rm *.o
|
|
BIN
HGL_SRC/mapview
BIN
HGL_SRC/mapview
Binary file not shown.
|
@ -1,33 +1,33 @@
|
||||||
implicit integer (a-z)
|
implicit integer (a-z)
|
||||||
parameter (maxn=1500,maxn2=3000)
|
|
||||||
parameter (fldmax=maxn2)
|
c parameter (maxn=625,fldmax=2*maxn)
|
||||||
|
parameter (maxn=1500,maxn2=3000)
|
||||||
|
parameter (fldmax=maxn2)
|
||||||
parameter (infinity=16000,sortmax=30000)
|
parameter (infinity=16000,sortmax=30000)
|
||||||
parameter (mxbits=(maxn*(maxn+1)+31)/32)
|
parameter (mxbits=(maxn*(maxn+1)+31)/32)
|
||||||
parameter (maxtloops=40)
|
parameter (maxtloops=40)
|
||||||
parameter (maxsiz=10000)
|
parameter (maxsiz=10000)
|
||||||
|
|
||||||
integer*2 vst(maxn*maxn),wst1(maxn*maxn),wst2(maxn*maxn)
|
integer*2 vst(maxn*maxn),wst(maxn*maxn)
|
||||||
integer poppen(4),maxpen
|
integer poppen(4),maxpen
|
||||||
real prelog
|
real prelog
|
||||||
|
dimension newnum(maxsiz),hstnum(fldmax),force(fldmax),
|
||||||
dimension newnum(maxsiz),hstnum(maxn2),force(maxn2),numseq(maxn2),
|
. numseq(fldmax), work(fldmax,0:2),
|
||||||
. work1(maxn2,0:2),work2(maxn2),
|
|
||||||
. stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30)
|
. stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30)
|
||||||
dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2)
|
dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2)
|
||||||
c common /main/ newnum,hstnum,force,work1,work2,
|
common /main/ vst,wst,newnum,hstnum,force,numseq,work,stack,tstk,
|
||||||
common /main/ newnum,hstnum,force,work1,work2,
|
. dangle,hairpin,bulge,inter,eparam,cntrl,nsave,poppen,maxpen,prelog
|
||||||
. stack,tstk,dangle,hairpin,bulge,inter,eparam,cntrl,nsave,n,
|
|
||||||
. numseq,poppen,prelog,maxpen,vst,wst1,wst2
|
|
||||||
|
|
||||||
character*1 seq(maxsiz)
|
character*1 seq(maxsiz)
|
||||||
c character*5 inbuf
|
c character*5 inbuf
|
||||||
character*10 progtitle
|
character*10 progtitle
|
||||||
character*30 seqlab
|
character*30 seqlab
|
||||||
common /seq/ seq,seqlab
|
common /seq/ seq,seqlab
|
||||||
|
data progtitle/'crna'/
|
||||||
|
|
||||||
dimension list(100,4)
|
dimension list(100,4)
|
||||||
common /list/ list,listsz
|
common /list/ list,listsz
|
||||||
common /nm/ vmin
|
common /nm/ n,vmin
|
||||||
data progtitle/'lrna'/
|
|
||||||
|
|
||||||
dimension basepr(maxn)
|
dimension basepr(maxn)
|
||||||
common /traceback/ basepr
|
common /traceback/ basepr
|
||||||
|
@ -40,21 +40,3 @@ c character*5 inbuf
|
||||||
|
|
||||||
integer*2 tloop(maxtloops,2),numoftloops
|
integer*2 tloop(maxtloops,2),numoftloops
|
||||||
common/tloops/tloop,numoftloops
|
common/tloops/tloop,numoftloops
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue