clean: repo
This commit is contained in:
parent
a80e729e21
commit
cd056bb91b
63 changed files with 17 additions and 6147 deletions
|
@ -1,761 +0,0 @@
|
|||
1menu:File
|
||||
|
||||
item:test cmask output
|
||||
itemmethod: kedit in1
|
||||
|
||||
in:in1
|
||||
informat:colormask
|
||||
|
||||
item:New sequence <meta N>
|
||||
itemmethod:echo "$Type$Name" > out1
|
||||
itemmeta:n
|
||||
itemhelp:new_sequence.help
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:New Sequence name?
|
||||
argtext:New
|
||||
|
||||
arg:Type
|
||||
argtype:choice_list
|
||||
arglabel:Type?
|
||||
argchoice:DNA/RNA:#
|
||||
argchoice:Amino Acid:%
|
||||
argchoice:Text:\"
|
||||
argchoice:Mask:@
|
||||
|
||||
out:out1
|
||||
outformat:flat
|
||||
|
||||
item:Import Foreign Format
|
||||
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:INPUTFILE
|
||||
argtype:text
|
||||
arglabel:Name of foreign file?
|
||||
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:Export Foreign Format
|
||||
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:FORMAT
|
||||
argtype:choice_list
|
||||
argchoice:FASTA:8
|
||||
argchoice:NEXUS:17
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:IG/Stanford:1
|
||||
argchoice:GenBank:2
|
||||
argchoice:NBRF:3
|
||||
argchoice:EMBL:4
|
||||
argchoice:GCG:5
|
||||
argchoice:DNA Strider:6
|
||||
argchoice:Fitch:7
|
||||
argchoice:Pearson:8
|
||||
argchoice:Zuker:9
|
||||
argchoice:Olsen:10
|
||||
argchoice:Phylip v3.2:11
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:Plain text:13
|
||||
|
||||
arg:OUTPUTFILE
|
||||
argtype:text
|
||||
arglabel:Save as?
|
||||
|
||||
in:INPUTFILE
|
||||
informat:genbank
|
||||
|
||||
|
||||
item:Save Selection
|
||||
itemmethod: cat $SAVE_FUNC > $Name
|
||||
itemhelp:save_selection.help
|
||||
|
||||
arg:SAVE_FUNC
|
||||
argtype:chooser
|
||||
arglabel:File format
|
||||
argchoice:Flat:in1
|
||||
argchoice:Genbank:in2
|
||||
argchoice:GDE/HGL:in3
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:File name?
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
in:in2
|
||||
informat:genbank
|
||||
|
||||
in:in3
|
||||
informat:gde
|
||||
|
||||
item:Print Selection
|
||||
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
|
||||
itemhelp:print_alignment.help
|
||||
|
||||
arg:SCALE
|
||||
argtype:slider
|
||||
arglabel:Reduce printout by?
|
||||
argmin:1
|
||||
argmax:20
|
||||
argvalue:1
|
||||
|
||||
arg:CMD
|
||||
argtype:chooser
|
||||
argchoice:Lpr:lpr
|
||||
argchoice:Enscript Gaudy:enscript -G -q
|
||||
argchoice:Enscript Two column:enscript -2rG
|
||||
|
||||
arg:PRINTER
|
||||
argtype:text
|
||||
arglabel:Which printer?
|
||||
argtext:lp
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
menu:Edit
|
||||
|
||||
item:Sort
|
||||
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
|
||||
itemhelp:heapsortHGL.help
|
||||
|
||||
arg:PRIM_KEY
|
||||
argtype:choice_list
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Primary sort field?
|
||||
|
||||
arg:SEC_KEY
|
||||
argtype:choice_list
|
||||
argchoice:None:
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Secondary sort field?
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:extract
|
||||
itemmethod:(gde in1;/bin/rm -f in1)&
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
menu:DNA/RNA
|
||||
|
||||
item:Translate...
|
||||
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
|
||||
|
||||
arg:FRAME
|
||||
argtype:chooser
|
||||
arglabel:Which reading frame?
|
||||
argchoice:First:1
|
||||
argchoice:Second:2
|
||||
argchoice:Third:3
|
||||
argchoice:All six:6
|
||||
|
||||
arg:MNFRM
|
||||
arglabel:Minimum length of AA sequence to translate?
|
||||
argtype:slider
|
||||
argmin:0
|
||||
argmax:100
|
||||
argvalue:20
|
||||
|
||||
arg:LTRCODE
|
||||
argtype:chooser
|
||||
arglabel:Translate to:
|
||||
argchoice:Single letter codes:
|
||||
argchoice:Triple letter codes:-3
|
||||
|
||||
arg:TBL
|
||||
arglabel:Codon table?
|
||||
argtype:chooser
|
||||
argchoice:universal:1
|
||||
argchoice:mycoplasma:2
|
||||
argchoice:yeast:3
|
||||
argchoice:Vert. mito.:4
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
item:Dot plot
|
||||
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
|
||||
itemhelp:DotPlotTool.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:Clustal alignment
|
||||
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
|
||||
|
||||
itemhelp:clustal_help
|
||||
|
||||
arg:KTUP
|
||||
argtype:slider
|
||||
arglabel:K-tuple size for pairwise search
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:2
|
||||
|
||||
arg:WIN
|
||||
argtype:slider
|
||||
arglabel:Window size
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:4
|
||||
|
||||
arg:Trans
|
||||
argtype:chooser
|
||||
arglabel:Transitions weighted?
|
||||
argchoice:Yes:/TRANSIT
|
||||
argchoice:No:
|
||||
|
||||
arg:FIXED
|
||||
argtype:slider
|
||||
arglabel:Fixed gap penalty
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:FLOAT
|
||||
arglabel:Floating gap penalty
|
||||
argtype:slider
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:REPORT
|
||||
argtype:chooser
|
||||
arglabel:View assembly report?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit in1.rpt&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
item:Variable Positions
|
||||
itemmethod:varpos $REV < in1 > out1
|
||||
|
||||
arg:REV
|
||||
argtype:chooser
|
||||
arglabel:Highlight (darken)
|
||||
argchoice:Conserved positions:
|
||||
argchoice:variable positions:-rev
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Phrap
|
||||
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
|
||||
out:out1
|
||||
outformat:genbank
|
||||
|
||||
item:SNAP
|
||||
itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
out:out1
|
||||
outformat:text
|
||||
|
||||
|
||||
|
||||
|
||||
item:Find all <meta-f>
|
||||
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
|
||||
itemhelp:findall.help
|
||||
itemmeta:f
|
||||
|
||||
arg:SEARCH
|
||||
argtype:text
|
||||
arglabel:Search String
|
||||
|
||||
arg:PRCNT
|
||||
argtype:slider
|
||||
arglabel:Percent mismatch
|
||||
argmin:0
|
||||
argmax:75
|
||||
argvalue:10
|
||||
|
||||
arg:CASE
|
||||
argtype:chooser
|
||||
arglabel:Case
|
||||
argchoice:Upper equals lower:
|
||||
argchoice:Upper not equal lower:-case
|
||||
|
||||
arg:UT
|
||||
argtype:chooser
|
||||
arglabel:U equal T?
|
||||
argchoice:Yes:-u=t
|
||||
argchoice:No:
|
||||
argvalue:0
|
||||
|
||||
arg:MAT
|
||||
arglabel:Match color
|
||||
argtype:choice_list
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:2
|
||||
|
||||
arg:MIS
|
||||
argtype:choice_list
|
||||
arglabel:Mismatch color
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:7
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Sequence Consensus
|
||||
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
|
||||
itemhelp:MakeCons.help
|
||||
|
||||
arg:METHOD
|
||||
arglabel:Method
|
||||
argtype:chooser
|
||||
argchoice:IUPAC:-iupac
|
||||
argchoice:Majority:-majority $PERCENT
|
||||
|
||||
arg:MASK
|
||||
argtype:chooser
|
||||
arglabel:Create a new:
|
||||
argchoice:Sequence:
|
||||
argchoice:Selection Mask: | Consto01mask
|
||||
|
||||
arg:PERCENT
|
||||
arglabel:Minimum Percentage for Majority
|
||||
argtype:slider
|
||||
argmin:50
|
||||
argmax:100
|
||||
argvalue:75
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
|
||||
#Menu for DNA/RNA
|
||||
|
||||
item:blastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBDNA
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:MATCH
|
||||
argtype:slider
|
||||
arglabel:Match Score
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:5
|
||||
|
||||
arg:MMSCORE
|
||||
argtype:slider
|
||||
arglabel:Mismatch Score
|
||||
argmin:-10
|
||||
argmax:-1
|
||||
argvalue:-5
|
||||
|
||||
item:blastx
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
|
||||
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
||||
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
item:------------------------
|
||||
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: enter the file name
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: enter the name of the DB
|
||||
|
||||
menu:seq. datasets
|
||||
|
||||
item:-------------
|
||||
item:add a new dataset
|
||||
itemmethod:cp $file /usr/local/biotools/GDE/db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the dataset name ?
|
||||
|
||||
arg:file
|
||||
argtype:text
|
||||
arglabel:Enter the dataset file (in FASTA) ?
|
||||
|
||||
|
||||
#Menu for Protein
|
||||
menu:protein
|
||||
item:blastp
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
||||
|
||||
arg:Matrix
|
||||
barglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
item:tblastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
|
||||
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
|
||||
item:Map View
|
||||
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
|
||||
itemhelp:mapview.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
arg:PBL
|
||||
arglabel:Pixel Between Lines
|
||||
argtype:slider
|
||||
argvalue:10
|
||||
argmin:1
|
||||
argmax:15
|
||||
|
||||
arg:NPP
|
||||
arglabel:Nucleotides Per Pixel
|
||||
argtype:slider
|
||||
argvalue:1
|
||||
argmin:1
|
||||
argmax:20
|
||||
|
||||
arg:LWIDTH
|
||||
arglabel:Line Thickness
|
||||
argtype:slider
|
||||
argvalue:2
|
||||
argmin:1
|
||||
argmax:5
|
||||
|
||||
item:--------------------------
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: Enter the file (in FASTA)
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: Enter the name of the DB
|
||||
|
||||
menu:Phylogeny
|
||||
|
||||
|
||||
item:Phylip help
|
||||
itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
|
||||
|
||||
arg:FILE
|
||||
argtype:choice_list
|
||||
arglabel:Which program?
|
||||
argchoice:clique:clique.html
|
||||
argchoice:consense:consense.html
|
||||
argchoice:contchar:contchar.html
|
||||
argchoice:contml:contml.html
|
||||
argchoice:contrast:contrast.html
|
||||
argchoice:discrete:discrete.html
|
||||
argchoice:distance:distance.html
|
||||
argchoice:dnaboot:dnaboot.html
|
||||
argchoice:dnacomp:dnacomp.html
|
||||
argchoice:dnadist:dnadist.html
|
||||
argchoice:dnainvar:dnainvar.html
|
||||
argchoice:dnaml:dnaml.html
|
||||
argchoice:dnamlk:dnamlk.html
|
||||
argchoice:dnamove:dnamove.html
|
||||
argchoice:dnapars:dnapars.html
|
||||
argchoice:dnapenny:dnapenny.html
|
||||
argchoice:dollop:dollop.html
|
||||
argchoice:dolmove:dolmove.html
|
||||
argchoice:dolpenny:dolpenny.html
|
||||
argchoice:draw:draw.html
|
||||
argchoice:drawgram:drawgram.html
|
||||
argchoice:drawtree:drawtree.html
|
||||
argchoice:factor:factor.html
|
||||
argchoice:fitch:fitch.html
|
||||
argchoice:gendist:gendist.html
|
||||
argchoice:kitsch:kitsch.html
|
||||
argchoice:main:main.html
|
||||
argchoice:mix:mix.html
|
||||
argchoice:move:move.html
|
||||
argchoice:neighbor:neighbor.html
|
||||
argchoice:penny:penny.html
|
||||
argchoice:protpars:protpars.html
|
||||
argchoice:read.me.general:read.me.general.html
|
||||
argchoice:restml:restml.html
|
||||
argchoice:seqboot:seqboot.html
|
||||
argchoice:sequence:sequence.html
|
||||
|
||||
|
||||
|
||||
item:Phylip 3.5
|
||||
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
|
||||
|
||||
arg:PROGRAM
|
||||
argtype:choice_list
|
||||
arglabel:Which program to run?
|
||||
argchoice:DNAPARS:dnapars
|
||||
argchoice:DNABOOT:dnaboot
|
||||
argchoice:DNAPENNY:dnapenny
|
||||
argchoice:DNAML:dnaml
|
||||
argchoice:DNAMLK:dnamlk
|
||||
argchoice:DNACOMP:dnacomp
|
||||
argchoice:DNAMOVE:dnamove
|
||||
argchoice:DNAINVAR:dnainvar
|
||||
argchoice:PROTPARS:protpars
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
item:Phylip DNA Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
|
||||
|
||||
arg:EXPLAIN
|
||||
argtype:text
|
||||
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
|
||||
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:DNADIST+NEIGHBOR:
|
||||
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Run ?
|
||||
argtype:chooser
|
||||
argchoice:Run without Bootstrap:
|
||||
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
|
||||
arg:DNA
|
||||
argtype:text
|
||||
arglabel:Name of DNADIST outfile?
|
||||
|
||||
arg:NEI
|
||||
argtype:text
|
||||
arglabel:Name of NEIGHBOR outfile?
|
||||
|
||||
arg:TREE
|
||||
argtype:text
|
||||
arglabel:Name of TREEFILE ?
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
item:Phylip PROTEIN Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:PROTDIST+NEIGHBOR:
|
||||
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
argchoice:No Bootstrap:
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
menu:On-Line Res.
|
||||
|
||||
item:GDE for Linux resources at Bioafrica.net
|
||||
itemmethod:netscape http://www.bioafrica.net &
|
||||
|
||||
item:-------------------------
|
||||
item:add a new website
|
||||
itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the site name
|
||||
|
||||
arg:url
|
||||
argtype:text
|
||||
arglabel:Enter the URL (including http://)
|
|
@ -1,761 +0,0 @@
|
|||
1menu:File
|
||||
|
||||
item:test cmask output
|
||||
itemmethod: kedit in1
|
||||
|
||||
in:in1
|
||||
informat:colormask
|
||||
|
||||
item:New sequence <meta N>
|
||||
itemmethod:echo "$Type$Name" > out1
|
||||
itemmeta:n
|
||||
itemhelp:new_sequence.help
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:New Sequence name?
|
||||
argtext:New
|
||||
|
||||
arg:Type
|
||||
argtype:choice_list
|
||||
arglabel:Type?
|
||||
argchoice:DNA/RNA:#
|
||||
argchoice:Amino Acid:%
|
||||
argchoice:Text:\"
|
||||
argchoice:Mask:@
|
||||
|
||||
out:out1
|
||||
outformat:flat
|
||||
|
||||
item:Import Foreign Format
|
||||
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:INPUTFILE
|
||||
argtype:text
|
||||
arglabel:Name of foreign file?
|
||||
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:Export Foreign Format
|
||||
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:FORMAT
|
||||
argtype:choice_list
|
||||
argchoice:FASTA:8
|
||||
argchoice:NEXUS:17
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:IG/Stanford:1
|
||||
argchoice:GenBank:2
|
||||
argchoice:NBRF:3
|
||||
argchoice:EMBL:4
|
||||
argchoice:GCG:5
|
||||
argchoice:DNA Strider:6
|
||||
argchoice:Fitch:7
|
||||
argchoice:Pearson:8
|
||||
argchoice:Zuker:9
|
||||
argchoice:Olsen:10
|
||||
argchoice:Phylip v3.2:11
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:Plain text:13
|
||||
|
||||
arg:OUTPUTFILE
|
||||
argtype:text
|
||||
arglabel:Save as?
|
||||
|
||||
in:INPUTFILE
|
||||
informat:genbank
|
||||
|
||||
|
||||
item:Save Selection
|
||||
itemmethod: cat $SAVE_FUNC > $Name
|
||||
itemhelp:save_selection.help
|
||||
|
||||
arg:SAVE_FUNC
|
||||
argtype:chooser
|
||||
arglabel:File format
|
||||
argchoice:Flat:in1
|
||||
argchoice:Genbank:in2
|
||||
argchoice:GDE/HGL:in3
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:File name?
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
in:in2
|
||||
informat:genbank
|
||||
|
||||
in:in3
|
||||
informat:gde
|
||||
|
||||
item:Print Selection
|
||||
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
|
||||
itemhelp:print_alignment.help
|
||||
|
||||
arg:SCALE
|
||||
argtype:slider
|
||||
arglabel:Reduce printout by?
|
||||
argmin:1
|
||||
argmax:20
|
||||
argvalue:1
|
||||
|
||||
arg:CMD
|
||||
argtype:chooser
|
||||
argchoice:Lpr:lpr
|
||||
argchoice:Enscript Gaudy:enscript -G -q
|
||||
argchoice:Enscript Two column:enscript -2rG
|
||||
|
||||
arg:PRINTER
|
||||
argtype:text
|
||||
arglabel:Which printer?
|
||||
argtext:lp
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
menu:Edit
|
||||
|
||||
item:Sort
|
||||
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
|
||||
itemhelp:heapsortHGL.help
|
||||
|
||||
arg:PRIM_KEY
|
||||
argtype:choice_list
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Primary sort field?
|
||||
|
||||
arg:SEC_KEY
|
||||
argtype:choice_list
|
||||
argchoice:None:
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Secondary sort field?
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:extract
|
||||
itemmethod:(gde in1;/bin/rm -f in1)&
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
menu:DNA/RNA
|
||||
|
||||
item:Translate...
|
||||
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
|
||||
|
||||
arg:FRAME
|
||||
argtype:chooser
|
||||
arglabel:Which reading frame?
|
||||
argchoice:First:1
|
||||
argchoice:Second:2
|
||||
argchoice:Third:3
|
||||
argchoice:All six:6
|
||||
|
||||
arg:MNFRM
|
||||
arglabel:Minimum length of AA sequence to translate?
|
||||
argtype:slider
|
||||
argmin:0
|
||||
argmax:100
|
||||
argvalue:20
|
||||
|
||||
arg:LTRCODE
|
||||
argtype:chooser
|
||||
arglabel:Translate to:
|
||||
argchoice:Single letter codes:
|
||||
argchoice:Triple letter codes:-3
|
||||
|
||||
arg:TBL
|
||||
arglabel:Codon table?
|
||||
argtype:chooser
|
||||
argchoice:universal:1
|
||||
argchoice:mycoplasma:2
|
||||
argchoice:yeast:3
|
||||
argchoice:Vert. mito.:4
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
item:Dot plot
|
||||
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
|
||||
itemhelp:DotPlotTool.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:Clustal alignment
|
||||
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
|
||||
|
||||
itemhelp:clustal_help
|
||||
|
||||
arg:KTUP
|
||||
argtype:slider
|
||||
arglabel:K-tuple size for pairwise search
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:2
|
||||
|
||||
arg:WIN
|
||||
argtype:slider
|
||||
arglabel:Window size
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:4
|
||||
|
||||
arg:Trans
|
||||
argtype:chooser
|
||||
arglabel:Transitions weighted?
|
||||
argchoice:Yes:/TRANSIT
|
||||
argchoice:No:
|
||||
|
||||
arg:FIXED
|
||||
argtype:slider
|
||||
arglabel:Fixed gap penalty
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:FLOAT
|
||||
arglabel:Floating gap penalty
|
||||
argtype:slider
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:REPORT
|
||||
argtype:chooser
|
||||
arglabel:View assembly report?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit in1.rpt&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
item:Variable Positions
|
||||
itemmethod:varpos $REV < in1 > out1
|
||||
|
||||
arg:REV
|
||||
argtype:chooser
|
||||
arglabel:Highlight (darken)
|
||||
argchoice:Conserved positions:
|
||||
argchoice:variable positions:-rev
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Phrap
|
||||
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
|
||||
out:out1
|
||||
outformat:genbank
|
||||
|
||||
item:SNAP
|
||||
itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
out:out1
|
||||
outformat:text
|
||||
|
||||
|
||||
|
||||
|
||||
item:Find all <meta-f>
|
||||
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
|
||||
itemhelp:findall.help
|
||||
itemmeta:f
|
||||
|
||||
arg:SEARCH
|
||||
argtype:text
|
||||
arglabel:Search String
|
||||
|
||||
arg:PRCNT
|
||||
argtype:slider
|
||||
arglabel:Percent mismatch
|
||||
argmin:0
|
||||
argmax:75
|
||||
argvalue:10
|
||||
|
||||
arg:CASE
|
||||
argtype:chooser
|
||||
arglabel:Case
|
||||
argchoice:Upper equals lower:
|
||||
argchoice:Upper not equal lower:-case
|
||||
|
||||
arg:UT
|
||||
argtype:chooser
|
||||
arglabel:U equal T?
|
||||
argchoice:Yes:-u=t
|
||||
argchoice:No:
|
||||
argvalue:0
|
||||
|
||||
arg:MAT
|
||||
arglabel:Match color
|
||||
argtype:choice_list
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:2
|
||||
|
||||
arg:MIS
|
||||
argtype:choice_list
|
||||
arglabel:Mismatch color
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:7
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Sequence Consensus
|
||||
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
|
||||
itemhelp:MakeCons.help
|
||||
|
||||
arg:METHOD
|
||||
arglabel:Method
|
||||
argtype:chooser
|
||||
argchoice:IUPAC:-iupac
|
||||
argchoice:Majority:-majority $PERCENT
|
||||
|
||||
arg:MASK
|
||||
argtype:chooser
|
||||
arglabel:Create a new:
|
||||
argchoice:Sequence:
|
||||
argchoice:Selection Mask: | Consto01mask
|
||||
|
||||
arg:PERCENT
|
||||
arglabel:Minimum Percentage for Majority
|
||||
argtype:slider
|
||||
argmin:50
|
||||
argmax:100
|
||||
argvalue:75
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
|
||||
#Menu for DNA/RNA
|
||||
|
||||
item:blastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBDNA
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:MATCH
|
||||
argtype:slider
|
||||
arglabel:Match Score
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:5
|
||||
|
||||
arg:MMSCORE
|
||||
argtype:slider
|
||||
arglabel:Mismatch Score
|
||||
argmin:-10
|
||||
argmax:-1
|
||||
argvalue:-5
|
||||
|
||||
item:blastx
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
|
||||
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
||||
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
item:------------------------
|
||||
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: enter the file name
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: enter the name of the DB
|
||||
|
||||
menu:seq. datasets
|
||||
|
||||
item:-------------
|
||||
item:add a new dataset
|
||||
itemmethod:cp $file /usr/local/bio/GDE/db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the dataset name ?
|
||||
|
||||
arg:file
|
||||
argtype:text
|
||||
arglabel:Enter the dataset file (in FASTA) ?
|
||||
|
||||
|
||||
#Menu for Protein
|
||||
menu:protein
|
||||
item:blastp
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
||||
|
||||
arg:Matrix
|
||||
barglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
item:tblastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
|
||||
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
|
||||
item:Map View
|
||||
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
|
||||
itemhelp:mapview.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
arg:PBL
|
||||
arglabel:Pixel Between Lines
|
||||
argtype:slider
|
||||
argvalue:10
|
||||
argmin:1
|
||||
argmax:15
|
||||
|
||||
arg:NPP
|
||||
arglabel:Nucleotides Per Pixel
|
||||
argtype:slider
|
||||
argvalue:1
|
||||
argmin:1
|
||||
argmax:20
|
||||
|
||||
arg:LWIDTH
|
||||
arglabel:Line Thickness
|
||||
argtype:slider
|
||||
argvalue:2
|
||||
argmin:1
|
||||
argmax:5
|
||||
|
||||
item:--------------------------
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: Enter the file (in FASTA)
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: Enter the name of the DB
|
||||
|
||||
menu:Phylogeny
|
||||
|
||||
|
||||
item:Phylip help
|
||||
itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
|
||||
|
||||
arg:FILE
|
||||
argtype:choice_list
|
||||
arglabel:Which program?
|
||||
argchoice:clique:clique.html
|
||||
argchoice:consense:consense.html
|
||||
argchoice:contchar:contchar.html
|
||||
argchoice:contml:contml.html
|
||||
argchoice:contrast:contrast.html
|
||||
argchoice:discrete:discrete.html
|
||||
argchoice:distance:distance.html
|
||||
argchoice:dnaboot:dnaboot.html
|
||||
argchoice:dnacomp:dnacomp.html
|
||||
argchoice:dnadist:dnadist.html
|
||||
argchoice:dnainvar:dnainvar.html
|
||||
argchoice:dnaml:dnaml.html
|
||||
argchoice:dnamlk:dnamlk.html
|
||||
argchoice:dnamove:dnamove.html
|
||||
argchoice:dnapars:dnapars.html
|
||||
argchoice:dnapenny:dnapenny.html
|
||||
argchoice:dollop:dollop.html
|
||||
argchoice:dolmove:dolmove.html
|
||||
argchoice:dolpenny:dolpenny.html
|
||||
argchoice:draw:draw.html
|
||||
argchoice:drawgram:drawgram.html
|
||||
argchoice:drawtree:drawtree.html
|
||||
argchoice:factor:factor.html
|
||||
argchoice:fitch:fitch.html
|
||||
argchoice:gendist:gendist.html
|
||||
argchoice:kitsch:kitsch.html
|
||||
argchoice:main:main.html
|
||||
argchoice:mix:mix.html
|
||||
argchoice:move:move.html
|
||||
argchoice:neighbor:neighbor.html
|
||||
argchoice:penny:penny.html
|
||||
argchoice:protpars:protpars.html
|
||||
argchoice:read.me.general:read.me.general.html
|
||||
argchoice:restml:restml.html
|
||||
argchoice:seqboot:seqboot.html
|
||||
argchoice:sequence:sequence.html
|
||||
|
||||
|
||||
|
||||
item:Phylip 3.5
|
||||
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
|
||||
|
||||
arg:PROGRAM
|
||||
argtype:choice_list
|
||||
arglabel:Which program to run?
|
||||
argchoice:DNAPARS:dnapars
|
||||
argchoice:DNABOOT:dnaboot
|
||||
argchoice:DNAPENNY:dnapenny
|
||||
argchoice:DNAML:dnaml
|
||||
argchoice:DNAMLK:dnamlk
|
||||
argchoice:DNACOMP:dnacomp
|
||||
argchoice:DNAMOVE:dnamove
|
||||
argchoice:DNAINVAR:dnainvar
|
||||
argchoice:PROTPARS:protpars
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
item:Phylip DNA Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
|
||||
|
||||
arg:EXPLAIN
|
||||
argtype:text
|
||||
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
|
||||
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:DNADIST+NEIGHBOR:
|
||||
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Run ?
|
||||
argtype:chooser
|
||||
argchoice:Run without Bootstrap:
|
||||
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
|
||||
arg:DNA
|
||||
argtype:text
|
||||
arglabel:Name of DNADIST outfile?
|
||||
|
||||
arg:NEI
|
||||
argtype:text
|
||||
arglabel:Name of NEIGHBOR outfile?
|
||||
|
||||
arg:TREE
|
||||
argtype:text
|
||||
arglabel:Name of TREEFILE ?
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
item:Phylip PROTEIN Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:PROTDIST+NEIGHBOR:
|
||||
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
argchoice:No Bootstrap:
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
menu:On-Line Res.
|
||||
|
||||
item:GDE for Linux resources at Bioafrica.net
|
||||
itemmethod:netscape http://www.bioafrica.net &
|
||||
|
||||
item:-------------------------
|
||||
item:add a new website
|
||||
itemmethod:xterm -e /usr/local/bio/GDE/newURL.pl $name $url
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the site name
|
||||
|
||||
arg:url
|
||||
argtype:text
|
||||
arglabel:Enter the URL (including http://)
|
791
CORE/.GDEmenus~
791
CORE/.GDEmenus~
|
@ -1,791 +0,0 @@
|
|||
1menu:File
|
||||
|
||||
item:test cmask output
|
||||
itemmethod: kedit in1
|
||||
|
||||
in:in1
|
||||
informat:colormask
|
||||
|
||||
item:New sequence <meta N>
|
||||
itemmethod:echo "$Type$Name" > out1
|
||||
itemmeta:n
|
||||
itemhelp:new_sequence.help
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:New Sequence name?
|
||||
argtext:New
|
||||
|
||||
arg:Type
|
||||
argtype:choice_list
|
||||
arglabel:Type?
|
||||
argchoice:DNA/RNA:#
|
||||
argchoice:Amino Acid:%
|
||||
argchoice:Text:\"
|
||||
argchoice:Mask:@
|
||||
|
||||
out:out1
|
||||
outformat:flat
|
||||
|
||||
item:Import Foreign Format
|
||||
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:INPUTFILE
|
||||
argtype:text
|
||||
arglabel:Name of foreign file?
|
||||
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:Export Foreign Format
|
||||
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:FORMAT
|
||||
argtype:choice_list
|
||||
argchoice:FASTA:8
|
||||
argchoice:NEXUS:17
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:IG/Stanford:1
|
||||
argchoice:GenBank:2
|
||||
argchoice:NBRF:3
|
||||
argchoice:EMBL:4
|
||||
argchoice:GCG:5
|
||||
argchoice:DNA Strider:6
|
||||
argchoice:Fitch:7
|
||||
argchoice:Pearson:8
|
||||
argchoice:Zuker:9
|
||||
argchoice:Olsen:10
|
||||
argchoice:Phylip v3.2:11
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:Plain text:13
|
||||
|
||||
arg:OUTPUTFILE
|
||||
argtype:text
|
||||
arglabel:Save as?
|
||||
|
||||
in:INPUTFILE
|
||||
informat:genbank
|
||||
|
||||
|
||||
item:Save Selection
|
||||
itemmethod: cat $SAVE_FUNC > $Name
|
||||
itemhelp:save_selection.help
|
||||
|
||||
arg:SAVE_FUNC
|
||||
argtype:chooser
|
||||
arglabel:File format
|
||||
argchoice:Flat:in1
|
||||
argchoice:Genbank:in2
|
||||
argchoice:GDE/HGL:in3
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:File name?
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
in:in2
|
||||
informat:genbank
|
||||
|
||||
in:in3
|
||||
informat:gde
|
||||
|
||||
item:Print Selection
|
||||
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
|
||||
itemhelp:print_alignment.help
|
||||
|
||||
arg:SCALE
|
||||
argtype:slider
|
||||
arglabel:Reduce printout by?
|
||||
argmin:1
|
||||
argmax:20
|
||||
argvalue:1
|
||||
|
||||
arg:CMD
|
||||
argtype:chooser
|
||||
argchoice:Lpr:lpr
|
||||
argchoice:Enscript Gaudy:enscript -G -q
|
||||
argchoice:Enscript Two column:enscript -2rG
|
||||
|
||||
arg:PRINTER
|
||||
argtype:text
|
||||
arglabel:Which printer?
|
||||
argtext:lp
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
menu:Edit
|
||||
|
||||
item:Sort
|
||||
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
|
||||
itemhelp:heapsortHGL.help
|
||||
|
||||
arg:PRIM_KEY
|
||||
argtype:choice_list
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Primary sort field?
|
||||
|
||||
arg:SEC_KEY
|
||||
argtype:choice_list
|
||||
argchoice:None:
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Secondary sort field?
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:extract
|
||||
itemmethod:(gde in1;/bin/rm -f in1)&
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
menu:DNA/RNA
|
||||
|
||||
item:Translate...
|
||||
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
|
||||
|
||||
arg:FRAME
|
||||
argtype:chooser
|
||||
arglabel:Which reading frame?
|
||||
argchoice:First:1
|
||||
argchoice:Second:2
|
||||
argchoice:Third:3
|
||||
argchoice:All six:6
|
||||
|
||||
arg:MNFRM
|
||||
arglabel:Minimum length of AA sequence to translate?
|
||||
argtype:slider
|
||||
argmin:0
|
||||
argmax:100
|
||||
argvalue:20
|
||||
|
||||
arg:LTRCODE
|
||||
argtype:chooser
|
||||
arglabel:Translate to:
|
||||
argchoice:Single letter codes:
|
||||
argchoice:Triple letter codes:-3
|
||||
|
||||
arg:TBL
|
||||
arglabel:Codon table?
|
||||
argtype:chooser
|
||||
argchoice:universal:1
|
||||
argchoice:mycoplasma:2
|
||||
argchoice:yeast:3
|
||||
argchoice:Vert. mito.:4
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
item:Dot plot
|
||||
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
|
||||
itemhelp:DotPlotTool.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:Clustal alignment
|
||||
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
|
||||
|
||||
itemhelp:clustal_help
|
||||
|
||||
arg:KTUP
|
||||
argtype:slider
|
||||
arglabel:K-tuple size for pairwise search
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:2
|
||||
|
||||
arg:WIN
|
||||
argtype:slider
|
||||
arglabel:Window size
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:4
|
||||
|
||||
arg:Trans
|
||||
argtype:chooser
|
||||
arglabel:Transitions weighted?
|
||||
argchoice:Yes:/TRANSIT
|
||||
argchoice:No:
|
||||
|
||||
arg:FIXED
|
||||
argtype:slider
|
||||
arglabel:Fixed gap penalty
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:FLOAT
|
||||
arglabel:Floating gap penalty
|
||||
argtype:slider
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:REPORT
|
||||
argtype:chooser
|
||||
arglabel:View assembly report?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit in1.rpt&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
item:Variable Positions
|
||||
itemmethod:varpos $REV < in1 > out1
|
||||
|
||||
arg:REV
|
||||
argtype:chooser
|
||||
arglabel:Highlight (darken)
|
||||
argchoice:Conserved positions:
|
||||
argchoice:variable positions:-rev
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Phrap
|
||||
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
|
||||
out:out1
|
||||
outformat:genbank
|
||||
|
||||
item:SNAP
|
||||
itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
out:out1
|
||||
outformat:text
|
||||
|
||||
|
||||
|
||||
|
||||
item:Find all <meta-f>
|
||||
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
|
||||
itemhelp:findall.help
|
||||
itemmeta:f
|
||||
|
||||
arg:SEARCH
|
||||
argtype:text
|
||||
arglabel:Search String
|
||||
|
||||
arg:PRCNT
|
||||
argtype:slider
|
||||
arglabel:Percent mismatch
|
||||
argmin:0
|
||||
argmax:75
|
||||
argvalue:10
|
||||
|
||||
arg:CASE
|
||||
argtype:chooser
|
||||
arglabel:Case
|
||||
argchoice:Upper equals lower:
|
||||
argchoice:Upper not equal lower:-case
|
||||
|
||||
arg:UT
|
||||
argtype:chooser
|
||||
arglabel:U equal T?
|
||||
argchoice:Yes:-u=t
|
||||
argchoice:No:
|
||||
argvalue:0
|
||||
|
||||
arg:MAT
|
||||
arglabel:Match color
|
||||
argtype:choice_list
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:2
|
||||
|
||||
arg:MIS
|
||||
argtype:choice_list
|
||||
arglabel:Mismatch color
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:7
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Sequence Consensus
|
||||
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
|
||||
itemhelp:MakeCons.help
|
||||
|
||||
arg:METHOD
|
||||
arglabel:Method
|
||||
argtype:chooser
|
||||
argchoice:IUPAC:-iupac
|
||||
argchoice:Majority:-majority $PERCENT
|
||||
|
||||
arg:MASK
|
||||
argtype:chooser
|
||||
arglabel:Create a new:
|
||||
argchoice:Sequence:
|
||||
argchoice:Selection Mask: | Consto01mask
|
||||
|
||||
arg:PERCENT
|
||||
arglabel:Minimum Percentage for Majority
|
||||
argtype:slider
|
||||
argmin:50
|
||||
argmax:100
|
||||
argvalue:75
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
|
||||
#Menu for DNA/RNA
|
||||
|
||||
item:blastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBDNA
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV-1 Seq. Db.:/usr/local/biotools/db/DNA/hiv17-08-01.fasta2
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:MATCH
|
||||
argtype:slider
|
||||
arglabel:Match Score
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:5
|
||||
|
||||
arg:MMSCORE
|
||||
argtype:slider
|
||||
arglabel:Mismatch Score
|
||||
argmin:-10
|
||||
argmax:-1
|
||||
argvalue:-5
|
||||
|
||||
item:blastx
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
|
||||
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBDNA
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
|
||||
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
item:------------------------
|
||||
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/biotools/GDE/bin/installBLASTDB.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: enter the file name
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: enter the name of the DB
|
||||
|
||||
menu:seq. datasets
|
||||
item:tttt
|
||||
itemmethod:readseq /usr/local/biotools/GDE/db/ttttt -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:HIV1POLDNA.fasta
|
||||
itemmethod:readseq /usr/local/biotools/GDE/db/HIV1POLDNA.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:structure
|
||||
itemmethod:readseq /usr/local/biotools/GDE/db/structprot.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:-------------
|
||||
item:add a new dataset
|
||||
itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the dataset name ?
|
||||
|
||||
arg:file
|
||||
argtype:text
|
||||
arglabel:Enter the dataset file (in FASTA) ?
|
||||
|
||||
|
||||
#Menu for Protein
|
||||
menu:protein
|
||||
item:blastp
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/biotools/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBPROT
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
|
||||
argchoice:ttttt:/usr/local/biotools/db/tttt
|
||||
argchoice:tytuiphn:/usr/local/biotools/db/yejhuh[9hp
|
||||
argchoice:yyyy:/usr/local/biotools/db/test
|
||||
|
||||
arg:Matrix
|
||||
barglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
item:tblastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/biotools/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
|
||||
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
|
||||
item:Map View
|
||||
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
|
||||
itemhelp:mapview.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
arg:PBL
|
||||
arglabel:Pixel Between Lines
|
||||
argtype:slider
|
||||
argvalue:10
|
||||
argmin:1
|
||||
argmax:15
|
||||
|
||||
arg:NPP
|
||||
arglabel:Nucleotides Per Pixel
|
||||
argtype:slider
|
||||
argvalue:1
|
||||
argmin:1
|
||||
argmax:20
|
||||
|
||||
arg:LWIDTH
|
||||
arglabel:Line Thickness
|
||||
argtype:slider
|
||||
argvalue:2
|
||||
argmin:1
|
||||
argmax:5
|
||||
|
||||
item:--------------------------
|
||||
item:Add a new Protein blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/biotools/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: Enter the file (in FASTA)
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: Enter the name of the DB
|
||||
|
||||
menu:Phylogeny
|
||||
|
||||
|
||||
item:Phylip help
|
||||
itemmethod:(netscape /usr/local/biotools/phylip/doc/$FILE)&
|
||||
|
||||
arg:FILE
|
||||
argtype:choice_list
|
||||
arglabel:Which program?
|
||||
argchoice:clique:clique.html
|
||||
argchoice:consense:consense.html
|
||||
argchoice:contchar:contchar.html
|
||||
argchoice:contml:contml.html
|
||||
argchoice:contrast:contrast.html
|
||||
argchoice:discrete:discrete.html
|
||||
argchoice:distance:distance.html
|
||||
argchoice:dnaboot:dnaboot.html
|
||||
argchoice:dnacomp:dnacomp.html
|
||||
argchoice:dnadist:dnadist.html
|
||||
argchoice:dnainvar:dnainvar.html
|
||||
argchoice:dnaml:dnaml.html
|
||||
argchoice:dnamlk:dnamlk.html
|
||||
argchoice:dnamove:dnamove.html
|
||||
argchoice:dnapars:dnapars.html
|
||||
argchoice:dnapenny:dnapenny.html
|
||||
argchoice:dollop:dollop.html
|
||||
argchoice:dolmove:dolmove.html
|
||||
argchoice:dolpenny:dolpenny.html
|
||||
argchoice:draw:draw.html
|
||||
argchoice:drawgram:drawgram.html
|
||||
argchoice:drawtree:drawtree.html
|
||||
argchoice:factor:factor.html
|
||||
argchoice:fitch:fitch.html
|
||||
argchoice:gendist:gendist.html
|
||||
argchoice:kitsch:kitsch.html
|
||||
argchoice:main:main.html
|
||||
argchoice:mix:mix.html
|
||||
argchoice:move:move.html
|
||||
argchoice:neighbor:neighbor.html
|
||||
argchoice:penny:penny.html
|
||||
argchoice:protpars:protpars.html
|
||||
argchoice:read.me.general:read.me.general.html
|
||||
argchoice:restml:restml.html
|
||||
argchoice:seqboot:seqboot.html
|
||||
argchoice:sequence:sequence.html
|
||||
|
||||
|
||||
|
||||
item:Phylip 3.5
|
||||
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
|
||||
|
||||
arg:PROGRAM
|
||||
argtype:choice_list
|
||||
arglabel:Which program to run?
|
||||
argchoice:DNAPARS:dnapars
|
||||
argchoice:DNABOOT:dnaboot
|
||||
argchoice:DNAPENNY:dnapenny
|
||||
argchoice:DNAML:dnaml
|
||||
argchoice:DNAMLK:dnamlk
|
||||
argchoice:DNACOMP:dnacomp
|
||||
argchoice:DNAMOVE:dnamove
|
||||
argchoice:DNAINVAR:dnainvar
|
||||
argchoice:PROTPARS:protpars
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
item:Phylip DNA Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
|
||||
|
||||
arg:EXPLAIN
|
||||
argtype:text
|
||||
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
|
||||
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:DNADIST+NEIGHBOR:
|
||||
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Run ?
|
||||
argtype:chooser
|
||||
argchoice:Run without Bootstrap:
|
||||
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
|
||||
arg:DNA
|
||||
argtype:text
|
||||
arglabel:Name of DNADIST outfile?
|
||||
|
||||
arg:NEI
|
||||
argtype:text
|
||||
arglabel:Name of NEIGHBOR outfile?
|
||||
|
||||
arg:TREE
|
||||
argtype:text
|
||||
arglabel:Name of TREEFILE ?
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
item:Phylip PROTEIN Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:PROTDIST+NEIGHBOR:
|
||||
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
argchoice:No Bootstrap:
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
menu:On-Line Res.
|
||||
item:tytyt
|
||||
itemmethod:netscape hnu[phoph &
|
||||
item:SANBI
|
||||
itemmethod:netscape again &
|
||||
item:PlasmoDB
|
||||
itemmethod:netscape http://www.plasmodb.org &
|
||||
item:NCBI
|
||||
itemmethod:netscape http://www.ncbi.nlm.nih.gov &
|
||||
item:sanbi
|
||||
itemmethod:netscape http://www.sanbi.ac.za &
|
||||
item:SANBI
|
||||
itemmethod:netscape http://www.sanbi.ac.za &
|
||||
|
||||
item:GDE for Linux resources at Bioafrica.net
|
||||
itemmethod:netscape http://www.bioafrica.net &
|
||||
|
||||
item:-------------------------
|
||||
item:add a new website
|
||||
itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the site name
|
||||
|
||||
arg:url
|
||||
argtype:text
|
||||
arglabel:Enter the URL (including http://)
|
Binary file not shown.
BIN
CORE/BuiltIn.o
BIN
CORE/BuiltIn.o
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
CORE/DrawNA.o
BIN
CORE/DrawNA.o
Binary file not shown.
BIN
CORE/Edit.o
BIN
CORE/Edit.o
Binary file not shown.
Binary file not shown.
1056
CORE/FileIO.c~
1056
CORE/FileIO.c~
File diff suppressed because it is too large
Load diff
BIN
CORE/FileIO.o
BIN
CORE/FileIO.o
Binary file not shown.
BIN
CORE/Free.o
BIN
CORE/Free.o
Binary file not shown.
BIN
CORE/Genbank.o
BIN
CORE/Genbank.o
Binary file not shown.
BIN
CORE/HGLfile.o
BIN
CORE/HGLfile.o
Binary file not shown.
|
@ -6,7 +6,7 @@ DrawNA.c Free.c BuiltIn.c Edit.c Genbank.c Scroll.c ChooseFile.c \
|
|||
CutCopyPaste.c HGLfile.c
|
||||
|
||||
LIBS= -lm -lxview -lolgx -lX11
|
||||
CFLAGS= -g -L/usr/openwin/lib -I/usr/openwin/include
|
||||
CFLAGS= -g -m32 -L/usr/lib32 -I/usr/include/xview
|
||||
CC = cc
|
||||
# Possible defines, SUN4 SGI DEC HGL
|
||||
DEFINES = -DLINUX
|
||||
|
|
BIN
CORE/ParseMenu.o
BIN
CORE/ParseMenu.o
Binary file not shown.
BIN
CORE/Scroll.o
BIN
CORE/Scroll.o
Binary file not shown.
|
@ -1,8 +0,0 @@
|
|||
|
||||
========================[ Feb 1, 2002 1:57 PM ]========================
|
||||
NOTE: CoreLib [002.003] FileOpen("HIV1POLDNA.fasta","r") failed
|
||||
Cannot open input database file. Formating failed...
|
||||
|
||||
========================[ Feb 1, 2002 7:27 PM ]========================
|
||||
NOTE: CoreLib [002.003] FileOpen("SIVPOLPRO.fasta","r") failed
|
||||
Cannot open input database file. Formating failed...
|
191
CORE/infile
191
CORE/infile
|
@ -1,191 +0,0 @@
|
|||
10 916
|
||||
contig GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG
|
||||
W22140 AAAAANGCCC NNTTCNAAGN GGGGGGGGGG GGGGGGGATA TTTTGCNNAG
|
||||
R.C.W27436 GGGNNNNGNN NNNNNNNNNN NNNNNNAANN NNNNNNNNNN NNNNNNNNNN
|
||||
R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
W28762 TCTTGACATT TGTCTCCATT TCAGCAAAAC GANACCTGTG GTGAAGGGAT
|
||||
#10005_2 2 GGnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
W28762 ---------- ---------- ---------- ---------- ----------
|
||||
W28762(165 GGGNNGGNGN GGNNNGNNGN NNNGGNNNNN NNNTNTGTNT GNNGGNAGGG
|
||||
#10005_2 2 GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG
|
||||
nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA
|
||||
GGGGGCATGA TGNNGAGANC NAAAGAAAGN NCNGGGNGGG AAAAAAGAAG
|
||||
NNNANNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
TTGTGTGCTG GCACTG---- ---------- ---------- ----------
|
||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
NNTNTNANNN NNTTNTANAG TNAAAGNTTG GTNNNNGTNN NTTTGANGAA
|
||||
nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA
|
||||
GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC
|
||||
GAGGNCCCTG GNGGGAGGGG GGNNCGNNTT TNNTGCNCCG GATGGAGGGN
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
nnnnnnnnGn AAnnnnnnnn nnnnnnnnnn nnnnnnnnnT TGAAAACTGT
|
||||
NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
GNTCAANNTG GGGNNNANAN NNGNNNTTGA NTGAAAATGG GGNAANCCCC
|
||||
GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC
|
||||
CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC TGAA-n--Tc TACT---CCG
|
||||
GGGGNTTTTN AAGNNTGTTT NTTTANAAGN AAGAGGGGGA NAAAATTTTT
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAACCGAAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TAnCCAAnTG GAATCCTAAG ACAATTTTCT -cCAwTTCA- sCAAC-CGAA
|
||||
TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAAC-CGAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
CNTTTTNCCA GTCANCTGGT AAGTCCAAGC TGAA-N--TC TACTC--C-G
|
||||
CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC Tgaa----Tc TACTC--C-G
|
||||
CATGTAA-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
||||
TTNNTTCTNT NNCTNGNNNG GGGGGGGGGG GGGGCCCCCA ATAAGNNNTT
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
CCCTGTGGTG GAGGGAATTN CGTTCTTGGC NCTTCAGACT NCAGGGCAGG
|
||||
---------- ---------- ---------- ----CAGACT GCAGGGNAGG
|
||||
ACCCTGTGGT GrAGGGATTT GTGTGCT-GG CACTGCAGAC TGCAGGGCAG
|
||||
ACCCTGTGGT GGAGGGAATT NCGTTCTTGG CNCTTCAGAC TNCAGGGCAG
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
CATGTAACCC C-NAAAGAGT TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
||||
CATGTAa-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
||||
GNGCNCAGAA NNAGGGGGGG GNGGGGGGGC CCCTTTNCTC CNAAAAATTT
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
AA-------- ---------- ---------- ---------- ----------
|
||||
AA-------- ---------- ---------- ---------- ----------
|
||||
GAAAGGGCTA GGGCCCAGGG GCTGGGAmAT GCATGAGGT- gCTCGGAGGA
|
||||
GAAAGGGCTA GGGCCCAGGG GCTGGGAAAT GCATGAGGTT GCTCGGAGGA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
||||
CTaAGCAGAT AGCAAAGaAG ATaATGGAGG AgCAATTGGT CATGGCCtTG
|
||||
CCCCCCNTTT TGGGNAAGGG TGGGGGAAAN NNTTTGGGCA AANAGGGGAA
|
||||
NNNNNNNNNN NNNAANNAGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA
|
||||
---------- -------AGG GCTAGGGCCC AGGGGCTGGG AAATGCATGA
|
||||
---------- -------AGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA
|
||||
GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC
|
||||
GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
CTAAGCAGAT AGCAAAGNAG ATNATGGAGG ANCAATTGGT CATGGCCNTG
|
||||
CTAAGCAGAT AGCAAAGAAG ATAATGGAGG AGCAATTGGT CATGGCCTTG
|
||||
GTTTCCCTCk AAACaACgCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
||||
AAAAAAAGNG GGGGGGGGCG GNTTCCANAA AANAANAAAG GGTNCACCCN
|
||||
GG-TTCTNGG NGGAGCCTGG CTAAANCCAA GCACCAGCAC CTGTGAGTCT
|
||||
GGTTGCTCGG AGGAGCCTGG CTAAATCCAA GCACCAGCAC CTGTGAGTCT
|
||||
GG-TGCTCGG AGGAGCCTGG NTAAATCCAA GCACCAGCAC CTGTGAGTCT
|
||||
TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC
|
||||
TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
GTTTCCCTCC AAACNACNCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
||||
GTTTCCCTCk AAACAACGCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
||||
tmGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
||||
TNGGGGGNCN CCCCCCCCNC NNGNAAATCN TCCCTTTTTT TGANGGGCNA
|
||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT NCCTCTTCTC
|
||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC
|
||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC
|
||||
TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCAAAAG
|
||||
TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCCNAAG
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
||||
TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
||||
ACGAGAGCTG GGAGAAGAGG cAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
||||
ANNNCATTTN CTTGNCCTTG AAGATTGACC NTGACTGCTC TGGCAAGAAG
|
||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
||||
TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTkGA GGGAAACCAA
|
||||
TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTTGA GGGAAACCAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
||||
ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGaTTTA gCCAGGCTCC
|
||||
AAGAGGTGTC CTTACAGAGA CCTCTTTACT GACCAACTGA AGNATAGACT
|
||||
CTTTCCCCCN AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT
|
||||
CTTTCCCCCC NAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT
|
||||
CTTTCCCCCA AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGNGTNGTT
|
||||
GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT
|
||||
GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA NCCAGGCTCC
|
||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA GCCAGGCTCC
|
||||
tCCgAGkA-- CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT-----
|
||||
TACTGCTGGA CAATCTGCAT GGGCATCACC CCTCCCCGCA TGTAACCC-A
|
||||
TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC
|
||||
TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC
|
||||
TGGAGGGAAA CCANGGCCAT GACCAATTGN TCCTCCATNA TCTNCTTTGC
|
||||
ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA
|
||||
ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TCCGAGC--A CCTCATGCAT GTCCCAGCCC CTGGGCCCTA GCCCT-----
|
||||
TCCGAGc--A CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT-----
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
AAAGAGGTGT CCAGAGCCAA GGCTTCTACC TTCATTGTCC CTCTCTGTGC
|
||||
TATCTGCTNA GAGNANNCAA NNNAANNNA- ---------- ----------
|
||||
TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC
|
||||
TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC
|
||||
TGAAGGTAGA AGCCTTGGCT CTGGACAmCT CTTTTGGG-t TACATGCG--
|
||||
TGAAGGTAGA AGCCTTGGCT CTGGACACCT CTTTTGGG-T TACATGCGGT
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- TTCCTgCCCT GcAGTCTGAA GnGCCAAG-A -ACGnAATTC
|
||||
TCAAGGAGTT CCATTCCAGG AGGAAGAGAT CTATACCCT- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC ACCTCTTTT-
|
||||
ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC AACTCTTTNG
|
||||
GAGTAgA-tt cAGCTTGGAC TTACCAGnTG ACTGGnAAAA nGGGGGnTTn
|
||||
GAGTANA-NN NA-------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- TTCCTNCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C
|
||||
---------- TTCCTGCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C
|
||||
CCTCCACCAC AGGGTTTCG- GTTGGGTGGn TTGGAAGA-A AATTGTCTTA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
GGGTTACATG CGGTGAGTAN ANNNA----- ---------- ----------
|
||||
GGGTTACATG CGG--AGTAG ANTTCAGCTT GGACTTACCA GNTGACTGGN
|
||||
CCCCATTTTC AnTCAAnnnC nnnTnTnnnC CCCAnnTTGA nCTTCnTCAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
CCTTCACCAC A-GGTNTCGT TTTGC-TGAA ATGG-AGACA AAT-GTCa-a
|
||||
CCTrCACCAC AGGGTTTCG- GTTGs-TGAA wTGg-AGA-A AATTGTCTTA
|
||||
GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
AAAANGGGGG NTTNCCCCAT TTTCANTCAA NNNCNNNTNT NNNCCCCANN
|
||||
AnnnACnnnn ACCAAnCTTT nACTnTAnAA nnnnnTnAnA nnCCCTnCCn
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
g-a------- ---------- ---------- ---------- ----------
|
||||
GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn
|
||||
nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TTGANCTTCN TCAAANNNAC NNNNACCAAN CTTTNACTNT ANAANNNNNT
|
||||
nCAnACAnAn nnnnnnnCCn nnnCnnCnnn CCnCnCCnnC CC--------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
NANANNCCCT NCCNNCANAC ANANNNNNNN NCCNNNNCNN CNNNCCNCNC
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
nnnnnnnnnn nnnnCC
|
||||
---------- ------
|
||||
---------- ------
|
||||
---------- ------
|
||||
CNNCCC---- ------
|
||||
---------- ------
|
||||
---------- ------
|
||||
---------- ------
|
||||
---------- ------
|
||||
nnnnnnnnnn nnnnCC
|
|
@ -1,2 +0,0 @@
|
|||
make
|
||||
cp gde ../bin
|
BIN
CORE/libxview.a
BIN
CORE/libxview.a
Binary file not shown.
BIN
CORE/main.o
BIN
CORE/main.o
Binary file not shown.
34
CORE/outfile
34
CORE/outfile
|
@ -1,34 +0,0 @@
|
|||
|
||||
DNA parsimony algorithm, version 3.51c
|
||||
|
||||
|
||||
One most parsimonious tree found:
|
||||
|
||||
|
||||
|
||||
|
||||
+-----------------------#10005_2 2
|
||||
!
|
||||
! +--------------------W28762(165
|
||||
+--9 !
|
||||
! ! ! +--R.C.W27652
|
||||
! ! ! +-----------6
|
||||
! ! ! ! +--#10005_2 2
|
||||
! +--8 !
|
||||
! ! +--5 +--W28762
|
||||
! ! ! ! +--7
|
||||
--1 ! ! ! +--4 +--W28762
|
||||
! ! ! ! ! !
|
||||
! +--2 +-----3 +-----R.C.W27652
|
||||
! ! !
|
||||
! ! +--------R.C.W27436
|
||||
! !
|
||||
! +-----------------W22140
|
||||
!
|
||||
+--------------------------contig
|
||||
|
||||
remember: this is an unrooted tree!
|
||||
|
||||
|
||||
requires a total of 2453.000
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
((#10005_2_2,(W28762(165,(((R.C.W27652,#10005_2_2),(((W28762,W28762),
|
||||
R.C.W27652),R.C.W27436)),W22140))),contig);
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,25 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FEATURES/GDE Accession File Instructions
|
||||
;
|
||||
; 1. Type in one or more GenBank Accession #'s below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of numbers.
|
||||
;
|
||||
; (NOTE: File can not contain LOCUS names.)
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each accession # on a separate line
|
||||
; SAMPLE ACCESSION FILE:
|
||||
;
|
||||
; M18249
|
||||
; X13383
|
||||
; J03680
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
clu2ig update 3 Feb 94
|
||||
|
||||
NAME
|
||||
clu2ig
|
||||
|
||||
SYNOPSIS
|
||||
clu2ig clustalfile > igfile
|
||||
|
||||
DESCRIPTION
|
||||
Converts interleaved .aln output from Clustal V into
|
||||
sequential .ig (IntelliGenetics) format for use by MASE.
|
||||
|
||||
clustalfile:
|
||||
CLUSTAL V multiple sequence alignment
|
||||
|
||||
name1 AACTTTCG
|
||||
name2 ATCTTTCG
|
||||
* ******
|
||||
|
||||
name1 CCTGCT
|
||||
name2 CCCGCT
|
||||
** ***
|
||||
|
||||
igfile:
|
||||
;
|
||||
name1
|
||||
AACTTTCG
|
||||
CCTGCT
|
||||
:
|
||||
name2
|
||||
ATCTTTCG
|
||||
CCCGCT
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,36 +0,0 @@
|
|||
dbstat update 3 Feb 94
|
||||
|
||||
NAME
|
||||
dbstat - calculates amino acid frequencies in a protein
|
||||
database
|
||||
|
||||
SYNOPSIS
|
||||
dbstat
|
||||
|
||||
DESCRIPTION
|
||||
dbstat reads a file of one or more nucleic acid sequences
|
||||
and calculates the amino acid frequencies, both in terms of
|
||||
absolute numbers, and as a fraction of the total.
|
||||
|
||||
input - The input file is the standard .wrp (Pearson) format,
|
||||
such as that produced by getob:
|
||||
|
||||
>name
|
||||
; one or more comment lines (optional)
|
||||
sequence lines
|
||||
|
||||
Comments begin either with semicolon (;) or right arrow (>)
|
||||
characters.
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,30 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FEATURES/GDE Expression File Instructions 8/7/95
|
||||
;
|
||||
; 1. Type in one or more GenBank expressions below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of feature keys.
|
||||
; or
|
||||
; Copy expressions from another window and Paste into this window.
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; NOTES:
|
||||
; 1) FEATURES will then extract the appropriate sequences.
|
||||
; YOU DON'T NEED TO EDIT OUT THESE COMMENT LINES.
|
||||
; 2) All expressions referring to GenBank entries must begin with a '@'
|
||||
; Literals (ie. sequences to be embedded in the final output)
|
||||
; do NOT begin with a '@'.
|
||||
; 3) Put each expression on a separate line.
|
||||
;
|
||||
; SAMPLE EXPRESSION FILE:
|
||||
;
|
||||
; @J05635:83..1813
|
||||
; ; EcoRI/NotI adaptor {this is a comment line}
|
||||
; AATTGCGGCCGC
|
||||
; @J05635:/product="flagellin A"
|
||||
; @x17548:singed_trans
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FEATURES/GDE Feature Key File Instructions
|
||||
;
|
||||
; 1. Type in one or more GenBank FEATURE Table feature keys below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of feature keys.
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each feature key on a separate line
|
||||
; SAMPLE FEATURE KEY FILE:
|
||||
;
|
||||
; mRNA
|
||||
; CDS
|
||||
; mat_peptide
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,407 +0,0 @@
|
|||
|
||||
FEATURES.DOC update 7 Feb 94
|
||||
|
||||
|
||||
NAME
|
||||
FEATURES - extracts features from GenBank entries
|
||||
|
||||
SYNOPSIS
|
||||
features
|
||||
features expression
|
||||
features [-f featurekey | -F keyfile]
|
||||
[-n name |-a accession | -e expression |
|
||||
-N namefile |-A accfile | -E expfile]
|
||||
[-u dbfile | -U dbfile | -g ]
|
||||
features -h
|
||||
|
||||
DESCRIPTION
|
||||
FEATURES extracts sequence objects from GenBank entries, using
|
||||
the Features Table language. Features can be retrieved either by
|
||||
specifying keywords (eg. CDS, mRNA, exon, intron etc.) or by
|
||||
evaluating expressions. In practical terms, FEATURES is actually
|
||||
a user interface for GETOB, which actually performs the parsing
|
||||
and extraction of sequence objects. FEATURES can be run either as
|
||||
an interactive program or with command line arguments.
|
||||
|
||||
'features' with no arguments runs the program interactively.
|
||||
'features' followed by an expression retrieves the data directly
|
||||
from GenBank and evaluates the expression. The third form of
|
||||
features requires all arguments to be accompanied by their
|
||||
respective option flags. Finally, 'features -h' prints the
|
||||
SYNOPSIS.
|
||||
|
||||
|
||||
INTERACTIVE EXECUTION
|
||||
FEATURES executed with no arguments runs interactively. An example of the
|
||||
FEATURES menu is shown below:
|
||||
|
||||
___________________________________________________________________
|
||||
FEATURES - Version 7 FEB 94
|
||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
||||
___________________________________________________________________
|
||||
Features: tRNA
|
||||
Entries: EPFCPCG
|
||||
Dataset:
|
||||
___________________________________________________________________
|
||||
Parameter Description Value
|
||||
-------------------------------------------------------------------
|
||||
1).................... FEATURES TO EXTRACT ....................> f
|
||||
f:Type a feature at the keyboard
|
||||
F:Read a list of features from a file
|
||||
2)....................ENTRIES TO BE PROCESSED (choose one).....> n
|
||||
Keyboard input - n:name a:accession # e:expression
|
||||
File input - N:name(s) A:accession #(s) E:expression(s)
|
||||
3)....................WHERE TO GET IT .........................> g
|
||||
u:Genbank dataset g:complete GenBank database
|
||||
U: same as u, but all entries
|
||||
4)....................WHERE TO SEND IT ........................> a
|
||||
s:Each feature to a separate file a:All output to same file
|
||||
---------------------------------------------------------------
|
||||
Type number of your choice or 0 to continue:
|
||||
0
|
||||
Messages will be written to EPFCPCG.msg
|
||||
Final sequence output will be written to EPFCPCG.out
|
||||
Expressions will be written to EPFCPCG.exp
|
||||
Extracting features...
|
||||
|
||||
In the example, FEATURES was instructed to retrieve all tRNAs from
|
||||
the GenBank entry EPFCPCG, which contains the Epifagus plastid
|
||||
genome. By default, the GenBank database was the source of the
|
||||
sequence. Messages indicate the progress of the job. A log describing
|
||||
the extraction of each feature is written to EPFCPCG.msg, while the
|
||||
extracted features themselves are written to EPFCPCG.out. Feature
|
||||
expressions which could be used by FEATURES to reconstruct the .out
|
||||
file, are written to EPFCPCG.exp.
|
||||
|
||||
The first step is to retrieve the EPFCPCG entry from GenBank, which is
|
||||
accomplished by calling FETCH. Next, FEATURES extracts the specified
|
||||
features from the entry.
|
||||
|
||||
An excerpt from EPFCPCG.msg is shown below, describing the extraction
|
||||
of the fifth tRNA found in this entry. To create this tRNA, two exons
|
||||
had to be joined. The qualifier line associated with this feature
|
||||
indicates that it is an Isoleucine tRNA with a gat anticodon.
|
||||
|
||||
|
||||
EPFCPCG:anticodon gtg
|
||||
complement
|
||||
(
|
||||
join
|
||||
(
|
||||
70023 70028
|
||||
|
||||
1 69
|
||||
|
||||
)
|
||||
|
||||
)
|
||||
|
||||
|
||||
/product="transfer RNA-His"
|
||||
/gene="His-tRNA"
|
||||
/label=anticodon gtg
|
||||
/note="anticodon gtg"
|
||||
//----------------------------------------------
|
||||
|
||||
|
||||
The actual sequence for this feature, as written to EPFCPCG.out, is
|
||||
written with each exon beginning a new line:
|
||||
|
||||
>EPFCPCG:anticodon gtg
|
||||
ggcggatgtagccaaatggatcaaggtagtggattgtgaatccaacatat
|
||||
gcgggttcaattcccgtcg
|
||||
ttcgcc
|
||||
|
||||
Finally, the expression that was evaluated to create this feature is
|
||||
written to EPFCPCG.exp:
|
||||
|
||||
>EPFCPCG:anticodon gtg
|
||||
@M81884:anticodon gtg
|
||||
|
||||
If EPFCPCG.exp was used as an expression file in option 2 (E) of FEATURES,
|
||||
EPFCPCG.out would be recreated.
|
||||
|
||||
OPTIONS
|
||||
1) FEATURES - choosing f will cause FEATURES to prompt for
|
||||
a feature to extract. If you wish to extract several types of
|
||||
features simultaneously (ie. F), you must construct a file listing the
|
||||
feature keywords. The following example would retrieve both tRNA and
|
||||
rRNA sequences:
|
||||
|
||||
OBJECTS
|
||||
tRNA
|
||||
rRNA
|
||||
SITES
|
||||
|
||||
The words 'OBJECTS' and 'SITES' must enclose the feature keywords,
|
||||
and each keyword must be on a separate line. For a rigorous
|
||||
definition of the input file format, see the GETOB manual pages
|
||||
(getob.doc).
|
||||
|
||||
In the menu shown above, f was chosen, and the user entered tRNA at
|
||||
the prompt. Thus tRNA is now displayed on the Features: line. If
|
||||
features had been specified from a file (suboption F) then the
|
||||
filename containing the feature keywords would be displayed instead.
|
||||
A complete list of legal feature keywords can be found in the GenBank
|
||||
Release notes (gbrel.txt) under the subheading 'Feature Key Names'.
|
||||
|
||||
2) ENTRIES
|
||||
n User is prompted for the name of an entry from which the
|
||||
feature is to be extracted. The name of the entry will appear
|
||||
on the 'Entries' line of the menu.
|
||||
|
||||
N User is prompted for a filename containing one or more
|
||||
entry names. Each name must be on a separate line. The filename
|
||||
will be displayed on the 'Entries' menu line.
|
||||
|
||||
a User is prompted for an accession number, which will appear
|
||||
on the 'Entries' line of the menu.
|
||||
|
||||
A User is prompted for a filename for accession numbers. The filename
|
||||
will appear on the 'Entries:' line.
|
||||
|
||||
e User is prompted for a GenBank Features expression of the
|
||||
form accession:location.'accession' refers to a GenBank
|
||||
accession number, while 'location' is any legal feature location.
|
||||
A brief description of location syntax can be found under the
|
||||
subheading "Feature Location" in the GenBank release notes
|
||||
(gbrel.txt). See "The DDBJ/EMBL/GenBank Feature Table:
|
||||
Definition" Version 1.04 for a complete definition.
|
||||
E User is prompted for a filename containing one or more Feature
|
||||
expressions. EACH EXPRESSION MUST BEGIN A '@'. All lines beginning
|
||||
with '@' are processed as expressions, and all other lines are
|
||||
copied to the output file unchanged.
|
||||
|
||||
Examples:
|
||||
|
||||
The tRNA shown above could have been extracted by choosing
|
||||
suboption e and entering either of the following expressions:
|
||||
|
||||
M81884:complement(join(70023..70028,1..69))
|
||||
M81884:anticodon gtg
|
||||
|
||||
In the first example, the feature line from the original entry
|
||||
is used as the location. In the second example, the feature is
|
||||
found by its qualifier line, which also appeared in the
|
||||
original entry. It must be noted that the qualifier line must
|
||||
be unique from others in the same entry in its first 15
|
||||
characters after the = .
|
||||
|
||||
The flaL protein coding region of B. licheniformis is described
|
||||
in GenBank entry BLIFALA, accession number M60287 in the
|
||||
following feature:
|
||||
|
||||
CDS 305..640
|
||||
/note="flaD (sin) homologue"
|
||||
/gene="flaL"
|
||||
/label=ORF2
|
||||
/codon_start=1
|
||||
|
||||
This feature could be retrieved using any of the following
|
||||
expressions:
|
||||
|
||||
M60287:305..640
|
||||
M60287:ORF2
|
||||
M60287:/label=ORF2
|
||||
M60287:/gene="flaL"
|
||||
M60287:/note="flaD (sin) homologue"
|
||||
|
||||
Note that the /label= qualifier is special, in that labels are
|
||||
specifically intented as unique tags on an feature. For labels,
|
||||
only the label itself is need be specified. Thus, /label=ORF2 is
|
||||
equivalent to ORF2. For other qualifiers, the qualifier keyword
|
||||
(eg. /note=) must be included.
|
||||
|
||||
3) DATABASE (WHERE TO GET IT) - By default, all entries processed will
|
||||
be automatically retrieved from GenBank using FETCH. Specifying 'u'
|
||||
(User-defined database subset) makes it possible to extract features
|
||||
from GenBank subsets created by the user. Usually, retrieval of
|
||||
features is much faster with a User-defined subset, so if you
|
||||
frequently work with sets of genes, it is best to retrieve them
|
||||
en-masse using FETCH, and work with them directly. For example, if
|
||||
you had retrieved a set of Beta-globin sequences into a file called
|
||||
'globin.gen', you could directly extract features from these entries
|
||||
by specifying 'globin' or 'globin.gen' as your User-defined database.
|
||||
If the file extension is '.gen', FEATURES will automatically create
|
||||
temporary files called globin.ano, globin.wrp and globin.ind,
|
||||
containing annotation, sequence, and an index, respectively. These
|
||||
files will be read during feature extraction, and then discarded. If
|
||||
you have already created such files using SPLITDB, simply specify
|
||||
any of 'globin', 'globin.ano', etc. ie. anything, as long as it does
|
||||
not have the .gen file extension.
|
||||
|
||||
'U' rather than 'u' causes ALL entries in the user-defined
|
||||
database to be subset. This means that it is unnecessary to
|
||||
specify entry options (eg -n, -N etc.), as these will be
|
||||
ignored, if given.
|
||||
|
||||
One consequence of these conventions is that the individual GenBank
|
||||
divisions can be processed directly. For example, suppose you were only
|
||||
interested in rodent globins. You could directly access the rodent
|
||||
division of GenBank by specifying the base name of that file division
|
||||
(eg. /home/psgendb/GenBank/gbrod) as your user-defined database. In
|
||||
this case, the files gbrod.ano, gbrod.wrp and gbrod.ind already
|
||||
exist. Again, this approach is faster, since FEATURES would not have
|
||||
to find and retrieve the sequences, but can read directly from the
|
||||
database files. Finally, if you wanted to process all of the entries
|
||||
in the database division, simply use -U. The user is warned that a
|
||||
GenBank division is a huge amount of data, and processing every entry
|
||||
could take a long time.
|
||||
|
||||
4) WHERE TO SEND IT - By default (a), the output for all entries goes
|
||||
to a single set of files, whose names are chosen by FEATURES,
|
||||
depending on the setting of option 2, Entries. If a single name (n) or
|
||||
accession number (a) has been chosen, that will be used as
|
||||
the raw filename. For example, if you were processing the entry
|
||||
WHTCAB, the output files would be WHTCAB.msg and WHTCAB.out. If names
|
||||
(N), accession numbers (A) or expressions (E) were read from a file,
|
||||
the raw name of that file would be used eg. cellulase.nam would result
|
||||
in cellulase.msg and cellulase.out. Finally, if a single expression
|
||||
is processed (e), then the primary accession number in that
|
||||
expression will be used for the filenames. In all cases, FEATURES
|
||||
will tell you the names of the files being written.
|
||||
|
||||
Choosing suboption s, you can specify that the features created for
|
||||
each entry be sent to separate files. In this case, each file will
|
||||
have the name of that entry, with the extension .obj. However, all
|
||||
messages and expressions will still go to a single files. While this
|
||||
can be a convenient way of creating separate files when you need them,
|
||||
this option still has the limitation of writing all features for a
|
||||
given entry (if there are more than one) to the same file. Also,
|
||||
successive resolution of features (anything requiring 'getob -r')
|
||||
will not work with this option. This may be corrected in future
|
||||
versions.
|
||||
|
||||
|
||||
COMMAND LINE EXECUTION
|
||||
|
||||
There are two ways of running FEATURES from the command line. If only one
|
||||
argument is supplied, that argument is interpreted as an expression, and
|
||||
the result of that expression (ie. a sequence ) is written to the
|
||||
standard output. .msg, .out and .exp files are NOT created. For example,
|
||||
GenBank entry BACFLALA (M60287) contains the following feature:
|
||||
|
||||
CDS 95..271
|
||||
/label=LORF-
|
||||
/codon_start=1
|
||||
/translation="MNKDKNEKEELDEEWTELIKHALEQGISPDDIRIFLNLGKKSSK
|
||||
PSASIERSHSINPF"
|
||||
Any of
|
||||
|
||||
features M60287:LORF-
|
||||
features M60287:95..271
|
||||
features M60287:/label=LORF-
|
||||
|
||||
would write the open reading frame to the standard output:
|
||||
|
||||
atgaataaagataaaaatgagaaagaagaattggatgaggagtggacaga
|
||||
actgattaaacacgctcttgaacaaggcattagtccagacgatatacgta
|
||||
tttttctcaatttgggtaagaagtcttcaaaaccttccgcatcaattgaa
|
||||
agaagtcattcaataaatcctttctga
|
||||
|
||||
This form of FEATURES is provided to make it easy to pipe output to
|
||||
other programs for further processing. For example
|
||||
|
||||
features M60287:LORF- |ribosome >LORF.protein
|
||||
|
||||
would write the translation of the open reading frame to a file called
|
||||
LORF.protein.
|
||||
|
||||
The full functionality of the FEATURES can be accessed using arguments on
|
||||
the command line. In particular, when there are multiple entries to be
|
||||
processed, or multiple features within entries, it is much faster to
|
||||
supply FEATURES with lists of entries, feature keys or expressions.
|
||||
Command line options are similar to suboptions in menu items 1-3 above:
|
||||
|
||||
Feature keys:
|
||||
-f key {feature key}
|
||||
-F filename {file of feature keys}
|
||||
|
||||
Entries:
|
||||
-n name {GenBank LOCUS name}
|
||||
-N filename {file of GenBank LOCUS names}
|
||||
-a accession {GenBank ACCESSION number}
|
||||
-A filename {file of GenBank ACCESSION numbers}
|
||||
-e expression {Feature Table expression}
|
||||
-E filename {file of Feature Table expressions, each begin-
|
||||
ning with '@'}
|
||||
|
||||
Databases:
|
||||
-u filename {GenBank dataset}
|
||||
-U filename { " " " " " " ,
|
||||
process all entries ie. -nNaAeE options
|
||||
will be ignored}
|
||||
-g {GenBank}
|
||||
|
||||
Examples:
|
||||
|
||||
features -f tRNA -n EPFCPCG
|
||||
|
||||
retrieves all tRNAs from GenBank entry EPFCPCG and writes .msg, .out,
|
||||
and .exp files.
|
||||
|
||||
features -e M60287:LORF-
|
||||
|
||||
would retrieve the same open reading frame as in the earlier example.
|
||||
|
||||
|
||||
Since most time-consuming operation in FEATURES is sequence retrieval,
|
||||
it is often best to retrieve frequently-used sequences as database
|
||||
subsets. For example, a set GenBank entries for chlorophyl a/b binding
|
||||
protein genes might be stored in a file called CAB.gen.
|
||||
|
||||
features -f CDS -N CAB.nam -u CAB.gen
|
||||
|
||||
would generate the files CAB.msg, CAB.out and CAB.exp containing output
|
||||
for all CDS features in the entries listed in the file CAB.nam.
|
||||
|
||||
features -E CAB.exp -u CAB.gen
|
||||
|
||||
would re-create the output file CAB.out.
|
||||
|
||||
|
||||
|
||||
BUGS
|
||||
FEATURES does no preliminary error checking for syntax of
|
||||
GenBank expressions prior to their evaluation. Expressions that can
|
||||
not be evaluated will be flagged by GETOB in the .msg file.
|
||||
|
||||
At present, little checking is done to test for the presence or
|
||||
correctness of input files. Some errors may cause the program to
|
||||
crash.
|
||||
|
||||
For User-defined datasets, filename expansion is not performed.
|
||||
|
||||
FILES
|
||||
Temporary files:
|
||||
X.term X.ano X.wrp X.ind X.gen {X is raw filename, see 4) }
|
||||
UNRESOLVED.fea UNRESOLVED.out
|
||||
FEA.inf FEA.nam FEA.gen FEA.ano FEA.wrp FEA.ind FEA.msg FEA.out
|
||||
|
||||
SEE ALSO
|
||||
grep(1V) fetch getob splitdb
|
||||
|
||||
TRANSPORTATION NOTES
|
||||
It should be fairly easy to get FEATURES to work even on systems
|
||||
in which GenBank has not been formatted for the XYLEM package.
|
||||
This is because FEATURES does not work directly on the database, but
|
||||
rather retrieves all necessary sequences by calling FETCH. Thus,
|
||||
statements like 'fetch FEA.nam FEA.gen' could be replaced with any
|
||||
command that, given a file containing names or accession numbers,
|
||||
returns a file containing GenBank entries. In principle, you
|
||||
could even implement this sort of command to retrieve entries from
|
||||
the email server (retrieve@ncbi.nlm.nih.gov) at NCBI, although
|
||||
such a setup would undoubtedly be quite slow.
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,320 +0,0 @@
|
|||
|
||||
FETCH.DOC update 24 Feb 96
|
||||
|
||||
|
||||
NAME
|
||||
fetch - retrieves database entries by name or accession number
|
||||
|
||||
SYNOPSIS
|
||||
fetch {interactive mode}
|
||||
fetch [options] namefile [output file] {batch mode}
|
||||
|
||||
DESCRIPTION
|
||||
fetch retrieves one or more entries from a database.
|
||||
|
||||
Interactive mode: fetch prompts the user to set search parameters,
|
||||
using an interactive menu:
|
||||
___________________________________________________________________
|
||||
FETCH - Version 7 Feb 94
|
||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
||||
___________________________________________________________________
|
||||
Namefile:
|
||||
Outfile:
|
||||
Database:
|
||||
-------------------------------------------------------------------
|
||||
Parameter Description Value
|
||||
|
||||
1) Name/Acc Name or Accession sequence to get
|
||||
2) Namefile Get list of sequences from Namefile
|
||||
3) WhatToGet a:annotation s:sequence b:both b
|
||||
4) Database g:GenBank p:PIR v:VecBase l:LiMB g
|
||||
G:GenBank dataset P:PIR dataset
|
||||
5) Outfile Send all output to a single file (Outfile)
|
||||
6) Files f:Send each entry to a separate file f
|
||||
-------------------------------------------------------------
|
||||
Type number of your choice or 0 to continue:
|
||||
|
||||
After all parameters have been set, type 0 to commence the search.
|
||||
Messages regarding the progress of the search will be printed.
|
||||
|
||||
(1,2) Which entries to get?
|
||||
If you want to get a single entry, option 1 lets you type in the
|
||||
name of that entry, without having to create a namefile. To get
|
||||
more than one entry, choose option 2, and specify the name of a
|
||||
file containing sequence names or accession numbers.
|
||||
|
||||
namefile is a file containing one or more sequence names or
|
||||
accession numbers, each on a separate line. Names and accession
|
||||
numbers can even be interspersed, in upper or lowercase, and in
|
||||
any order. For example, the namefile prp.nam might contain
|
||||
|
||||
; plant pathogenesis related proteins
|
||||
; (these are sample comment lines)
|
||||
; note that any line containing a semicolon is ignored
|
||||
x06362
|
||||
x05454
|
||||
TOBPR1A1
|
||||
; comments can be interspersed with names.
|
||||
PUMPR13
|
||||
tobpr1ar
|
||||
|
||||
Options 1 & 2 are mutually exclusive. Setting one will negate the
|
||||
other. If option 2 is chosen, the name of the namefile will appear
|
||||
at the top of the menu.
|
||||
|
||||
(3) WhatToGet
|
||||
Use this option to specify whether to get annotation, sequence,
|
||||
or both (default=both).
|
||||
|
||||
(4) Database
|
||||
Use this option to select the database. (default=GenBank).
|
||||
G and P select user-created database subsets containing GenBank
|
||||
or PIR entries, respectively. It is assumed that the database
|
||||
has been split into .ano, .wrp and .ind files using splitdb.
|
||||
For example, if you had created a database subset called PR1.pir,
|
||||
splitdb would create PR1.ano, PR1.wrp and PR1.ind. These are
|
||||
the files actually read by FETCH. When prompted for the name
|
||||
of the database, simply type "PR1", without a file extension.
|
||||
(If you do type a file extension, it will be ignored).
|
||||
|
||||
(5, 6) Where to send output
|
||||
By default, option 6 is set to f, and each entry will be written to
|
||||
a separate file, where the name of the file is the name of the
|
||||
entry, followed by a file extension. If a complete entry is
|
||||
retrieved, the file extension will indicate the type of database
|
||||
(GenBank: .gen; PIR: .pir, Vecbase: .vec; LiMB: .LiMB). If only
|
||||
annotation or sequence are retrieved, the file extensions will be
|
||||
.ano or .wrp, respectively. Using the default, the namefile above
|
||||
would create the following files:
|
||||
|
||||
PUMPR13.gen
|
||||
TOBPR1A1.gen
|
||||
TOBPR1AR.gen
|
||||
TOBPR1CR.gen
|
||||
TOBPR1PS.gen
|
||||
|
||||
By choosing option 5, you can specify the name of an output file
|
||||
for all entries to go to. This filename will appear at the top
|
||||
of the menu. Obviously, options 5 & 6 are mutually exclusive.
|
||||
Note entries retrieved are writen in alphabetical order (sorting by
|
||||
ASCII values), not the order in which they appeared in namefile.
|
||||
|
||||
(Note for remote users only: -f will only work for a single
|
||||
name/accession supplied in 1). -f IS NOT ENABLED FOR NAMEFILES
|
||||
specified in 2).)
|
||||
|
||||
Batch mode:
|
||||
Although it is transparent to the user, all fetch really does
|
||||
is call getloc, saving the user the trouble of knowing which
|
||||
database files to retrieve sequences from, or of having to
|
||||
execute getloc multiple times to retrieve sequences from
|
||||
different database files. Thus, the options are identical to those
|
||||
for getloc:
|
||||
|
||||
-a Write annotation portions of entries only, terminated by '//'.
|
||||
-s Write sequence data only, in Pearson (.wrp) format.
|
||||
-f Write each entry to a separate file.
|
||||
-g GenBank (default)
|
||||
-e EMBL {not implemented}
|
||||
-p PIR (NBRF)
|
||||
-v Vecbase
|
||||
-l LiMB
|
||||
-G GenBank_dataset
|
||||
-P PIR_dataset
|
||||
|
||||
If -f is not specified, outfile must be specified.
|
||||
|
||||
-L force execution of findkey on local host even if
|
||||
$XYLEM_RHOST is set. See "REMOTE EXECUTION" below
|
||||
|
||||
|
||||
PIR_dataset
|
||||
GenBank_dataset
|
||||
This can be either a file of PIR entries, a file of GenBank entries,
|
||||
or a XYLEM dataset created by splitdb. A file of PIR entries must
|
||||
have the file extension ".pir". A file of GenBank entries must have
|
||||
the file extension ".gen". A XYLEM dataset contains PIR entries split
|
||||
among three files by splitdb: annotation (.ano), sequence (.wrp)
|
||||
and index (.ind). These file extensions must be used!
|
||||
|
||||
When specifying a split dataset, only the base name needs to be
|
||||
used. For example given a XYLEM dataset consisting of the files
|
||||
myset.ano, myset.wrp and myset.ind, the following two commands
|
||||
are equivalent:
|
||||
|
||||
fetch -P myset something.nam something.pir
|
||||
fetch -P myset.ano something.nam something.pir
|
||||
|
||||
If the original .pir file had been used, the command would have
|
||||
been
|
||||
|
||||
fetch -P myset.pir something.nam something.pir
|
||||
|
||||
The ability to work directly with .gen or .pir files is quite
|
||||
convenient. However, since FETCH needs to work with a split
|
||||
FETCH automatically splits .pir or .gen files into .ano, .wrp
|
||||
and .ind files, which are removed when finished. This requires
|
||||
extra disk space and execution time, which could be significant
|
||||
for large datasets.
|
||||
|
||||
EXAMPLES
|
||||
Batch example:
|
||||
fetch -f chitinase.nam
|
||||
will retrieve annotation and sequence for sequences listed in
|
||||
chitinase.nam from GenBank, writing each entry to a separate file
|
||||
with the extension .gen.
|
||||
|
||||
fetch -s -v pbr.nam pbr.wrp
|
||||
will retrieve sequence data only for the entries listed in pbr.nam,
|
||||
from VecBase, and write all sequences to a Pearson format file
|
||||
(ie. readable by fasta) with the name pbr.wrp.
|
||||
|
||||
fetch -G sample sample.nam new.gen
|
||||
fetch -G sample.ano sample.nam new.gen
|
||||
Assumes that a set of GenBank entries has been split by splitdb
|
||||
into sample.ano sample.wrp and sample.ind. The entries listed in
|
||||
sample.nam are written to new.gen.
|
||||
|
||||
|
||||
FILES
|
||||
Database files:
|
||||
The directories for database files are specified by the environment
|
||||
variables $GB (GenBank) $PIR (PIR/NBRF) $VEC(Vecbase) and $LIMB
|
||||
(LiMB).
|
||||
|
||||
Index files are $GB/gbacc.idx for GenBank (this file is supplied
|
||||
with each GenBank release), while the other databases
|
||||
use .ind files generated by splitdb. Split database files MUST
|
||||
have the following file extensions: .ano {annotation}, .wrp
|
||||
{sequence} and .ind {index}. Thus, when creating database files
|
||||
for pir1.dat with splitdb, the output files should be pir1.ano,
|
||||
pir1.wrp and pir1.ind.
|
||||
|
||||
Temporary files:
|
||||
NAMEFILE.fetch
|
||||
PRELIMINARY.fetch
|
||||
TMP.fetch
|
||||
FOUND.fetch
|
||||
FETCHDIR {temporary directory}
|
||||
|
||||
REMOTE EXECUTION
|
||||
Where the databases can not be stored locally, FETCH can call
|
||||
FETCH on another system and retrieve the results. To run
|
||||
FETCH remotely, your .cshrc file should contain the following
|
||||
lines:
|
||||
|
||||
setenv XYLEM_RHOST remotehostname
|
||||
setenv XYLEM_USERID remoteuserid
|
||||
|
||||
where remotehostname is the name of the host on which the
|
||||
databases reside (in XYLEM split format) and remoteuserid
|
||||
is your userid on the remote system. When run remotely,
|
||||
your local copy of FETCH will generate the following
|
||||
commands:
|
||||
|
||||
rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
|
||||
rsh $XYLEM_RHOST -l $XYLEM_USERID fetch ...
|
||||
rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
|
||||
rsh $XYLEM_RHOST -l $XYLEM_USERID $RM temporary_files
|
||||
|
||||
Because FETCH uses rsh and rcp, your home directory on both
|
||||
the local and remote systems must have a world-readable
|
||||
file called .rhosts, containing the names of trusted remote
|
||||
hosts and your userid on each host. Before trying to get
|
||||
FETCH to work remotely, make sure that you can rcp and
|
||||
rsh to the remote host.
|
||||
|
||||
Obviously, remote execution of FETCH implies that FETCH
|
||||
must already be installed on the remote host. When FETCH
|
||||
runs another copy of FETCH remotely, it uses the -L option
|
||||
(findkey -L) to insure that the remote FETCH job executes,
|
||||
rather than calling yet another FETCH on another host.
|
||||
|
||||
|
||||
---------- Remote execution on more than 1 host -----------
|
||||
If more than 1 remote host is available for running FINDKEY
|
||||
(say, in a clustered environment where many servers mount
|
||||
a common filesystem) the choice of a host can be determined
|
||||
by the csh script choosehost, such that execution of
|
||||
choosehost returns the name of a remote server. To use this
|
||||
approach, the following script, called 'choosehost' should
|
||||
be in your bin directory:
|
||||
|
||||
#!/bin/csh
|
||||
# choosehost - choose a host to use for a remote job.
|
||||
# This script rotates among servers listed in .rexhosts,
|
||||
# by choosing the host at the top of the list and moving
|
||||
# it to the bottom.
|
||||
|
||||
#Rotate the list, putting the current host to the bottom.
|
||||
set HOST = `head -1 $home/.rexhosts`
|
||||
set JOBID = $$
|
||||
tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
|
||||
echo $HOST >> /tmp/.rexhosts.$JOBID
|
||||
/usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
|
||||
|
||||
# Write out the current host name
|
||||
echo $HOST
|
||||
|
||||
You must also have a file in your home directory called
|
||||
.rexhosts, listing remote hosts, such as
|
||||
|
||||
graucho.cc.umanitoba.ca
|
||||
harpo.cc.umanitoba.ca
|
||||
chico.cc.umanitoba.ca
|
||||
zeppo.cc.umanitoba.ca
|
||||
|
||||
Each time choosehost is called, choosehost will rotate the
|
||||
names in the file. For example, starting with the .rexhosts
|
||||
as shown, it will move graucho.cc.umanitoba.ca to the bottom
|
||||
of the file, and write the line 'graucho.cc.umanitoba.ca'
|
||||
to the standard output. The next time choosehosts is
|
||||
run, it would write 'harpo.cc.umanitoba.ca', and so on.
|
||||
|
||||
Depending on your local configuration, you may wish to
|
||||
rewrite choosehosts. All that is really necessary is that
|
||||
echo `choosehost` should return the name of a valid host.
|
||||
|
||||
Once you have installed choosehost and tested it, you can
|
||||
get FINDKEY to use choosehost simply by setting
|
||||
|
||||
setenv XYLEM_RHOST choosehost
|
||||
|
||||
in your .cshrc file.
|
||||
|
||||
--------------- Remote filesystems -----------------------
|
||||
Finally, an alternative to remote execution is to remotely mount
|
||||
the file system containing the databases across the network.
|
||||
This has the advantage of simplicity, and means that the
|
||||
databases are available for ALL programs on your local
|
||||
workstation. However, it may still be advantageous to run
|
||||
FETCH remotely, since that will shift much of the computational
|
||||
load to another host.
|
||||
|
||||
BUGS
|
||||
When retrieving entries directly from GenBank, FETCH uses the
|
||||
Accession Number index file gbacc.idx. In this case, FETCH
|
||||
can retrieve all entries containing a given accession number.
|
||||
This capability makes it possible to retrieve an entry using a
|
||||
secondary accession number. However if more than one entry
|
||||
share a secondary accession number, all of those entries will
|
||||
be retrieved. While this behavior might be a bit of an
|
||||
annoyance at times, it can also be useful because it alerts
|
||||
the user to the presence of other, related entries that might
|
||||
be of interest.
|
||||
|
||||
SEE ALSO
|
||||
getloc features
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,365 +0,0 @@
|
|||
|
||||
FINDKEY.DOC update 13 Mar 97
|
||||
|
||||
|
||||
NAME
|
||||
findkey - finds database entries containg one or more keywords
|
||||
|
||||
SYNOPSIS
|
||||
findkey
|
||||
findkey [-pvbmgrdutielnsaxzL] keywordfile [namefile findfile]
|
||||
findkey [-P PIR_dataset] keywordfile [namefile findfile]
|
||||
findkey [-G GenBank_dataset] keywordfile [namefile findfile]
|
||||
|
||||
DESCRIPTION
|
||||
findkey uses the grep family of commands to find lines in database
|
||||
annotation files containing one or more keywords. Next, identify
|
||||
is called to create a .nam file, containing the names of entries
|
||||
containing the keywords, and a .fnd file, containing the actual
|
||||
lines from each entry containing hits. A PIR or GenBank dataset is
|
||||
either a file containing one or more GenBank or PIR entries, or
|
||||
the name of a XYLEM dataset created by splitdb. See FILES below
|
||||
for a more detailed description.
|
||||
|
||||
INTERACTIVE USE
|
||||
findkey prompts the user to set search parameters, using an interactive
|
||||
menu:
|
||||
|
||||
___________________________________________________________________
|
||||
FINDKEY - Version 12 Aug 94
|
||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
||||
___________________________________________________________________
|
||||
Keyfile:
|
||||
Dataset:
|
||||
-------------------------------------------------------------------
|
||||
Parameter Description Value
|
||||
-------------------------------------------------------------------
|
||||
1) Keyword Keyword to find thionin
|
||||
2) Keyfile Get list of keywords from Keyfile
|
||||
3) WhereToLook p:PIR v:VecBase p
|
||||
GenBank - b:bacterial i:invertebrate
|
||||
m:mamalian e:expressed seq. tag
|
||||
g:phage l:plant
|
||||
r:primate n:rna
|
||||
d:rodent s:synthetic
|
||||
u:unannotated a:viral
|
||||
t:vertebrate x:patented
|
||||
z:STS
|
||||
G: GenBank dataset P: PIR dataset
|
||||
-------------------------------------------------------------
|
||||
Type number of your choice or 0 to continue:
|
||||
0
|
||||
Searching /home/psgendb/PIR/pir1.ano...
|
||||
Sequence names will be written to thionin~pir.nam
|
||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
||||
Searching /home/psgendb/PIR/pir2.ano...
|
||||
Sequence names will be written to thionin~pir.nam
|
||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
||||
Searching /home/psgendb/PIR/pir3.ano...
|
||||
Sequence names will be written to thionin~pir.nam
|
||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
||||
|
||||
As shown in the example above, the keyword thionin was specified
|
||||
as the keyword to search for. By default, option 3 is set to p,
|
||||
and the PIR protein database is searched. Messages describe the
|
||||
progress of the search. Since PIR is broken up into two divisions
|
||||
(new and protein) both are searched, but all output is written to
|
||||
thionin.pir.nam and thionin.pir.fnd
|
||||
|
||||
OPTIONS
|
||||
(1,2) Which keywords to search for?
|
||||
If you want to search for a single keyword, option 1 lets you type
|
||||
the keyword, without having to create a file. To search for more
|
||||
than one keyword, choose option 2, and specify the name of a
|
||||
file containing the keywords. For example, entries containing
|
||||
genes for antibiotic resistance might be found using the
|
||||
following keyword file:
|
||||
|
||||
ampicillin
|
||||
chloramphenicol
|
||||
kanamycin
|
||||
neomycin
|
||||
tetracycline
|
||||
|
||||
Note: keyword searches are case insensitive.
|
||||
|
||||
As you might expect, it takes longer to search for multiple
|
||||
keywords than a single keyword.
|
||||
|
||||
Options 1 & 2 are mutually exclusive. Setting one will negate the
|
||||
other. If option 2 is chosen, the name of the keyword file will
|
||||
appear at the top of the menu.
|
||||
|
||||
Finally, it is probably not a good idea to search GenBank
|
||||
entries using very short keywords consisting only of letters.
|
||||
This is because GenBank entries now include a /translation
|
||||
field containing the amino acid sequence of each protein
|
||||
coding sequence. Consequently, 3 or 4 letter keywords
|
||||
consisting of legal amino acid symbols (eg. CAP, recA) will
|
||||
turn up fairly often in protein translations.
|
||||
|
||||
(3) WhereToLook
|
||||
Use this option to specify the database to be searched In the
|
||||
case of GenBank, only one division at a time may be searched.
|
||||
User-created database subsets containing PIR (P) or GenBank (G)
|
||||
entries may also be searched. User-created database subsets
|
||||
must be in the .ano/.wrp/.ind form created by splitdb.
|
||||
|
||||
OUTPUT
|
||||
The output filenames take the following form:
|
||||
|
||||
name_ex1.ex2
|
||||
|
||||
The 'name' part of the filename is either the keyword searched for,
|
||||
if option 1 was chosen, or the name of the keyword file,if option 2
|
||||
obtains. 'ex1' indicates the database division that was searched. For
|
||||
PIR and VecBase, ex1 is 'pir' and 'vec', respectively. For GenBank,
|
||||
ex1 is as follows:
|
||||
|
||||
bct - bacterial
|
||||
inv - invertebrate
|
||||
mam - other mamalian
|
||||
est - expressed sequence tag
|
||||
phg - phage
|
||||
pln - plant (includes fungi)
|
||||
pri - primate
|
||||
rna - structural RNAs
|
||||
rod - rodent
|
||||
syn - synthetic sequences
|
||||
sts - sequence tagged sites
|
||||
una - unannotated (new) sequences
|
||||
vrl - viral
|
||||
vrt - other vertebrate
|
||||
|
||||
'ex2' distinguishes the files containing the names of entries
|
||||
containing keywords (.nam) and the files containing the lines found
|
||||
in each entry (.fnd).
|
||||
|
||||
The .nam file can be used directly as a namefile for fetch, getloc,
|
||||
or getob.
|
||||
|
||||
COMMAND LINE USE
|
||||
|
||||
OPTIONS
|
||||
p search PIR (default)
|
||||
P PIR dataset search dbfile, containing PIR entries
|
||||
v search VecBase
|
||||
b search Genbank bacterial division
|
||||
m search Genbank mamalian division
|
||||
g search Genbank phage division
|
||||
r search Genbank primate division
|
||||
d search Genbank rodent division
|
||||
u search Genbank unannotated division
|
||||
t search Genbank vertebrate division
|
||||
i search Genbank invertebrate division
|
||||
l search Genbank plant division
|
||||
n search Genbank rna division
|
||||
s search Genbank synthetic division
|
||||
a search Genbank viral division
|
||||
x search Genbank patented division
|
||||
e search Genbank exp.seq.tag division
|
||||
z search GenBank STS division
|
||||
S search GenBank Genom. Survey division
|
||||
h search GenBank High Thrput. division
|
||||
G GenBank dataset search dbfile, containing GenBank entries
|
||||
|
||||
L force execution of findkey on local host
|
||||
even if $XYLEM_RHOST is set. See "REMOTE
|
||||
EXECUTION" below
|
||||
|
||||
FILES
|
||||
|
||||
keywordfile - contains keywords to search for
|
||||
|
||||
namefile - LOCUS names of hits are written to this file
|
||||
|
||||
findfile - for each hit, a report listing the LOCUS name and the
|
||||
lines matching the keyword if written to this file.
|
||||
|
||||
If namefile and findfile are not specified on the command line,
|
||||
filenames will be created as described above for interactive
|
||||
use.
|
||||
|
||||
PIR_dataset
|
||||
GenBank_dataset
|
||||
This can be either a file of PIR entries, a file of GenBank entries,
|
||||
or a XYLEM dataset created by splitdb. A file of PIR entries must
|
||||
have the file extension ".pir". A file of GenBank entries must have
|
||||
the file extension ".gen". A XYLEM dataset contains PIR entries split
|
||||
among three files by splitdb: annotation (.ano), sequence (.wrp)
|
||||
and index (.ind). These file extensions must be used!
|
||||
|
||||
When specifying a split dataset, only the base name needs to be
|
||||
used. For example given a XYLEM dataset consisting of the files
|
||||
myset.ano, myset.wrp and myset.ind, the following two commands
|
||||
are equivalent:
|
||||
|
||||
findkey -P myset something.kw
|
||||
findkey -P myset.ano something.kw
|
||||
|
||||
If the original .pir file had been used, the command would have
|
||||
been
|
||||
|
||||
findkey -P myset.pir something.kw
|
||||
|
||||
The ability to work directly with .gen or .pir files is quite
|
||||
convenient. However, since FINDKEY needs to work with a split
|
||||
FINDKEY automatically splits .pir or .gen files into .ano, .wrp
|
||||
and .ind files, which are removed when finished. This requires
|
||||
extra disk space and execution time, which could be significant
|
||||
for large datasets.
|
||||
|
||||
EXAMPLES
|
||||
If the list of antibiotics shown above was stored in the file
|
||||
antibiotic.kw, and option 3 was set to 'b', then the annotation
|
||||
portion of the GenBank bacterial division would be searched, and
|
||||
all lines containing any of these keywords would be written to
|
||||
antibiotic~bac.fnd. The corresponding GenBank entry names would
|
||||
appear in antibiotic~bac.nam.
|
||||
|
||||
The same keyword file could be used to search other database files.
|
||||
If VecBase was searched, the output files would be antibiotic~vec.fnd
|
||||
and antibiotic~vec.nam. These filename conventions make it easy
|
||||
to search different database divisions, and to keep track of where
|
||||
data came from.
|
||||
|
||||
Command line examples:
|
||||
|
||||
findkey thionin.kw
|
||||
|
||||
would be equivalent to the interactive example shown above. In
|
||||
this case, the file thionin.kw contains the word 'thionin'.
|
||||
(Note that since PIR is the default, -p need not be supplied.)
|
||||
|
||||
findkey -b antibiotic.kw drugs.nam drugs.fnd
|
||||
|
||||
would search the GenBank bacterial division for the keywords
|
||||
contained in antibiotic.kw, and write the output to drugs.nam
|
||||
and drugs.kw.
|
||||
|
||||
FILES
|
||||
Database files:
|
||||
The directories for database files are specified by the environment
|
||||
variables $GB (GenBank) $PIR (PIR/NBRF) and $VEC(Vecbase).
|
||||
Annotation (.ano) and index (.ind) are those generated by splitdb.
|
||||
|
||||
Temporary files:
|
||||
$jobid.fnd
|
||||
$jobid.nam
|
||||
$jobid.grep
|
||||
|
||||
where $jobid is a unique jobid generated by the shell
|
||||
|
||||
REMOTE EXECUTION
|
||||
Where the databases can not be stored locally, FINDKEY can call
|
||||
FINDKEY on another system and retrieve the results. To run
|
||||
FINDKEY remotely, your .cshrc file should contain the following
|
||||
lines:
|
||||
|
||||
setenv XYLEM_RHOST remotehostname
|
||||
setenv XYLEM_USERID remoteuserid
|
||||
|
||||
where remotehostname is the name of the host on which the
|
||||
databases reside (in XYLEM split format) and remoteuserid
|
||||
is your userid on the remote system. When run remotely,
|
||||
your local copy of FINDKEY will generate the following
|
||||
commands:
|
||||
|
||||
rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
|
||||
rsh $XYLEM_RHOST -l $XYLEM_USERID findkey ...
|
||||
rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
|
||||
rsh $XYLEM_RHOST -l $XYLEM_USERID rm temporary_files
|
||||
|
||||
Because FINDKEY uses rsh and rcp, your home directory on both
|
||||
the local and remote systems must have a world-readable
|
||||
file called .rhosts, containing the names of trusted remote
|
||||
hosts and your userid on each host. Before trying to get
|
||||
FINDKEY to work remotely, make sure that you can rcp and
|
||||
rsh to the remote host.
|
||||
|
||||
Obviously, remote execution of FINDKEY implies that FINDKEY
|
||||
must already be installed on the remote host. When FINDKEY
|
||||
runs another copy of FINDKEY remotely, it uses the -L option
|
||||
(findkey -L) to insure that the remote FINDKEY job executes,
|
||||
rather than calling yet another FINDKEY on another host.
|
||||
|
||||
---------- Remote execution on more than 1 host -----------
|
||||
If more than 1 remote host is available for running FINDKEY
|
||||
(say, in a clustered environment where many servers mount
|
||||
a common filesystem) the choice of a host can be determined
|
||||
by the csh script choosehost, such that execution of
|
||||
choosehost returns the name of a remote server. To use this
|
||||
approach, the following script, called 'choosehost' should
|
||||
be in your bin directory:
|
||||
|
||||
#!/bin/csh
|
||||
# choosehost - choose a host to use for a remote job.
|
||||
# This script rotates among servers listed in .rexhosts,
|
||||
# by choosing the host at the top of the list and moving
|
||||
# it to the bottom.
|
||||
|
||||
#Rotate the list, putting the current host to the bottom.
|
||||
set HOST = `head -1 $home/.rexhosts`
|
||||
set JOBID = $$
|
||||
tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
|
||||
echo $HOST >> /tmp/.rexhosts.$JOBID
|
||||
/usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
|
||||
|
||||
# Write out the current host name
|
||||
echo $HOST
|
||||
|
||||
You must also have a file in your home directory called
|
||||
.rexhosts, listing remote hosts, such as
|
||||
|
||||
graucho.cc.umanitoba.ca
|
||||
harpo.cc.umanitoba.ca
|
||||
chico.cc.umanitoba.ca
|
||||
zeppo.cc.umanitoba.ca
|
||||
|
||||
Each time choosehost is called, choosehost will rotate the
|
||||
names in the file. For example, starting with the .rexhosts
|
||||
as shown, it will move graucho.cc.umanitoba.ca to the bottom
|
||||
of the file, and write the line 'graucho.cc.umanitoba.ca'
|
||||
to the standard output. The next time choosehosts is
|
||||
run, it would write 'harpo.cc.umanitoba.ca', and so on.
|
||||
|
||||
Depending on your local configuration, you may wish to
|
||||
rewrite choosehosts. All that is really necessary is that
|
||||
echo `choosehost` should return the name of a valid host.
|
||||
|
||||
Once you have installed choosehost and tested it, you can
|
||||
get FINDKEY to use choosehost simply by setting
|
||||
|
||||
setenv XYLEM_RHOST choosehost
|
||||
|
||||
in your .cshrc file.
|
||||
|
||||
--------------- Remote filesystems -----------------------
|
||||
Finally, an alternative to remote execution is to remotely mount
|
||||
the file system containing the databases across the network.
|
||||
This has the advantage of simplicity, and means that the
|
||||
databases are available for ALL programs on your local
|
||||
workstation. However, it may still be advantageous to run
|
||||
XYLEM remotely, since that will shift much of the computational
|
||||
load to another host.
|
||||
|
||||
|
||||
BUGS
|
||||
At present, regular expression characters cannot be used for
|
||||
keyword searches.
|
||||
|
||||
SEE ALSO
|
||||
grep(1V) identify splitdb
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,65 +0,0 @@
|
|||
|
||||
GETLOC.DOC update 30 May 95
|
||||
|
||||
|
||||
NAME
|
||||
getloc - retrieve database entries listed in namefile to outfile.
|
||||
|
||||
SYNOPSIS
|
||||
getloc [-asfcgepvl] namefile [anofile] [seqfile] indfile outfile
|
||||
|
||||
DESCRIPTION
|
||||
getloc reads a list of names from namefile and recreates
|
||||
entries by combining the annotation and sequence portions of each
|
||||
entry from anofile and seqfile. getloc will work most quickly
|
||||
when the namefile is in alphabetical order, but it will also
|
||||
work on unsorted lists. The following options affect the output:
|
||||
|
||||
a Write annotation portions of entries only, terminated by '//'.
|
||||
seqfile is not included on command line.
|
||||
|
||||
s Write sequence data only, in Pearson (.wrp) format.
|
||||
anofile is not included on commandline.
|
||||
|
||||
f Write each entry to a separate file. The filename will
|
||||
consist of the LOCUS name, followed by .ano for annotation
|
||||
only, .wrp for sequence only, or gen for complete GenBank
|
||||
format.
|
||||
|
||||
c namefile contains accession numbers, rather than names
|
||||
|
||||
The following options identify the type of database being read:
|
||||
|
||||
g GenBank (default)
|
||||
e EMBL
|
||||
p PIR (NBRF)
|
||||
v Vecbase
|
||||
l LiMB
|
||||
|
||||
namefile consists of an alphabetically ordered list of LOCUS names,
|
||||
each on a separate line. Indfile could be used to create a
|
||||
namefile by simply editing out some subset of names. (This can also
|
||||
be done using the Unix comm command.) If the entire indfile was
|
||||
used, the entire database would be recreated, minus the header
|
||||
information that might have been present in the original, but
|
||||
deleted by splitdb.
|
||||
|
||||
NOTE
|
||||
Getloc automatically expands leading blanks that have been
|
||||
compressed using splitdb -c. See splitdb.doc for more information.
|
||||
|
||||
SEE ALSO
|
||||
splitdb, comm(1).
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,327 +0,0 @@
|
|||
|
||||
GETOB 21 Dec 94
|
||||
|
||||
|
||||
NAME
|
||||
getob - Get an object from GenBank
|
||||
|
||||
SYNOPSIS
|
||||
getob [-frcn] infile namefile anofile seqfile indfile message
|
||||
[outfile] expfile
|
||||
|
||||
DESCRIPTION
|
||||
getob extracts 'objects' (subsequences) from GenBank entries, using
|
||||
the features table, and writes them to outfile (.out). A log
|
||||
describing the construction of each object is written to message
|
||||
(.msg). If -r is not set, a list of expressions that would recreate
|
||||
the .out file if evaluated by getob -r, is written to expfile (.exp)
|
||||
|
||||
The following options are available:
|
||||
|
||||
f Write each entry to a separate file. The name will consist
|
||||
of the entry name, and the extension '.obj'.
|
||||
|
||||
r Resolve expressions from namefile into objects.
|
||||
Expressions take the form:
|
||||
|
||||
@[<database>::]<accession>:<location>
|
||||
|
||||
In effect, r makes it possible to use getob to resolve
|
||||
features that span more than one entry, such as segmented
|
||||
files. In the first run of the program, features that require
|
||||
data from outside the entry in which they are defined will be
|
||||
written to outfile with those externally-defined parts rep-
|
||||
resented using the '@' notation described above. During a
|
||||
subsequent run, the outfile from the previous run is used as
|
||||
namefile. When r is set, all lines not beginning with '@' (ie.
|
||||
name lines and sequence lines) are simply copied to the new
|
||||
outfile. When an '@' is encountered, the expression is parsed
|
||||
into accession number and location. The entry with the
|
||||
specified accession number is located in indfile, and read from
|
||||
anofile and seqfile. It is then evaluated, and the result
|
||||
written to outfile in place of the '@' expression.
|
||||
|
||||
getob can also be used to get specific labeled objects from
|
||||
a given entry. Examples:
|
||||
|
||||
@k30576:polyprotein
|
||||
@k30576:/label=polyprotein
|
||||
@x10345:/product="hsp70"
|
||||
@j00879:group(1..2200,mutation_37)
|
||||
|
||||
The first two constructs given above are equivalent. Both
|
||||
will extract the feature called polyprotein. The third
|
||||
construct shows that any feature label can be specified. If
|
||||
none is specified, as in the first example, then /label= is
|
||||
assumed. One limitation, however, is that the label sought
|
||||
must be unique within the entry in its first 15 characters
|
||||
including double quotes ("). Otherwise, only the first
|
||||
matching label expression will be evaluated. Finally, the
|
||||
last example shows that a mutant sequence can be constructed
|
||||
by first specifying an expression that evaluates to a
|
||||
sequence (ie. 1..2200) and then a labeled expression that
|
||||
upon evaluation, uses replace() to modify that sequence. The
|
||||
usage shown in examples 3 & 4 above represent extensions to
|
||||
the DDBJ/EMBL/GenBank Features Table Format.
|
||||
|
||||
As touched on briefly above, the r option makes it possible
|
||||
to construct objects that include recursive references to
|
||||
other entries (eg. segmented files) by iterative calls to
|
||||
getob. The 'features' command automates this process. The basic
|
||||
algorithm is as follows:
|
||||
|
||||
getob infile namefile anofile seqfile indfile ...
|
||||
|
||||
#Pull out all lines containing indirect references
|
||||
grep '@' outfile > unresolved.grep
|
||||
|
||||
while (unresolved.grep is not empty)
|
||||
|
||||
#extract accession numbers to be retrieved
|
||||
cut -c2-7 unresolved.grep > unresolved.nam
|
||||
|
||||
#retrieve the sequences into a new file, and create
|
||||
#a database subset to be used by getob
|
||||
fetch unresolved.nam new.gen
|
||||
splitdb new.gen new.ano new.wrp new.ind
|
||||
|
||||
#run getob again to resolve indirect references
|
||||
getob -r infile outfile new.ano new.wrp new.ind ...
|
||||
|
||||
#Pull out all lines containing indirect references
|
||||
grep '@' outfile > unresolved.grep
|
||||
end
|
||||
|
||||
c NAMEFILE contains accession numbers, rather than locus names
|
||||
|
||||
n By default, the qualifier 'codon_start' is used to determine
|
||||
how many n's, if necessary, must be added to the 5' end of
|
||||
CDS, mat_peptide, or sig_peptide, to preserve the reading
|
||||
frame. To turn OFF this feature, -n must be set. -n must be set
|
||||
for GenBank Releases 67.0 and earlier.
|
||||
|
||||
infile contains commands indicating what data is to be pulled from
|
||||
each entry. Two types of output may be presented, GenBank or
|
||||
OBJECTS. These are described below:
|
||||
|
||||
1) GenBank output - If the word 'GENBANK' is the first line in
|
||||
infile, a pseudo-GenBank entry will be recreated. This option
|
||||
is only intended for debugging purposes and will probably be
|
||||
removed in later releases.
|
||||
|
||||
2) Object format - This option instructs getob to write part or
|
||||
all of each sequence, along with site annotation, by specifying
|
||||
feature key names. The syntax for infile is shown below:
|
||||
|
||||
Backus-Naur format: Example:
|
||||
----------------------------------------------------------
|
||||
OBJECTS OBJECTS
|
||||
<feature key> tRNA
|
||||
{<feature key> rRNA
|
||||
. . . SITES
|
||||
<feature key>} stem_loop
|
||||
SITES
|
||||
{<feature key>
|
||||
. . .
|
||||
<feature key>}
|
||||
|
||||
In the example above, getob is instructed to extract all tRNA or
|
||||
rRNA sequences from each entry, and annotate the position of each
|
||||
stem/loop structure. Note that the SITES coordinates written to the
|
||||
file tell the positions of those SITES relative to the start of the
|
||||
object, rather than the original location in the sequence. As above,
|
||||
each word begins a separate line.
|
||||
|
||||
While the -r option does not use infile, at least a dummy infile
|
||||
must be included in the command line. This dummy file need only
|
||||
contain two lines:
|
||||
|
||||
OBJECTS
|
||||
SITES
|
||||
|
||||
NOTE: SITES IS NOT YET IMPLEMENTED! Although inclusion of SITES in
|
||||
the input file will have no effect, the word SITES must still be
|
||||
present after the last feature key.
|
||||
|
||||
|
||||
namefile
|
||||
namefile consists of a list of LOCUS names or accession numbers,
|
||||
each on a separate line. Names or accession numbers should appear
|
||||
in the order in which they appear in the database file. Unordered
|
||||
namefiles will slow the progress of the search. Since only the
|
||||
first non-blank field of each line in namefile is read, indfile
|
||||
could be used to create a namefile. If the entire indfile was
|
||||
used, the entire database file would be processed. A sample
|
||||
namefile requesting four sequences by LOCUS name is shown below:
|
||||
|
||||
POTPR1A
|
||||
POTPSTH2
|
||||
POTPSTH21
|
||||
POTSTHA
|
||||
|
||||
anofile, seqfile, and indfile
|
||||
The database subset containing GenBank entries must be divided
|
||||
among annotation, sequence and an index by splitdb.
|
||||
|
||||
message
|
||||
message contains a log describing the parsing of each object.
|
||||
For annotative purposes, qualifier lines from the object are
|
||||
included in along with the location expression being parsed.
|
||||
The beginning of a typical message file is shown below:
|
||||
|
||||
GETOB Version 0.962 14 May 1992
|
||||
|
||||
POTPR1A:CDS1
|
||||
join
|
||||
(
|
||||
295 603
|
||||
|
||||
1011 1355
|
||||
|
||||
)
|
||||
|
||||
|
||||
/note="pathogenesis-related protein (prp1)"
|
||||
/codon_start=1
|
||||
/translation="MAEVKLLGLRYSPFSHRVEWALKIKGVKYEFIEEDLQNKSPLLL
|
||||
QSNPIHKKIPVLIHNGKCICESMVILEYIDEAFEGPSILPKDPYDRALARFWAKYVED
|
||||
KGAAVWKSFFSKGEEQEKAKEEAYEMLKILDNEFKDKKCFVGDKFGFADIVANGAALY
|
||||
LGILEEVSGIVLATSEKFPNFCAWRDEYCTQNEEYFPSRDELLIRYRAYIQPVDASK"
|
||||
//----------------------------------------------
|
||||
|
||||
In the example above, getob was instructed to retrieve all CDS
|
||||
features from the database subset. The message for the entry
|
||||
POTPR1A is shown, along with a reconstruction of the location
|
||||
expression that was evaluated to create the object. In this
|
||||
case, protien coding sequences from two exons had to be joined
|
||||
to create the object.
|
||||
|
||||
outfile
|
||||
outfile contains the actual objects constructed, consisting of
|
||||
sites found and sequences. The beginning of a typical output file
|
||||
is shown below:
|
||||
|
||||
>POTPR1A:CDS1
|
||||
atggcagaagtgaagttgcttggtctaaggtatagtccttttagccatag
|
||||
agttgaatgggctctaaaaattaagggagtgaaatatgaatttatagagg
|
||||
aagatttacaaaataagagccctttacttcttcaatctaatccaattcac
|
||||
aagaaaattccagtgttaattcacaatggcaagtgcatttgtgagtctat
|
||||
ggtcattcttgaatacattgatgaggcatttgaaggcccttccattttgc
|
||||
ctaaagacccttatgatcgcgctttagcacgattttgggctaaatacgtc
|
||||
gaagataag
|
||||
ggggcagcagtgtggaaaagtttcttttcgaaaggagaggaacaagagaa
|
||||
agctaaagaggaagcttatgagatgttgaaaattcttgataatgagttca
|
||||
aggacaagaagtgctttgttggtgacaaatttggatttgctgatattgtt
|
||||
gcaaatggtgcagcactttatttgggaattcttgaagaagtatctggaat
|
||||
tgttttggcaacaagtgaaaaatttccaaatttttgtgcttggagagatg
|
||||
aatattgcacacaaaacgaggaatattttccttcaagagatgaattgctt
|
||||
atccgttaccgagcctacattcagcctgttgatgcttcaaaatga
|
||||
|
||||
In the example, the CDS from entry POTPR1A has been written in
|
||||
two chunks, corresponding to the two exon portions of the coding
|
||||
sequence. Each location retrieved in constructing the object is
|
||||
written as a separate block of sequence. By comparing message file
|
||||
to outfile, it is possible to verify the correctness of the
|
||||
operation.
|
||||
|
||||
Numbers are appended to the sequence names to indicate
|
||||
which CDS in the entry has been retrieved. Thus, if two CDS
|
||||
features were present, the second one would be named >POTPR1A:2.
|
||||
For compatiblility with the FASTA programs of Pearson, the name line
|
||||
begins with a '>'.
|
||||
|
||||
expfile
|
||||
The expression evaluated to create this feature is written
|
||||
to expfile:
|
||||
|
||||
>POTPR1A:CDS1
|
||||
@J03679:join(295..603,1011..1355)
|
||||
|
||||
expfile is only created if -r is not set. It is itended as a way
|
||||
of automating the creation of a feature expression file for use
|
||||
in generating customized datasets. Expressions in expfile can be
|
||||
deleted or modified, or new expressions added, to tailor the
|
||||
dataset to individual needs. To generate a dataset from expfile:
|
||||
|
||||
getob -r infile expfile anofile seqfile indfile message outfile
|
||||
|
||||
EXTENSIONS TO THE FEATURE TABLE LANGUAGE
|
||||
|
||||
1) poly(<absolute_location>|<literal>|<feature_name>,x)
|
||||
|
||||
This operator evaluates an absolute location, literal, or
|
||||
feature name (ie. any location not containing functional
|
||||
operators) and writes it x times. The most obvious
|
||||
application of poly is to create spacers to represent regions
|
||||
of unknown sequence between sequences that are known. For
|
||||
example, the restriction map of a 4kb EcoR1 fragment with a
|
||||
Hind3 site 1000 bp from one end could be represented as follows:
|
||||
|
||||
join("gaattc",poly("n",1000),"aagctt",poly("n",3000),"gaattc")
|
||||
|
||||
2) The following feature keys are recognized by GETOB, although
|
||||
not included in the language definition. While they will not
|
||||
appear in GenBank entries, they could be used in user-created
|
||||
GenBank-format files:
|
||||
|
||||
contig
|
||||
This feature key is meant to be used to assemble large
|
||||
sequence segments from smaller segments, possibly using the
|
||||
poly() operator.
|
||||
|
||||
chromosome
|
||||
Intended to annotate the complete sequence of a chromosome. This
|
||||
feature may be constructed by a join of two or more contigs.
|
||||
|
||||
Use of these keywords is illustrated in the features table
|
||||
shown below, which could be used to construct a model of part
|
||||
of the E.coli chromosome, spanning map units 763.4 to 1031.4 kb:
|
||||
|
||||
contig join(J01619:1..13063,poly("n",7140),
|
||||
J03939:1..1363,poly("n",14380),
|
||||
X02306:complement(1..1622),poly("n",14710),
|
||||
J04423:1..5793,poly("n",22500),
|
||||
X03722:1..2400,poly("n",123750),
|
||||
one-of(X05017:complement(1..1854),X05017:1..1854))
|
||||
/label=Eco_contig8
|
||||
/map=763.4-950.6kb
|
||||
contig join(V00352:1..2412,poly("n",28800),M15273:1..3409)
|
||||
/label=Eco_contig9
|
||||
/map=972.9-1001.7kb
|
||||
contig join(X02826:1..1357,poly("n",13540),
|
||||
J01654:complement(1..2270))
|
||||
/label=Eco_contig10
|
||||
/map=1016.5-1031.4kb
|
||||
chromosome join(Eco_contig8,poly("n",22300),
|
||||
Eco_contig9,poly("n",14800),
|
||||
Eco_contig10)
|
||||
/label=Ecoli_chromosome
|
||||
|
||||
NOTES
|
||||
1) If the const DEBUG is set to true in the Pascal source code, getob
|
||||
writes messages to the standard output, indicating the progress of
|
||||
processing for each entry read in. By default, DEBUG=false.
|
||||
This feature is solely for debugging purposes and will be removed in
|
||||
later releases.
|
||||
|
||||
2) GETOB automatically expands leading blanks that have been
|
||||
compressed using splitdb -c. See splitdb.doc for more information.
|
||||
|
||||
SEE ALSO
|
||||
features, splitdb, getloc
|
||||
The DDBJ/EMBL/GenBank Feature Table: Definition, Version 1.04
|
||||
September 1, 1992
|
||||
GenBank Release Notes for Release 79.0.
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,83 +0,0 @@
|
|||
|
||||
IDENTIFY update 3 Feb 94
|
||||
|
||||
|
||||
NAME
|
||||
identify - creates a file of locus names corresponding to lines
|
||||
found by grep in a GenBank annotation file.
|
||||
|
||||
SYNOPSIS
|
||||
identify grepfile indfile namefile findfile
|
||||
|
||||
DESCRIPTION
|
||||
grepfile is created using the Unix grep command to search a .ano
|
||||
file created by splitgb. For example, to find all lines containing
|
||||
the word 'chlorophyll' in plant.ano, use
|
||||
|
||||
grep -n -i 'chlorophyll' plant.ano > plant.grep
|
||||
|
||||
In the example shown, the -n option causes each line written to
|
||||
plant.grep to be preceeded by the number of that line in plant.ano.
|
||||
(The -i option causes grep to ignore case.) Identify can use the
|
||||
indfile do determine which entry a given numbered line was found
|
||||
in, and writes the corresponding LOCUS name to namefile. In
|
||||
addition, all lines found in a given entry are re-written to
|
||||
findfile without the line numbers, and preceeded by the LOCUS name
|
||||
for that entry.
|
||||
|
||||
EXAMPLES
|
||||
Suppose you wanted to obtain a list of names for all plant
|
||||
sequences which code for proteins. The task is complicated by the
|
||||
fact that many fungal sequences are included in the GenBank plant
|
||||
file. You could begin by searching plant.ano (containing all
|
||||
GenBank plant entries) for the word 'Planta':
|
||||
|
||||
grep -n 'Planta' plant.ano > Planta.grep
|
||||
|
||||
However, we want to eliminate all fungal sequences, as well as all
|
||||
sequences for RNAs other than mRNAs. If we create the file
|
||||
bad.str containing the keywords
|
||||
|
||||
Mycophyta
|
||||
tRNA
|
||||
rRNA
|
||||
uRNA
|
||||
|
||||
we can then type
|
||||
|
||||
grep -n -f bad.str plant.ano > bad.grep
|
||||
|
||||
bad.grep now contains all lines containing the offending keywords.
|
||||
We next use identify to find the names of the entries found by
|
||||
grep.
|
||||
|
||||
identify Planta.grep plant.ind Planta.nam Planta.fnd
|
||||
identify bad.grep plant.ind bad.nam bad.fnd
|
||||
|
||||
Next, we can use the Unix comm command to compare the two .nam
|
||||
files and produce an output file containing only names which are
|
||||
present in Planta.nam but not bad.nam:
|
||||
|
||||
comm -23 Planta.nam bad.nam > plants.nam
|
||||
|
||||
The file plants.nam now contains names of either plant cDNA or
|
||||
genomic sequences which do not code for structural RNAs.
|
||||
At this point, getloc could to create a sub-database containing
|
||||
only those entries listed in planta.nam. See documentation for
|
||||
getloc for a more detailed discussion.
|
||||
|
||||
SEE ALSO
|
||||
grep, fgrep, egrep, ngrep, comm, splitgb, getloc
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,23 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FINDKEY/GDE Keyword File Instructions
|
||||
;
|
||||
; 1. Type in one or more keywords below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of keywords.
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FINDKEY will then perform the keyword search. YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each keyword on a separate line
|
||||
; SAMPLE KEYWORD FILE:
|
||||
;
|
||||
; maize
|
||||
; corn
|
||||
; Z.mays
|
||||
; Zea
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FETCH/GDE Name/Accession File Instructions
|
||||
;
|
||||
; 1. Type in one or more LOCUS names or Accession #'s below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of names or accession #'s.
|
||||
; or
|
||||
; Copy names or accession #'s from another window and Paste into this window.
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FETCH will then retrieve the data. YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each name on a separate line
|
||||
; SAMPLE NAME/ACCESSION FILE:
|
||||
;
|
||||
; X30412
|
||||
; PSDRR1
|
||||
; PEADRRG
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FEATURES/GDE Name File Instructions
|
||||
;
|
||||
; 1. Type in one or more GenBank LOCUS names below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of names.
|
||||
;
|
||||
; (NOTE: File can not contain accession numbers.)
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each name on a separate line
|
||||
; SAMPLE NAME FILE:
|
||||
;
|
||||
; PEADRRA
|
||||
; PSDRR1
|
||||
; PEADRRG
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
printdoc update 3 Feb 94
|
||||
|
||||
NAME
|
||||
printdoc - prints documentation files
|
||||
|
||||
SYNOPSIS
|
||||
printdoc filename
|
||||
|
||||
DESCRIPTION
|
||||
printdoc uses the file extension to decide how to print a
|
||||
documentation file. If necessary, a filter such as pr or nroff
|
||||
is used to format the file before sending to the appropriate
|
||||
printer. A list of file extensions recognized by printdoc is
|
||||
given below. If no file extension is given, or the extension is
|
||||
not in the list, printdoc assumes .doc.
|
||||
|
||||
.doc - (default) Uses pr to print the text, using the default
|
||||
settings provided by pr (56 text lines per page plus a 5 line
|
||||
header and footer). Printing is at 12 cpi, front only. This works
|
||||
reasonbly well for most unformatted documentation files,
|
||||
provided that the line length doesn't exceed 80 char. This
|
||||
option assumes that a half-inch left margin is automatically
|
||||
provided by the printer.
|
||||
|
||||
.tex - Assumes that document is already pre-formatted. Thus,
|
||||
no headers or footers are provided, and it is assumed that
|
||||
the top and bottom of pages are padded with blanks or header/
|
||||
footer lines as needed. Form-feed characters (^L) may be
|
||||
included in the text to force page breaks.
|
||||
|
||||
.ps - Assumes file is in PostScript format. Sends it to the
|
||||
PostScript printer.
|
||||
|
||||
.nroff - Assumes file is formatted for use by nroff, using the
|
||||
standard macro set (nroff -ms).
|
||||
|
||||
.nroff.me - Assumes file is formatted for use by nroff, using the
|
||||
e macro set (nroff -me).
|
||||
|
||||
TRANSPORTATION NOTES
|
||||
For reasons which should be obvious, this script needs major
|
||||
rewriting at each site, since the available printers will
|
||||
be of different types and have different names.
|
||||
|
||||
SEE ALSO
|
||||
pr, pr(V), xlp, nroff
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
prot2nuc update 10 Aug 94
|
||||
|
||||
NAME
|
||||
prot2nuc - reverse translates protein into nucleic acid
|
||||
|
||||
SYNOPSIS
|
||||
prot2nuc [-ln -gn] < input > output
|
||||
|
||||
DESCRIPTION
|
||||
prot2nuc reads a file containing an amino acid sequence
|
||||
and writes the corresponding reverse translated nucleic acid
|
||||
sequence, using the standard IUPAC-IUB ambiguity codes to output.
|
||||
The amino acid sequence may contain internal stop '*' characters.
|
||||
That is, all legal amino acid characters will be processed.
|
||||
|
||||
-ln print n amino acids/codons per line. (default = 25)
|
||||
|
||||
-gn number the amino acid sequence every n amino acids/codons.
|
||||
(defalut = 5)
|
||||
|
||||
If l is not evenly divisible by g, the defaults are used.
|
||||
|
||||
input - If the first line of the file begins with '>' or ';',
|
||||
input will be read as the standard .wrp (Pearson) format,
|
||||
such as that produced by getob:
|
||||
|
||||
>name
|
||||
sequence lines
|
||||
|
||||
|
||||
Otherwise, it will be assumed that the file ONLY contains
|
||||
sequence, and all legal IUPAC/IUB DNA characters will be
|
||||
read as sequence.
|
||||
|
||||
output - The output begins with a header, listing the both
|
||||
1 and 3 letter amino acid codes [J. Biol. Chem. 243, 3557-3559
|
||||
(1968)], as well as the nucleic acid ambiguity codes [Cornish-
|
||||
Bowden (1985) Nucl. Acids Res. 13:3021-3030.]. The amino acid
|
||||
sequence, along with its reverse translation, are then printed on
|
||||
lines of l amino acids/codons, numbering every g amino acids/codons.
|
||||
Non-ambiguous nucleotides appear capitalized, while ambiguous
|
||||
nucleotides are in lowercase. A sample output file appears below:
|
||||
|
||||
PROT2NUC Version 8/10/94
|
||||
|
||||
IUPAC-IUP AMINO ACID SYMBOLS
|
||||
[J. Biol. Chem. 243, 3557-3559 (1968)]
|
||||
|
||||
Phe F Leu L Ile I
|
||||
Met M Val V Ser S
|
||||
Pro P Thr T Ala A
|
||||
Tyr Y His H Gln Q
|
||||
Asn N Lys K Asp D
|
||||
Glu E Cys C Trp W
|
||||
Arg R Gly G STOP *
|
||||
Asx B Glx Z UNKNOWN X
|
||||
|
||||
|
||||
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE
|
||||
[Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.]
|
||||
|
||||
Symbol Meaning | Symbol Meaning
|
||||
------------------------------------+---------------------------------
|
||||
G Guanine | k G or T
|
||||
A Adenine | s G or C
|
||||
C Cytosine | w A or T
|
||||
T Thymine | h A or C or T
|
||||
U Uracil | b G or T or C
|
||||
r Purine (A or G) | v G or C or A
|
||||
y Pyrimidine (C or T) | d G or T or A
|
||||
m A or C | n G or A or T or C
|
||||
|
||||
pI39
|
||||
5 10 15 20
|
||||
M E K K S L A A L S F L L L L V L F V A
|
||||
ATGGArAArAArTCnCTnGCnGCnCTnTCnTTyCTnCTnCTnCTnGTnCTnTTyGTnGCn
|
||||
AGyTTr TTrAGy TTrTTrTTrTTr TTr
|
||||
|
||||
25 30 35 40
|
||||
Q E I V V T E A N T C E H L A D T Y R G
|
||||
CArGArAThGTnGTnACnGArGCnAAyACnTGyGArCAyCTnGCnGAyACnTAyCGnGGn
|
||||
TTr AGr
|
||||
|
||||
45 50 55 60
|
||||
V C F T N A S C D D H C K N K A H L I S
|
||||
GTnTGyTTyACnAAyGCnTCnTGyGAyGAyCAyTGyAArAAyAArGCnCAyCTnAThTCn
|
||||
AGy TTr AGy
|
||||
|
||||
65 70
|
||||
G T C H D W K C F C T Q N C
|
||||
GGnACnTGyCAyGAyTGGAArTGyTTyTGyACnCArAAyTGy
|
||||
|
||||
|
||||
With the Universal Genetic code, ambiguity symbols make it possible
|
||||
to represent all possible codons for an amino acid using two output
|
||||
lines. It is important to realize that the ambiguities on each line
|
||||
can not be combined. For example, CTn and TTr represent all codons for
|
||||
Leucine. However, attempting to combine them into a single triplet,
|
||||
yTn, would be incorrect. For example, TTT and TTC are codons for
|
||||
Phenylalanine, not Leucine.
|
||||
|
||||
FUTURE PLANS
|
||||
1. It wouldn't be hard to have the output printed as nucleic acid
|
||||
sequences in Perason format, so that the output could be read back
|
||||
into GDE. I don't know why you would want to do this, but it could
|
||||
be done.
|
||||
2. Right now, only the Universal Genetic Code is used, but it should
|
||||
be possible to read in alternative genetic codes, have prot2nuc
|
||||
figure out the ambiguity rules (as is already done in ribosome) and
|
||||
print out the appropriate ambiguous codons.
|
||||
3. It might be useful to have each possible codon printed out, rather
|
||||
than ambiguous codons. This would take up a lot more space and
|
||||
wouldn't be as pretty. If there's a lot of demand I could do this.
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
|
@ -1,107 +0,0 @@
|
|||
reform update 3 Feb 94
|
||||
|
||||
NAME
|
||||
reform - reformats multiply-aligned sequences for printing.
|
||||
|
||||
SYNOPSIS
|
||||
reform [-gpcnm] [-fx] [-sn] [-ln] [file {ralign only}]
|
||||
or
|
||||
ralign file parameters | reform [-gpcn] [-sn] [-ln] file
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
g Gaps are to be represented by dashes (-).
|
||||
p Bases which agree with the consensus are
|
||||
represented by periods (.).
|
||||
c Positions at which all sequences agree are
|
||||
capitalized in the consensus.
|
||||
n Sequence data is nucleic acid. Protein default
|
||||
fx Specify input file format, where x is
|
||||
r:RALIGN (default) p:PEARSON i:MBCRR-MASE (Intelligenetics)
|
||||
m Input file contains multiline format sequences already aligned,
|
||||
as opposed to ralign output. This option is obsolete, and is
|
||||
equivalent to -fp.
|
||||
ln The output linelength is set to n.
|
||||
Default is 70.
|
||||
sn numbering starts with n (default=0)
|
||||
|
||||
file Sequence file as described in ralign docu-
|
||||
mentation. reform needs to re-read the
|
||||
sequence file read by ralign to get the
|
||||
names of the sequences, which ralign ignores.
|
||||
This filename is only included for ralign output.
|
||||
If -m is set, file is ignored, and sequence names
|
||||
must be read from the input.
|
||||
|
||||
Note that positions in the consensus at which no nucleotide is in the
|
||||
majority are represented by n's (for nucleic acids) or x's (for proteins),
|
||||
rather than periods, as in ralign.
|
||||
|
||||
Gaps in the input sequences may be represented by either blanks or dashes.
|
||||
|
||||
INPUT FILE FORMATS
|
||||
|
||||
(a) ralign (default, -fr)
|
||||
As described in ralign documentation, the input file (which is assumed to
|
||||
be ralign output) must have each sequence on a single long line. All
|
||||
characters on a given line will be included in the alignment. All lines
|
||||
must be exactly the same length. For example, if ralign had been read
|
||||
sequence from a file called 'allcab.seq' and written output to 'allcab.ral',
|
||||
the following command might be used:
|
||||
|
||||
reform allcab.seq <allcab.ralign >allcab.ref
|
||||
|
||||
(b) Pearson (-fp, -m)
|
||||
Compatible with sequence files used by Pearson's fasta programs as shown:
|
||||
>name1
|
||||
sequence1
|
||||
>name2
|
||||
sequence2
|
||||
...
|
||||
>namen
|
||||
sequencen
|
||||
|
||||
Sequences may run over many lines and line length does not have to be
|
||||
uniform. However, both dashes ('-') and blanks (' ') will be read in
|
||||
as gaps in the alignment. A right arrow (>) at the beginning of a line
|
||||
indicates the name line at the beginning of a new sequence.
|
||||
|
||||
Any line beginning with a semicolon (';') will be considered a comment,
|
||||
and will be ignored.
|
||||
|
||||
(c) MBCRR-MASE (Intelligenetics) (-fi)
|
||||
Compatible with .mase files produced by MBCRR's mase and pima programs,
|
||||
which use the Intelligenetics format as shown:
|
||||
|
||||
;one or more comment lines
|
||||
name1
|
||||
sequence1
|
||||
;one or more comment lines
|
||||
name2
|
||||
sequence2
|
||||
...
|
||||
;one or more comment lines
|
||||
namen
|
||||
sequencen
|
||||
|
||||
Sequences may run over many lines and line length does not have to be
|
||||
uniform. However, both dashes ('-') and blanks (' ') will be read in
|
||||
as gaps in the alignment. Each sequence MUST begin with at least one
|
||||
comment line. When a comment line is encountered, that signals the
|
||||
beginning of a new sequence. The first line after the comment is read
|
||||
as the name, and the sequence begins on the next line after that.
|
||||
|
||||
SEE ALSO ralign, mase
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,84 +0,0 @@
|
|||
ribosome update 3 Feb 94
|
||||
|
||||
NAME
|
||||
ribosome - translates nucleic acid into protein
|
||||
|
||||
SYNOPSIS
|
||||
ribosome [-g gcfile] < input > output
|
||||
|
||||
DESCRIPTION
|
||||
ribosome reads a file of one or more nucleic acid sequences
|
||||
and writes the corresponding amino acid sequence, in the standard
|
||||
one letter code, to output. Ribosome begins translating at the
|
||||
first nucleotide in each input sequence and continues to the end.
|
||||
If the length of the translated sequence is not divisible by 3,
|
||||
ribosome pads the final codon with N's and attempts to use ambi-
|
||||
guity rules to translate the final codon. Based on the genetic
|
||||
code used, ribosome derives a set of rules to resolve all ambi-
|
||||
guities that can possibly be resolved.
|
||||
|
||||
-g read in an alternative genetic code from gcfile. If this
|
||||
option is not specified, ribosome uses the universal
|
||||
genetic code.
|
||||
|
||||
gcfile - This file specifies an alternative genetic code. An
|
||||
example is shown below. ribosome reads the first 64 legal
|
||||
capital letters as amino acids. Consequently, lowercase letters
|
||||
can be used for annotation purposes, as shown in the example.
|
||||
All non-amino acid characters are ignored.
|
||||
|
||||
sgc2 - yeast mitochondrial genetic code
|
||||
|
||||
second position
|
||||
first position ------------------------------- third position
|
||||
(5' end) u c a g (3' end)
|
||||
-----------------------------------------------------------------
|
||||
u F S Y C u
|
||||
F S Y C c
|
||||
L S * W a
|
||||
L S * W g
|
||||
-----------------------------------------------------------------
|
||||
c T P H R u
|
||||
T P H R c
|
||||
T P Q R a
|
||||
T P Q R g
|
||||
-----------------------------------------------------------------
|
||||
a I T N S u
|
||||
I T N S c
|
||||
M T K R a
|
||||
M T K R g
|
||||
-----------------------------------------------------------------
|
||||
g V A D G u
|
||||
V A D G c
|
||||
V A E G a
|
||||
V A E G g
|
||||
|
||||
|
||||
input - If the first line of the file begins with '>' or ';',
|
||||
input will be read as the standard .wrp (Pearson) format,
|
||||
such as that produced by getob:
|
||||
|
||||
>name
|
||||
; one or more comment lines (optional)
|
||||
sequence lines
|
||||
|
||||
|
||||
Otherwise, it will be assumed that the file ONLY contains
|
||||
sequence, and all legal IUPAC/IUB DNA characters will be
|
||||
read as sequence.
|
||||
|
||||
SEE ALSO
|
||||
getob
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,66 +0,0 @@
|
|||
shuffle.doc update 3 Feb 94
|
||||
|
||||
SYNOPSIS
|
||||
shuffle -sn [-wn -on]
|
||||
|
||||
DESCRIPTION
|
||||
Shuffles sequences locally. See Lipman DJ, Wilbur WJ, Smith TF
|
||||
and Waterman MS (1984) On the statistical significance of nucleic
|
||||
acid similarities. Nucl. Acids Res. 12:215-226.
|
||||
-sn n is a random integer between 0 and 32767. This number
|
||||
must be provided for each run.
|
||||
|
||||
-wn n is an integer, indicating the width of the window for
|
||||
random localization. If w exceeds the length of a sequence,
|
||||
or is negative, the entire sequence is scrambled as a single
|
||||
window. This is also the case if w is not specified.
|
||||
|
||||
-on n is an integer, indicating the number of nucleotides
|
||||
overlap between adjacent windows. It should never exceed
|
||||
the window size. o defaults to 0 if not specified.
|
||||
|
||||
If w and o are specified, overlapping windows of w nucleotides
|
||||
are shuffled, thus preserving the local characteristic base
|
||||
composition. Windows overlap by o nucleotides.
|
||||
If w and o are not specified, each sequence is shuffled globally,
|
||||
thus preserving the overall base composition, but not the local
|
||||
variations in comp.
|
||||
|
||||
Any number of sequences may be processed from a single input
|
||||
file. In Pearson-format files, each new sequence begins with a
|
||||
'>' comment line, indicating the name and a short description of
|
||||
the sequence.
|
||||
|
||||
No distinction is made between protein or nucleic acid sequences.
|
||||
That is, shuffle will read any of the following characters as
|
||||
sequence:
|
||||
|
||||
T,U,C,A,G,N,R,Y,M,W,S,K,D,H,V,B,L,Z,F,P,E,I,Q,X,*,-
|
||||
|
||||
where '*' is the result of translating a stop codon, and '-'
|
||||
is a gap generated during sequence alignment. Lowercase is
|
||||
also accepted.
|
||||
|
||||
EXAMPLE
|
||||
A sample output file is shown below. Note that the first two
|
||||
lines of output are comment lines, listing the version of the
|
||||
program and the parameters used in the run.
|
||||
|
||||
>SHUFFLE VERSION 11/ 8/93
|
||||
>RANDOM SEED: 9873 WINDOW: 12 OVERLAP: 3
|
||||
>BAZFAZ - Borborigmus azerbi F-actin-zeta gene
|
||||
ctgagtagctagtcctaaatagttagtccatagtactagtacgggtcgtt
|
||||
cacccttgggcagtg.....(etc.)
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,141 +0,0 @@
|
|||
|
||||
SPLITDB update 28 Mar 98
|
||||
|
||||
|
||||
NAME
|
||||
splitdb - split GenBank files into annotation, sequence, and index
|
||||
|
||||
SYNOPSIS
|
||||
splitdb [-gepvlct] dbfile anofile seqfile indfile
|
||||
|
||||
DESCRIPTION
|
||||
Splitdb splits a database (dbfile) among three files: anofile, seqfile
|
||||
and indfile. Splitdb ignores any header information that might be in the
|
||||
file and begins processing at the first entry.
|
||||
|
||||
anofile contains the annotation portion of each entry. Entries are
|
||||
terminated with '//' or '///' (PIR only). Trailing blanks present in
|
||||
dbfile are omitted in anofile.
|
||||
|
||||
seqfile contains the sequence data for each entry. Each sequence
|
||||
entry begins with a header line, followed by sequence data on
|
||||
succeeding lines of 75 characters per line. The header line
|
||||
includes the header flag character '>' in column 1, followed by the
|
||||
name, followed by the first 50 characters of the 1st
|
||||
DEFINITION line. An example is shown below:
|
||||
|
||||
>UNHOR1 - Unicorn horn protein 1, complete cDNA sequence
|
||||
attcctctatagtctattctagctagccaaataggttagatggctgtcttactacttacgc
|
||||
...
|
||||
|
||||
Removal of blanks and numbers from sequence lines makes makes split
|
||||
datasets about 8-9% smaller than the original GenBank files.
|
||||
|
||||
indfile is an index which tells the line numbers for each entry in
|
||||
anofile and seqfile. It is assumed to be in alphabetical order by
|
||||
name. Each line contains a name and accession number, followed by the
|
||||
line numbers on which the annotation and sequence data begin in anofile
|
||||
and seqfile, respectively. Thus the file plants.ind might contain:
|
||||
|
||||
|
||||
A15660 TA156608 1 1
|
||||
A15671 A15671 33 11
|
||||
A15673 A15673 65 25
|
||||
A15675 AK156751 97 36
|
||||
A15677 BA156770 128 46
|
||||
A16780 BA167807 160 57
|
||||
A16782 A16782 192 70
|
||||
ATHRPRP1C GM905105 225 83
|
||||
etc...
|
||||
|
||||
Note that indfile is a perfectly legitimate .nam file, for use with
|
||||
programs such as getloc, getob, or comm.
|
||||
|
||||
|
||||
The following options identify the type of database being read:
|
||||
|
||||
-g GenBank (default)
|
||||
-e EMBL
|
||||
-p PIR (NBRF)
|
||||
-v Vecbase
|
||||
-l LiMB
|
||||
|
||||
Other options:
|
||||
-c Compress 3 or more leading blanks in annotation lines
|
||||
to take the form <CRUNCHFLAG><CRUNCHCHAR>, where CRUNCHFLAG
|
||||
is the ASCII character specified by the Pascal const
|
||||
CRUNCHOFFSET, which is set to 33 ("!") in the current
|
||||
implementation. For each annotation line read, if the
|
||||
number of leading blanks is >=3, splitdb sets CRUNCHCHAR
|
||||
to CRUNCHOFFSET+the number of blanks. Thus, for lines
|
||||
with 3, 4, or 5 leading blanks, CRUNCHCHAR would be
|
||||
'$', '%' and '&', respectively. GETLOC and GETOB
|
||||
automatically expand crunched blanks when CRUNCHFLAG
|
||||
is encountered on an input line. Empiracle observations
|
||||
indicate that the -c option decreases the size of
|
||||
GenBank files by about 10%.
|
||||
|
||||
This compression method may fail when the number of
|
||||
leading blanks exceeds 127-CRUNCHOFFSET. However,
|
||||
none of the above mentioned databases currently
|
||||
supports any datafield with anywhere near that number
|
||||
of leading blanks.
|
||||
|
||||
-t (GenBank only) Append all information in the first
|
||||
ORGANISM to the end of each line in indfile. For example,
|
||||
the entry which begins:
|
||||
|
||||
LOCUS GORMTDLOOZ 282 bp DNA UNA 11-MAR-1996
|
||||
DEFINITION GGGOMT493; Gorilla gorilla gorilla (BomBom, ISIS 438, Audubon
|
||||
Zoological Gardens) mitochondrial D-loop DNA.
|
||||
ACCESSION L76759
|
||||
NID g1222584
|
||||
KEYWORDS D-loop.
|
||||
SOURCE Mitochondrion Gorilla gorilla gorilla (individual_isolate BomBom,
|
||||
ISIS 438, Audubon Zoological Gardens, sub_species gorilla) male
|
||||
DNA.
|
||||
ORGANISM Mitochondrion Gorilla gorilla gorilla
|
||||
Eukaryotae; mitochondrial eukaryotes; Metazoa; Chordata;
|
||||
Vertebrata; Eutheria; Primates; Catarrhini; Hominidae; Gorilla.
|
||||
|
||||
might be indexed as
|
||||
|
||||
GORMTDLOOZ L76759 1 1 Mitochondrion Gorilla gorilla gorilla
|
||||
|
||||
This is useful for taxonomic studies, or as a way of making
|
||||
it easy to create subsets from a single index. Thus,
|
||||
'grep gorilla primates.ind' would print all lines in the
|
||||
file that contained the word gorilla. The output from
|
||||
this command could be used as a .nam file for extracting
|
||||
just gorilla sequences from a larger dataset using
|
||||
fetch.
|
||||
|
||||
|
||||
NOTES
|
||||
1. Header lines that aren't part of entries are automatically
|
||||
stripped out during processing. For example, in a file containing
|
||||
GenBank entries, all lines up to the first occurrence of 'LOCUS'
|
||||
starting in column 1, are ignored. Similarly for PIR, processing
|
||||
begins on the first line containing 'ENTRY' beginning in column 1.
|
||||
2. GenBank/EMBL/DDBJ entries created on or after Feb. 1, 1996,
|
||||
have accession numbers of 8 characters, rather than 6. Previously
|
||||
assigned accession numbers will remain at 6 characters. Splitdb has
|
||||
been updated to write all accession numbers to the .ind file, left
|
||||
justified in a field of 8 characters, in columns 14-21 of the .ind
|
||||
file.
|
||||
|
||||
SEE ALSO
|
||||
getloc, getob, comm(1) (Unix command).
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,125 +0,0 @@
|
|||
|
||||
|
||||
XYLEM.DOC update 10 Aug 1994
|
||||
|
||||
XYLEM: TOOLS FOR MANIPULATION OF GENETIC DATABASES
|
||||
Brian Fristensky, University of Manitoba
|
||||
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
||||
|
||||
SPLITDB - Splits files containing one or more GenBank entries into
|
||||
annotation, sequence, and index files. Indexfiles can also serve as
|
||||
namefiles for GETLOC. Sequence files are in the format required for
|
||||
use with the Pearson programs (FASTA,LFASTA etc.).
|
||||
|
||||
GETLOC - Reads a file containing LOCUS names (namefile) and
|
||||
retrieves either annotation, sequence, or both from a split
|
||||
database or database subset created by SPLITDB.
|
||||
|
||||
FETCH - A c-shell script that provides a convenient menu-driven
|
||||
front end for retrieval of database entries using GETLOC.
|
||||
|
||||
FINDKEY - A c-shell script that provides a convenient menu-driven
|
||||
front end for keyword searches of database annotation files,
|
||||
using IDENTIFY.
|
||||
|
||||
IDENTIFY- Given line-numbered output from grep, IDENTIFY uses the
|
||||
index file to determine which entries contained the keywords
|
||||
searched for by grep. It then produces a namefile for use by
|
||||
GETLOC. Namefiles can serve as logical databases, and utilities
|
||||
such as the Unix comm command can perform logical operations on
|
||||
these namefiles to produce database subsets.
|
||||
|
||||
FEATURES/GETOB - Given a namefile, pulls objects (mRNA, tRNA, CDS
|
||||
etc.) from each of the named entries, using the new
|
||||
DDBJ/EMBL/GenBank International Features Table Format. A future
|
||||
version will also allow the annotation of sites within objects that
|
||||
are extracted.
|
||||
|
||||
DBSTAT - Calculates amino acid frequencies in a protein database.
|
||||
|
||||
RIBOSOME - Given a file of one or more nucleic acids (eg. output
|
||||
from GETOB) , RIBOSOME translates them into protein, using either
|
||||
the universal genetic code or an alternative genetic code supplied
|
||||
by the user. All ambiguities that can be resolved are translated.
|
||||
|
||||
PROT2NUC - reverse translates a sequence from protein to nucleic
|
||||
acid, using IUPAC-IUB ambiguity codes.
|
||||
|
||||
SHUFFLE - Given a random seed, shuffles each sequence in a Pearson-
|
||||
format (.wrp) file. Shuffling is done locally in overlapping windows
|
||||
across the length of a given sequence. The window size and overlap
|
||||
length can be specified by the user.
|
||||
|
||||
REFORM - Reformats multiply aligned nucleic acid or protein
|
||||
sequences for publication. Output for M. Waterman's RALIGN
|
||||
program, or the MBCRR MASE editor, can be directly used as input.
|
||||
A variety of options are available for representing gaps, consensus
|
||||
sequences and other features.
|
||||
|
||||
Fristensky (Cornell) Sequence Analysis Package - General purpose
|
||||
sequence analysis package written in Standard Pascal. Features
|
||||
include: sequence numbering, formatting, & translation, restriction
|
||||
site searches & mapping, matrix similarity searches, TESTCODE
|
||||
analysis, base composition analysis. All programs are interactive
|
||||
and read free-format, BIONET, and GenBank files.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
XYLEM DATABASE TOOLS
|
||||
|
||||
|
||||
|
||||
----------
|
||||
| .gen | getloc
|
||||
|----------|<--------------------------
|
||||
| GenBank | |
|
||||
---------- |
|
||||
| |
|
||||
| splitgb |
|
||||
/|\ |
|
||||
/ | \ |
|
||||
/ | \ |
|
||||
/ | \ |
|
||||
/ | \ |
|
||||
/ | \ |
|
||||
v v v |
|
||||
---------- ---------- ---------- |
|
||||
| .ano | | .wrp | | .ind | |
|
||||
|----------| |----------| |----------| |
|
||||
|annotation| | sequence | | index | |
|
||||
---------- ---------- ---------- |
|
||||
| \ | / |
|
||||
| \ | / |
|
||||
| \ | / |
|
||||
| \ | / |
|
||||
grep -n | \ | / |
|
||||
| \ | / |
|
||||
| | |
|
||||
| | -------------------------------+
|
||||
| ^ |
|
||||
v | getob |
|
||||
---------- ---------- v
|
||||
| .grep | identify | .nam | ----------
|
||||
|----------| --------->|----------| | .wrp |
|
||||
| numbered | | LOCUS | ----------
|
||||
|file lines| ---------- | eg. mRNA |
|
||||
---------- | ^ | tRNA |
|
||||
| | | rRNA |
|
||||
| | | CDS |
|
||||
--comm-- ----------
|
||||
(logical operations on
|
||||
sets of names)
|
||||
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB R3T 2N2 CANADA
|
||||
204-474-6085
|
||||
frist@cc.umanitoba.ca
|
||||
|
BIN
HGL_SRC/Alloc.o
BIN
HGL_SRC/Alloc.o
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
HGL_SRC/MakeCons
BIN
HGL_SRC/MakeCons
Binary file not shown.
|
@ -1,10 +1,10 @@
|
|||
|
||||
CC = cc
|
||||
#FLAGS = -g
|
||||
OPENWINHOME = /usr/openwin
|
||||
FLAGS = -m32
|
||||
OPENWINHOME = ../usr
|
||||
MFILE =
|
||||
INCDIR = -I$(OPENWINHOME)/include
|
||||
LIBDIR = -L$(OPENWINHOME)/lib
|
||||
INCDIR = -I/usr/include/xview
|
||||
LIBDIR = -L/usr/lib32
|
||||
LIBS = -lxview -lolgx -lX11
|
||||
|
||||
libs.o = Alloc.o HGLfuncs.o
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,5 +0,0 @@
|
|||
#/bin/csh
|
||||
make all
|
||||
cp Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool ../bin
|
||||
rm Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool
|
||||
rm *.o
|
BIN
HGL_SRC/mapview
BIN
HGL_SRC/mapview
Binary file not shown.
|
@ -1,4 +1,6 @@
|
|||
implicit integer (a-z)
|
||||
|
||||
c parameter (maxn=625,fldmax=2*maxn)
|
||||
parameter (maxn=1500,maxn2=3000)
|
||||
parameter (fldmax=maxn2)
|
||||
parameter (infinity=16000,sortmax=30000)
|
||||
|
@ -6,28 +8,26 @@
|
|||
parameter (maxtloops=40)
|
||||
parameter (maxsiz=10000)
|
||||
|
||||
integer*2 vst(maxn*maxn),wst1(maxn*maxn),wst2(maxn*maxn)
|
||||
integer*2 vst(maxn*maxn),wst(maxn*maxn)
|
||||
integer poppen(4),maxpen
|
||||
real prelog
|
||||
|
||||
dimension newnum(maxsiz),hstnum(maxn2),force(maxn2),numseq(maxn2),
|
||||
. work1(maxn2,0:2),work2(maxn2),
|
||||
dimension newnum(maxsiz),hstnum(fldmax),force(fldmax),
|
||||
. numseq(fldmax), work(fldmax,0:2),
|
||||
. stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30)
|
||||
dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2)
|
||||
c common /main/ newnum,hstnum,force,work1,work2,
|
||||
common /main/ newnum,hstnum,force,work1,work2,
|
||||
. stack,tstk,dangle,hairpin,bulge,inter,eparam,cntrl,nsave,n,
|
||||
. numseq,poppen,prelog,maxpen,vst,wst1,wst2
|
||||
common /main/ vst,wst,newnum,hstnum,force,numseq,work,stack,tstk,
|
||||
. dangle,hairpin,bulge,inter,eparam,cntrl,nsave,poppen,maxpen,prelog
|
||||
|
||||
character*1 seq(maxsiz)
|
||||
c character*5 inbuf
|
||||
character*10 progtitle
|
||||
character*30 seqlab
|
||||
common /seq/ seq,seqlab
|
||||
data progtitle/'crna'/
|
||||
|
||||
dimension list(100,4)
|
||||
common /list/ list,listsz
|
||||
common /nm/ vmin
|
||||
data progtitle/'lrna'/
|
||||
common /nm/ n,vmin
|
||||
|
||||
dimension basepr(maxn)
|
||||
common /traceback/ basepr
|
||||
|
@ -40,21 +40,3 @@ c character*5 inbuf
|
|||
|
||||
integer*2 tloop(maxtloops,2),numoftloops
|
||||
common/tloops/tloop,numoftloops
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue