diff --git a/CORE/.GDEmenus.bak b/CORE/.GDEmenus.bak
deleted file mode 100644
index 5a3f56f..0000000
--- a/CORE/.GDEmenus.bak
+++ /dev/null
@@ -1,761 +0,0 @@
-1menu:File
-
-item:test cmask output
-itemmethod: kedit in1
-
-in:in1
-informat:colormask
-
-item:New sequence
-itemmethod:echo "$Type$Name" > out1
-itemmeta:n
-itemhelp:new_sequence.help
-
-arg:Name
-argtype:text
-arglabel:New Sequence name?
-argtext:New
-
-arg:Type
-argtype:choice_list
-arglabel:Type?
-argchoice:DNA/RNA:#
-argchoice:Amino Acid:%
-argchoice:Text:\"
-argchoice:Mask:@
-
-out:out1
-outformat:flat
-
-item:Import Foreign Format
-itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
-itemhelp:readseq.help
-
-arg:INPUTFILE
-argtype:text
-arglabel:Name of foreign file?
-
-out:OUTPUTFILE
-outformat:genbank
-
-item:Export Foreign Format
-itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
-itemhelp:readseq.help
-
-arg:FORMAT
-argtype:choice_list
-argchoice:FASTA:8
-argchoice:NEXUS:17
-argchoice:Phylip v3.3:12
-argchoice:IG/Stanford:1
-argchoice:GenBank:2
-argchoice:NBRF:3
-argchoice:EMBL:4
-argchoice:GCG:5
-argchoice:DNA Strider:6
-argchoice:Fitch:7
-argchoice:Pearson:8
-argchoice:Zuker:9
-argchoice:Olsen:10
-argchoice:Phylip v3.2:11
-argchoice:Phylip v3.3:12
-argchoice:Plain text:13
-
-arg:OUTPUTFILE
-argtype:text
-arglabel:Save as?
-
-in:INPUTFILE
-informat:genbank
-
-
-item:Save Selection
-itemmethod: cat $SAVE_FUNC > $Name
-itemhelp:save_selection.help
-
-arg:SAVE_FUNC
-argtype:chooser
-arglabel:File format
-argchoice:Flat:in1
-argchoice:Genbank:in2
-argchoice:GDE/HGL:in3
-
-arg:Name
-argtype:text
-arglabel:File name?
-
-in:in1
-informat:flat
-
-in:in2
-informat:genbank
-
-in:in3
-informat:gde
-
-item:Print Selection
-itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
-itemhelp:print_alignment.help
-
-arg:SCALE
-argtype:slider
-arglabel:Reduce printout by?
-argmin:1
-argmax:20
-argvalue:1
-
-arg:CMD
-argtype:chooser
-argchoice:Lpr:lpr
-argchoice:Enscript Gaudy:enscript -G -q
-argchoice:Enscript Two column:enscript -2rG
-
-arg:PRINTER
-argtype:text
-arglabel:Which printer?
-argtext:lp
-
-in:in1
-informat:gde
-insave:
-
-menu:Edit
-
-item:Sort
-itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
-itemhelp:heapsortHGL.help
-
-arg:PRIM_KEY
-argtype:choice_list
-argchoice:Group:group-ID
-argchoice:type:type
-argchoice:name:name
-argchoice:Sequence ID:sequence-ID
-argchoice:creator:creator
-argchoice:offset:offset
-arglabel:Primary sort field?
-
-arg:SEC_KEY
-argtype:choice_list
-argchoice:None:
-argchoice:Group:group-ID
-argchoice:type:type
-argchoice:name:name
-argchoice:Sequence ID:sequence-ID
-argchoice:creator:creator
-argchoice:offset:offset
-arglabel:Secondary sort field?
-
-in:in1
-informat:gde
-insave:
-
-item:extract
-itemmethod:(gde in1;/bin/rm -f in1)&
-
-in:in1
-informat:gde
-inmask:
-insave:
-
-menu:DNA/RNA
-
-item:Translate...
-itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
-
-arg:FRAME
-argtype:chooser
-arglabel:Which reading frame?
-argchoice:First:1
-argchoice:Second:2
-argchoice:Third:3
-argchoice:All six:6
-
-arg:MNFRM
-arglabel:Minimum length of AA sequence to translate?
-argtype:slider
-argmin:0
-argmax:100
-argvalue:20
-
-arg:LTRCODE
-argtype:chooser
-arglabel:Translate to:
-argchoice:Single letter codes:
-argchoice:Triple letter codes:-3
-
-arg:TBL
-arglabel:Codon table?
-argtype:chooser
-argchoice:universal:1
-argchoice:mycoplasma:2
-argchoice:yeast:3
-argchoice:Vert. mito.:4
-in:in1
-informat:gde
-
-out:out1
-outformat:gde
-
-item:Dot plot
-itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
-itemhelp:DotPlotTool.help
-
-in:in1
-informat:gde
-insave:
-
-item:Clustal alignment
-itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
-
-itemhelp:clustal_help
-
-arg:KTUP
-argtype:slider
-arglabel:K-tuple size for pairwise search
-argmin:1
-argmax:10
-argvalue:2
-
-arg:WIN
-argtype:slider
-arglabel:Window size
-argmin:1
-argmax:10
-argvalue:4
-
-arg:Trans
-argtype:chooser
-arglabel:Transitions weighted?
-argchoice:Yes:/TRANSIT
-argchoice:No:
-
-arg:FIXED
-argtype:slider
-arglabel:Fixed gap penalty
-argmin:1
-argmax:100
-argvalue:10
-
-arg:FLOAT
-arglabel:Floating gap penalty
-argtype:slider
-argmin:1
-argmax:100
-argvalue:10
-
-arg:REPORT
-argtype:chooser
-arglabel:View assembly report?
-argchoice:No:
-argchoice:Yes:kedit in1.rpt&
-
-
-in:in1
-informat:flat
-insave:
-
-item:Variable Positions
-itemmethod:varpos $REV < in1 > out1
-
-arg:REV
-argtype:chooser
-arglabel:Highlight (darken)
-argchoice:Conserved positions:
-argchoice:variable positions:-rev
-
-in:in1
-informat:flat
-
-out:out1
-outformat:colormask
-
-item:Phrap
-itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
-
-in:in1
-informat:genbank
-
-out:out1
-outformat:genbank
-
-item:SNAP
-itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
-
-in:in1
-informat:flat
-out:out1
-outformat:text
-
-
-
-
-item:Find all
-itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
-itemhelp:findall.help
-itemmeta:f
-
-arg:SEARCH
-argtype:text
-arglabel:Search String
-
-arg:PRCNT
-argtype:slider
-arglabel:Percent mismatch
-argmin:0
-argmax:75
-argvalue:10
-
-arg:CASE
-argtype:chooser
-arglabel:Case
-argchoice:Upper equals lower:
-argchoice:Upper not equal lower:-case
-
-arg:UT
-argtype:chooser
-arglabel:U equal T?
-argchoice:Yes:-u=t
-argchoice:No:
-argvalue:0
-
-arg:MAT
-arglabel:Match color
-argtype:choice_list
-argchoice:yellow:1
-argchoice:violet:2
-argchoice:red:3
-argchoice:aqua:4
-argchoice:green:5
-argchoice:blue:6
-argchoice:grey:11
-argchoice:black:8
-argvalue:2
-
-arg:MIS
-argtype:choice_list
-arglabel:Mismatch color
-argchoice:yellow:1
-argchoice:violet:2
-argchoice:red:3
-argchoice:aqua:4
-argchoice:green:5
-argchoice:blue:6
-argchoice:grey:11
-argchoice:black:8
-argvalue:7
-
-in:in1
-informat:flat
-
-out:out1
-outformat:colormask
-
-item:Sequence Consensus
-itemmethod:(MakeCons in1 $METHOD $MASK > out1)
-itemhelp:MakeCons.help
-
-arg:METHOD
-arglabel:Method
-argtype:chooser
-argchoice:IUPAC:-iupac
-argchoice:Majority:-majority $PERCENT
-
-arg:MASK
-argtype:chooser
-arglabel:Create a new:
-argchoice:Sequence:
-argchoice:Selection Mask: | Consto01mask
-
-arg:PERCENT
-arglabel:Minimum Percentage for Majority
-argtype:slider
-argmin:50
-argmax:100
-argvalue:75
-
-in:in1
-informat:gde
-
-out:out1
-outformat:gde
-
-
-#Menu for DNA/RNA
-
-item:blastn
-itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDBDNA
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:4
-argmax:18
-argvalue:12
-
-arg:MATCH
-argtype:slider
-arglabel:Match Score
-argmin:1
-argmax:10
-argvalue:5
-
-arg:MMSCORE
-argtype:slider
-arglabel:Mismatch Score
-argmin:-10
-argmax:-1
-argvalue:-5
-
-item:blastx
-itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
-
-
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDB
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
-argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:1
-argmax:5
-argvalue:3
-
-arg:Matrix
-arglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:CODE
-argtype:choice_list
-arglabel:Genetic Code
-argchoice:Standard or Universal:0
-argchoice:Vertebrate Mitochondrial:1
-argchoice:Yeast Mitochondrial:2
-argchoice:Mold Mitochondrial and Mycoplasma:3
-argchoice:Invertebrate Mitochondrial:4
-argchoice:Ciliate Macronuclear:5
-argchoice:Protozoan Mitochondrial:6
-argchoice:Plant Mitochondrial:7
-argchoice:Echinodermate Mitochondrial:8
-
-item:------------------------
-
-item:Add a new DNA blast db
-itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
-
-arg:sourcefile
-argtype:text
-arglabel: enter the file name
-
-arg:menuname
-argtype:text
-arglabel: enter the name of the DB
-
-menu:seq. datasets
-
-item:-------------
-item:add a new dataset
-itemmethod:cp $file /usr/local/biotools/GDE/db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
-
-arg:name
-argtype:text
-arglabel:Enter the dataset name ?
-
-arg:file
-argtype:text
-arglabel:Enter the dataset file (in FASTA) ?
-
-
-#Menu for Protein
-menu:protein
-item:blastp
-itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
-
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDB
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
-
-arg:Matrix
-barglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:1
-argmax:5
-argvalue:3
-
-item:tblastn
-itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDB
-argtype:choice_list
-arglabel:Which Database
-argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
-argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
-
-arg:Matrix
-arglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:4
-argmax:18
-argvalue:12
-
-arg:CODE
-argtype:choice_list
-arglabel:Genetic Code
-argchoice:Standard or Universal:0
-argchoice:Vertebrate Mitochondrial:1
-argchoice:Yeast Mitochondrial:2
-argchoice:Mold Mitochondrial and Mycoplasma:3
-argchoice:Invertebrate Mitochondrial:4
-argchoice:Ciliate Macronuclear:5
-argchoice:Protozoan Mitochondrial:6
-argchoice:Plant Mitochondrial:7
-argchoice:Echinodermate Mitochondrial:8
-
-
-item:Map View
-itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
-itemhelp:mapview.help
-
-in:in1
-informat:gde
-insave:
-
-arg:PBL
-arglabel:Pixel Between Lines
-argtype:slider
-argvalue:10
-argmin:1
-argmax:15
-
-arg:NPP
-arglabel:Nucleotides Per Pixel
-argtype:slider
-argvalue:1
-argmin:1
-argmax:20
-
-arg:LWIDTH
-arglabel:Line Thickness
-argtype:slider
-argvalue:2
-argmin:1
-argmax:5
-
-item:--------------------------
-item:Add a new DNA blast db
-itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
-
-arg:sourcefile
-argtype:text
-arglabel: Enter the file (in FASTA)
-
-arg:menuname
-argtype:text
-arglabel: Enter the name of the DB
-
-menu:Phylogeny
-
-
-item:Phylip help
-itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
-
-arg:FILE
-argtype:choice_list
-arglabel:Which program?
-argchoice:clique:clique.html
-argchoice:consense:consense.html
-argchoice:contchar:contchar.html
-argchoice:contml:contml.html
-argchoice:contrast:contrast.html
-argchoice:discrete:discrete.html
-argchoice:distance:distance.html
-argchoice:dnaboot:dnaboot.html
-argchoice:dnacomp:dnacomp.html
-argchoice:dnadist:dnadist.html
-argchoice:dnainvar:dnainvar.html
-argchoice:dnaml:dnaml.html
-argchoice:dnamlk:dnamlk.html
-argchoice:dnamove:dnamove.html
-argchoice:dnapars:dnapars.html
-argchoice:dnapenny:dnapenny.html
-argchoice:dollop:dollop.html
-argchoice:dolmove:dolmove.html
-argchoice:dolpenny:dolpenny.html
-argchoice:draw:draw.html
-argchoice:drawgram:drawgram.html
-argchoice:drawtree:drawtree.html
-argchoice:factor:factor.html
-argchoice:fitch:fitch.html
-argchoice:gendist:gendist.html
-argchoice:kitsch:kitsch.html
-argchoice:main:main.html
-argchoice:mix:mix.html
-argchoice:move:move.html
-argchoice:neighbor:neighbor.html
-argchoice:penny:penny.html
-argchoice:protpars:protpars.html
-argchoice:read.me.general:read.me.general.html
-argchoice:restml:restml.html
-argchoice:seqboot:seqboot.html
-argchoice:sequence:sequence.html
-
-
-
-item:Phylip 3.5
-itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
-
-arg:PROGRAM
-argtype:choice_list
-arglabel:Which program to run?
-argchoice:DNAPARS:dnapars
-argchoice:DNABOOT:dnaboot
-argchoice:DNAPENNY:dnapenny
-argchoice:DNAML:dnaml
-argchoice:DNAMLK:dnamlk
-argchoice:DNACOMP:dnacomp
-argchoice:DNAMOVE:dnamove
-argchoice:DNAINVAR:dnainvar
-argchoice:PROTPARS:protpars
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-
-
-item:Phylip DNA Distance methods
-itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
-
-arg:EXPLAIN
-argtype:text
-arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
-
-
-arg:PROGRAM
-arglabel:Which method?
-argtype:chooser
-argchoice:DNADIST+NEIGHBOR:
-argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
-
-arg:PROG
-arglabel:Run ?
-argtype:chooser
-argchoice:Run without Bootstrap:
-argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
-
-arg:DNA
-argtype:text
-arglabel:Name of DNADIST outfile?
-
-arg:NEI
-argtype:text
-arglabel:Name of NEIGHBOR outfile?
-
-arg:TREE
-argtype:text
-arglabel:Name of TREEFILE ?
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-item:Phylip PROTEIN Distance methods
-itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
-
-arg:PROGRAM
-arglabel:Which method?
-argtype:chooser
-argchoice:PROTDIST+NEIGHBOR:
-argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
-
-arg:PROG
-arglabel:Which method?
-argtype:chooser
-argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
-argchoice:No Bootstrap:
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-
-
-
-
-menu:On-Line Res.
-
-item:GDE for Linux resources at Bioafrica.net
-itemmethod:netscape http://www.bioafrica.net &
-
-item:-------------------------
-item:add a new website
-itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
-
-arg:name
-argtype:text
-arglabel:Enter the site name
-
-arg:url
-argtype:text
-arglabel:Enter the URL (including http://)
diff --git a/CORE/.GDEmenusthat~ b/CORE/.GDEmenusthat~
deleted file mode 100644
index ca925b9..0000000
--- a/CORE/.GDEmenusthat~
+++ /dev/null
@@ -1,761 +0,0 @@
-1menu:File
-
-item:test cmask output
-itemmethod: kedit in1
-
-in:in1
-informat:colormask
-
-item:New sequence
-itemmethod:echo "$Type$Name" > out1
-itemmeta:n
-itemhelp:new_sequence.help
-
-arg:Name
-argtype:text
-arglabel:New Sequence name?
-argtext:New
-
-arg:Type
-argtype:choice_list
-arglabel:Type?
-argchoice:DNA/RNA:#
-argchoice:Amino Acid:%
-argchoice:Text:\"
-argchoice:Mask:@
-
-out:out1
-outformat:flat
-
-item:Import Foreign Format
-itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
-itemhelp:readseq.help
-
-arg:INPUTFILE
-argtype:text
-arglabel:Name of foreign file?
-
-out:OUTPUTFILE
-outformat:genbank
-
-item:Export Foreign Format
-itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
-itemhelp:readseq.help
-
-arg:FORMAT
-argtype:choice_list
-argchoice:FASTA:8
-argchoice:NEXUS:17
-argchoice:Phylip v3.3:12
-argchoice:IG/Stanford:1
-argchoice:GenBank:2
-argchoice:NBRF:3
-argchoice:EMBL:4
-argchoice:GCG:5
-argchoice:DNA Strider:6
-argchoice:Fitch:7
-argchoice:Pearson:8
-argchoice:Zuker:9
-argchoice:Olsen:10
-argchoice:Phylip v3.2:11
-argchoice:Phylip v3.3:12
-argchoice:Plain text:13
-
-arg:OUTPUTFILE
-argtype:text
-arglabel:Save as?
-
-in:INPUTFILE
-informat:genbank
-
-
-item:Save Selection
-itemmethod: cat $SAVE_FUNC > $Name
-itemhelp:save_selection.help
-
-arg:SAVE_FUNC
-argtype:chooser
-arglabel:File format
-argchoice:Flat:in1
-argchoice:Genbank:in2
-argchoice:GDE/HGL:in3
-
-arg:Name
-argtype:text
-arglabel:File name?
-
-in:in1
-informat:flat
-
-in:in2
-informat:genbank
-
-in:in3
-informat:gde
-
-item:Print Selection
-itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
-itemhelp:print_alignment.help
-
-arg:SCALE
-argtype:slider
-arglabel:Reduce printout by?
-argmin:1
-argmax:20
-argvalue:1
-
-arg:CMD
-argtype:chooser
-argchoice:Lpr:lpr
-argchoice:Enscript Gaudy:enscript -G -q
-argchoice:Enscript Two column:enscript -2rG
-
-arg:PRINTER
-argtype:text
-arglabel:Which printer?
-argtext:lp
-
-in:in1
-informat:gde
-insave:
-
-menu:Edit
-
-item:Sort
-itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
-itemhelp:heapsortHGL.help
-
-arg:PRIM_KEY
-argtype:choice_list
-argchoice:Group:group-ID
-argchoice:type:type
-argchoice:name:name
-argchoice:Sequence ID:sequence-ID
-argchoice:creator:creator
-argchoice:offset:offset
-arglabel:Primary sort field?
-
-arg:SEC_KEY
-argtype:choice_list
-argchoice:None:
-argchoice:Group:group-ID
-argchoice:type:type
-argchoice:name:name
-argchoice:Sequence ID:sequence-ID
-argchoice:creator:creator
-argchoice:offset:offset
-arglabel:Secondary sort field?
-
-in:in1
-informat:gde
-insave:
-
-item:extract
-itemmethod:(gde in1;/bin/rm -f in1)&
-
-in:in1
-informat:gde
-inmask:
-insave:
-
-menu:DNA/RNA
-
-item:Translate...
-itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
-
-arg:FRAME
-argtype:chooser
-arglabel:Which reading frame?
-argchoice:First:1
-argchoice:Second:2
-argchoice:Third:3
-argchoice:All six:6
-
-arg:MNFRM
-arglabel:Minimum length of AA sequence to translate?
-argtype:slider
-argmin:0
-argmax:100
-argvalue:20
-
-arg:LTRCODE
-argtype:chooser
-arglabel:Translate to:
-argchoice:Single letter codes:
-argchoice:Triple letter codes:-3
-
-arg:TBL
-arglabel:Codon table?
-argtype:chooser
-argchoice:universal:1
-argchoice:mycoplasma:2
-argchoice:yeast:3
-argchoice:Vert. mito.:4
-in:in1
-informat:gde
-
-out:out1
-outformat:gde
-
-item:Dot plot
-itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
-itemhelp:DotPlotTool.help
-
-in:in1
-informat:gde
-insave:
-
-item:Clustal alignment
-itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
-
-itemhelp:clustal_help
-
-arg:KTUP
-argtype:slider
-arglabel:K-tuple size for pairwise search
-argmin:1
-argmax:10
-argvalue:2
-
-arg:WIN
-argtype:slider
-arglabel:Window size
-argmin:1
-argmax:10
-argvalue:4
-
-arg:Trans
-argtype:chooser
-arglabel:Transitions weighted?
-argchoice:Yes:/TRANSIT
-argchoice:No:
-
-arg:FIXED
-argtype:slider
-arglabel:Fixed gap penalty
-argmin:1
-argmax:100
-argvalue:10
-
-arg:FLOAT
-arglabel:Floating gap penalty
-argtype:slider
-argmin:1
-argmax:100
-argvalue:10
-
-arg:REPORT
-argtype:chooser
-arglabel:View assembly report?
-argchoice:No:
-argchoice:Yes:kedit in1.rpt&
-
-
-in:in1
-informat:flat
-insave:
-
-item:Variable Positions
-itemmethod:varpos $REV < in1 > out1
-
-arg:REV
-argtype:chooser
-arglabel:Highlight (darken)
-argchoice:Conserved positions:
-argchoice:variable positions:-rev
-
-in:in1
-informat:flat
-
-out:out1
-outformat:colormask
-
-item:Phrap
-itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
-
-in:in1
-informat:genbank
-
-out:out1
-outformat:genbank
-
-item:SNAP
-itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
-
-in:in1
-informat:flat
-out:out1
-outformat:text
-
-
-
-
-item:Find all
-itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
-itemhelp:findall.help
-itemmeta:f
-
-arg:SEARCH
-argtype:text
-arglabel:Search String
-
-arg:PRCNT
-argtype:slider
-arglabel:Percent mismatch
-argmin:0
-argmax:75
-argvalue:10
-
-arg:CASE
-argtype:chooser
-arglabel:Case
-argchoice:Upper equals lower:
-argchoice:Upper not equal lower:-case
-
-arg:UT
-argtype:chooser
-arglabel:U equal T?
-argchoice:Yes:-u=t
-argchoice:No:
-argvalue:0
-
-arg:MAT
-arglabel:Match color
-argtype:choice_list
-argchoice:yellow:1
-argchoice:violet:2
-argchoice:red:3
-argchoice:aqua:4
-argchoice:green:5
-argchoice:blue:6
-argchoice:grey:11
-argchoice:black:8
-argvalue:2
-
-arg:MIS
-argtype:choice_list
-arglabel:Mismatch color
-argchoice:yellow:1
-argchoice:violet:2
-argchoice:red:3
-argchoice:aqua:4
-argchoice:green:5
-argchoice:blue:6
-argchoice:grey:11
-argchoice:black:8
-argvalue:7
-
-in:in1
-informat:flat
-
-out:out1
-outformat:colormask
-
-item:Sequence Consensus
-itemmethod:(MakeCons in1 $METHOD $MASK > out1)
-itemhelp:MakeCons.help
-
-arg:METHOD
-arglabel:Method
-argtype:chooser
-argchoice:IUPAC:-iupac
-argchoice:Majority:-majority $PERCENT
-
-arg:MASK
-argtype:chooser
-arglabel:Create a new:
-argchoice:Sequence:
-argchoice:Selection Mask: | Consto01mask
-
-arg:PERCENT
-arglabel:Minimum Percentage for Majority
-argtype:slider
-argmin:50
-argmax:100
-argvalue:75
-
-in:in1
-informat:gde
-
-out:out1
-outformat:gde
-
-
-#Menu for DNA/RNA
-
-item:blastn
-itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDBDNA
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:4
-argmax:18
-argvalue:12
-
-arg:MATCH
-argtype:slider
-arglabel:Match Score
-argmin:1
-argmax:10
-argvalue:5
-
-arg:MMSCORE
-argtype:slider
-arglabel:Mismatch Score
-argmin:-10
-argmax:-1
-argvalue:-5
-
-item:blastx
-itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
-
-
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDB
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
-argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:1
-argmax:5
-argvalue:3
-
-arg:Matrix
-arglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:CODE
-argtype:choice_list
-arglabel:Genetic Code
-argchoice:Standard or Universal:0
-argchoice:Vertebrate Mitochondrial:1
-argchoice:Yeast Mitochondrial:2
-argchoice:Mold Mitochondrial and Mycoplasma:3
-argchoice:Invertebrate Mitochondrial:4
-argchoice:Ciliate Macronuclear:5
-argchoice:Protozoan Mitochondrial:6
-argchoice:Plant Mitochondrial:7
-argchoice:Echinodermate Mitochondrial:8
-
-item:------------------------
-
-item:Add a new DNA blast db
-itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
-
-arg:sourcefile
-argtype:text
-arglabel: enter the file name
-
-arg:menuname
-argtype:text
-arglabel: enter the name of the DB
-
-menu:seq. datasets
-
-item:-------------
-item:add a new dataset
-itemmethod:cp $file /usr/local/bio/GDE/db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file
-
-arg:name
-argtype:text
-arglabel:Enter the dataset name ?
-
-arg:file
-argtype:text
-arglabel:Enter the dataset file (in FASTA) ?
-
-
-#Menu for Protein
-menu:protein
-item:blastp
-itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
-
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDB
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
-
-arg:Matrix
-barglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:1
-argmax:5
-argvalue:3
-
-item:tblastn
-itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDB
-argtype:choice_list
-arglabel:Which Database
-argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
-argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
-
-arg:Matrix
-arglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:4
-argmax:18
-argvalue:12
-
-arg:CODE
-argtype:choice_list
-arglabel:Genetic Code
-argchoice:Standard or Universal:0
-argchoice:Vertebrate Mitochondrial:1
-argchoice:Yeast Mitochondrial:2
-argchoice:Mold Mitochondrial and Mycoplasma:3
-argchoice:Invertebrate Mitochondrial:4
-argchoice:Ciliate Macronuclear:5
-argchoice:Protozoan Mitochondrial:6
-argchoice:Plant Mitochondrial:7
-argchoice:Echinodermate Mitochondrial:8
-
-
-item:Map View
-itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
-itemhelp:mapview.help
-
-in:in1
-informat:gde
-insave:
-
-arg:PBL
-arglabel:Pixel Between Lines
-argtype:slider
-argvalue:10
-argmin:1
-argmax:15
-
-arg:NPP
-arglabel:Nucleotides Per Pixel
-argtype:slider
-argvalue:1
-argmin:1
-argmax:20
-
-arg:LWIDTH
-arglabel:Line Thickness
-argtype:slider
-argvalue:2
-argmin:1
-argmax:5
-
-item:--------------------------
-item:Add a new DNA blast db
-itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
-
-arg:sourcefile
-argtype:text
-arglabel: Enter the file (in FASTA)
-
-arg:menuname
-argtype:text
-arglabel: Enter the name of the DB
-
-menu:Phylogeny
-
-
-item:Phylip help
-itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
-
-arg:FILE
-argtype:choice_list
-arglabel:Which program?
-argchoice:clique:clique.html
-argchoice:consense:consense.html
-argchoice:contchar:contchar.html
-argchoice:contml:contml.html
-argchoice:contrast:contrast.html
-argchoice:discrete:discrete.html
-argchoice:distance:distance.html
-argchoice:dnaboot:dnaboot.html
-argchoice:dnacomp:dnacomp.html
-argchoice:dnadist:dnadist.html
-argchoice:dnainvar:dnainvar.html
-argchoice:dnaml:dnaml.html
-argchoice:dnamlk:dnamlk.html
-argchoice:dnamove:dnamove.html
-argchoice:dnapars:dnapars.html
-argchoice:dnapenny:dnapenny.html
-argchoice:dollop:dollop.html
-argchoice:dolmove:dolmove.html
-argchoice:dolpenny:dolpenny.html
-argchoice:draw:draw.html
-argchoice:drawgram:drawgram.html
-argchoice:drawtree:drawtree.html
-argchoice:factor:factor.html
-argchoice:fitch:fitch.html
-argchoice:gendist:gendist.html
-argchoice:kitsch:kitsch.html
-argchoice:main:main.html
-argchoice:mix:mix.html
-argchoice:move:move.html
-argchoice:neighbor:neighbor.html
-argchoice:penny:penny.html
-argchoice:protpars:protpars.html
-argchoice:read.me.general:read.me.general.html
-argchoice:restml:restml.html
-argchoice:seqboot:seqboot.html
-argchoice:sequence:sequence.html
-
-
-
-item:Phylip 3.5
-itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
-
-arg:PROGRAM
-argtype:choice_list
-arglabel:Which program to run?
-argchoice:DNAPARS:dnapars
-argchoice:DNABOOT:dnaboot
-argchoice:DNAPENNY:dnapenny
-argchoice:DNAML:dnaml
-argchoice:DNAMLK:dnamlk
-argchoice:DNACOMP:dnacomp
-argchoice:DNAMOVE:dnamove
-argchoice:DNAINVAR:dnainvar
-argchoice:PROTPARS:protpars
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-
-
-item:Phylip DNA Distance methods
-itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
-
-arg:EXPLAIN
-argtype:text
-arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
-
-
-arg:PROGRAM
-arglabel:Which method?
-argtype:chooser
-argchoice:DNADIST+NEIGHBOR:
-argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
-
-arg:PROG
-arglabel:Run ?
-argtype:chooser
-argchoice:Run without Bootstrap:
-argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
-
-arg:DNA
-argtype:text
-arglabel:Name of DNADIST outfile?
-
-arg:NEI
-argtype:text
-arglabel:Name of NEIGHBOR outfile?
-
-arg:TREE
-argtype:text
-arglabel:Name of TREEFILE ?
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-item:Phylip PROTEIN Distance methods
-itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
-
-arg:PROGRAM
-arglabel:Which method?
-argtype:chooser
-argchoice:PROTDIST+NEIGHBOR:
-argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
-
-arg:PROG
-arglabel:Which method?
-argtype:chooser
-argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
-argchoice:No Bootstrap:
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-
-
-
-
-menu:On-Line Res.
-
-item:GDE for Linux resources at Bioafrica.net
-itemmethod:netscape http://www.bioafrica.net &
-
-item:-------------------------
-item:add a new website
-itemmethod:xterm -e /usr/local/bio/GDE/newURL.pl $name $url
-
-arg:name
-argtype:text
-arglabel:Enter the site name
-
-arg:url
-argtype:text
-arglabel:Enter the URL (including http://)
diff --git a/CORE/.GDEmenus~ b/CORE/.GDEmenus~
deleted file mode 100644
index fa1cff0..0000000
--- a/CORE/.GDEmenus~
+++ /dev/null
@@ -1,791 +0,0 @@
-1menu:File
-
-item:test cmask output
-itemmethod: kedit in1
-
-in:in1
-informat:colormask
-
-item:New sequence
-itemmethod:echo "$Type$Name" > out1
-itemmeta:n
-itemhelp:new_sequence.help
-
-arg:Name
-argtype:text
-arglabel:New Sequence name?
-argtext:New
-
-arg:Type
-argtype:choice_list
-arglabel:Type?
-argchoice:DNA/RNA:#
-argchoice:Amino Acid:%
-argchoice:Text:\"
-argchoice:Mask:@
-
-out:out1
-outformat:flat
-
-item:Import Foreign Format
-itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
-itemhelp:readseq.help
-
-arg:INPUTFILE
-argtype:text
-arglabel:Name of foreign file?
-
-out:OUTPUTFILE
-outformat:genbank
-
-item:Export Foreign Format
-itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
-itemhelp:readseq.help
-
-arg:FORMAT
-argtype:choice_list
-argchoice:FASTA:8
-argchoice:NEXUS:17
-argchoice:Phylip v3.3:12
-argchoice:IG/Stanford:1
-argchoice:GenBank:2
-argchoice:NBRF:3
-argchoice:EMBL:4
-argchoice:GCG:5
-argchoice:DNA Strider:6
-argchoice:Fitch:7
-argchoice:Pearson:8
-argchoice:Zuker:9
-argchoice:Olsen:10
-argchoice:Phylip v3.2:11
-argchoice:Phylip v3.3:12
-argchoice:Plain text:13
-
-arg:OUTPUTFILE
-argtype:text
-arglabel:Save as?
-
-in:INPUTFILE
-informat:genbank
-
-
-item:Save Selection
-itemmethod: cat $SAVE_FUNC > $Name
-itemhelp:save_selection.help
-
-arg:SAVE_FUNC
-argtype:chooser
-arglabel:File format
-argchoice:Flat:in1
-argchoice:Genbank:in2
-argchoice:GDE/HGL:in3
-
-arg:Name
-argtype:text
-arglabel:File name?
-
-in:in1
-informat:flat
-
-in:in2
-informat:genbank
-
-in:in3
-informat:gde
-
-item:Print Selection
-itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
-itemhelp:print_alignment.help
-
-arg:SCALE
-argtype:slider
-arglabel:Reduce printout by?
-argmin:1
-argmax:20
-argvalue:1
-
-arg:CMD
-argtype:chooser
-argchoice:Lpr:lpr
-argchoice:Enscript Gaudy:enscript -G -q
-argchoice:Enscript Two column:enscript -2rG
-
-arg:PRINTER
-argtype:text
-arglabel:Which printer?
-argtext:lp
-
-in:in1
-informat:gde
-insave:
-
-menu:Edit
-
-item:Sort
-itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
-itemhelp:heapsortHGL.help
-
-arg:PRIM_KEY
-argtype:choice_list
-argchoice:Group:group-ID
-argchoice:type:type
-argchoice:name:name
-argchoice:Sequence ID:sequence-ID
-argchoice:creator:creator
-argchoice:offset:offset
-arglabel:Primary sort field?
-
-arg:SEC_KEY
-argtype:choice_list
-argchoice:None:
-argchoice:Group:group-ID
-argchoice:type:type
-argchoice:name:name
-argchoice:Sequence ID:sequence-ID
-argchoice:creator:creator
-argchoice:offset:offset
-arglabel:Secondary sort field?
-
-in:in1
-informat:gde
-insave:
-
-item:extract
-itemmethod:(gde in1;/bin/rm -f in1)&
-
-in:in1
-informat:gde
-inmask:
-insave:
-
-menu:DNA/RNA
-
-item:Translate...
-itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
-
-arg:FRAME
-argtype:chooser
-arglabel:Which reading frame?
-argchoice:First:1
-argchoice:Second:2
-argchoice:Third:3
-argchoice:All six:6
-
-arg:MNFRM
-arglabel:Minimum length of AA sequence to translate?
-argtype:slider
-argmin:0
-argmax:100
-argvalue:20
-
-arg:LTRCODE
-argtype:chooser
-arglabel:Translate to:
-argchoice:Single letter codes:
-argchoice:Triple letter codes:-3
-
-arg:TBL
-arglabel:Codon table?
-argtype:chooser
-argchoice:universal:1
-argchoice:mycoplasma:2
-argchoice:yeast:3
-argchoice:Vert. mito.:4
-in:in1
-informat:gde
-
-out:out1
-outformat:gde
-
-item:Dot plot
-itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
-itemhelp:DotPlotTool.help
-
-in:in1
-informat:gde
-insave:
-
-item:Clustal alignment
-itemmethod:(tr '%#' '>'clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
-
-itemhelp:clustal_help
-
-arg:KTUP
-argtype:slider
-arglabel:K-tuple size for pairwise search
-argmin:1
-argmax:10
-argvalue:2
-
-arg:WIN
-argtype:slider
-arglabel:Window size
-argmin:1
-argmax:10
-argvalue:4
-
-arg:Trans
-argtype:chooser
-arglabel:Transitions weighted?
-argchoice:Yes:/TRANSIT
-argchoice:No:
-
-arg:FIXED
-argtype:slider
-arglabel:Fixed gap penalty
-argmin:1
-argmax:100
-argvalue:10
-
-arg:FLOAT
-arglabel:Floating gap penalty
-argtype:slider
-argmin:1
-argmax:100
-argvalue:10
-
-arg:REPORT
-argtype:chooser
-arglabel:View assembly report?
-argchoice:No:
-argchoice:Yes:kedit in1.rpt&
-
-
-in:in1
-informat:flat
-insave:
-
-item:Variable Positions
-itemmethod:varpos $REV < in1 > out1
-
-arg:REV
-argtype:chooser
-arglabel:Highlight (darken)
-argchoice:Conserved positions:
-argchoice:variable positions:-rev
-
-in:in1
-informat:flat
-
-out:out1
-outformat:colormask
-
-item:Phrap
-itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
-
-in:in1
-informat:genbank
-
-out:out1
-outformat:genbank
-
-item:SNAP
-itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
-
-in:in1
-informat:flat
-out:out1
-outformat:text
-
-
-
-
-item:Find all
-itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
-itemhelp:findall.help
-itemmeta:f
-
-arg:SEARCH
-argtype:text
-arglabel:Search String
-
-arg:PRCNT
-argtype:slider
-arglabel:Percent mismatch
-argmin:0
-argmax:75
-argvalue:10
-
-arg:CASE
-argtype:chooser
-arglabel:Case
-argchoice:Upper equals lower:
-argchoice:Upper not equal lower:-case
-
-arg:UT
-argtype:chooser
-arglabel:U equal T?
-argchoice:Yes:-u=t
-argchoice:No:
-argvalue:0
-
-arg:MAT
-arglabel:Match color
-argtype:choice_list
-argchoice:yellow:1
-argchoice:violet:2
-argchoice:red:3
-argchoice:aqua:4
-argchoice:green:5
-argchoice:blue:6
-argchoice:grey:11
-argchoice:black:8
-argvalue:2
-
-arg:MIS
-argtype:choice_list
-arglabel:Mismatch color
-argchoice:yellow:1
-argchoice:violet:2
-argchoice:red:3
-argchoice:aqua:4
-argchoice:green:5
-argchoice:blue:6
-argchoice:grey:11
-argchoice:black:8
-argvalue:7
-
-in:in1
-informat:flat
-
-out:out1
-outformat:colormask
-
-item:Sequence Consensus
-itemmethod:(MakeCons in1 $METHOD $MASK > out1)
-itemhelp:MakeCons.help
-
-arg:METHOD
-arglabel:Method
-argtype:chooser
-argchoice:IUPAC:-iupac
-argchoice:Majority:-majority $PERCENT
-
-arg:MASK
-argtype:chooser
-arglabel:Create a new:
-argchoice:Sequence:
-argchoice:Selection Mask: | Consto01mask
-
-arg:PERCENT
-arglabel:Minimum Percentage for Majority
-argtype:slider
-argmin:50
-argmax:100
-argvalue:75
-
-in:in1
-informat:gde
-
-out:out1
-outformat:gde
-
-
-#Menu for DNA/RNA
-
-item:blastn
-itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDBDNA
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV-1 Seq. Db.:/usr/local/biotools/db/DNA/hiv17-08-01.fasta2
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:4
-argmax:18
-argvalue:12
-
-arg:MATCH
-argtype:slider
-arglabel:Match Score
-argmin:1
-argmax:10
-argvalue:5
-
-arg:MMSCORE
-argtype:slider
-arglabel:Mismatch Score
-argmin:-10
-argmax:-1
-argvalue:-5
-
-item:blastx
-itemmethod:(sed "s/[#%]/>/" in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
-
-
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDBDNA
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
-argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:1
-argmax:5
-argvalue:3
-
-arg:Matrix
-arglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:CODE
-argtype:choice_list
-arglabel:Genetic Code
-
-argchoice:Standard or Universal:0
-argchoice:Vertebrate Mitochondrial:1
-argchoice:Yeast Mitochondrial:2
-argchoice:Mold Mitochondrial and Mycoplasma:3
-argchoice:Invertebrate Mitochondrial:4
-argchoice:Ciliate Macronuclear:5
-argchoice:Protozoan Mitochondrial:6
-argchoice:Plant Mitochondrial:7
-argchoice:Echinodermate Mitochondrial:8
-
-item:------------------------
-
-item:Add a new DNA blast db
-itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/biotools/GDE/bin/installBLASTDB.pl $sourcefile $menuname;
-
-arg:sourcefile
-argtype:text
-arglabel: enter the file name
-
-arg:menuname
-argtype:text
-arglabel: enter the name of the DB
-
-menu:seq. datasets
-item:tttt
-itemmethod:readseq /usr/local/biotools/GDE/db/ttttt -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
-out:OUTPUTFILE
-outformat:genbank
-
-item:HIV1POLDNA.fasta
-itemmethod:readseq /usr/local/biotools/GDE/db/HIV1POLDNA.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
-out:OUTPUTFILE
-outformat:genbank
-
-item:structure
-itemmethod:readseq /usr/local/biotools/GDE/db/structprot.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
-out:OUTPUTFILE
-outformat:genbank
-
-item:-------------
-item:add a new dataset
-itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
-
-arg:name
-argtype:text
-arglabel:Enter the dataset name ?
-
-arg:file
-argtype:text
-arglabel:Enter the dataset file (in FASTA) ?
-
-
-#Menu for Protein
-menu:protein
-item:blastp
-itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/biotools/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
-
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDBPROT
-argtype:choice_list
-arglabel:Which Database
-argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
-argchoice:ttttt:/usr/local/biotools/db/tttt
-argchoice:tytuiphn:/usr/local/biotools/db/yejhuh[9hp
-argchoice:yyyy:/usr/local/biotools/db/test
-
-arg:Matrix
-barglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:1
-argmax:5
-argvalue:3
-
-item:tblastn
-itemmethod:(sed "s/[#%]/>/" in1.f; cp /usr/local/biotools/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
-
-in:in1
-informat:flat
-insave:
-
-arg:BLASTDB
-argtype:choice_list
-arglabel:Which Database
-argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
-argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
-
-arg:Matrix
-arglabel:Substitution Matrix:
-argtype:choice_list
-argchoice:PAM30:PAM30
-argchoice:PAM70:PAM70
-
-arg:WORDLEN
-argtype:slider
-arglabel:Word Size
-argmin:4
-argmax:18
-argvalue:12
-
-arg:CODE
-argtype:choice_list
-arglabel:Genetic Code
-argchoice:Standard or Universal:0
-argchoice:Vertebrate Mitochondrial:1
-argchoice:Yeast Mitochondrial:2
-argchoice:Mold Mitochondrial and Mycoplasma:3
-argchoice:Invertebrate Mitochondrial:4
-argchoice:Ciliate Macronuclear:5
-argchoice:Protozoan Mitochondrial:6
-argchoice:Plant Mitochondrial:7
-argchoice:Echinodermate Mitochondrial:8
-
-
-item:Map View
-itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
-itemhelp:mapview.help
-
-in:in1
-informat:gde
-insave:
-
-arg:PBL
-arglabel:Pixel Between Lines
-argtype:slider
-argvalue:10
-argmin:1
-argmax:15
-
-arg:NPP
-arglabel:Nucleotides Per Pixel
-argtype:slider
-argvalue:1
-argmin:1
-argmax:20
-
-arg:LWIDTH
-arglabel:Line Thickness
-argtype:slider
-argvalue:2
-argmin:1
-argmax:5
-
-item:--------------------------
-item:Add a new Protein blast db
-itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/biotools/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname;
-
-arg:sourcefile
-argtype:text
-arglabel: Enter the file (in FASTA)
-
-arg:menuname
-argtype:text
-arglabel: Enter the name of the DB
-
-menu:Phylogeny
-
-
-item:Phylip help
-itemmethod:(netscape /usr/local/biotools/phylip/doc/$FILE)&
-
-arg:FILE
-argtype:choice_list
-arglabel:Which program?
-argchoice:clique:clique.html
-argchoice:consense:consense.html
-argchoice:contchar:contchar.html
-argchoice:contml:contml.html
-argchoice:contrast:contrast.html
-argchoice:discrete:discrete.html
-argchoice:distance:distance.html
-argchoice:dnaboot:dnaboot.html
-argchoice:dnacomp:dnacomp.html
-argchoice:dnadist:dnadist.html
-argchoice:dnainvar:dnainvar.html
-argchoice:dnaml:dnaml.html
-argchoice:dnamlk:dnamlk.html
-argchoice:dnamove:dnamove.html
-argchoice:dnapars:dnapars.html
-argchoice:dnapenny:dnapenny.html
-argchoice:dollop:dollop.html
-argchoice:dolmove:dolmove.html
-argchoice:dolpenny:dolpenny.html
-argchoice:draw:draw.html
-argchoice:drawgram:drawgram.html
-argchoice:drawtree:drawtree.html
-argchoice:factor:factor.html
-argchoice:fitch:fitch.html
-argchoice:gendist:gendist.html
-argchoice:kitsch:kitsch.html
-argchoice:main:main.html
-argchoice:mix:mix.html
-argchoice:move:move.html
-argchoice:neighbor:neighbor.html
-argchoice:penny:penny.html
-argchoice:protpars:protpars.html
-argchoice:read.me.general:read.me.general.html
-argchoice:restml:restml.html
-argchoice:seqboot:seqboot.html
-argchoice:sequence:sequence.html
-
-
-
-item:Phylip 3.5
-itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
-
-arg:PROGRAM
-argtype:choice_list
-arglabel:Which program to run?
-argchoice:DNAPARS:dnapars
-argchoice:DNABOOT:dnaboot
-argchoice:DNAPENNY:dnapenny
-argchoice:DNAML:dnaml
-argchoice:DNAMLK:dnamlk
-argchoice:DNACOMP:dnacomp
-argchoice:DNAMOVE:dnamove
-argchoice:DNAINVAR:dnainvar
-argchoice:PROTPARS:protpars
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-
-
-item:Phylip DNA Distance methods
-itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
-
-arg:EXPLAIN
-argtype:text
-arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
-
-
-arg:PROGRAM
-arglabel:Which method?
-argtype:chooser
-argchoice:DNADIST+NEIGHBOR:
-argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
-
-arg:PROG
-arglabel:Run ?
-argtype:chooser
-argchoice:Run without Bootstrap:
-argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
-
-arg:DNA
-argtype:text
-arglabel:Name of DNADIST outfile?
-
-arg:NEI
-argtype:text
-arglabel:Name of NEIGHBOR outfile?
-
-arg:TREE
-argtype:text
-arglabel:Name of TREEFILE ?
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-item:Phylip PROTEIN Distance methods
-itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
-
-arg:PROGRAM
-arglabel:Which method?
-argtype:chooser
-argchoice:PROTDIST+NEIGHBOR:
-argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
-
-arg:PROG
-arglabel:Which method?
-argtype:chooser
-argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
-argchoice:No Bootstrap:
-
-arg:PREEDIT
-argtype:chooser
-arglabel:Edit input before running?
-argchoice:No:
-argchoice:Yes:kedit infile;
-
-in:in1
-informat:genbank
-inmask:
-insave:
-
-
-
-
-
-menu:On-Line Res.
-item:tytyt
-itemmethod:netscape hnu[phoph &
-item:SANBI
-itemmethod:netscape again &
-item:PlasmoDB
-itemmethod:netscape http://www.plasmodb.org &
-item:NCBI
-itemmethod:netscape http://www.ncbi.nlm.nih.gov &
-item:sanbi
-itemmethod:netscape http://www.sanbi.ac.za &
-item:SANBI
-itemmethod:netscape http://www.sanbi.ac.za &
-
-item:GDE for Linux resources at Bioafrica.net
-itemmethod:netscape http://www.bioafrica.net &
-
-item:-------------------------
-item:add a new website
-itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
-
-arg:name
-argtype:text
-arglabel:Enter the site name
-
-arg:url
-argtype:text
-arglabel:Enter the URL (including http://)
diff --git a/CORE/BasicDisplay.o b/CORE/BasicDisplay.o
deleted file mode 100644
index 5ac3fee..0000000
Binary files a/CORE/BasicDisplay.o and /dev/null differ
diff --git a/CORE/BuiltIn.o b/CORE/BuiltIn.o
deleted file mode 100644
index 8714e2a..0000000
Binary files a/CORE/BuiltIn.o and /dev/null differ
diff --git a/CORE/ChooseFile.o b/CORE/ChooseFile.o
deleted file mode 100644
index c21acc0..0000000
Binary files a/CORE/ChooseFile.o and /dev/null differ
diff --git a/CORE/CutCopyPaste.o b/CORE/CutCopyPaste.o
deleted file mode 100644
index 37e24c2..0000000
Binary files a/CORE/CutCopyPaste.o and /dev/null differ
diff --git a/CORE/DrawNA.o b/CORE/DrawNA.o
deleted file mode 100644
index 269a7df..0000000
Binary files a/CORE/DrawNA.o and /dev/null differ
diff --git a/CORE/Edit.o b/CORE/Edit.o
deleted file mode 100644
index 838edab..0000000
Binary files a/CORE/Edit.o and /dev/null differ
diff --git a/CORE/EventHandler.o b/CORE/EventHandler.o
deleted file mode 100644
index b6a4ed0..0000000
Binary files a/CORE/EventHandler.o and /dev/null differ
diff --git a/CORE/FileIO.c~ b/CORE/FileIO.c~
deleted file mode 100755
index 3087c50..0000000
--- a/CORE/FileIO.c~
+++ /dev/null
@@ -1,1056 +0,0 @@
-#include
-#include
-#include
-#include
-#include
-#include "menudefs.h"
-#include "defines.h"
-
-/*
-LoadData():
- Load a data set from the command line argument.
-
-Copyright (c) 1989, University of Illinois board of trustees. All rights
-reserved. Written by Steven Smith at the Center for Prokaryote Genome
-Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr.
-Carl Woese.
-
-Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory.
-All rights reserved.
-
-*/
-
-LoadData(filename)
-char *filename;
-{
- extern NA_Alignment *DataSet;
- extern int DataType,FileFormat,Default_DNA_Trans[],Default_RNA_Trans[];
- extern int Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[];
-
- extern Frame frame;
- extern Canvas EditCan,EditNameCan;
- extern char FileName[];
- FILE *file;
- NA_Alignment *DataNaAln;
- char temp[1024];
-/*
-* Get file name, determine the file type, and away we go..
-*/
- if(Find2(filename,"gde")!=0)
- strcpy(FileName,filename);
- if( (file=fopen(filename,"r"))!=0 )
- {
- FindType(filename,&DataType,&FileFormat);
- switch(DataType)
- {
- case NASEQ_ALIGN:
- if(DataSet == NULL)
- {
- DataSet = (NA_Alignment*)Calloc(1,
- sizeof(NA_Alignment));
- DataNaAln =(NA_Alignment*)DataSet;
- DataSet->rel_offset = 0;
- }
- else
- DataNaAln = (NA_Alignment*)DataSet;
-
- LoadFile(filename,DataNaAln,
- DataType,FileFormat);
-
- break;
- default:
- break;
- }
- }
- fclose(file);
- sprintf(temp,"Genetic Data Environment 2.2 (%s)",FileName);
- xv_set(frame,
- FRAME_LABEL, temp,
- 0);
- return;
-}
-
-
-/*
-LoadFile():
- Load the given filename into the given dataset. Handle any
-type conversion needed to get the data into the specified data type.
-This routine is used in situations where the format and datatype is known.
-
-Copyright (c) 1989-1990, University of Illinois board of trustees. All
-rights reserved. Written by Steven Smith at the Center for Prokaryote Genome
-Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr.
-Carl Woese.
-
-Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory.
-All rights reserved.
-*/
-
-LoadFile(filename,dataset,type,format)
-char *filename;
-char *dataset;
-int type,format;
-{
- extern int DataType;
-
- if (DataType != type)
- fprintf(stderr,"Warning, datatypes do not match.\n");
-/*
-Handle the overwrite/create/merge dialog here.
-*/
- switch(format)
- {
- case NA_FLAT:
- ReadNA_Flat(filename,dataset,type);
- ((NA_Alignment*)dataset)->format = GDE;
- break;
-
- case GENBANK:
- ReadGen(filename,dataset,type);
- ((NA_Alignment*)dataset)->format = GENBANK;
- break;
-
- case GDE:
- ReadGDE(filename,dataset,type);
- ((NA_Alignment*)dataset)->format = GDE;
- break;
- case COLORMASK:
- ReadCMask(filename);
-
- default:
- break;
- }
- return;
-}
-
-
-
-/*
-* Print error message, and die
-*/
-ErrorOut(code,string)
-int code;
-char *string;
-{
- if (code == 0)
- {
- fprintf(stderr,"Error:%s\n",string);
- exit(1);
- }
- return;
-}
-
-
-/*
-* More robust memory management routines
-*/
-char *Calloc(count,size)
-int count,size;
-{
- char *temp;
-#ifdef SeeAlloc
- extern int TotalCalloc;
- TotalCalloc += count*size;
- fprintf(stderr,"Calloc %d %d\n",count*size,TotalCalloc);
-#endif
- temp = calloc(count,size);
- ErrorOut(temp,"Cannot allocate memory");
- return(temp);
-}
-
-char *Realloc(block,size)
-char *block;
-int size;
-{
- char *temp;
-#ifdef SeeAlloc
- extern int TotalRealloc;
- TotalRealloc += size;
- fprintf(stderr,"Realloc %d\n",TotalRealloc);
-#endif
- temp=realloc(block,size);
- ErrorOut(temp,"Cannot change memory size");
- return(temp);
-}
-
-Cfree(block)
-char* block;
-{
- if (block)
- {
- /* rtm 18.III.98
- FileIO.c: In function `Cfree':
- FileIO.c:181: void value not ignored as it ought to be
-
- if(cfree(block) == 0)
- Warning("Error in Cfree...");
- */
- cfree(block);
- }
- else
- Warning("Error in Cfree, NULL block");
- return;
-}
-
-
-
-/*
-* same as strdup
-*/
-char *String(string)
-char *string;
-{
- char *temp;
-
- temp = Calloc(strlen(string)+1,sizeof(char));
- strcpy(temp,string);
- return(temp);
-}
-
-
-FindType(name,dtype,ftype)
-char *name;
-int *dtype,*ftype;
-{
- FILE *file;
- char Inline[GBUFSIZ];
-
- file = fopen(name,"r");
- *dtype=0;
- *ftype=0;
-
- if (file == NULL)
- return(1);
-
- /*
-* Is this a flat file?
-* Get the first non blank line, see if a type marker shows up.
-*/
- fgets(Inline,GBUFSIZ,file);
- for(;strlen(Inline)<2 && fgets(Inline,GBUFSIZ,file) != NULL;);
- if(Inline[0] == '#' || Inline[0] == '%' ||
- Inline[0] == '"' || Inline[0] == '@' )
- {
- *dtype=NASEQ_ALIGN;
- *ftype=NA_FLAT;
- }
-
- /*
-* Else, try genbank
-*/
- else
- {
- fclose(file);
- file = fopen(name,"r");
- *dtype=0;
- *ftype=0;
-
- if (file == NULL)
- return(1);
-
- for(;fgets(Inline,GBUFSIZ,file) != NULL;)
- if(Find(Inline,"LOCUS"))
- {
- *dtype=NASEQ_ALIGN;
- *ftype=GENBANK;
- fclose(file);
- return(0);
- }
- /*
-* and last, try GDE
-*/
- else if(Find(Inline,"sequence"))
- {
- *dtype = NASEQ_ALIGN;
- *ftype = GDE;
- fclose(file);
- return(0);
- }
- else if(Find(Inline,"start:"))
- {
- *dtype = NASEQ_ALIGN;
- *ftype = COLORMASK;
- fclose(file);
- return(0);
- }
- }
-
- fclose(file);
- return(0);
-}
-
-AppendNA(buffer,len,seq)
-NA_Base *buffer;
-int len;
-NA_Sequence *seq;
-{
- int curlen=0,j;
- NA_Base *temp;
-
- if(seq->seqlen+len >= seq->seqmaxlen)
- {
- if(seq->seqlen>0)
- seq->sequence = (NA_Base*)Realloc(seq->sequence,
- (seq->seqlen + len+GBUFSIZ) * sizeof(NA_Base));
- else
- seq->sequence = (NA_Base*)Calloc(1,(seq->seqlen +
- len+GBUFSIZ) * sizeof(NA_Base));
- seq->seqmaxlen = seq->seqlen + len+GBUFSIZ;
- }
- /*
-* seqlen is the length, and the index of the next free
-* base
-*/
- curlen = seq->seqlen + seq->offset;
- for(j=0;jseqlen += len;
- return;
-}
-
-Ascii2NA(buffer,len,matrix)
-char *buffer;
-int len;
-int matrix[16];
-{
- /*
-* if the translation matrix exists, use it to
-* encode the buffer.
-*/
- register i;
- if(matrix != NULL)
- for(i=0;inumelements == (int) NULL)
- return;
- seqs = aln->element;
-
- file = fopen(filename,"w");
- if(file == NULL)
- {
- Warning("Cannot open file for output");
- return(1);
- }
- if(maskable && (method != SELECT_REGION))
- {
- for(j=0;jnumelements;j++)
- if(seqs[j].elementtype == MASK &&
- seqs[j].selected)
- mask = j;
- }
- for(j=0;jnumelements;j++)
- {
- SeqNorm(&(seqs[j]));
- }
-
- for(j=0;jnumelements;j++)
- {
- if(method != SELECT_REGION)
- offset = seqs[j].offset;
- else
- for(offset=seqs[j].offset;
- aln->selection_mask[offset] == '0';
- offset++);
-
- if(offset+aln->rel_offset != 0)
- sprintf(offset_str,"(%d)",offset+aln->rel_offset);
- else
- offset_str[0] = '\0';
-
- if(((j!=mask) && (seqs[j].selected) && method != SELECT_REGION)
- || (method == SELECT_REGION && seqs[j].subselected)
- || method == ALL)
- {
- fprintf(file,"%c%s%s\n",
- seqs[j].elementtype == DNA?'#':
- seqs[j].elementtype == RNA?'#':
- seqs[j].elementtype == PROTEIN?'%':
- seqs[j].elementtype == TEXT?'"':
- seqs[j].elementtype == MASK?'@':'"',
- seqs[j].short_name,
- (offset+aln->rel_offset == 0)? "":offset_str);
- if(seqs[j].tmatrix)
- {
- if(mask == -1)
- for(k=0,kk=0;kk0)
- {
- buf[60] = '\0';
- fputs(buf,file);
- putc('\n',file);
- }
- if(method == SELECT_REGION)
- {
- if(aln->selection_mask[kk+offset]=='1')
- {
- buf[k%60] =((char)seqs[j].tmatrix[
- (int)getelem( &(seqs[j]),kk+offset) ]);
- k++;
- }
- }
- else
- {
- buf[k%60] =((char)seqs[j].tmatrix[
- (int)getelem( &(seqs[j]),kk+offset) ]);
- k++;
- }
- }
- else
- for(k=0,kk=0;kk1)
- {
- buf[60] = '\0';
- fputs(buf,file);
- putc('\n',file);
- }
- buf[k%60] = ((char)seqs[j].tmatrix
- [getelem(&(seqs[j]),kk+offset)]);
- }
- }
- }
- else
- {
- if(mask == -1)
- for(k=0,kk=0;kk0)
- {
- buf[60] = '\0';
- fputs(buf,file);
- putc('\n',file);
- }
- if(method == SELECT_REGION)
- {
- if(aln->selection_mask[kk+offset]=='1')
- {
- buf[k%60] =(getelem( &(seqs[j]),kk+offset));
- k++;
- }
- }
- else
- {
- buf[k%60] =( getelem( &(seqs[j]),kk+offset) );
- k++;
- }
- }
- else
- for(k=0,kk=0;kk1)
- {
- buf[60] = '\0';
- fputs(buf,file);
- putc('\n',file);
- }
- buf[k%60] =((char)getelem(&(seqs[j]),
- kk+offset));
- }
- }
- }
- buf[(k%60)>0 ? (k%60):60] = '\0';
- fputs(buf,file);
- putc('\n',file);
- }
- }
- fclose(file);
- return(0);
-}
-
-
-Warning(s)
-char *s;
-{
- extern Frame frame;
- extern Panel_item left_foot,right_foot;
- Beep();
- xv_set(frame,FRAME_RIGHT_FOOTER,s,0);
- xv_set(right_foot,PANEL_LABEL_STRING,s,0);
-}
-
-
-InitNASeq(seq,type)
-NA_Sequence *seq;
-int type;
-{
- extern int Default_RNA_Trans[]; /* rtm 18.III.98 */
- extern int Default_DNA_Trans[],Default_NA_RTrans[];
- extern int
- Default_NA_RTrans[],Default_PROColor_LKUP[],Default_NAColor_LKUP[];
-
- SetTime(&(seq->t_stamp.origin));
- SetTime(&(seq->t_stamp.modify));
- strncpy(seq->id,uniqueID(),79);
- seq->seq_name[0] = '\0';
- seq->barcode[0] = '\0';
- seq->contig[0] = '\0';
- seq->membrane[0] = '\0';
- seq->authority[0] = '\0';
- seq->short_name[0] = '\0';
- seq->sequence = NULL;
- seq->offset = 0;
- seq->baggage = NULL;
- seq->baggage_len = 0;
- seq->baggage_maxlen = 0;
- seq->comments = NULL;
- seq->comments_len = 0;
- seq->comments_maxlen = 0;
- seq->description[0] = '\0';
- seq->mask = NULL;
- seq->seqlen = 0;
- seq->seqmaxlen = 0;
- seq->protect = PROT_WHITE_SPACE + PROT_TRANSLATION;
-#ifdef HGL
- seq->attr = 0;
-#else
- seq->attr = IS_5_TO_3 + IS_PRIMARY;
-#endif
- seq->elementtype = type;
- seq->groupid = 0;
- seq->groupb = NULL;
- seq->groupf = NULL;
- seq->cmask = NULL;
- seq->selected = 0;
- seq->subselected = 0;
-
- switch (type)
- {
- case DNA:
- seq->tmatrix = Default_DNA_Trans;
- seq->rmatrix = Default_NA_RTrans;
- seq->col_lut = Default_NAColor_LKUP;
- break;
- case RNA:
- seq->tmatrix = Default_RNA_Trans;
- seq->rmatrix = Default_NA_RTrans;
- seq->col_lut = Default_NAColor_LKUP;
- break;
- case PROTEIN:
- seq->tmatrix = NULL;
- seq->rmatrix = NULL;
- seq->col_lut = Default_PROColor_LKUP;
- break;
- case MASK:
- case TEXT:
- default:
- seq->tmatrix = NULL;
- seq->rmatrix = NULL;
- seq->col_lut = NULL;
- break;
- }
- return;
-}
-
-
-ReadCMask(filename)
-char *filename;
-{
- extern Frame frame;
- extern NA_Alignment *DataSet;
-
- char Inline[GBUFSIZ],head[GBUFSIZ],curname[GBUFSIZ],
- temp[GBUFSIZ];
- int IGNORE_DASH = FALSE,offset;
- NA_DisplayData *NAdd;
- NA_Alignment *aln;
-
- int i,j,k,curlen = 0,*colors,orig_ctype,jj,indx = 0;
- FILE *file;
-
- if(DataSet == NULL) return;
-
- NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata;
-
- if(NAdd == NULL)
- return;
-
- aln = (NA_Alignment*)DataSet;
-
- curname[0] = '\0';
- orig_ctype = NAdd->color_type;
- file = fopen(filename,"r");
- if(file == NULL)
- {
- Warning("File not found");
- Warning(filename);
- return;
- }
-
- NAdd->color_type = COLOR_ALN_MASK;
- for(;fgets(Inline,GBUFSIZ,file) !=0;)
- {
- if(Find(Inline,"offset:"))
- {
- crop(Inline,head,temp);
- sscanf(temp,"%d",&(aln->cmask_offset));
- }
- else if(Find(Inline,"nodash:"))
- IGNORE_DASH = TRUE;
- else if(Find(Inline,"dash:"))
- IGNORE_DASH = TRUE;
- else if(Find(Inline,"name:"))
- {
- crop(Inline,head,curname);
- curname[strlen(curname)-1] = '\0';
- for(j=0;jcolor_type = orig_ctype;
- return;
- }
- if(strlen(curname) != 0)
- {
- indx = -1;
- for(j=0;jnumelements;j++)
- if(Find(aln->element[j].short_name,curname)
- || Find(aln->element[j].id,curname))
- {
- if(aln->element[j].cmask != NULL)
- Cfree(aln -> element[j].cmask);
- colors=(int*)Calloc(aln->element[j]
- .seqmaxlen+1+aln->element[j].offset
- ,sizeof(int));
- aln->element[j].cmask = colors;
- NAdd->color_type = COLOR_SEQ_MASK;
- indx = j;
- j = aln->numelements;
- }
- if(indx == -1)
- colors=NULL;
- }
- else
- {
- if(aln->cmask != NULL) Cfree(aln->cmask);
- colors=(int*)Calloc(curlen,sizeof(int));
- aln->cmask = colors;
- aln->cmask_len = curlen;
- NAdd->color_type = COLOR_ALN_MASK;
- for(j=0;jelement[indx].seqlen);j++,jj++)
- {
- offset = aln->element[indx].offset;
- if(fgets(Inline,GBUFSIZ,file)==NULL)
- {
- Warning
- ("illegal format in colormask");
- NAdd->color_type = orig_ctype;
- return;
- }
-/*
-* Fixed so that the keyword nodash causes the colormask to be mapped
-* to the sequence, not the alignment.
-*
-* The allocated space is equal the seqlen of the matched sequence.
-*
-*/
- if(aln->element[indx].tmatrix)
- for(;(getelem(&(aln->element[indx]),jj
- +offset)
- ==(aln->element[indx].tmatrix['-'])
- || (getelem(&(aln->element[indx]),jj
- +offset)
- ==aln->element[indx].tmatrix['~']))
- && jj < aln->element[indx].seqlen;)
- colors[jj++] = 12;
- else
- for(;getelem(&(aln->element[indx]),jj
- +offset)
- =='-' && jj < aln->element[indx].seqlen;)
- colors[jj++] = 12;
-
- sscanf(Inline,"%d",&(colors[jj]));
- }
- }
- else if((indx == -1) && (strlen(curname) != 0))
- for(j=0;jcolor_type = orig_ctype;
- return;
- }
- sscanf(Inline,"%d",&(colors[j]));
- }
- IGNORE_DASH = FALSE;
- curname[0] = '\0';
- }
-
- }
- RepaintAll(TRUE);
- return;
-}
-
-
-ReadNA_Flat(filename,dataset,type)
-char *filename;
-char *dataset;
-int type;
-{
- int i, j, jj, c, curelem,offset;
- char name[GBUFSIZ];
- char buffer[GBUFSIZ];
- char origin[GBUFSIZ],ref[GBUFSIZ];
- char Inline[GBUFSIZ],head[GBUFSIZ],tail[GBUFSIZ],temp[GBUFSIZ];
- char curname[GBUFSIZ];
-
- NA_Sequence *this_elem;
- NA_Alignment *data;
- extern int Default_DNA_Trans[],Default_RNA_Trans[],Default_NA_RTrans[];
-
- FILE *file;
-
- curname[0] = '\0';
- data = (NA_Alignment*)dataset;
-
- file = fopen(filename,"r");
- if(file == NULL)
- {
- fprintf(stderr,"Cannot open %s.\n",filename);
- return;
- }
- for(;fgets(Inline,GBUFSIZ,file) !=0;)
- {
- if(
- Inline[0] == '#' ||
- Inline[0] == '%' ||
- Inline[0] == '"' ||
- Inline[0] == '@'
- )
- {
- offset = 0;
- for(j=0;jnumelements++;
- if( curelem == 0 )
- {
- data->element=(NA_Sequence*)
- Calloc(5,sizeof(NA_Sequence));
- data->maxnumelements = 5;
- }
- else if (curelem==data->maxnumelements)
- {
- (data->maxnumelements) *= 2;
- data->element=
- (NA_Sequence*)Realloc(data->element
- ,data->maxnumelements*sizeof(NA_Sequence));
- }
-
- InitNASeq(&(data->element[curelem]),
- Inline[0] == '#'?DNA:
- Inline[0] == '%'?PROTEIN:
- Inline[0] == '"'?TEXT:
- Inline[0] == '@'?MASK:TEXT);
- this_elem= &(data->element[curelem]);
- if(Inline[strlen(Inline)-1] == '\n')
- Inline[strlen(Inline)-1] = '\0';
- strncpy(this_elem->short_name,(char*)&(Inline[1]),31);
- this_elem->offset = offset;
- }
- else if(Inline[0] != '\n')
- {
- for(j=0,jj=0;jelement[curelem].rmatrix)
- Ascii2NA(buffer,jj,data->element[curelem]
- .rmatrix);
- AppendNA(buffer,jj,&(data->element[curelem]));
- }
- }
-
- for(j=0;jnumelements;j++)
- data->maxlen = MAX(data->maxlen,data->element[j].seqlen +
- data->element[j].offset);
-
- for(j=0;jnumelements;j++)
- if(data->element[j].seqlen==0)
- data->element[j].protect =
- PROT_BASE_CHANGES+ PROT_GREY_SPACE+
- PROT_WHITE_SPACE+ PROT_TRANSLATION;
-
- NormalizeOffset(data);
- Regroup(data);
- return;
-}
-
-
-WriteStatus(aln,filename,method)
-NA_Alignment *aln;
-char *filename;
-int method;
-{
- extern int EditMode,FileFormat;
- extern NA_Alignment *DataSet;
- NA_DisplayData *NAdd;
- NA_Sequence *this_seq;
- int j;
- FILE *file;
-
- if(DataSet == NULL)
- return;
-
- NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata;
- if(NAdd == NULL)
- return;
-
- file = fopen(filename,"w");
- if (file == NULL)
- {
- Warning("Cannot open status file.");
- return(1);
- }
- fprintf(file,"File_format: %s\n",FileFormat==GENBANK?"genbank":"flat");
- /*
- fprintf(file,"EditMode: %s\n",EditMode==INSERT?"insert":
- "check");
-*/
-
- this_seq = &(aln->element[NAdd->cursor_y]);
- if(this_seq->id != NULL)
- fprintf(file,"sequence-ID %s\n",this_seq->id);
- fprintf(file,"Column: %d\nPos:%d\n",NAdd->cursor_x,NAdd->position);
- switch(this_seq->elementtype)
- {
- case DNA:
- case RNA:
- fprintf(file,"#%s\n",
- this_seq->short_name);
- break;
- case PROTEIN:
- fprintf(file,"%%%s\n",
- this_seq->short_name);
- break;
- case MASK:
- fprintf(file,"@%s\n",
- this_seq->short_name);
- break;
- case TEXT:
- fprintf(file,"%c%s\n",'"',
- this_seq->short_name);
- break;
- default:
- break;
- }
- if(this_seq->tmatrix)
- for(j=0;jseqlen;j++)
- putc(this_seq->tmatrix[getelem(this_seq,j)],file);
- else
- for(j=0;jseqlen;j++)
- putc(getelem(this_seq,j),file);
-
- fclose(file);
- return;
-}
-
-ReadStatus(filename)
-char *filename;
-{
- /*
- int i,j;
- FILE *file;
- char Inline[GBUFSIZ],head[GBUFSIZ];
- file = fopen(filename,"r");
- for(;!DONE;)
- {
- fgets(Inline,GBUFSIZ,file);
- if(strlen(Inline) == 0)
- DONE = TRUE;
- else
- {
- sscanf(Inline,"%s",head);
- if(strncmp(head,"Col",3) != 0)
- {
- sscanf(Inline,"%*s %d",head,&(DataSet->nadd->
- cursor_x),&(DataSet->nadd->cursory);
- }
- else if(strncmp(head,"Pos",3) != 0)
- {
- }
- }
- }
-
-*/
-}
-
-
-NormalizeOffset(aln)
-NA_Alignment *aln;
-{
- int i,j,offset = 99999999;
-
- for(j=0;jnumelements;j++)
- offset = MIN(offset,aln->element[j].offset);
-
- for(j=0;jnumelements;j++)
- aln->element[j].offset -= offset;
-
- aln->maxlen = -999999999;
- for(j=0;jnumelements;j++)
- aln->maxlen = MAX(aln->element[j].seqlen+aln->element[j].offset,
- aln->maxlen);
-
- aln->rel_offset += offset;
-
- if(aln->numelements == 0)
- aln->rel_offset = 0;
-
- return;
-}
-
-WriteCMask(aln,filename,method,maskable)
-NA_Alignment *aln;
-char *filename;
-int method,maskable;
-{
- int j,kk,mask = -1,k,offset,min_offset= -999999;
- char offset_str[100];
- int *buf;
- NA_Sequence *seqs;
- FILE *file;
- if(aln == NULL)
- return;
- if(aln->numelements == (int) NULL)
- return;
- seqs = aln->element;
-
- file = fopen(filename,"w");
- if(file == NULL)
- {
- Warning("Cannot open file for output");
- return(1);
- }
- if(maskable && (method != SELECT_REGION))
- {
- for(j=0;jnumelements;j++)
- if(seqs[j].elementtype == MASK &&
- seqs[j].selected)
- mask = j;
- }
- for(j=0;jnumelements;j++)
- {
- SeqNorm(&(seqs[j]));
- }
-
- for(j=0;jnumelements;j++)
- {
- if(method != SELECT_REGION)
- offset = seqs[j].offset;
- else
- for(offset=seqs[j].offset;
- aln->selection_mask[offset] == '0';
- offset++);
-
- if(offset+aln->rel_offset != 0)
- sprintf(offset_str,"(%d)",offset+aln->rel_offset);
- else
- offset_str[0] = '\0';
-
- if(((j!=mask) && (seqs[j].selected) && method != SELECT_REGION)
- || (method == SELECT_REGION && seqs[j].subselected)
- || method == ALL)
- {
- fprintf(file,"%c%s%s\n",
- seqs[j].elementtype == DNA?'#':
- seqs[j].elementtype == RNA?'#':
- seqs[j].elementtype == PROTEIN?'%':
- seqs[j].elementtype == TEXT?'"':
- seqs[j].elementtype == MASK?'@':'"',
- seqs[j].short_name,
- (offset+aln->rel_offset == 0)? "":offset_str);
-
- if(seqs[j].cmask != NULL)
- {
-
- buf =(int*) Calloc(seqs[j].seqlen,sizeof(int) );
-
- if(mask == -1)
- {
- for(k=0,kk=0;kkselection_mask[kk+offset]=='1')
- buf[k++] = (getcmask( &(seqs[j]),kk+offset));
- }
-
- else
- buf[k++] =( getcmask( &(seqs[j]),kk+offset) );
- }
- }
- else
- {
- for(k=0,kk=0;kk igfile
-
- DESCRIPTION
- Converts interleaved .aln output from Clustal V into
- sequential .ig (IntelliGenetics) format for use by MASE.
-
- clustalfile:
- CLUSTAL V multiple sequence alignment
-
- name1 AACTTTCG
- name2 ATCTTTCG
- * ******
-
- name1 CCTGCT
- name2 CCCGCT
- ** ***
-
- igfile:
- ;
- name1
- AACTTTCG
- CCTGCT
- :
- name2
- ATCTTTCG
- CCCGCT
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/dbstat.doc b/CORE/xylem/dbstat.doc
deleted file mode 100644
index fa922c9..0000000
--- a/CORE/xylem/dbstat.doc
+++ /dev/null
@@ -1,36 +0,0 @@
- dbstat update 3 Feb 94
-
- NAME
- dbstat - calculates amino acid frequencies in a protein
- database
-
- SYNOPSIS
- dbstat
-
- DESCRIPTION
- dbstat reads a file of one or more nucleic acid sequences
- and calculates the amino acid frequencies, both in terms of
- absolute numbers, and as a fraction of the total.
-
- input - The input file is the standard .wrp (Pearson) format,
- such as that produced by getob:
-
- >name
- ; one or more comment lines (optional)
- sequence lines
-
- Comments begin either with semicolon (;) or right arrow (>)
- characters.
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/expfile.template b/CORE/xylem/expfile.template
deleted file mode 100644
index 9c82cb8..0000000
--- a/CORE/xylem/expfile.template
+++ /dev/null
@@ -1,30 +0,0 @@
-;---------------------------------------------------------------------------
-; FEATURES/GDE Expression File Instructions 8/7/95
-;
-; 1. Type in one or more GenBank expressions below,
-; or
-; Place cursor at end of this file and choose 'Include File' in the FILE
-; menu to read in a file of feature keys.
-; or
-; Copy expressions from another window and Paste into this window.
-; 2. Choose 'Save Current File' in the File menu
-; 3. Quit this window
-;
-; NOTES:
-; 1) FEATURES will then extract the appropriate sequences.
-; YOU DON'T NEED TO EDIT OUT THESE COMMENT LINES.
-; 2) All expressions referring to GenBank entries must begin with a '@'
-; Literals (ie. sequences to be embedded in the final output)
-; do NOT begin with a '@'.
-; 3) Put each expression on a separate line.
-;
-; SAMPLE EXPRESSION FILE:
-;
-; @J05635:83..1813
-; ; EcoRI/NotI adaptor {this is a comment line}
-; AATTGCGGCCGC
-; @J05635:/product="flagellin A"
-; @x17548:singed_trans
-;
-;---------------------------------------------------------------------------
-
diff --git a/CORE/xylem/feafile.template b/CORE/xylem/feafile.template
deleted file mode 100644
index 12e8dd9..0000000
--- a/CORE/xylem/feafile.template
+++ /dev/null
@@ -1,23 +0,0 @@
-;---------------------------------------------------------------------------
-; FEATURES/GDE Feature Key File Instructions
-;
-; 1. Type in one or more GenBank FEATURE Table feature keys below,
-; or
-; Place cursor at end of this file and choose 'Include File' in the FILE
-; menu to read in a file of feature keys.
-;
-; 2. Choose 'Save Current File' in the File menu
-; 3. Quit this window
-;
-; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
-; OUT THESE COMMENT LINES.
-;
-; NOTE: Put each feature key on a separate line
-; SAMPLE FEATURE KEY FILE:
-;
-; mRNA
-; CDS
-; mat_peptide
-;
-;---------------------------------------------------------------------------
-
diff --git a/CORE/xylem/features.doc b/CORE/xylem/features.doc
deleted file mode 100644
index 8e1321c..0000000
--- a/CORE/xylem/features.doc
+++ /dev/null
@@ -1,407 +0,0 @@
-
- FEATURES.DOC update 7 Feb 94
-
-
- NAME
- FEATURES - extracts features from GenBank entries
-
- SYNOPSIS
- features
- features expression
- features [-f featurekey | -F keyfile]
- [-n name |-a accession | -e expression |
- -N namefile |-A accfile | -E expfile]
- [-u dbfile | -U dbfile | -g ]
- features -h
-
- DESCRIPTION
- FEATURES extracts sequence objects from GenBank entries, using
- the Features Table language. Features can be retrieved either by
- specifying keywords (eg. CDS, mRNA, exon, intron etc.) or by
- evaluating expressions. In practical terms, FEATURES is actually
- a user interface for GETOB, which actually performs the parsing
- and extraction of sequence objects. FEATURES can be run either as
- an interactive program or with command line arguments.
-
- 'features' with no arguments runs the program interactively.
- 'features' followed by an expression retrieves the data directly
- from GenBank and evaluates the expression. The third form of
- features requires all arguments to be accompanied by their
- respective option flags. Finally, 'features -h' prints the
- SYNOPSIS.
-
-
- INTERACTIVE EXECUTION
- FEATURES executed with no arguments runs interactively. An example of the
- FEATURES menu is shown below:
-
- ___________________________________________________________________
- FEATURES - Version 7 FEB 94
- Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
- ___________________________________________________________________
- Features: tRNA
- Entries: EPFCPCG
- Dataset:
- ___________________________________________________________________
- Parameter Description Value
- -------------------------------------------------------------------
- 1).................... FEATURES TO EXTRACT ....................> f
- f:Type a feature at the keyboard
- F:Read a list of features from a file
- 2)....................ENTRIES TO BE PROCESSED (choose one).....> n
- Keyboard input - n:name a:accession # e:expression
- File input - N:name(s) A:accession #(s) E:expression(s)
- 3)....................WHERE TO GET IT .........................> g
- u:Genbank dataset g:complete GenBank database
- U: same as u, but all entries
- 4)....................WHERE TO SEND IT ........................> a
- s:Each feature to a separate file a:All output to same file
- ---------------------------------------------------------------
- Type number of your choice or 0 to continue:
- 0
- Messages will be written to EPFCPCG.msg
- Final sequence output will be written to EPFCPCG.out
- Expressions will be written to EPFCPCG.exp
- Extracting features...
-
- In the example, FEATURES was instructed to retrieve all tRNAs from
- the GenBank entry EPFCPCG, which contains the Epifagus plastid
- genome. By default, the GenBank database was the source of the
- sequence. Messages indicate the progress of the job. A log describing
- the extraction of each feature is written to EPFCPCG.msg, while the
- extracted features themselves are written to EPFCPCG.out. Feature
- expressions which could be used by FEATURES to reconstruct the .out
- file, are written to EPFCPCG.exp.
-
- The first step is to retrieve the EPFCPCG entry from GenBank, which is
- accomplished by calling FETCH. Next, FEATURES extracts the specified
- features from the entry.
-
- An excerpt from EPFCPCG.msg is shown below, describing the extraction
- of the fifth tRNA found in this entry. To create this tRNA, two exons
- had to be joined. The qualifier line associated with this feature
- indicates that it is an Isoleucine tRNA with a gat anticodon.
-
-
- EPFCPCG:anticodon gtg
- complement
- (
- join
- (
- 70023 70028
-
- 1 69
-
- )
-
- )
-
-
- /product="transfer RNA-His"
- /gene="His-tRNA"
- /label=anticodon gtg
- /note="anticodon gtg"
- //----------------------------------------------
-
-
- The actual sequence for this feature, as written to EPFCPCG.out, is
- written with each exon beginning a new line:
-
- >EPFCPCG:anticodon gtg
- ggcggatgtagccaaatggatcaaggtagtggattgtgaatccaacatat
- gcgggttcaattcccgtcg
- ttcgcc
-
- Finally, the expression that was evaluated to create this feature is
- written to EPFCPCG.exp:
-
- >EPFCPCG:anticodon gtg
- @M81884:anticodon gtg
-
- If EPFCPCG.exp was used as an expression file in option 2 (E) of FEATURES,
- EPFCPCG.out would be recreated.
-
- OPTIONS
- 1) FEATURES - choosing f will cause FEATURES to prompt for
- a feature to extract. If you wish to extract several types of
- features simultaneously (ie. F), you must construct a file listing the
- feature keywords. The following example would retrieve both tRNA and
- rRNA sequences:
-
- OBJECTS
- tRNA
- rRNA
- SITES
-
- The words 'OBJECTS' and 'SITES' must enclose the feature keywords,
- and each keyword must be on a separate line. For a rigorous
- definition of the input file format, see the GETOB manual pages
- (getob.doc).
-
- In the menu shown above, f was chosen, and the user entered tRNA at
- the prompt. Thus tRNA is now displayed on the Features: line. If
- features had been specified from a file (suboption F) then the
- filename containing the feature keywords would be displayed instead.
- A complete list of legal feature keywords can be found in the GenBank
- Release notes (gbrel.txt) under the subheading 'Feature Key Names'.
-
- 2) ENTRIES
- n User is prompted for the name of an entry from which the
- feature is to be extracted. The name of the entry will appear
- on the 'Entries' line of the menu.
-
- N User is prompted for a filename containing one or more
- entry names. Each name must be on a separate line. The filename
- will be displayed on the 'Entries' menu line.
-
- a User is prompted for an accession number, which will appear
- on the 'Entries' line of the menu.
-
- A User is prompted for a filename for accession numbers. The filename
- will appear on the 'Entries:' line.
-
- e User is prompted for a GenBank Features expression of the
- form accession:location.'accession' refers to a GenBank
- accession number, while 'location' is any legal feature location.
- A brief description of location syntax can be found under the
- subheading "Feature Location" in the GenBank release notes
- (gbrel.txt). See "The DDBJ/EMBL/GenBank Feature Table:
- Definition" Version 1.04 for a complete definition.
- E User is prompted for a filename containing one or more Feature
- expressions. EACH EXPRESSION MUST BEGIN A '@'. All lines beginning
- with '@' are processed as expressions, and all other lines are
- copied to the output file unchanged.
-
- Examples:
-
- The tRNA shown above could have been extracted by choosing
- suboption e and entering either of the following expressions:
-
- M81884:complement(join(70023..70028,1..69))
- M81884:anticodon gtg
-
- In the first example, the feature line from the original entry
- is used as the location. In the second example, the feature is
- found by its qualifier line, which also appeared in the
- original entry. It must be noted that the qualifier line must
- be unique from others in the same entry in its first 15
- characters after the = .
-
- The flaL protein coding region of B. licheniformis is described
- in GenBank entry BLIFALA, accession number M60287 in the
- following feature:
-
- CDS 305..640
- /note="flaD (sin) homologue"
- /gene="flaL"
- /label=ORF2
- /codon_start=1
-
- This feature could be retrieved using any of the following
- expressions:
-
- M60287:305..640
- M60287:ORF2
- M60287:/label=ORF2
- M60287:/gene="flaL"
- M60287:/note="flaD (sin) homologue"
-
- Note that the /label= qualifier is special, in that labels are
- specifically intented as unique tags on an feature. For labels,
- only the label itself is need be specified. Thus, /label=ORF2 is
- equivalent to ORF2. For other qualifiers, the qualifier keyword
- (eg. /note=) must be included.
-
- 3) DATABASE (WHERE TO GET IT) - By default, all entries processed will
- be automatically retrieved from GenBank using FETCH. Specifying 'u'
- (User-defined database subset) makes it possible to extract features
- from GenBank subsets created by the user. Usually, retrieval of
- features is much faster with a User-defined subset, so if you
- frequently work with sets of genes, it is best to retrieve them
- en-masse using FETCH, and work with them directly. For example, if
- you had retrieved a set of Beta-globin sequences into a file called
- 'globin.gen', you could directly extract features from these entries
- by specifying 'globin' or 'globin.gen' as your User-defined database.
- If the file extension is '.gen', FEATURES will automatically create
- temporary files called globin.ano, globin.wrp and globin.ind,
- containing annotation, sequence, and an index, respectively. These
- files will be read during feature extraction, and then discarded. If
- you have already created such files using SPLITDB, simply specify
- any of 'globin', 'globin.ano', etc. ie. anything, as long as it does
- not have the .gen file extension.
-
- 'U' rather than 'u' causes ALL entries in the user-defined
- database to be subset. This means that it is unnecessary to
- specify entry options (eg -n, -N etc.), as these will be
- ignored, if given.
-
- One consequence of these conventions is that the individual GenBank
- divisions can be processed directly. For example, suppose you were only
- interested in rodent globins. You could directly access the rodent
- division of GenBank by specifying the base name of that file division
- (eg. /home/psgendb/GenBank/gbrod) as your user-defined database. In
- this case, the files gbrod.ano, gbrod.wrp and gbrod.ind already
- exist. Again, this approach is faster, since FEATURES would not have
- to find and retrieve the sequences, but can read directly from the
- database files. Finally, if you wanted to process all of the entries
- in the database division, simply use -U. The user is warned that a
- GenBank division is a huge amount of data, and processing every entry
- could take a long time.
-
- 4) WHERE TO SEND IT - By default (a), the output for all entries goes
- to a single set of files, whose names are chosen by FEATURES,
- depending on the setting of option 2, Entries. If a single name (n) or
- accession number (a) has been chosen, that will be used as
- the raw filename. For example, if you were processing the entry
- WHTCAB, the output files would be WHTCAB.msg and WHTCAB.out. If names
- (N), accession numbers (A) or expressions (E) were read from a file,
- the raw name of that file would be used eg. cellulase.nam would result
- in cellulase.msg and cellulase.out. Finally, if a single expression
- is processed (e), then the primary accession number in that
- expression will be used for the filenames. In all cases, FEATURES
- will tell you the names of the files being written.
-
- Choosing suboption s, you can specify that the features created for
- each entry be sent to separate files. In this case, each file will
- have the name of that entry, with the extension .obj. However, all
- messages and expressions will still go to a single files. While this
- can be a convenient way of creating separate files when you need them,
- this option still has the limitation of writing all features for a
- given entry (if there are more than one) to the same file. Also,
- successive resolution of features (anything requiring 'getob -r')
- will not work with this option. This may be corrected in future
- versions.
-
-
- COMMAND LINE EXECUTION
-
- There are two ways of running FEATURES from the command line. If only one
- argument is supplied, that argument is interpreted as an expression, and
- the result of that expression (ie. a sequence ) is written to the
- standard output. .msg, .out and .exp files are NOT created. For example,
- GenBank entry BACFLALA (M60287) contains the following feature:
-
- CDS 95..271
- /label=LORF-
- /codon_start=1
- /translation="MNKDKNEKEELDEEWTELIKHALEQGISPDDIRIFLNLGKKSSK
- PSASIERSHSINPF"
- Any of
-
- features M60287:LORF-
- features M60287:95..271
- features M60287:/label=LORF-
-
- would write the open reading frame to the standard output:
-
- atgaataaagataaaaatgagaaagaagaattggatgaggagtggacaga
- actgattaaacacgctcttgaacaaggcattagtccagacgatatacgta
- tttttctcaatttgggtaagaagtcttcaaaaccttccgcatcaattgaa
- agaagtcattcaataaatcctttctga
-
- This form of FEATURES is provided to make it easy to pipe output to
- other programs for further processing. For example
-
- features M60287:LORF- |ribosome >LORF.protein
-
- would write the translation of the open reading frame to a file called
- LORF.protein.
-
- The full functionality of the FEATURES can be accessed using arguments on
- the command line. In particular, when there are multiple entries to be
- processed, or multiple features within entries, it is much faster to
- supply FEATURES with lists of entries, feature keys or expressions.
- Command line options are similar to suboptions in menu items 1-3 above:
-
- Feature keys:
- -f key {feature key}
- -F filename {file of feature keys}
-
- Entries:
- -n name {GenBank LOCUS name}
- -N filename {file of GenBank LOCUS names}
- -a accession {GenBank ACCESSION number}
- -A filename {file of GenBank ACCESSION numbers}
- -e expression {Feature Table expression}
- -E filename {file of Feature Table expressions, each begin-
- ning with '@'}
-
- Databases:
- -u filename {GenBank dataset}
- -U filename { " " " " " " ,
- process all entries ie. -nNaAeE options
- will be ignored}
- -g {GenBank}
-
- Examples:
-
- features -f tRNA -n EPFCPCG
-
- retrieves all tRNAs from GenBank entry EPFCPCG and writes .msg, .out,
- and .exp files.
-
- features -e M60287:LORF-
-
- would retrieve the same open reading frame as in the earlier example.
-
-
- Since most time-consuming operation in FEATURES is sequence retrieval,
- it is often best to retrieve frequently-used sequences as database
- subsets. For example, a set GenBank entries for chlorophyl a/b binding
- protein genes might be stored in a file called CAB.gen.
-
- features -f CDS -N CAB.nam -u CAB.gen
-
- would generate the files CAB.msg, CAB.out and CAB.exp containing output
- for all CDS features in the entries listed in the file CAB.nam.
-
- features -E CAB.exp -u CAB.gen
-
- would re-create the output file CAB.out.
-
-
-
- BUGS
- FEATURES does no preliminary error checking for syntax of
- GenBank expressions prior to their evaluation. Expressions that can
- not be evaluated will be flagged by GETOB in the .msg file.
-
- At present, little checking is done to test for the presence or
- correctness of input files. Some errors may cause the program to
- crash.
-
- For User-defined datasets, filename expansion is not performed.
-
- FILES
- Temporary files:
- X.term X.ano X.wrp X.ind X.gen {X is raw filename, see 4) }
- UNRESOLVED.fea UNRESOLVED.out
- FEA.inf FEA.nam FEA.gen FEA.ano FEA.wrp FEA.ind FEA.msg FEA.out
-
- SEE ALSO
- grep(1V) fetch getob splitdb
-
- TRANSPORTATION NOTES
- It should be fairly easy to get FEATURES to work even on systems
- in which GenBank has not been formatted for the XYLEM package.
- This is because FEATURES does not work directly on the database, but
- rather retrieves all necessary sequences by calling FETCH. Thus,
- statements like 'fetch FEA.nam FEA.gen' could be replaced with any
- command that, given a file containing names or accession numbers,
- returns a file containing GenBank entries. In principle, you
- could even implement this sort of command to retrieve entries from
- the email server (retrieve@ncbi.nlm.nih.gov) at NCBI, although
- such a setup would undoubtedly be quite slow.
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/fetch.doc b/CORE/xylem/fetch.doc
deleted file mode 100644
index 9b4b1a6..0000000
--- a/CORE/xylem/fetch.doc
+++ /dev/null
@@ -1,320 +0,0 @@
-
- FETCH.DOC update 24 Feb 96
-
-
- NAME
- fetch - retrieves database entries by name or accession number
-
- SYNOPSIS
- fetch {interactive mode}
- fetch [options] namefile [output file] {batch mode}
-
- DESCRIPTION
- fetch retrieves one or more entries from a database.
-
- Interactive mode: fetch prompts the user to set search parameters,
- using an interactive menu:
- ___________________________________________________________________
- FETCH - Version 7 Feb 94
- Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
- ___________________________________________________________________
- Namefile:
- Outfile:
- Database:
- -------------------------------------------------------------------
- Parameter Description Value
-
- 1) Name/Acc Name or Accession sequence to get
- 2) Namefile Get list of sequences from Namefile
- 3) WhatToGet a:annotation s:sequence b:both b
- 4) Database g:GenBank p:PIR v:VecBase l:LiMB g
- G:GenBank dataset P:PIR dataset
- 5) Outfile Send all output to a single file (Outfile)
- 6) Files f:Send each entry to a separate file f
- -------------------------------------------------------------
- Type number of your choice or 0 to continue:
-
- After all parameters have been set, type 0 to commence the search.
- Messages regarding the progress of the search will be printed.
-
- (1,2) Which entries to get?
- If you want to get a single entry, option 1 lets you type in the
- name of that entry, without having to create a namefile. To get
- more than one entry, choose option 2, and specify the name of a
- file containing sequence names or accession numbers.
-
- namefile is a file containing one or more sequence names or
- accession numbers, each on a separate line. Names and accession
- numbers can even be interspersed, in upper or lowercase, and in
- any order. For example, the namefile prp.nam might contain
-
- ; plant pathogenesis related proteins
- ; (these are sample comment lines)
- ; note that any line containing a semicolon is ignored
- x06362
- x05454
- TOBPR1A1
- ; comments can be interspersed with names.
- PUMPR13
- tobpr1ar
-
- Options 1 & 2 are mutually exclusive. Setting one will negate the
- other. If option 2 is chosen, the name of the namefile will appear
- at the top of the menu.
-
- (3) WhatToGet
- Use this option to specify whether to get annotation, sequence,
- or both (default=both).
-
- (4) Database
- Use this option to select the database. (default=GenBank).
- G and P select user-created database subsets containing GenBank
- or PIR entries, respectively. It is assumed that the database
- has been split into .ano, .wrp and .ind files using splitdb.
- For example, if you had created a database subset called PR1.pir,
- splitdb would create PR1.ano, PR1.wrp and PR1.ind. These are
- the files actually read by FETCH. When prompted for the name
- of the database, simply type "PR1", without a file extension.
- (If you do type a file extension, it will be ignored).
-
- (5, 6) Where to send output
- By default, option 6 is set to f, and each entry will be written to
- a separate file, where the name of the file is the name of the
- entry, followed by a file extension. If a complete entry is
- retrieved, the file extension will indicate the type of database
- (GenBank: .gen; PIR: .pir, Vecbase: .vec; LiMB: .LiMB). If only
- annotation or sequence are retrieved, the file extensions will be
- .ano or .wrp, respectively. Using the default, the namefile above
- would create the following files:
-
- PUMPR13.gen
- TOBPR1A1.gen
- TOBPR1AR.gen
- TOBPR1CR.gen
- TOBPR1PS.gen
-
- By choosing option 5, you can specify the name of an output file
- for all entries to go to. This filename will appear at the top
- of the menu. Obviously, options 5 & 6 are mutually exclusive.
- Note entries retrieved are writen in alphabetical order (sorting by
- ASCII values), not the order in which they appeared in namefile.
-
- (Note for remote users only: -f will only work for a single
- name/accession supplied in 1). -f IS NOT ENABLED FOR NAMEFILES
- specified in 2).)
-
- Batch mode:
- Although it is transparent to the user, all fetch really does
- is call getloc, saving the user the trouble of knowing which
- database files to retrieve sequences from, or of having to
- execute getloc multiple times to retrieve sequences from
- different database files. Thus, the options are identical to those
- for getloc:
-
- -a Write annotation portions of entries only, terminated by '//'.
- -s Write sequence data only, in Pearson (.wrp) format.
- -f Write each entry to a separate file.
- -g GenBank (default)
- -e EMBL {not implemented}
- -p PIR (NBRF)
- -v Vecbase
- -l LiMB
- -G GenBank_dataset
- -P PIR_dataset
-
- If -f is not specified, outfile must be specified.
-
- -L force execution of findkey on local host even if
- $XYLEM_RHOST is set. See "REMOTE EXECUTION" below
-
-
- PIR_dataset
- GenBank_dataset
- This can be either a file of PIR entries, a file of GenBank entries,
- or a XYLEM dataset created by splitdb. A file of PIR entries must
- have the file extension ".pir". A file of GenBank entries must have
- the file extension ".gen". A XYLEM dataset contains PIR entries split
- among three files by splitdb: annotation (.ano), sequence (.wrp)
- and index (.ind). These file extensions must be used!
-
- When specifying a split dataset, only the base name needs to be
- used. For example given a XYLEM dataset consisting of the files
- myset.ano, myset.wrp and myset.ind, the following two commands
- are equivalent:
-
- fetch -P myset something.nam something.pir
- fetch -P myset.ano something.nam something.pir
-
- If the original .pir file had been used, the command would have
- been
-
- fetch -P myset.pir something.nam something.pir
-
- The ability to work directly with .gen or .pir files is quite
- convenient. However, since FETCH needs to work with a split
- FETCH automatically splits .pir or .gen files into .ano, .wrp
- and .ind files, which are removed when finished. This requires
- extra disk space and execution time, which could be significant
- for large datasets.
-
- EXAMPLES
- Batch example:
- fetch -f chitinase.nam
- will retrieve annotation and sequence for sequences listed in
- chitinase.nam from GenBank, writing each entry to a separate file
- with the extension .gen.
-
- fetch -s -v pbr.nam pbr.wrp
- will retrieve sequence data only for the entries listed in pbr.nam,
- from VecBase, and write all sequences to a Pearson format file
- (ie. readable by fasta) with the name pbr.wrp.
-
- fetch -G sample sample.nam new.gen
- fetch -G sample.ano sample.nam new.gen
- Assumes that a set of GenBank entries has been split by splitdb
- into sample.ano sample.wrp and sample.ind. The entries listed in
- sample.nam are written to new.gen.
-
-
- FILES
- Database files:
- The directories for database files are specified by the environment
- variables $GB (GenBank) $PIR (PIR/NBRF) $VEC(Vecbase) and $LIMB
- (LiMB).
-
- Index files are $GB/gbacc.idx for GenBank (this file is supplied
- with each GenBank release), while the other databases
- use .ind files generated by splitdb. Split database files MUST
- have the following file extensions: .ano {annotation}, .wrp
- {sequence} and .ind {index}. Thus, when creating database files
- for pir1.dat with splitdb, the output files should be pir1.ano,
- pir1.wrp and pir1.ind.
-
- Temporary files:
- NAMEFILE.fetch
- PRELIMINARY.fetch
- TMP.fetch
- FOUND.fetch
- FETCHDIR {temporary directory}
-
- REMOTE EXECUTION
- Where the databases can not be stored locally, FETCH can call
- FETCH on another system and retrieve the results. To run
- FETCH remotely, your .cshrc file should contain the following
- lines:
-
- setenv XYLEM_RHOST remotehostname
- setenv XYLEM_USERID remoteuserid
-
- where remotehostname is the name of the host on which the
- databases reside (in XYLEM split format) and remoteuserid
- is your userid on the remote system. When run remotely,
- your local copy of FETCH will generate the following
- commands:
-
- rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
- rsh $XYLEM_RHOST -l $XYLEM_USERID fetch ...
- rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
- rsh $XYLEM_RHOST -l $XYLEM_USERID $RM temporary_files
-
- Because FETCH uses rsh and rcp, your home directory on both
- the local and remote systems must have a world-readable
- file called .rhosts, containing the names of trusted remote
- hosts and your userid on each host. Before trying to get
- FETCH to work remotely, make sure that you can rcp and
- rsh to the remote host.
-
- Obviously, remote execution of FETCH implies that FETCH
- must already be installed on the remote host. When FETCH
- runs another copy of FETCH remotely, it uses the -L option
- (findkey -L) to insure that the remote FETCH job executes,
- rather than calling yet another FETCH on another host.
-
-
- ---------- Remote execution on more than 1 host -----------
- If more than 1 remote host is available for running FINDKEY
- (say, in a clustered environment where many servers mount
- a common filesystem) the choice of a host can be determined
- by the csh script choosehost, such that execution of
- choosehost returns the name of a remote server. To use this
- approach, the following script, called 'choosehost' should
- be in your bin directory:
-
- #!/bin/csh
- # choosehost - choose a host to use for a remote job.
- # This script rotates among servers listed in .rexhosts,
- # by choosing the host at the top of the list and moving
- # it to the bottom.
-
- #Rotate the list, putting the current host to the bottom.
- set HOST = `head -1 $home/.rexhosts`
- set JOBID = $$
- tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
- echo $HOST >> /tmp/.rexhosts.$JOBID
- /usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
-
- # Write out the current host name
- echo $HOST
-
- You must also have a file in your home directory called
- .rexhosts, listing remote hosts, such as
-
- graucho.cc.umanitoba.ca
- harpo.cc.umanitoba.ca
- chico.cc.umanitoba.ca
- zeppo.cc.umanitoba.ca
-
- Each time choosehost is called, choosehost will rotate the
- names in the file. For example, starting with the .rexhosts
- as shown, it will move graucho.cc.umanitoba.ca to the bottom
- of the file, and write the line 'graucho.cc.umanitoba.ca'
- to the standard output. The next time choosehosts is
- run, it would write 'harpo.cc.umanitoba.ca', and so on.
-
- Depending on your local configuration, you may wish to
- rewrite choosehosts. All that is really necessary is that
- echo `choosehost` should return the name of a valid host.
-
- Once you have installed choosehost and tested it, you can
- get FINDKEY to use choosehost simply by setting
-
- setenv XYLEM_RHOST choosehost
-
- in your .cshrc file.
-
- --------------- Remote filesystems -----------------------
- Finally, an alternative to remote execution is to remotely mount
- the file system containing the databases across the network.
- This has the advantage of simplicity, and means that the
- databases are available for ALL programs on your local
- workstation. However, it may still be advantageous to run
- FETCH remotely, since that will shift much of the computational
- load to another host.
-
- BUGS
- When retrieving entries directly from GenBank, FETCH uses the
- Accession Number index file gbacc.idx. In this case, FETCH
- can retrieve all entries containing a given accession number.
- This capability makes it possible to retrieve an entry using a
- secondary accession number. However if more than one entry
- share a secondary accession number, all of those entries will
- be retrieved. While this behavior might be a bit of an
- annoyance at times, it can also be useful because it alerts
- the user to the presence of other, related entries that might
- be of interest.
-
- SEE ALSO
- getloc features
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/findkey.doc b/CORE/xylem/findkey.doc
deleted file mode 100644
index c3197c7..0000000
--- a/CORE/xylem/findkey.doc
+++ /dev/null
@@ -1,365 +0,0 @@
-
- FINDKEY.DOC update 13 Mar 97
-
-
- NAME
- findkey - finds database entries containg one or more keywords
-
- SYNOPSIS
- findkey
- findkey [-pvbmgrdutielnsaxzL] keywordfile [namefile findfile]
- findkey [-P PIR_dataset] keywordfile [namefile findfile]
- findkey [-G GenBank_dataset] keywordfile [namefile findfile]
-
- DESCRIPTION
- findkey uses the grep family of commands to find lines in database
- annotation files containing one or more keywords. Next, identify
- is called to create a .nam file, containing the names of entries
- containing the keywords, and a .fnd file, containing the actual
- lines from each entry containing hits. A PIR or GenBank dataset is
- either a file containing one or more GenBank or PIR entries, or
- the name of a XYLEM dataset created by splitdb. See FILES below
- for a more detailed description.
-
- INTERACTIVE USE
- findkey prompts the user to set search parameters, using an interactive
- menu:
-
- ___________________________________________________________________
- FINDKEY - Version 12 Aug 94
- Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
- ___________________________________________________________________
- Keyfile:
- Dataset:
- -------------------------------------------------------------------
- Parameter Description Value
- -------------------------------------------------------------------
- 1) Keyword Keyword to find thionin
- 2) Keyfile Get list of keywords from Keyfile
- 3) WhereToLook p:PIR v:VecBase p
- GenBank - b:bacterial i:invertebrate
- m:mamalian e:expressed seq. tag
- g:phage l:plant
- r:primate n:rna
- d:rodent s:synthetic
- u:unannotated a:viral
- t:vertebrate x:patented
- z:STS
- G: GenBank dataset P: PIR dataset
- -------------------------------------------------------------
- Type number of your choice or 0 to continue:
- 0
- Searching /home/psgendb/PIR/pir1.ano...
- Sequence names will be written to thionin~pir.nam
- Lines containing keyword(s) will be written to thionin~pir.fnd
- Searching /home/psgendb/PIR/pir2.ano...
- Sequence names will be written to thionin~pir.nam
- Lines containing keyword(s) will be written to thionin~pir.fnd
- Searching /home/psgendb/PIR/pir3.ano...
- Sequence names will be written to thionin~pir.nam
- Lines containing keyword(s) will be written to thionin~pir.fnd
-
- As shown in the example above, the keyword thionin was specified
- as the keyword to search for. By default, option 3 is set to p,
- and the PIR protein database is searched. Messages describe the
- progress of the search. Since PIR is broken up into two divisions
- (new and protein) both are searched, but all output is written to
- thionin.pir.nam and thionin.pir.fnd
-
- OPTIONS
- (1,2) Which keywords to search for?
- If you want to search for a single keyword, option 1 lets you type
- the keyword, without having to create a file. To search for more
- than one keyword, choose option 2, and specify the name of a
- file containing the keywords. For example, entries containing
- genes for antibiotic resistance might be found using the
- following keyword file:
-
- ampicillin
- chloramphenicol
- kanamycin
- neomycin
- tetracycline
-
- Note: keyword searches are case insensitive.
-
- As you might expect, it takes longer to search for multiple
- keywords than a single keyword.
-
- Options 1 & 2 are mutually exclusive. Setting one will negate the
- other. If option 2 is chosen, the name of the keyword file will
- appear at the top of the menu.
-
- Finally, it is probably not a good idea to search GenBank
- entries using very short keywords consisting only of letters.
- This is because GenBank entries now include a /translation
- field containing the amino acid sequence of each protein
- coding sequence. Consequently, 3 or 4 letter keywords
- consisting of legal amino acid symbols (eg. CAP, recA) will
- turn up fairly often in protein translations.
-
- (3) WhereToLook
- Use this option to specify the database to be searched In the
- case of GenBank, only one division at a time may be searched.
- User-created database subsets containing PIR (P) or GenBank (G)
- entries may also be searched. User-created database subsets
- must be in the .ano/.wrp/.ind form created by splitdb.
-
- OUTPUT
- The output filenames take the following form:
-
- name_ex1.ex2
-
- The 'name' part of the filename is either the keyword searched for,
- if option 1 was chosen, or the name of the keyword file,if option 2
- obtains. 'ex1' indicates the database division that was searched. For
- PIR and VecBase, ex1 is 'pir' and 'vec', respectively. For GenBank,
- ex1 is as follows:
-
- bct - bacterial
- inv - invertebrate
- mam - other mamalian
- est - expressed sequence tag
- phg - phage
- pln - plant (includes fungi)
- pri - primate
- rna - structural RNAs
- rod - rodent
- syn - synthetic sequences
- sts - sequence tagged sites
- una - unannotated (new) sequences
- vrl - viral
- vrt - other vertebrate
-
- 'ex2' distinguishes the files containing the names of entries
- containing keywords (.nam) and the files containing the lines found
- in each entry (.fnd).
-
- The .nam file can be used directly as a namefile for fetch, getloc,
- or getob.
-
- COMMAND LINE USE
-
- OPTIONS
- p search PIR (default)
- P PIR dataset search dbfile, containing PIR entries
- v search VecBase
- b search Genbank bacterial division
- m search Genbank mamalian division
- g search Genbank phage division
- r search Genbank primate division
- d search Genbank rodent division
- u search Genbank unannotated division
- t search Genbank vertebrate division
- i search Genbank invertebrate division
- l search Genbank plant division
- n search Genbank rna division
- s search Genbank synthetic division
- a search Genbank viral division
- x search Genbank patented division
- e search Genbank exp.seq.tag division
- z search GenBank STS division
- S search GenBank Genom. Survey division
- h search GenBank High Thrput. division
- G GenBank dataset search dbfile, containing GenBank entries
-
- L force execution of findkey on local host
- even if $XYLEM_RHOST is set. See "REMOTE
- EXECUTION" below
-
- FILES
-
- keywordfile - contains keywords to search for
-
- namefile - LOCUS names of hits are written to this file
-
- findfile - for each hit, a report listing the LOCUS name and the
- lines matching the keyword if written to this file.
-
- If namefile and findfile are not specified on the command line,
- filenames will be created as described above for interactive
- use.
-
- PIR_dataset
- GenBank_dataset
- This can be either a file of PIR entries, a file of GenBank entries,
- or a XYLEM dataset created by splitdb. A file of PIR entries must
- have the file extension ".pir". A file of GenBank entries must have
- the file extension ".gen". A XYLEM dataset contains PIR entries split
- among three files by splitdb: annotation (.ano), sequence (.wrp)
- and index (.ind). These file extensions must be used!
-
- When specifying a split dataset, only the base name needs to be
- used. For example given a XYLEM dataset consisting of the files
- myset.ano, myset.wrp and myset.ind, the following two commands
- are equivalent:
-
- findkey -P myset something.kw
- findkey -P myset.ano something.kw
-
- If the original .pir file had been used, the command would have
- been
-
- findkey -P myset.pir something.kw
-
- The ability to work directly with .gen or .pir files is quite
- convenient. However, since FINDKEY needs to work with a split
- FINDKEY automatically splits .pir or .gen files into .ano, .wrp
- and .ind files, which are removed when finished. This requires
- extra disk space and execution time, which could be significant
- for large datasets.
-
- EXAMPLES
- If the list of antibiotics shown above was stored in the file
- antibiotic.kw, and option 3 was set to 'b', then the annotation
- portion of the GenBank bacterial division would be searched, and
- all lines containing any of these keywords would be written to
- antibiotic~bac.fnd. The corresponding GenBank entry names would
- appear in antibiotic~bac.nam.
-
- The same keyword file could be used to search other database files.
- If VecBase was searched, the output files would be antibiotic~vec.fnd
- and antibiotic~vec.nam. These filename conventions make it easy
- to search different database divisions, and to keep track of where
- data came from.
-
- Command line examples:
-
- findkey thionin.kw
-
- would be equivalent to the interactive example shown above. In
- this case, the file thionin.kw contains the word 'thionin'.
- (Note that since PIR is the default, -p need not be supplied.)
-
- findkey -b antibiotic.kw drugs.nam drugs.fnd
-
- would search the GenBank bacterial division for the keywords
- contained in antibiotic.kw, and write the output to drugs.nam
- and drugs.kw.
-
- FILES
- Database files:
- The directories for database files are specified by the environment
- variables $GB (GenBank) $PIR (PIR/NBRF) and $VEC(Vecbase).
- Annotation (.ano) and index (.ind) are those generated by splitdb.
-
- Temporary files:
- $jobid.fnd
- $jobid.nam
- $jobid.grep
-
- where $jobid is a unique jobid generated by the shell
-
- REMOTE EXECUTION
- Where the databases can not be stored locally, FINDKEY can call
- FINDKEY on another system and retrieve the results. To run
- FINDKEY remotely, your .cshrc file should contain the following
- lines:
-
- setenv XYLEM_RHOST remotehostname
- setenv XYLEM_USERID remoteuserid
-
- where remotehostname is the name of the host on which the
- databases reside (in XYLEM split format) and remoteuserid
- is your userid on the remote system. When run remotely,
- your local copy of FINDKEY will generate the following
- commands:
-
- rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
- rsh $XYLEM_RHOST -l $XYLEM_USERID findkey ...
- rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
- rsh $XYLEM_RHOST -l $XYLEM_USERID rm temporary_files
-
- Because FINDKEY uses rsh and rcp, your home directory on both
- the local and remote systems must have a world-readable
- file called .rhosts, containing the names of trusted remote
- hosts and your userid on each host. Before trying to get
- FINDKEY to work remotely, make sure that you can rcp and
- rsh to the remote host.
-
- Obviously, remote execution of FINDKEY implies that FINDKEY
- must already be installed on the remote host. When FINDKEY
- runs another copy of FINDKEY remotely, it uses the -L option
- (findkey -L) to insure that the remote FINDKEY job executes,
- rather than calling yet another FINDKEY on another host.
-
- ---------- Remote execution on more than 1 host -----------
- If more than 1 remote host is available for running FINDKEY
- (say, in a clustered environment where many servers mount
- a common filesystem) the choice of a host can be determined
- by the csh script choosehost, such that execution of
- choosehost returns the name of a remote server. To use this
- approach, the following script, called 'choosehost' should
- be in your bin directory:
-
- #!/bin/csh
- # choosehost - choose a host to use for a remote job.
- # This script rotates among servers listed in .rexhosts,
- # by choosing the host at the top of the list and moving
- # it to the bottom.
-
- #Rotate the list, putting the current host to the bottom.
- set HOST = `head -1 $home/.rexhosts`
- set JOBID = $$
- tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
- echo $HOST >> /tmp/.rexhosts.$JOBID
- /usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
-
- # Write out the current host name
- echo $HOST
-
- You must also have a file in your home directory called
- .rexhosts, listing remote hosts, such as
-
- graucho.cc.umanitoba.ca
- harpo.cc.umanitoba.ca
- chico.cc.umanitoba.ca
- zeppo.cc.umanitoba.ca
-
- Each time choosehost is called, choosehost will rotate the
- names in the file. For example, starting with the .rexhosts
- as shown, it will move graucho.cc.umanitoba.ca to the bottom
- of the file, and write the line 'graucho.cc.umanitoba.ca'
- to the standard output. The next time choosehosts is
- run, it would write 'harpo.cc.umanitoba.ca', and so on.
-
- Depending on your local configuration, you may wish to
- rewrite choosehosts. All that is really necessary is that
- echo `choosehost` should return the name of a valid host.
-
- Once you have installed choosehost and tested it, you can
- get FINDKEY to use choosehost simply by setting
-
- setenv XYLEM_RHOST choosehost
-
- in your .cshrc file.
-
- --------------- Remote filesystems -----------------------
- Finally, an alternative to remote execution is to remotely mount
- the file system containing the databases across the network.
- This has the advantage of simplicity, and means that the
- databases are available for ALL programs on your local
- workstation. However, it may still be advantageous to run
- XYLEM remotely, since that will shift much of the computational
- load to another host.
-
-
- BUGS
- At present, regular expression characters cannot be used for
- keyword searches.
-
- SEE ALSO
- grep(1V) identify splitdb
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/getloc.doc b/CORE/xylem/getloc.doc
deleted file mode 100644
index f1c1bc1..0000000
--- a/CORE/xylem/getloc.doc
+++ /dev/null
@@ -1,65 +0,0 @@
-
- GETLOC.DOC update 30 May 95
-
-
- NAME
- getloc - retrieve database entries listed in namefile to outfile.
-
- SYNOPSIS
- getloc [-asfcgepvl] namefile [anofile] [seqfile] indfile outfile
-
- DESCRIPTION
- getloc reads a list of names from namefile and recreates
- entries by combining the annotation and sequence portions of each
- entry from anofile and seqfile. getloc will work most quickly
- when the namefile is in alphabetical order, but it will also
- work on unsorted lists. The following options affect the output:
-
- a Write annotation portions of entries only, terminated by '//'.
- seqfile is not included on command line.
-
- s Write sequence data only, in Pearson (.wrp) format.
- anofile is not included on commandline.
-
- f Write each entry to a separate file. The filename will
- consist of the LOCUS name, followed by .ano for annotation
- only, .wrp for sequence only, or gen for complete GenBank
- format.
-
- c namefile contains accession numbers, rather than names
-
- The following options identify the type of database being read:
-
- g GenBank (default)
- e EMBL
- p PIR (NBRF)
- v Vecbase
- l LiMB
-
- namefile consists of an alphabetically ordered list of LOCUS names,
- each on a separate line. Indfile could be used to create a
- namefile by simply editing out some subset of names. (This can also
- be done using the Unix comm command.) If the entire indfile was
- used, the entire database would be recreated, minus the header
- information that might have been present in the original, but
- deleted by splitdb.
-
- NOTE
- Getloc automatically expands leading blanks that have been
- compressed using splitdb -c. See splitdb.doc for more information.
-
- SEE ALSO
- splitdb, comm(1).
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/getob.doc b/CORE/xylem/getob.doc
deleted file mode 100644
index 895bd17..0000000
--- a/CORE/xylem/getob.doc
+++ /dev/null
@@ -1,327 +0,0 @@
-
- GETOB 21 Dec 94
-
-
- NAME
- getob - Get an object from GenBank
-
- SYNOPSIS
- getob [-frcn] infile namefile anofile seqfile indfile message
- [outfile] expfile
-
- DESCRIPTION
- getob extracts 'objects' (subsequences) from GenBank entries, using
- the features table, and writes them to outfile (.out). A log
- describing the construction of each object is written to message
- (.msg). If -r is not set, a list of expressions that would recreate
- the .out file if evaluated by getob -r, is written to expfile (.exp)
-
- The following options are available:
-
- f Write each entry to a separate file. The name will consist
- of the entry name, and the extension '.obj'.
-
- r Resolve expressions from namefile into objects.
- Expressions take the form:
-
- @[::]:
-
- In effect, r makes it possible to use getob to resolve
- features that span more than one entry, such as segmented
- files. In the first run of the program, features that require
- data from outside the entry in which they are defined will be
- written to outfile with those externally-defined parts rep-
- resented using the '@' notation described above. During a
- subsequent run, the outfile from the previous run is used as
- namefile. When r is set, all lines not beginning with '@' (ie.
- name lines and sequence lines) are simply copied to the new
- outfile. When an '@' is encountered, the expression is parsed
- into accession number and location. The entry with the
- specified accession number is located in indfile, and read from
- anofile and seqfile. It is then evaluated, and the result
- written to outfile in place of the '@' expression.
-
- getob can also be used to get specific labeled objects from
- a given entry. Examples:
-
- @k30576:polyprotein
- @k30576:/label=polyprotein
- @x10345:/product="hsp70"
- @j00879:group(1..2200,mutation_37)
-
- The first two constructs given above are equivalent. Both
- will extract the feature called polyprotein. The third
- construct shows that any feature label can be specified. If
- none is specified, as in the first example, then /label= is
- assumed. One limitation, however, is that the label sought
- must be unique within the entry in its first 15 characters
- including double quotes ("). Otherwise, only the first
- matching label expression will be evaluated. Finally, the
- last example shows that a mutant sequence can be constructed
- by first specifying an expression that evaluates to a
- sequence (ie. 1..2200) and then a labeled expression that
- upon evaluation, uses replace() to modify that sequence. The
- usage shown in examples 3 & 4 above represent extensions to
- the DDBJ/EMBL/GenBank Features Table Format.
-
- As touched on briefly above, the r option makes it possible
- to construct objects that include recursive references to
- other entries (eg. segmented files) by iterative calls to
- getob. The 'features' command automates this process. The basic
- algorithm is as follows:
-
- getob infile namefile anofile seqfile indfile ...
-
- #Pull out all lines containing indirect references
- grep '@' outfile > unresolved.grep
-
- while (unresolved.grep is not empty)
-
- #extract accession numbers to be retrieved
- cut -c2-7 unresolved.grep > unresolved.nam
-
- #retrieve the sequences into a new file, and create
- #a database subset to be used by getob
- fetch unresolved.nam new.gen
- splitdb new.gen new.ano new.wrp new.ind
-
- #run getob again to resolve indirect references
- getob -r infile outfile new.ano new.wrp new.ind ...
-
- #Pull out all lines containing indirect references
- grep '@' outfile > unresolved.grep
- end
-
- c NAMEFILE contains accession numbers, rather than locus names
-
- n By default, the qualifier 'codon_start' is used to determine
- how many n's, if necessary, must be added to the 5' end of
- CDS, mat_peptide, or sig_peptide, to preserve the reading
- frame. To turn OFF this feature, -n must be set. -n must be set
- for GenBank Releases 67.0 and earlier.
-
- infile contains commands indicating what data is to be pulled from
- each entry. Two types of output may be presented, GenBank or
- OBJECTS. These are described below:
-
- 1) GenBank output - If the word 'GENBANK' is the first line in
- infile, a pseudo-GenBank entry will be recreated. This option
- is only intended for debugging purposes and will probably be
- removed in later releases.
-
- 2) Object format - This option instructs getob to write part or
- all of each sequence, along with site annotation, by specifying
- feature key names. The syntax for infile is shown below:
-
- Backus-Naur format: Example:
- ----------------------------------------------------------
- OBJECTS OBJECTS
- tRNA
- { rRNA
- . . . SITES
- } stem_loop
- SITES
- {
- . . .
- }
-
- In the example above, getob is instructed to extract all tRNA or
- rRNA sequences from each entry, and annotate the position of each
- stem/loop structure. Note that the SITES coordinates written to the
- file tell the positions of those SITES relative to the start of the
- object, rather than the original location in the sequence. As above,
- each word begins a separate line.
-
- While the -r option does not use infile, at least a dummy infile
- must be included in the command line. This dummy file need only
- contain two lines:
-
- OBJECTS
- SITES
-
- NOTE: SITES IS NOT YET IMPLEMENTED! Although inclusion of SITES in
- the input file will have no effect, the word SITES must still be
- present after the last feature key.
-
-
- namefile
- namefile consists of a list of LOCUS names or accession numbers,
- each on a separate line. Names or accession numbers should appear
- in the order in which they appear in the database file. Unordered
- namefiles will slow the progress of the search. Since only the
- first non-blank field of each line in namefile is read, indfile
- could be used to create a namefile. If the entire indfile was
- used, the entire database file would be processed. A sample
- namefile requesting four sequences by LOCUS name is shown below:
-
- POTPR1A
- POTPSTH2
- POTPSTH21
- POTSTHA
-
- anofile, seqfile, and indfile
- The database subset containing GenBank entries must be divided
- among annotation, sequence and an index by splitdb.
-
- message
- message contains a log describing the parsing of each object.
- For annotative purposes, qualifier lines from the object are
- included in along with the location expression being parsed.
- The beginning of a typical message file is shown below:
-
- GETOB Version 0.962 14 May 1992
-
- POTPR1A:CDS1
- join
- (
- 295 603
-
- 1011 1355
-
- )
-
-
- /note="pathogenesis-related protein (prp1)"
- /codon_start=1
- /translation="MAEVKLLGLRYSPFSHRVEWALKIKGVKYEFIEEDLQNKSPLLL
- QSNPIHKKIPVLIHNGKCICESMVILEYIDEAFEGPSILPKDPYDRALARFWAKYVED
- KGAAVWKSFFSKGEEQEKAKEEAYEMLKILDNEFKDKKCFVGDKFGFADIVANGAALY
- LGILEEVSGIVLATSEKFPNFCAWRDEYCTQNEEYFPSRDELLIRYRAYIQPVDASK"
- //----------------------------------------------
-
- In the example above, getob was instructed to retrieve all CDS
- features from the database subset. The message for the entry
- POTPR1A is shown, along with a reconstruction of the location
- expression that was evaluated to create the object. In this
- case, protien coding sequences from two exons had to be joined
- to create the object.
-
- outfile
- outfile contains the actual objects constructed, consisting of
- sites found and sequences. The beginning of a typical output file
- is shown below:
-
- >POTPR1A:CDS1
- atggcagaagtgaagttgcttggtctaaggtatagtccttttagccatag
- agttgaatgggctctaaaaattaagggagtgaaatatgaatttatagagg
- aagatttacaaaataagagccctttacttcttcaatctaatccaattcac
- aagaaaattccagtgttaattcacaatggcaagtgcatttgtgagtctat
- ggtcattcttgaatacattgatgaggcatttgaaggcccttccattttgc
- ctaaagacccttatgatcgcgctttagcacgattttgggctaaatacgtc
- gaagataag
- ggggcagcagtgtggaaaagtttcttttcgaaaggagaggaacaagagaa
- agctaaagaggaagcttatgagatgttgaaaattcttgataatgagttca
- aggacaagaagtgctttgttggtgacaaatttggatttgctgatattgtt
- gcaaatggtgcagcactttatttgggaattcttgaagaagtatctggaat
- tgttttggcaacaagtgaaaaatttccaaatttttgtgcttggagagatg
- aatattgcacacaaaacgaggaatattttccttcaagagatgaattgctt
- atccgttaccgagcctacattcagcctgttgatgcttcaaaatga
-
- In the example, the CDS from entry POTPR1A has been written in
- two chunks, corresponding to the two exon portions of the coding
- sequence. Each location retrieved in constructing the object is
- written as a separate block of sequence. By comparing message file
- to outfile, it is possible to verify the correctness of the
- operation.
-
- Numbers are appended to the sequence names to indicate
- which CDS in the entry has been retrieved. Thus, if two CDS
- features were present, the second one would be named >POTPR1A:2.
- For compatiblility with the FASTA programs of Pearson, the name line
- begins with a '>'.
-
- expfile
- The expression evaluated to create this feature is written
- to expfile:
-
- >POTPR1A:CDS1
- @J03679:join(295..603,1011..1355)
-
- expfile is only created if -r is not set. It is itended as a way
- of automating the creation of a feature expression file for use
- in generating customized datasets. Expressions in expfile can be
- deleted or modified, or new expressions added, to tailor the
- dataset to individual needs. To generate a dataset from expfile:
-
- getob -r infile expfile anofile seqfile indfile message outfile
-
- EXTENSIONS TO THE FEATURE TABLE LANGUAGE
-
- 1) poly(||,x)
-
- This operator evaluates an absolute location, literal, or
- feature name (ie. any location not containing functional
- operators) and writes it x times. The most obvious
- application of poly is to create spacers to represent regions
- of unknown sequence between sequences that are known. For
- example, the restriction map of a 4kb EcoR1 fragment with a
- Hind3 site 1000 bp from one end could be represented as follows:
-
- join("gaattc",poly("n",1000),"aagctt",poly("n",3000),"gaattc")
-
- 2) The following feature keys are recognized by GETOB, although
- not included in the language definition. While they will not
- appear in GenBank entries, they could be used in user-created
- GenBank-format files:
-
- contig
- This feature key is meant to be used to assemble large
- sequence segments from smaller segments, possibly using the
- poly() operator.
-
- chromosome
- Intended to annotate the complete sequence of a chromosome. This
- feature may be constructed by a join of two or more contigs.
-
- Use of these keywords is illustrated in the features table
- shown below, which could be used to construct a model of part
- of the E.coli chromosome, spanning map units 763.4 to 1031.4 kb:
-
- contig join(J01619:1..13063,poly("n",7140),
- J03939:1..1363,poly("n",14380),
- X02306:complement(1..1622),poly("n",14710),
- J04423:1..5793,poly("n",22500),
- X03722:1..2400,poly("n",123750),
- one-of(X05017:complement(1..1854),X05017:1..1854))
- /label=Eco_contig8
- /map=763.4-950.6kb
- contig join(V00352:1..2412,poly("n",28800),M15273:1..3409)
- /label=Eco_contig9
- /map=972.9-1001.7kb
- contig join(X02826:1..1357,poly("n",13540),
- J01654:complement(1..2270))
- /label=Eco_contig10
- /map=1016.5-1031.4kb
- chromosome join(Eco_contig8,poly("n",22300),
- Eco_contig9,poly("n",14800),
- Eco_contig10)
- /label=Ecoli_chromosome
-
- NOTES
- 1) If the const DEBUG is set to true in the Pascal source code, getob
- writes messages to the standard output, indicating the progress of
- processing for each entry read in. By default, DEBUG=false.
- This feature is solely for debugging purposes and will be removed in
- later releases.
-
- 2) GETOB automatically expands leading blanks that have been
- compressed using splitdb -c. See splitdb.doc for more information.
-
- SEE ALSO
- features, splitdb, getloc
- The DDBJ/EMBL/GenBank Feature Table: Definition, Version 1.04
- September 1, 1992
- GenBank Release Notes for Release 79.0.
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/identify.doc b/CORE/xylem/identify.doc
deleted file mode 100644
index 56ced71..0000000
--- a/CORE/xylem/identify.doc
+++ /dev/null
@@ -1,83 +0,0 @@
-
- IDENTIFY update 3 Feb 94
-
-
- NAME
- identify - creates a file of locus names corresponding to lines
- found by grep in a GenBank annotation file.
-
- SYNOPSIS
- identify grepfile indfile namefile findfile
-
- DESCRIPTION
- grepfile is created using the Unix grep command to search a .ano
- file created by splitgb. For example, to find all lines containing
- the word 'chlorophyll' in plant.ano, use
-
- grep -n -i 'chlorophyll' plant.ano > plant.grep
-
- In the example shown, the -n option causes each line written to
- plant.grep to be preceeded by the number of that line in plant.ano.
- (The -i option causes grep to ignore case.) Identify can use the
- indfile do determine which entry a given numbered line was found
- in, and writes the corresponding LOCUS name to namefile. In
- addition, all lines found in a given entry are re-written to
- findfile without the line numbers, and preceeded by the LOCUS name
- for that entry.
-
- EXAMPLES
- Suppose you wanted to obtain a list of names for all plant
- sequences which code for proteins. The task is complicated by the
- fact that many fungal sequences are included in the GenBank plant
- file. You could begin by searching plant.ano (containing all
- GenBank plant entries) for the word 'Planta':
-
- grep -n 'Planta' plant.ano > Planta.grep
-
- However, we want to eliminate all fungal sequences, as well as all
- sequences for RNAs other than mRNAs. If we create the file
- bad.str containing the keywords
-
- Mycophyta
- tRNA
- rRNA
- uRNA
-
- we can then type
-
- grep -n -f bad.str plant.ano > bad.grep
-
- bad.grep now contains all lines containing the offending keywords.
- We next use identify to find the names of the entries found by
- grep.
-
- identify Planta.grep plant.ind Planta.nam Planta.fnd
- identify bad.grep plant.ind bad.nam bad.fnd
-
- Next, we can use the Unix comm command to compare the two .nam
- files and produce an output file containing only names which are
- present in Planta.nam but not bad.nam:
-
- comm -23 Planta.nam bad.nam > plants.nam
-
- The file plants.nam now contains names of either plant cDNA or
- genomic sequences which do not code for structural RNAs.
- At this point, getloc could to create a sub-database containing
- only those entries listed in planta.nam. See documentation for
- getloc for a more detailed discussion.
-
- SEE ALSO
- grep, fgrep, egrep, ngrep, comm, splitgb, getloc
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/keyfile.template b/CORE/xylem/keyfile.template
deleted file mode 100644
index 66ac651..0000000
--- a/CORE/xylem/keyfile.template
+++ /dev/null
@@ -1,23 +0,0 @@
-;---------------------------------------------------------------------------
-; FINDKEY/GDE Keyword File Instructions
-;
-; 1. Type in one or more keywords below,
-; or
-; Place cursor at end of this file and choose 'Include File' in the FILE
-; menu to read in a file of keywords.
-;
-; 2. Choose 'Save Current File' in the File menu
-; 3. Quit this window
-;
-; FINDKEY will then perform the keyword search. YOU DON'T NEED TO EDIT
-; OUT THESE COMMENT LINES.
-;
-; NOTE: Put each keyword on a separate line
-; SAMPLE KEYWORD FILE:
-;
-; maize
-; corn
-; Z.mays
-; Zea
-;---------------------------------------------------------------------------
-
diff --git a/CORE/xylem/namefile.template b/CORE/xylem/namefile.template
deleted file mode 100644
index cd63482..0000000
--- a/CORE/xylem/namefile.template
+++ /dev/null
@@ -1,25 +0,0 @@
-;---------------------------------------------------------------------------
-; FETCH/GDE Name/Accession File Instructions
-;
-; 1. Type in one or more LOCUS names or Accession #'s below,
-; or
-; Place cursor at end of this file and choose 'Include File' in the FILE
-; menu to read in a file of names or accession #'s.
-; or
-; Copy names or accession #'s from another window and Paste into this window.
-;
-; 2. Choose 'Save Current File' in the File menu
-; 3. Quit this window
-;
-; FETCH will then retrieve the data. YOU DON'T NEED TO EDIT
-; OUT THESE COMMENT LINES.
-;
-; NOTE: Put each name on a separate line
-; SAMPLE NAME/ACCESSION FILE:
-;
-; X30412
-; PSDRR1
-; PEADRRG
-;
-;---------------------------------------------------------------------------
-
diff --git a/CORE/xylem/names.template b/CORE/xylem/names.template
deleted file mode 100644
index e2e4f23..0000000
--- a/CORE/xylem/names.template
+++ /dev/null
@@ -1,25 +0,0 @@
-;---------------------------------------------------------------------------
-; FEATURES/GDE Name File Instructions
-;
-; 1. Type in one or more GenBank LOCUS names below,
-; or
-; Place cursor at end of this file and choose 'Include File' in the FILE
-; menu to read in a file of names.
-;
-; (NOTE: File can not contain accession numbers.)
-;
-; 2. Choose 'Save Current File' in the File menu
-; 3. Quit this window
-;
-; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
-; OUT THESE COMMENT LINES.
-;
-; NOTE: Put each name on a separate line
-; SAMPLE NAME FILE:
-;
-; PEADRRA
-; PSDRR1
-; PEADRRG
-;
-;---------------------------------------------------------------------------
-
diff --git a/CORE/xylem/printdoc.doc b/CORE/xylem/printdoc.doc
deleted file mode 100644
index 8ca092d..0000000
--- a/CORE/xylem/printdoc.doc
+++ /dev/null
@@ -1,56 +0,0 @@
- printdoc update 3 Feb 94
-
- NAME
- printdoc - prints documentation files
-
- SYNOPSIS
- printdoc filename
-
- DESCRIPTION
- printdoc uses the file extension to decide how to print a
- documentation file. If necessary, a filter such as pr or nroff
- is used to format the file before sending to the appropriate
- printer. A list of file extensions recognized by printdoc is
- given below. If no file extension is given, or the extension is
- not in the list, printdoc assumes .doc.
-
- .doc - (default) Uses pr to print the text, using the default
- settings provided by pr (56 text lines per page plus a 5 line
- header and footer). Printing is at 12 cpi, front only. This works
- reasonbly well for most unformatted documentation files,
- provided that the line length doesn't exceed 80 char. This
- option assumes that a half-inch left margin is automatically
- provided by the printer.
-
- .tex - Assumes that document is already pre-formatted. Thus,
- no headers or footers are provided, and it is assumed that
- the top and bottom of pages are padded with blanks or header/
- footer lines as needed. Form-feed characters (^L) may be
- included in the text to force page breaks.
-
- .ps - Assumes file is in PostScript format. Sends it to the
- PostScript printer.
-
- .nroff - Assumes file is formatted for use by nroff, using the
- standard macro set (nroff -ms).
-
- .nroff.me - Assumes file is formatted for use by nroff, using the
- e macro set (nroff -me).
-
- TRANSPORTATION NOTES
- For reasons which should be obvious, this script needs major
- rewriting at each site, since the available printers will
- be of different types and have different names.
-
- SEE ALSO
- pr, pr(V), xlp, nroff
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
diff --git a/CORE/xylem/prot2nuc.doc b/CORE/xylem/prot2nuc.doc
deleted file mode 100644
index 0212a58..0000000
--- a/CORE/xylem/prot2nuc.doc
+++ /dev/null
@@ -1,123 +0,0 @@
- prot2nuc update 10 Aug 94
-
- NAME
- prot2nuc - reverse translates protein into nucleic acid
-
- SYNOPSIS
- prot2nuc [-ln -gn] < input > output
-
- DESCRIPTION
- prot2nuc reads a file containing an amino acid sequence
- and writes the corresponding reverse translated nucleic acid
- sequence, using the standard IUPAC-IUB ambiguity codes to output.
- The amino acid sequence may contain internal stop '*' characters.
- That is, all legal amino acid characters will be processed.
-
- -ln print n amino acids/codons per line. (default = 25)
-
- -gn number the amino acid sequence every n amino acids/codons.
- (defalut = 5)
-
- If l is not evenly divisible by g, the defaults are used.
-
- input - If the first line of the file begins with '>' or ';',
- input will be read as the standard .wrp (Pearson) format,
- such as that produced by getob:
-
- >name
- sequence lines
-
-
- Otherwise, it will be assumed that the file ONLY contains
- sequence, and all legal IUPAC/IUB DNA characters will be
- read as sequence.
-
- output - The output begins with a header, listing the both
- 1 and 3 letter amino acid codes [J. Biol. Chem. 243, 3557-3559
- (1968)], as well as the nucleic acid ambiguity codes [Cornish-
- Bowden (1985) Nucl. Acids Res. 13:3021-3030.]. The amino acid
- sequence, along with its reverse translation, are then printed on
- lines of l amino acids/codons, numbering every g amino acids/codons.
- Non-ambiguous nucleotides appear capitalized, while ambiguous
- nucleotides are in lowercase. A sample output file appears below:
-
- PROT2NUC Version 8/10/94
-
- IUPAC-IUP AMINO ACID SYMBOLS
- [J. Biol. Chem. 243, 3557-3559 (1968)]
-
- Phe F Leu L Ile I
- Met M Val V Ser S
- Pro P Thr T Ala A
- Tyr Y His H Gln Q
- Asn N Lys K Asp D
- Glu E Cys C Trp W
- Arg R Gly G STOP *
- Asx B Glx Z UNKNOWN X
-
-
- IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE
- [Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.]
-
- Symbol Meaning | Symbol Meaning
- ------------------------------------+---------------------------------
- G Guanine | k G or T
- A Adenine | s G or C
- C Cytosine | w A or T
- T Thymine | h A or C or T
- U Uracil | b G or T or C
- r Purine (A or G) | v G or C or A
- y Pyrimidine (C or T) | d G or T or A
- m A or C | n G or A or T or C
-
- pI39
- 5 10 15 20
- M E K K S L A A L S F L L L L V L F V A
- ATGGArAArAArTCnCTnGCnGCnCTnTCnTTyCTnCTnCTnCTnGTnCTnTTyGTnGCn
- AGyTTr TTrAGy TTrTTrTTrTTr TTr
-
- 25 30 35 40
- Q E I V V T E A N T C E H L A D T Y R G
- CArGArAThGTnGTnACnGArGCnAAyACnTGyGArCAyCTnGCnGAyACnTAyCGnGGn
- TTr AGr
-
- 45 50 55 60
- V C F T N A S C D D H C K N K A H L I S
- GTnTGyTTyACnAAyGCnTCnTGyGAyGAyCAyTGyAArAAyAArGCnCAyCTnAThTCn
- AGy TTr AGy
-
- 65 70
- G T C H D W K C F C T Q N C
- GGnACnTGyCAyGAyTGGAArTGyTTyTGyACnCArAAyTGy
-
-
- With the Universal Genetic code, ambiguity symbols make it possible
- to represent all possible codons for an amino acid using two output
- lines. It is important to realize that the ambiguities on each line
- can not be combined. For example, CTn and TTr represent all codons for
- Leucine. However, attempting to combine them into a single triplet,
- yTn, would be incorrect. For example, TTT and TTC are codons for
- Phenylalanine, not Leucine.
-
- FUTURE PLANS
- 1. It wouldn't be hard to have the output printed as nucleic acid
- sequences in Perason format, so that the output could be read back
- into GDE. I don't know why you would want to do this, but it could
- be done.
- 2. Right now, only the Universal Genetic Code is used, but it should
- be possible to read in alternative genetic codes, have prot2nuc
- figure out the ambiguity rules (as is already done in ribosome) and
- print out the appropriate ambiguous codons.
- 3. It might be useful to have each possible codon printed out, rather
- than ambiguous codons. This would take up a lot more space and
- wouldn't be as pretty. If there's a lot of demand I could do this.
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
diff --git a/CORE/xylem/reform.doc b/CORE/xylem/reform.doc
deleted file mode 100644
index add7a38..0000000
--- a/CORE/xylem/reform.doc
+++ /dev/null
@@ -1,107 +0,0 @@
- reform update 3 Feb 94
-
- NAME
- reform - reformats multiply-aligned sequences for printing.
-
- SYNOPSIS
- reform [-gpcnm] [-fx] [-sn] [-ln] [file {ralign only}]
- or
- ralign file parameters | reform [-gpcn] [-sn] [-ln] file
-
- DESCRIPTION
-
- g Gaps are to be represented by dashes (-).
- p Bases which agree with the consensus are
- represented by periods (.).
- c Positions at which all sequences agree are
- capitalized in the consensus.
- n Sequence data is nucleic acid. Protein default
- fx Specify input file format, where x is
- r:RALIGN (default) p:PEARSON i:MBCRR-MASE (Intelligenetics)
- m Input file contains multiline format sequences already aligned,
- as opposed to ralign output. This option is obsolete, and is
- equivalent to -fp.
- ln The output linelength is set to n.
- Default is 70.
- sn numbering starts with n (default=0)
-
- file Sequence file as described in ralign docu-
- mentation. reform needs to re-read the
- sequence file read by ralign to get the
- names of the sequences, which ralign ignores.
- This filename is only included for ralign output.
- If -m is set, file is ignored, and sequence names
- must be read from the input.
-
- Note that positions in the consensus at which no nucleotide is in the
- majority are represented by n's (for nucleic acids) or x's (for proteins),
- rather than periods, as in ralign.
-
- Gaps in the input sequences may be represented by either blanks or dashes.
-
- INPUT FILE FORMATS
-
- (a) ralign (default, -fr)
- As described in ralign documentation, the input file (which is assumed to
- be ralign output) must have each sequence on a single long line. All
- characters on a given line will be included in the alignment. All lines
- must be exactly the same length. For example, if ralign had been read
- sequence from a file called 'allcab.seq' and written output to 'allcab.ral',
- the following command might be used:
-
- reform allcab.seq allcab.ref
-
- (b) Pearson (-fp, -m)
- Compatible with sequence files used by Pearson's fasta programs as shown:
- >name1
- sequence1
- >name2
- sequence2
- ...
- >namen
- sequencen
-
- Sequences may run over many lines and line length does not have to be
- uniform. However, both dashes ('-') and blanks (' ') will be read in
- as gaps in the alignment. A right arrow (>) at the beginning of a line
- indicates the name line at the beginning of a new sequence.
-
- Any line beginning with a semicolon (';') will be considered a comment,
- and will be ignored.
-
- (c) MBCRR-MASE (Intelligenetics) (-fi)
- Compatible with .mase files produced by MBCRR's mase and pima programs,
- which use the Intelligenetics format as shown:
-
- ;one or more comment lines
- name1
- sequence1
- ;one or more comment lines
- name2
- sequence2
- ...
- ;one or more comment lines
- namen
- sequencen
-
- Sequences may run over many lines and line length does not have to be
- uniform. However, both dashes ('-') and blanks (' ') will be read in
- as gaps in the alignment. Each sequence MUST begin with at least one
- comment line. When a comment line is encountered, that signals the
- beginning of a new sequence. The first line after the comment is read
- as the name, and the sequence begins on the next line after that.
-
- SEE ALSO ralign, mase
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/ribosome.doc b/CORE/xylem/ribosome.doc
deleted file mode 100644
index df13855..0000000
--- a/CORE/xylem/ribosome.doc
+++ /dev/null
@@ -1,84 +0,0 @@
- ribosome update 3 Feb 94
-
- NAME
- ribosome - translates nucleic acid into protein
-
- SYNOPSIS
- ribosome [-g gcfile] < input > output
-
- DESCRIPTION
- ribosome reads a file of one or more nucleic acid sequences
- and writes the corresponding amino acid sequence, in the standard
- one letter code, to output. Ribosome begins translating at the
- first nucleotide in each input sequence and continues to the end.
- If the length of the translated sequence is not divisible by 3,
- ribosome pads the final codon with N's and attempts to use ambi-
- guity rules to translate the final codon. Based on the genetic
- code used, ribosome derives a set of rules to resolve all ambi-
- guities that can possibly be resolved.
-
- -g read in an alternative genetic code from gcfile. If this
- option is not specified, ribosome uses the universal
- genetic code.
-
- gcfile - This file specifies an alternative genetic code. An
- example is shown below. ribosome reads the first 64 legal
- capital letters as amino acids. Consequently, lowercase letters
- can be used for annotation purposes, as shown in the example.
- All non-amino acid characters are ignored.
-
- sgc2 - yeast mitochondrial genetic code
-
- second position
- first position ------------------------------- third position
- (5' end) u c a g (3' end)
- -----------------------------------------------------------------
- u F S Y C u
- F S Y C c
- L S * W a
- L S * W g
- -----------------------------------------------------------------
- c T P H R u
- T P H R c
- T P Q R a
- T P Q R g
- -----------------------------------------------------------------
- a I T N S u
- I T N S c
- M T K R a
- M T K R g
- -----------------------------------------------------------------
- g V A D G u
- V A D G c
- V A E G a
- V A E G g
-
-
- input - If the first line of the file begins with '>' or ';',
- input will be read as the standard .wrp (Pearson) format,
- such as that produced by getob:
-
- >name
- ; one or more comment lines (optional)
- sequence lines
-
-
- Otherwise, it will be assumed that the file ONLY contains
- sequence, and all legal IUPAC/IUB DNA characters will be
- read as sequence.
-
- SEE ALSO
- getob
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/shuffle.doc b/CORE/xylem/shuffle.doc
deleted file mode 100644
index 77c69e8..0000000
--- a/CORE/xylem/shuffle.doc
+++ /dev/null
@@ -1,66 +0,0 @@
- shuffle.doc update 3 Feb 94
-
- SYNOPSIS
- shuffle -sn [-wn -on]
-
- DESCRIPTION
- Shuffles sequences locally. See Lipman DJ, Wilbur WJ, Smith TF
- and Waterman MS (1984) On the statistical significance of nucleic
- acid similarities. Nucl. Acids Res. 12:215-226.
- -sn n is a random integer between 0 and 32767. This number
- must be provided for each run.
-
- -wn n is an integer, indicating the width of the window for
- random localization. If w exceeds the length of a sequence,
- or is negative, the entire sequence is scrambled as a single
- window. This is also the case if w is not specified.
-
- -on n is an integer, indicating the number of nucleotides
- overlap between adjacent windows. It should never exceed
- the window size. o defaults to 0 if not specified.
-
- If w and o are specified, overlapping windows of w nucleotides
- are shuffled, thus preserving the local characteristic base
- composition. Windows overlap by o nucleotides.
- If w and o are not specified, each sequence is shuffled globally,
- thus preserving the overall base composition, but not the local
- variations in comp.
-
- Any number of sequences may be processed from a single input
- file. In Pearson-format files, each new sequence begins with a
- '>' comment line, indicating the name and a short description of
- the sequence.
-
- No distinction is made between protein or nucleic acid sequences.
- That is, shuffle will read any of the following characters as
- sequence:
-
- T,U,C,A,G,N,R,Y,M,W,S,K,D,H,V,B,L,Z,F,P,E,I,Q,X,*,-
-
- where '*' is the result of translating a stop codon, and '-'
- is a gap generated during sequence alignment. Lowercase is
- also accepted.
-
- EXAMPLE
- A sample output file is shown below. Note that the first two
- lines of output are comment lines, listing the version of the
- program and the parameters used in the run.
-
- >SHUFFLE VERSION 11/ 8/93
- >RANDOM SEED: 9873 WINDOW: 12 OVERLAP: 3
- >BAZFAZ - Borborigmus azerbi F-actin-zeta gene
- ctgagtagctagtcctaaatagttagtccatagtactagtacgggtcgtt
- cacccttgggcagtg.....(etc.)
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/splitdb.doc b/CORE/xylem/splitdb.doc
deleted file mode 100644
index 49e97c2..0000000
--- a/CORE/xylem/splitdb.doc
+++ /dev/null
@@ -1,141 +0,0 @@
-
- SPLITDB update 28 Mar 98
-
-
- NAME
- splitdb - split GenBank files into annotation, sequence, and index
-
- SYNOPSIS
- splitdb [-gepvlct] dbfile anofile seqfile indfile
-
- DESCRIPTION
- Splitdb splits a database (dbfile) among three files: anofile, seqfile
- and indfile. Splitdb ignores any header information that might be in the
- file and begins processing at the first entry.
-
- anofile contains the annotation portion of each entry. Entries are
- terminated with '//' or '///' (PIR only). Trailing blanks present in
- dbfile are omitted in anofile.
-
- seqfile contains the sequence data for each entry. Each sequence
- entry begins with a header line, followed by sequence data on
- succeeding lines of 75 characters per line. The header line
- includes the header flag character '>' in column 1, followed by the
- name, followed by the first 50 characters of the 1st
- DEFINITION line. An example is shown below:
-
- >UNHOR1 - Unicorn horn protein 1, complete cDNA sequence
- attcctctatagtctattctagctagccaaataggttagatggctgtcttactacttacgc
- ...
-
- Removal of blanks and numbers from sequence lines makes makes split
- datasets about 8-9% smaller than the original GenBank files.
-
- indfile is an index which tells the line numbers for each entry in
- anofile and seqfile. It is assumed to be in alphabetical order by
- name. Each line contains a name and accession number, followed by the
- line numbers on which the annotation and sequence data begin in anofile
- and seqfile, respectively. Thus the file plants.ind might contain:
-
-
- A15660 TA156608 1 1
- A15671 A15671 33 11
- A15673 A15673 65 25
- A15675 AK156751 97 36
- A15677 BA156770 128 46
- A16780 BA167807 160 57
- A16782 A16782 192 70
- ATHRPRP1C GM905105 225 83
- etc...
-
- Note that indfile is a perfectly legitimate .nam file, for use with
- programs such as getloc, getob, or comm.
-
-
- The following options identify the type of database being read:
-
- -g GenBank (default)
- -e EMBL
- -p PIR (NBRF)
- -v Vecbase
- -l LiMB
-
- Other options:
- -c Compress 3 or more leading blanks in annotation lines
- to take the form , where CRUNCHFLAG
- is the ASCII character specified by the Pascal const
- CRUNCHOFFSET, which is set to 33 ("!") in the current
- implementation. For each annotation line read, if the
- number of leading blanks is >=3, splitdb sets CRUNCHCHAR
- to CRUNCHOFFSET+the number of blanks. Thus, for lines
- with 3, 4, or 5 leading blanks, CRUNCHCHAR would be
- '$', '%' and '&', respectively. GETLOC and GETOB
- automatically expand crunched blanks when CRUNCHFLAG
- is encountered on an input line. Empiracle observations
- indicate that the -c option decreases the size of
- GenBank files by about 10%.
-
- This compression method may fail when the number of
- leading blanks exceeds 127-CRUNCHOFFSET. However,
- none of the above mentioned databases currently
- supports any datafield with anywhere near that number
- of leading blanks.
-
- -t (GenBank only) Append all information in the first
- ORGANISM to the end of each line in indfile. For example,
- the entry which begins:
-
- LOCUS GORMTDLOOZ 282 bp DNA UNA 11-MAR-1996
- DEFINITION GGGOMT493; Gorilla gorilla gorilla (BomBom, ISIS 438, Audubon
- Zoological Gardens) mitochondrial D-loop DNA.
- ACCESSION L76759
- NID g1222584
- KEYWORDS D-loop.
- SOURCE Mitochondrion Gorilla gorilla gorilla (individual_isolate BomBom,
- ISIS 438, Audubon Zoological Gardens, sub_species gorilla) male
- DNA.
- ORGANISM Mitochondrion Gorilla gorilla gorilla
- Eukaryotae; mitochondrial eukaryotes; Metazoa; Chordata;
- Vertebrata; Eutheria; Primates; Catarrhini; Hominidae; Gorilla.
-
- might be indexed as
-
- GORMTDLOOZ L76759 1 1 Mitochondrion Gorilla gorilla gorilla
-
- This is useful for taxonomic studies, or as a way of making
- it easy to create subsets from a single index. Thus,
- 'grep gorilla primates.ind' would print all lines in the
- file that contained the word gorilla. The output from
- this command could be used as a .nam file for extracting
- just gorilla sequences from a larger dataset using
- fetch.
-
-
- NOTES
- 1. Header lines that aren't part of entries are automatically
- stripped out during processing. For example, in a file containing
- GenBank entries, all lines up to the first occurrence of 'LOCUS'
- starting in column 1, are ignored. Similarly for PIR, processing
- begins on the first line containing 'ENTRY' beginning in column 1.
- 2. GenBank/EMBL/DDBJ entries created on or after Feb. 1, 1996,
- have accession numbers of 8 characters, rather than 6. Previously
- assigned accession numbers will remain at 6 characters. Splitdb has
- been updated to write all accession numbers to the .ind file, left
- justified in a field of 8 characters, in columns 14-21 of the .ind
- file.
-
- SEE ALSO
- getloc, getob, comm(1) (Unix command).
-
- AUTHOR
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB Canada R3T 2N2
- Phone: 204-474-6085
- FAX: 204-261-5732
- frist@cc.umanitoba.ca
-
- REFERENCE
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
diff --git a/CORE/xylem/xylem.doc b/CORE/xylem/xylem.doc
deleted file mode 100644
index e8bf2cd..0000000
--- a/CORE/xylem/xylem.doc
+++ /dev/null
@@ -1,125 +0,0 @@
-
-
- XYLEM.DOC update 10 Aug 1994
-
- XYLEM: TOOLS FOR MANIPULATION OF GENETIC DATABASES
- Brian Fristensky, University of Manitoba
-
- Fristensky, B. (1993) Feature expressions: creating and manipulating
- sequence datasets. Nucleic Acids Research 21:5997-6003.
-
- SPLITDB - Splits files containing one or more GenBank entries into
- annotation, sequence, and index files. Indexfiles can also serve as
- namefiles for GETLOC. Sequence files are in the format required for
- use with the Pearson programs (FASTA,LFASTA etc.).
-
- GETLOC - Reads a file containing LOCUS names (namefile) and
- retrieves either annotation, sequence, or both from a split
- database or database subset created by SPLITDB.
-
- FETCH - A c-shell script that provides a convenient menu-driven
- front end for retrieval of database entries using GETLOC.
-
- FINDKEY - A c-shell script that provides a convenient menu-driven
- front end for keyword searches of database annotation files,
- using IDENTIFY.
-
- IDENTIFY- Given line-numbered output from grep, IDENTIFY uses the
- index file to determine which entries contained the keywords
- searched for by grep. It then produces a namefile for use by
- GETLOC. Namefiles can serve as logical databases, and utilities
- such as the Unix comm command can perform logical operations on
- these namefiles to produce database subsets.
-
- FEATURES/GETOB - Given a namefile, pulls objects (mRNA, tRNA, CDS
- etc.) from each of the named entries, using the new
- DDBJ/EMBL/GenBank International Features Table Format. A future
- version will also allow the annotation of sites within objects that
- are extracted.
-
- DBSTAT - Calculates amino acid frequencies in a protein database.
-
- RIBOSOME - Given a file of one or more nucleic acids (eg. output
- from GETOB) , RIBOSOME translates them into protein, using either
- the universal genetic code or an alternative genetic code supplied
- by the user. All ambiguities that can be resolved are translated.
-
- PROT2NUC - reverse translates a sequence from protein to nucleic
- acid, using IUPAC-IUB ambiguity codes.
-
- SHUFFLE - Given a random seed, shuffles each sequence in a Pearson-
- format (.wrp) file. Shuffling is done locally in overlapping windows
- across the length of a given sequence. The window size and overlap
- length can be specified by the user.
-
- REFORM - Reformats multiply aligned nucleic acid or protein
- sequences for publication. Output for M. Waterman's RALIGN
- program, or the MBCRR MASE editor, can be directly used as input.
- A variety of options are available for representing gaps, consensus
- sequences and other features.
-
- Fristensky (Cornell) Sequence Analysis Package - General purpose
- sequence analysis package written in Standard Pascal. Features
- include: sequence numbering, formatting, & translation, restriction
- site searches & mapping, matrix similarity searches, TESTCODE
- analysis, base composition analysis. All programs are interactive
- and read free-format, BIONET, and GenBank files.
-
-
-
-
-
-
-
- XYLEM DATABASE TOOLS
-
-
-
- ----------
- | .gen | getloc
- |----------|<--------------------------
- | GenBank | |
- ---------- |
- | |
- | splitgb |
- /|\ |
- / | \ |
- / | \ |
- / | \ |
- / | \ |
- / | \ |
- v v v |
- ---------- ---------- ---------- |
- | .ano | | .wrp | | .ind | |
- |----------| |----------| |----------| |
- |annotation| | sequence | | index | |
- ---------- ---------- ---------- |
- | \ | / |
- | \ | / |
- | \ | / |
- | \ | / |
- grep -n | \ | / |
- | \ | / |
- | | |
- | | -------------------------------+
- | ^ |
- v | getob |
- ---------- ---------- v
- | .grep | identify | .nam | ----------
- |----------| --------->|----------| | .wrp |
- | numbered | | LOCUS | ----------
- |file lines| ---------- | eg. mRNA |
- ---------- | ^ | tRNA |
- | | | rRNA |
- | | | CDS |
- --comm-- ----------
- (logical operations on
- sets of names)
-
- Dr. Brian Fristensky
- Dept. of Plant Science
- University of Manitoba
- Winnipeg, MB R3T 2N2 CANADA
- 204-474-6085
- frist@cc.umanitoba.ca
-
diff --git a/HGL_SRC/Alloc.o b/HGL_SRC/Alloc.o
deleted file mode 100755
index 0269c43..0000000
Binary files a/HGL_SRC/Alloc.o and /dev/null differ
diff --git a/HGL_SRC/Consto01mask b/HGL_SRC/Consto01mask
deleted file mode 100755
index 16b4c92..0000000
Binary files a/HGL_SRC/Consto01mask and /dev/null differ
diff --git a/HGL_SRC/DotPlotTool b/HGL_SRC/DotPlotTool
deleted file mode 100755
index b0dfb05..0000000
Binary files a/HGL_SRC/DotPlotTool and /dev/null differ
diff --git a/HGL_SRC/HGLfuncs.o b/HGL_SRC/HGLfuncs.o
deleted file mode 100755
index 05f85b1..0000000
Binary files a/HGL_SRC/HGLfuncs.o and /dev/null differ
diff --git a/HGL_SRC/MAP_ChooseFile.o b/HGL_SRC/MAP_ChooseFile.o
deleted file mode 100755
index 2bbac68..0000000
Binary files a/HGL_SRC/MAP_ChooseFile.o and /dev/null differ
diff --git a/HGL_SRC/MakeCons b/HGL_SRC/MakeCons
deleted file mode 100755
index a1777f5..0000000
Binary files a/HGL_SRC/MakeCons and /dev/null differ
diff --git a/HGL_SRC/Makefile b/HGL_SRC/Makefile
index 95a04e1..9632d10 100755
--- a/HGL_SRC/Makefile
+++ b/HGL_SRC/Makefile
@@ -1,10 +1,10 @@
CC = cc
-#FLAGS = -g
-OPENWINHOME = /usr/openwin
+FLAGS = -m32
+OPENWINHOME = ../usr
MFILE =
-INCDIR = -I$(OPENWINHOME)/include
-LIBDIR = -L$(OPENWINHOME)/lib
+INCDIR = -I/usr/include/xview
+LIBDIR = -L/usr/lib32
LIBS = -lxview -lolgx -lX11
libs.o = Alloc.o HGLfuncs.o
diff --git a/HGL_SRC/PrintStrat b/HGL_SRC/PrintStrat
deleted file mode 100755
index 0087fa7..0000000
Binary files a/HGL_SRC/PrintStrat and /dev/null differ
diff --git a/HGL_SRC/Translate b/HGL_SRC/Translate
deleted file mode 100755
index 91e68fc..0000000
Binary files a/HGL_SRC/Translate and /dev/null differ
diff --git a/HGL_SRC/heapsortHGL b/HGL_SRC/heapsortHGL
deleted file mode 100755
index b692971..0000000
Binary files a/HGL_SRC/heapsortHGL and /dev/null differ
diff --git a/HGL_SRC/install.csh b/HGL_SRC/install.csh
deleted file mode 100755
index eee9c3d..0000000
--- a/HGL_SRC/install.csh
+++ /dev/null
@@ -1,5 +0,0 @@
-#/bin/csh
-make all
-cp Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool ../bin
-rm Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool
-rm *.o
diff --git a/HGL_SRC/mapview b/HGL_SRC/mapview
deleted file mode 100755
index 52453f6..0000000
Binary files a/HGL_SRC/mapview and /dev/null differ
diff --git a/ZUKER/rfd.inc b/ZUKER/rfd.inc
index bab54f7..691f11f 100755
--- a/ZUKER/rfd.inc
+++ b/ZUKER/rfd.inc
@@ -1,33 +1,33 @@
implicit integer (a-z)
- parameter (maxn=1500,maxn2=3000)
- parameter (fldmax=maxn2)
+
+c parameter (maxn=625,fldmax=2*maxn)
+ parameter (maxn=1500,maxn2=3000)
+ parameter (fldmax=maxn2)
parameter (infinity=16000,sortmax=30000)
parameter (mxbits=(maxn*(maxn+1)+31)/32)
parameter (maxtloops=40)
parameter (maxsiz=10000)
- integer*2 vst(maxn*maxn),wst1(maxn*maxn),wst2(maxn*maxn)
+ integer*2 vst(maxn*maxn),wst(maxn*maxn)
integer poppen(4),maxpen
real prelog
-
- dimension newnum(maxsiz),hstnum(maxn2),force(maxn2),numseq(maxn2),
- . work1(maxn2,0:2),work2(maxn2),
+ dimension newnum(maxsiz),hstnum(fldmax),force(fldmax),
+ . numseq(fldmax), work(fldmax,0:2),
. stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30)
dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2)
-c common /main/ newnum,hstnum,force,work1,work2,
- common /main/ newnum,hstnum,force,work1,work2,
- . stack,tstk,dangle,hairpin,bulge,inter,eparam,cntrl,nsave,n,
- . numseq,poppen,prelog,maxpen,vst,wst1,wst2
+ common /main/ vst,wst,newnum,hstnum,force,numseq,work,stack,tstk,
+ . dangle,hairpin,bulge,inter,eparam,cntrl,nsave,poppen,maxpen,prelog
character*1 seq(maxsiz)
c character*5 inbuf
character*10 progtitle
character*30 seqlab
common /seq/ seq,seqlab
+ data progtitle/'crna'/
+
dimension list(100,4)
common /list/ list,listsz
- common /nm/ vmin
- data progtitle/'lrna'/
+ common /nm/ n,vmin
dimension basepr(maxn)
common /traceback/ basepr
@@ -40,21 +40,3 @@ c character*5 inbuf
integer*2 tloop(maxtloops,2),numoftloops
common/tloops/tloop,numoftloops
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-