Compare commits
18 Commits
Author | SHA1 | Date |
---|---|---|
kuoi | d16e0470a1 | |
kuoi | 416ce85007 | |
kuoi | 6e65224051 | |
kuoi | 4cb1529d40 | |
kuoi | ba8f936bc2 | |
kuoi | 26cf4b741f | |
kuoi | 8a501c58ed | |
kuoi | f20ecc9d03 | |
kuoi | de8480df9a | |
kuoi | 339d05a4d8 | |
kuoi | 30b16b2a1e | |
kuoi | b307d12174 | |
kuoi | 8092ada581 | |
kuoi | cd056bb91b | |
kuoi | a80e729e21 | |
kuoi | b2951105ae | |
kuoi | 525c9eb122 | |
kuoi | 15feb1c6c9 |
836
CORE/.GDEmenus
836
CORE/.GDEmenus
File diff suppressed because it is too large
Load Diff
|
@ -1,761 +0,0 @@
|
|||
1menu:File
|
||||
|
||||
item:test cmask output
|
||||
itemmethod: kedit in1
|
||||
|
||||
in:in1
|
||||
informat:colormask
|
||||
|
||||
item:New sequence <meta N>
|
||||
itemmethod:echo "$Type$Name" > out1
|
||||
itemmeta:n
|
||||
itemhelp:new_sequence.help
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:New Sequence name?
|
||||
argtext:New
|
||||
|
||||
arg:Type
|
||||
argtype:choice_list
|
||||
arglabel:Type?
|
||||
argchoice:DNA/RNA:#
|
||||
argchoice:Amino Acid:%
|
||||
argchoice:Text:\"
|
||||
argchoice:Mask:@
|
||||
|
||||
out:out1
|
||||
outformat:flat
|
||||
|
||||
item:Import Foreign Format
|
||||
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:INPUTFILE
|
||||
argtype:text
|
||||
arglabel:Name of foreign file?
|
||||
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:Export Foreign Format
|
||||
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:FORMAT
|
||||
argtype:choice_list
|
||||
argchoice:FASTA:8
|
||||
argchoice:NEXUS:17
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:IG/Stanford:1
|
||||
argchoice:GenBank:2
|
||||
argchoice:NBRF:3
|
||||
argchoice:EMBL:4
|
||||
argchoice:GCG:5
|
||||
argchoice:DNA Strider:6
|
||||
argchoice:Fitch:7
|
||||
argchoice:Pearson:8
|
||||
argchoice:Zuker:9
|
||||
argchoice:Olsen:10
|
||||
argchoice:Phylip v3.2:11
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:Plain text:13
|
||||
|
||||
arg:OUTPUTFILE
|
||||
argtype:text
|
||||
arglabel:Save as?
|
||||
|
||||
in:INPUTFILE
|
||||
informat:genbank
|
||||
|
||||
|
||||
item:Save Selection
|
||||
itemmethod: cat $SAVE_FUNC > $Name
|
||||
itemhelp:save_selection.help
|
||||
|
||||
arg:SAVE_FUNC
|
||||
argtype:chooser
|
||||
arglabel:File format
|
||||
argchoice:Flat:in1
|
||||
argchoice:Genbank:in2
|
||||
argchoice:GDE/HGL:in3
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:File name?
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
in:in2
|
||||
informat:genbank
|
||||
|
||||
in:in3
|
||||
informat:gde
|
||||
|
||||
item:Print Selection
|
||||
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
|
||||
itemhelp:print_alignment.help
|
||||
|
||||
arg:SCALE
|
||||
argtype:slider
|
||||
arglabel:Reduce printout by?
|
||||
argmin:1
|
||||
argmax:20
|
||||
argvalue:1
|
||||
|
||||
arg:CMD
|
||||
argtype:chooser
|
||||
argchoice:Lpr:lpr
|
||||
argchoice:Enscript Gaudy:enscript -G -q
|
||||
argchoice:Enscript Two column:enscript -2rG
|
||||
|
||||
arg:PRINTER
|
||||
argtype:text
|
||||
arglabel:Which printer?
|
||||
argtext:lp
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
menu:Edit
|
||||
|
||||
item:Sort
|
||||
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
|
||||
itemhelp:heapsortHGL.help
|
||||
|
||||
arg:PRIM_KEY
|
||||
argtype:choice_list
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Primary sort field?
|
||||
|
||||
arg:SEC_KEY
|
||||
argtype:choice_list
|
||||
argchoice:None:
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Secondary sort field?
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:extract
|
||||
itemmethod:(gde in1;/bin/rm -f in1)&
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
menu:DNA/RNA
|
||||
|
||||
item:Translate...
|
||||
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
|
||||
|
||||
arg:FRAME
|
||||
argtype:chooser
|
||||
arglabel:Which reading frame?
|
||||
argchoice:First:1
|
||||
argchoice:Second:2
|
||||
argchoice:Third:3
|
||||
argchoice:All six:6
|
||||
|
||||
arg:MNFRM
|
||||
arglabel:Minimum length of AA sequence to translate?
|
||||
argtype:slider
|
||||
argmin:0
|
||||
argmax:100
|
||||
argvalue:20
|
||||
|
||||
arg:LTRCODE
|
||||
argtype:chooser
|
||||
arglabel:Translate to:
|
||||
argchoice:Single letter codes:
|
||||
argchoice:Triple letter codes:-3
|
||||
|
||||
arg:TBL
|
||||
arglabel:Codon table?
|
||||
argtype:chooser
|
||||
argchoice:universal:1
|
||||
argchoice:mycoplasma:2
|
||||
argchoice:yeast:3
|
||||
argchoice:Vert. mito.:4
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
item:Dot plot
|
||||
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
|
||||
itemhelp:DotPlotTool.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:Clustal alignment
|
||||
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
|
||||
|
||||
itemhelp:clustal_help
|
||||
|
||||
arg:KTUP
|
||||
argtype:slider
|
||||
arglabel:K-tuple size for pairwise search
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:2
|
||||
|
||||
arg:WIN
|
||||
argtype:slider
|
||||
arglabel:Window size
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:4
|
||||
|
||||
arg:Trans
|
||||
argtype:chooser
|
||||
arglabel:Transitions weighted?
|
||||
argchoice:Yes:/TRANSIT
|
||||
argchoice:No:
|
||||
|
||||
arg:FIXED
|
||||
argtype:slider
|
||||
arglabel:Fixed gap penalty
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:FLOAT
|
||||
arglabel:Floating gap penalty
|
||||
argtype:slider
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:REPORT
|
||||
argtype:chooser
|
||||
arglabel:View assembly report?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit in1.rpt&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
item:Variable Positions
|
||||
itemmethod:varpos $REV < in1 > out1
|
||||
|
||||
arg:REV
|
||||
argtype:chooser
|
||||
arglabel:Highlight (darken)
|
||||
argchoice:Conserved positions:
|
||||
argchoice:variable positions:-rev
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Phrap
|
||||
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
|
||||
out:out1
|
||||
outformat:genbank
|
||||
|
||||
item:SNAP
|
||||
itemmethod: cat in1 > infile;/usr/local/bio/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/bio/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/bio/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
out:out1
|
||||
outformat:text
|
||||
|
||||
|
||||
|
||||
|
||||
item:Find all <meta-f>
|
||||
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
|
||||
itemhelp:findall.help
|
||||
itemmeta:f
|
||||
|
||||
arg:SEARCH
|
||||
argtype:text
|
||||
arglabel:Search String
|
||||
|
||||
arg:PRCNT
|
||||
argtype:slider
|
||||
arglabel:Percent mismatch
|
||||
argmin:0
|
||||
argmax:75
|
||||
argvalue:10
|
||||
|
||||
arg:CASE
|
||||
argtype:chooser
|
||||
arglabel:Case
|
||||
argchoice:Upper equals lower:
|
||||
argchoice:Upper not equal lower:-case
|
||||
|
||||
arg:UT
|
||||
argtype:chooser
|
||||
arglabel:U equal T?
|
||||
argchoice:Yes:-u=t
|
||||
argchoice:No:
|
||||
argvalue:0
|
||||
|
||||
arg:MAT
|
||||
arglabel:Match color
|
||||
argtype:choice_list
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:2
|
||||
|
||||
arg:MIS
|
||||
argtype:choice_list
|
||||
arglabel:Mismatch color
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:7
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Sequence Consensus
|
||||
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
|
||||
itemhelp:MakeCons.help
|
||||
|
||||
arg:METHOD
|
||||
arglabel:Method
|
||||
argtype:chooser
|
||||
argchoice:IUPAC:-iupac
|
||||
argchoice:Majority:-majority $PERCENT
|
||||
|
||||
arg:MASK
|
||||
argtype:chooser
|
||||
arglabel:Create a new:
|
||||
argchoice:Sequence:
|
||||
argchoice:Selection Mask: | Consto01mask
|
||||
|
||||
arg:PERCENT
|
||||
arglabel:Minimum Percentage for Majority
|
||||
argtype:slider
|
||||
argmin:50
|
||||
argmax:100
|
||||
argvalue:75
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
|
||||
#Menu for DNA/RNA
|
||||
|
||||
item:blastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBDNA
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV-1 Seq. Db.:/usr/local/bio/db/DNA/hiv17-08-01.fasta2
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:MATCH
|
||||
argtype:slider
|
||||
arglabel:Match Score
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:5
|
||||
|
||||
arg:MMSCORE
|
||||
argtype:slider
|
||||
arglabel:Mismatch Score
|
||||
argmin:-10
|
||||
argmax:-1
|
||||
argvalue:-5
|
||||
|
||||
item:blastx
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/bio/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
|
||||
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
||||
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
item:------------------------
|
||||
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: enter the file name
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: enter the name of the DB
|
||||
|
||||
menu:seq. datasets
|
||||
|
||||
item:-------------
|
||||
item:add a new dataset
|
||||
itemmethod:cp $file /usr/local/biotools/GDE/db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the dataset name ?
|
||||
|
||||
arg:file
|
||||
argtype:text
|
||||
arglabel:Enter the dataset file (in FASTA) ?
|
||||
|
||||
|
||||
#Menu for Protein
|
||||
menu:protein
|
||||
item:blastp
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
||||
|
||||
arg:Matrix
|
||||
barglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
item:tblastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
|
||||
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
|
||||
item:Map View
|
||||
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
|
||||
itemhelp:mapview.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
arg:PBL
|
||||
arglabel:Pixel Between Lines
|
||||
argtype:slider
|
||||
argvalue:10
|
||||
argmin:1
|
||||
argmax:15
|
||||
|
||||
arg:NPP
|
||||
arglabel:Nucleotides Per Pixel
|
||||
argtype:slider
|
||||
argvalue:1
|
||||
argmin:1
|
||||
argmax:20
|
||||
|
||||
arg:LWIDTH
|
||||
arglabel:Line Thickness
|
||||
argtype:slider
|
||||
argvalue:2
|
||||
argmin:1
|
||||
argmax:5
|
||||
|
||||
item:--------------------------
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: Enter the file (in FASTA)
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: Enter the name of the DB
|
||||
|
||||
menu:Phylogeny
|
||||
|
||||
|
||||
item:Phylip help
|
||||
itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
|
||||
|
||||
arg:FILE
|
||||
argtype:choice_list
|
||||
arglabel:Which program?
|
||||
argchoice:clique:clique.html
|
||||
argchoice:consense:consense.html
|
||||
argchoice:contchar:contchar.html
|
||||
argchoice:contml:contml.html
|
||||
argchoice:contrast:contrast.html
|
||||
argchoice:discrete:discrete.html
|
||||
argchoice:distance:distance.html
|
||||
argchoice:dnaboot:dnaboot.html
|
||||
argchoice:dnacomp:dnacomp.html
|
||||
argchoice:dnadist:dnadist.html
|
||||
argchoice:dnainvar:dnainvar.html
|
||||
argchoice:dnaml:dnaml.html
|
||||
argchoice:dnamlk:dnamlk.html
|
||||
argchoice:dnamove:dnamove.html
|
||||
argchoice:dnapars:dnapars.html
|
||||
argchoice:dnapenny:dnapenny.html
|
||||
argchoice:dollop:dollop.html
|
||||
argchoice:dolmove:dolmove.html
|
||||
argchoice:dolpenny:dolpenny.html
|
||||
argchoice:draw:draw.html
|
||||
argchoice:drawgram:drawgram.html
|
||||
argchoice:drawtree:drawtree.html
|
||||
argchoice:factor:factor.html
|
||||
argchoice:fitch:fitch.html
|
||||
argchoice:gendist:gendist.html
|
||||
argchoice:kitsch:kitsch.html
|
||||
argchoice:main:main.html
|
||||
argchoice:mix:mix.html
|
||||
argchoice:move:move.html
|
||||
argchoice:neighbor:neighbor.html
|
||||
argchoice:penny:penny.html
|
||||
argchoice:protpars:protpars.html
|
||||
argchoice:read.me.general:read.me.general.html
|
||||
argchoice:restml:restml.html
|
||||
argchoice:seqboot:seqboot.html
|
||||
argchoice:sequence:sequence.html
|
||||
|
||||
|
||||
|
||||
item:Phylip 3.5
|
||||
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
|
||||
|
||||
arg:PROGRAM
|
||||
argtype:choice_list
|
||||
arglabel:Which program to run?
|
||||
argchoice:DNAPARS:dnapars
|
||||
argchoice:DNABOOT:dnaboot
|
||||
argchoice:DNAPENNY:dnapenny
|
||||
argchoice:DNAML:dnaml
|
||||
argchoice:DNAMLK:dnamlk
|
||||
argchoice:DNACOMP:dnacomp
|
||||
argchoice:DNAMOVE:dnamove
|
||||
argchoice:DNAINVAR:dnainvar
|
||||
argchoice:PROTPARS:protpars
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
item:Phylip DNA Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
|
||||
|
||||
arg:EXPLAIN
|
||||
argtype:text
|
||||
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
|
||||
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:DNADIST+NEIGHBOR:
|
||||
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Run ?
|
||||
argtype:chooser
|
||||
argchoice:Run without Bootstrap:
|
||||
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
|
||||
arg:DNA
|
||||
argtype:text
|
||||
arglabel:Name of DNADIST outfile?
|
||||
|
||||
arg:NEI
|
||||
argtype:text
|
||||
arglabel:Name of NEIGHBOR outfile?
|
||||
|
||||
arg:TREE
|
||||
argtype:text
|
||||
arglabel:Name of TREEFILE ?
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
item:Phylip PROTEIN Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:PROTDIST+NEIGHBOR:
|
||||
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
argchoice:No Bootstrap:
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
menu:On-Line Res.
|
||||
|
||||
item:GDE for Linux resources at Bioafrica.net
|
||||
itemmethod:netscape http://www.bioafrica.net &
|
||||
|
||||
item:-------------------------
|
||||
item:add a new website
|
||||
itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the site name
|
||||
|
||||
arg:url
|
||||
argtype:text
|
||||
arglabel:Enter the URL (including http://)
|
|
@ -424,7 +424,7 @@ in:in1
|
|||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
arg:BLASTDBDNA
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
||||
|
@ -446,6 +446,7 @@ argchoice:PAM70:PAM70
|
|||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
|
@ -459,7 +460,7 @@ argchoice:Echinodermate Mitochondrial:8
|
|||
item:------------------------
|
||||
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/installBLASTDB.pl $sourcefile $menuname;
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/bio/GDE/bin/installBLASTDB.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
|
@ -469,11 +470,14 @@ arg:menuname
|
|||
argtype:text
|
||||
arglabel: enter the name of the DB
|
||||
|
||||
|
||||
#Sequence dataset
|
||||
menu:seq. datasets
|
||||
|
||||
|
||||
item:-------------
|
||||
item:add a new dataset
|
||||
itemmethod:cp $file /usr/local/bio/GDE/db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file
|
||||
itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/bio/GDE/newDATASET.pl $name $file
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
|
@ -487,14 +491,14 @@ arglabel:Enter the dataset file (in FASTA) ?
|
|||
#Menu for Protein
|
||||
menu:protein
|
||||
item:blastp
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDB -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/bio/db/PAM30; /usr/local/bio/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
arg:BLASTDBPROT
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/bio/db/hiv17-08-01.PROT.fasta
|
||||
|
@ -582,8 +586,8 @@ argmin:1
|
|||
argmax:5
|
||||
|
||||
item:--------------------------
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/installBLASTDBPROT.pl $sourcefile $menuname;
|
||||
item:Add a new Protein blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/bio/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
|
@ -593,8 +597,9 @@ arg:menuname
|
|||
argtype:text
|
||||
arglabel: Enter the name of the DB
|
||||
|
||||
menu:Phylogeny
|
||||
#Phylogenetic Menu
|
||||
|
||||
menu:Phylogeny
|
||||
|
||||
item:Phylip help
|
||||
itemmethod:(netscape /usr/local/bio/phylip/doc/$FILE)&
|
||||
|
@ -739,23 +744,3 @@ informat:genbank
|
|||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
menu:On-Line Res.
|
||||
|
||||
item:GDE for Linux resources at Bioafrica.net
|
||||
itemmethod:netscape http://www.bioafrica.net &
|
||||
|
||||
item:-------------------------
|
||||
item:add a new website
|
||||
itemmethod:xterm -e /usr/local/bio/GDE/newURL.pl $name $url
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the site name
|
||||
|
||||
arg:url
|
||||
argtype:text
|
||||
arglabel:Enter the URL (including http://)
|
File diff suppressed because it is too large
Load Diff
791
CORE/.GDEmenus~
791
CORE/.GDEmenus~
|
@ -1,791 +0,0 @@
|
|||
1menu:File
|
||||
|
||||
item:test cmask output
|
||||
itemmethod: kedit in1
|
||||
|
||||
in:in1
|
||||
informat:colormask
|
||||
|
||||
item:New sequence <meta N>
|
||||
itemmethod:echo "$Type$Name" > out1
|
||||
itemmeta:n
|
||||
itemhelp:new_sequence.help
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:New Sequence name?
|
||||
argtext:New
|
||||
|
||||
arg:Type
|
||||
argtype:choice_list
|
||||
arglabel:Type?
|
||||
argchoice:DNA/RNA:#
|
||||
argchoice:Amino Acid:%
|
||||
argchoice:Text:\"
|
||||
argchoice:Mask:@
|
||||
|
||||
out:out1
|
||||
outformat:flat
|
||||
|
||||
item:Import Foreign Format
|
||||
itemmethod:cp $INPUTFILE OUTFILE.tmp;readseq OUTFILE.tmp -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:INPUTFILE
|
||||
argtype:text
|
||||
arglabel:Name of foreign file?
|
||||
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:Export Foreign Format
|
||||
itemmethod:readseq INPUTFILE -a -f$FORMAT > $OUTPUTFILE
|
||||
itemhelp:readseq.help
|
||||
|
||||
arg:FORMAT
|
||||
argtype:choice_list
|
||||
argchoice:FASTA:8
|
||||
argchoice:NEXUS:17
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:IG/Stanford:1
|
||||
argchoice:GenBank:2
|
||||
argchoice:NBRF:3
|
||||
argchoice:EMBL:4
|
||||
argchoice:GCG:5
|
||||
argchoice:DNA Strider:6
|
||||
argchoice:Fitch:7
|
||||
argchoice:Pearson:8
|
||||
argchoice:Zuker:9
|
||||
argchoice:Olsen:10
|
||||
argchoice:Phylip v3.2:11
|
||||
argchoice:Phylip v3.3:12
|
||||
argchoice:Plain text:13
|
||||
|
||||
arg:OUTPUTFILE
|
||||
argtype:text
|
||||
arglabel:Save as?
|
||||
|
||||
in:INPUTFILE
|
||||
informat:genbank
|
||||
|
||||
|
||||
item:Save Selection
|
||||
itemmethod: cat $SAVE_FUNC > $Name
|
||||
itemhelp:save_selection.help
|
||||
|
||||
arg:SAVE_FUNC
|
||||
argtype:chooser
|
||||
arglabel:File format
|
||||
argchoice:Flat:in1
|
||||
argchoice:Genbank:in2
|
||||
argchoice:GDE/HGL:in3
|
||||
|
||||
arg:Name
|
||||
argtype:text
|
||||
arglabel:File name?
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
in:in2
|
||||
informat:genbank
|
||||
|
||||
in:in3
|
||||
informat:gde
|
||||
|
||||
item:Print Selection
|
||||
itemmethod:(PrintStrat in1 $SCALE > in1.tmp; $CMD -P$PRINTER in1.tmp; /bin/rm -f in1 in1.tmp)&
|
||||
itemhelp:print_alignment.help
|
||||
|
||||
arg:SCALE
|
||||
argtype:slider
|
||||
arglabel:Reduce printout by?
|
||||
argmin:1
|
||||
argmax:20
|
||||
argvalue:1
|
||||
|
||||
arg:CMD
|
||||
argtype:chooser
|
||||
argchoice:Lpr:lpr
|
||||
argchoice:Enscript Gaudy:enscript -G -q
|
||||
argchoice:Enscript Two column:enscript -2rG
|
||||
|
||||
arg:PRINTER
|
||||
argtype:text
|
||||
arglabel:Which printer?
|
||||
argtext:lp
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
menu:Edit
|
||||
|
||||
item:Sort
|
||||
itemmethod:(heapsortHGL in1 $PRIM_KEY $SEC_KEY > in1.tmp ; gde in1.tmp;/bin/rm -f in1*)&
|
||||
itemhelp:heapsortHGL.help
|
||||
|
||||
arg:PRIM_KEY
|
||||
argtype:choice_list
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Primary sort field?
|
||||
|
||||
arg:SEC_KEY
|
||||
argtype:choice_list
|
||||
argchoice:None:
|
||||
argchoice:Group:group-ID
|
||||
argchoice:type:type
|
||||
argchoice:name:name
|
||||
argchoice:Sequence ID:sequence-ID
|
||||
argchoice:creator:creator
|
||||
argchoice:offset:offset
|
||||
arglabel:Secondary sort field?
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:extract
|
||||
itemmethod:(gde in1;/bin/rm -f in1)&
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
menu:DNA/RNA
|
||||
|
||||
item:Translate...
|
||||
itemmethod:Translate -tbl $TBL -frame $FRAME -min_frame $MNFRM $LTRCODE in1 > out1
|
||||
|
||||
arg:FRAME
|
||||
argtype:chooser
|
||||
arglabel:Which reading frame?
|
||||
argchoice:First:1
|
||||
argchoice:Second:2
|
||||
argchoice:Third:3
|
||||
argchoice:All six:6
|
||||
|
||||
arg:MNFRM
|
||||
arglabel:Minimum length of AA sequence to translate?
|
||||
argtype:slider
|
||||
argmin:0
|
||||
argmax:100
|
||||
argvalue:20
|
||||
|
||||
arg:LTRCODE
|
||||
argtype:chooser
|
||||
arglabel:Translate to:
|
||||
argchoice:Single letter codes:
|
||||
argchoice:Triple letter codes:-3
|
||||
|
||||
arg:TBL
|
||||
arglabel:Codon table?
|
||||
argtype:chooser
|
||||
argchoice:universal:1
|
||||
argchoice:mycoplasma:2
|
||||
argchoice:yeast:3
|
||||
argchoice:Vert. mito.:4
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
item:Dot plot
|
||||
itemmethod:(DotPlotTool in1 ; /bin/rm -f in1)&
|
||||
itemhelp:DotPlotTool.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
item:Clustal alignment
|
||||
itemmethod:(tr '%#' '>'<in1>clus_in;clustalw -quicktree -output=PIR -infile=clus_in -align > in1.rpt;sed "s/>DL;/#/g" < clus_in.pir> in1;$REPORT gde in1;/bin/rm -f clus_in* in1* )&
|
||||
|
||||
itemhelp:clustal_help
|
||||
|
||||
arg:KTUP
|
||||
argtype:slider
|
||||
arglabel:K-tuple size for pairwise search
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:2
|
||||
|
||||
arg:WIN
|
||||
argtype:slider
|
||||
arglabel:Window size
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:4
|
||||
|
||||
arg:Trans
|
||||
argtype:chooser
|
||||
arglabel:Transitions weighted?
|
||||
argchoice:Yes:/TRANSIT
|
||||
argchoice:No:
|
||||
|
||||
arg:FIXED
|
||||
argtype:slider
|
||||
arglabel:Fixed gap penalty
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:FLOAT
|
||||
arglabel:Floating gap penalty
|
||||
argtype:slider
|
||||
argmin:1
|
||||
argmax:100
|
||||
argvalue:10
|
||||
|
||||
arg:REPORT
|
||||
argtype:chooser
|
||||
arglabel:View assembly report?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit in1.rpt&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
item:Variable Positions
|
||||
itemmethod:varpos $REV < in1 > out1
|
||||
|
||||
arg:REV
|
||||
argtype:chooser
|
||||
arglabel:Highlight (darken)
|
||||
argchoice:Conserved positions:
|
||||
argchoice:variable positions:-rev
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Phrap
|
||||
itemmethod:readseq in1 -a -f8 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp; phrap OUTPUTFILE; readseq -a -f2 OUTPUTFILE.contigs > out1;/bin/rm -rf OUTPUT*;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
|
||||
out:out1
|
||||
outformat:genbank
|
||||
|
||||
item:SNAP
|
||||
itemmethod: cat in1 > infile;/usr/local/biotools/GDE/bin/fasta2snap.pl > outfile; /usr/bin/X11/xterm -e /home/tulio/biotools/SNAP/SNAP.pl outfile; kedit backg*; kedit summ*; sheeltool /home/tulio/biotools/codons-xyplot.pl codons.*; kedit codon.data; /bin/rm -rf back* codon* summ*;
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
out:out1
|
||||
outformat:text
|
||||
|
||||
|
||||
|
||||
|
||||
item:Find all <meta-f>
|
||||
itemmethod:findall $SEARCH $PRCNT $CASE $UT -match $MAT -mismatch $MIS < in1 > out1;
|
||||
itemhelp:findall.help
|
||||
itemmeta:f
|
||||
|
||||
arg:SEARCH
|
||||
argtype:text
|
||||
arglabel:Search String
|
||||
|
||||
arg:PRCNT
|
||||
argtype:slider
|
||||
arglabel:Percent mismatch
|
||||
argmin:0
|
||||
argmax:75
|
||||
argvalue:10
|
||||
|
||||
arg:CASE
|
||||
argtype:chooser
|
||||
arglabel:Case
|
||||
argchoice:Upper equals lower:
|
||||
argchoice:Upper not equal lower:-case
|
||||
|
||||
arg:UT
|
||||
argtype:chooser
|
||||
arglabel:U equal T?
|
||||
argchoice:Yes:-u=t
|
||||
argchoice:No:
|
||||
argvalue:0
|
||||
|
||||
arg:MAT
|
||||
arglabel:Match color
|
||||
argtype:choice_list
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:2
|
||||
|
||||
arg:MIS
|
||||
argtype:choice_list
|
||||
arglabel:Mismatch color
|
||||
argchoice:yellow:1
|
||||
argchoice:violet:2
|
||||
argchoice:red:3
|
||||
argchoice:aqua:4
|
||||
argchoice:green:5
|
||||
argchoice:blue:6
|
||||
argchoice:grey:11
|
||||
argchoice:black:8
|
||||
argvalue:7
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
|
||||
out:out1
|
||||
outformat:colormask
|
||||
|
||||
item:Sequence Consensus
|
||||
itemmethod:(MakeCons in1 $METHOD $MASK > out1)
|
||||
itemhelp:MakeCons.help
|
||||
|
||||
arg:METHOD
|
||||
arglabel:Method
|
||||
argtype:chooser
|
||||
argchoice:IUPAC:-iupac
|
||||
argchoice:Majority:-majority $PERCENT
|
||||
|
||||
arg:MASK
|
||||
argtype:chooser
|
||||
arglabel:Create a new:
|
||||
argchoice:Sequence:
|
||||
argchoice:Selection Mask: | Consto01mask
|
||||
|
||||
arg:PERCENT
|
||||
arglabel:Minimum Percentage for Majority
|
||||
argtype:slider
|
||||
argmin:50
|
||||
argmax:100
|
||||
argvalue:75
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
|
||||
out:out1
|
||||
outformat:gde
|
||||
|
||||
|
||||
#Menu for DNA/RNA
|
||||
|
||||
item:blastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/biotools/blast/blastall -p blastn -d $BLASTDBDNA -i in1.f -W $WORDLEN -M $MATCH > in1.tmp; kedit in1.tmp; rm in1*)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBDNA
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV-1 Seq. Db.:/usr/local/biotools/db/DNA/hiv17-08-01.fasta2
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:MATCH
|
||||
argtype:slider
|
||||
arglabel:Match Score
|
||||
argmin:1
|
||||
argmax:10
|
||||
argvalue:5
|
||||
|
||||
arg:MMSCORE
|
||||
argtype:slider
|
||||
arglabel:Mismatch Score
|
||||
argmin:-10
|
||||
argmax:-1
|
||||
argvalue:-5
|
||||
|
||||
item:blastx
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; /usr/local/biotools/blast/blastall -p blastx -d $BLASTDB -i in1.f -W $WORDLEN -M PAM30 > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1*)&
|
||||
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBDNA
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
|
||||
argchoice:genpept:$GDE_HELP_DIR/BLAST/genpept
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
item:------------------------
|
||||
|
||||
item:Add a new DNA blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p F -o T; /usr/local/biotools/GDE/bin/installBLASTDB.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: enter the file name
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: enter the name of the DB
|
||||
|
||||
menu:seq. datasets
|
||||
item:tttt
|
||||
itemmethod:readseq /usr/local/biotools/GDE/db/ttttt -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:HIV1POLDNA.fasta
|
||||
itemmethod:readseq /usr/local/biotools/GDE/db/HIV1POLDNA.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:structure
|
||||
itemmethod:readseq /usr/local/biotools/GDE/db/structprot.fasta -a -f2 > OUTPUTFILE;/bin/rm -f OUTFILE.tmp
|
||||
out:OUTPUTFILE
|
||||
outformat:genbank
|
||||
|
||||
item:-------------
|
||||
item:add a new dataset
|
||||
itemmethod:mkdir db; cp $file db/ ;xterm -e /usr/local/biotools/GDE/newDATASET.pl $name $file
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the dataset name ?
|
||||
|
||||
arg:file
|
||||
argtype:text
|
||||
arglabel:Enter the dataset file (in FASTA) ?
|
||||
|
||||
|
||||
#Menu for Protein
|
||||
menu:protein
|
||||
item:blastp
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/biotools/db/PAM30; /usr/local/biotools/blast/blastall -p blastp -d $BLASTDBPROT -i in1.f -W $WORDLEN -M $Matrix > in1.tmp; /usr/openwin/bin/kedit in1.tmp; rm in1* PAM30)&
|
||||
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDBPROT
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:HIV Proteins:/usr/local/biotools/db/hiv17-08-01.PROT.fasta
|
||||
argchoice:ttttt:/usr/local/biotools/db/tttt
|
||||
argchoice:tytuiphn:/usr/local/biotools/db/yejhuh[9hp
|
||||
argchoice:yyyy:/usr/local/biotools/db/test
|
||||
|
||||
arg:Matrix
|
||||
barglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:1
|
||||
argmax:5
|
||||
argvalue:3
|
||||
|
||||
item:tblastn
|
||||
itemmethod:(sed "s/[#%]/>/" <in1 > in1.f; cp /usr/local/biotools/db/PAM??? .; tblastn $BLASTDB in1.f W=$WORDLEN M=$Matrix C=$CODE > in1.tmp; kedit in1.tmp; rm in1* PAM???)&
|
||||
|
||||
in:in1
|
||||
informat:flat
|
||||
insave:
|
||||
|
||||
arg:BLASTDB
|
||||
argtype:choice_list
|
||||
arglabel:Which Database
|
||||
argchoice:genbank:$GDE_HELP_DIR/BLAST/genbank
|
||||
argchoice:genbank update:$GDE_HELP_DIR/BLAST/genupdate
|
||||
|
||||
arg:Matrix
|
||||
arglabel:Substitution Matrix:
|
||||
argtype:choice_list
|
||||
argchoice:PAM30:PAM30
|
||||
argchoice:PAM70:PAM70
|
||||
|
||||
arg:WORDLEN
|
||||
argtype:slider
|
||||
arglabel:Word Size
|
||||
argmin:4
|
||||
argmax:18
|
||||
argvalue:12
|
||||
|
||||
arg:CODE
|
||||
argtype:choice_list
|
||||
arglabel:Genetic Code
|
||||
argchoice:Standard or Universal:0
|
||||
argchoice:Vertebrate Mitochondrial:1
|
||||
argchoice:Yeast Mitochondrial:2
|
||||
argchoice:Mold Mitochondrial and Mycoplasma:3
|
||||
argchoice:Invertebrate Mitochondrial:4
|
||||
argchoice:Ciliate Macronuclear:5
|
||||
argchoice:Protozoan Mitochondrial:6
|
||||
argchoice:Plant Mitochondrial:7
|
||||
argchoice:Echinodermate Mitochondrial:8
|
||||
|
||||
|
||||
item:Map View
|
||||
itemmethod:(mapview in1 -pbl $PBL -npp $NPP; /bin/rm -f in1)&
|
||||
itemhelp:mapview.help
|
||||
|
||||
in:in1
|
||||
informat:gde
|
||||
insave:
|
||||
|
||||
arg:PBL
|
||||
arglabel:Pixel Between Lines
|
||||
argtype:slider
|
||||
argvalue:10
|
||||
argmin:1
|
||||
argmax:15
|
||||
|
||||
arg:NPP
|
||||
arglabel:Nucleotides Per Pixel
|
||||
argtype:slider
|
||||
argvalue:1
|
||||
argmin:1
|
||||
argmax:20
|
||||
|
||||
arg:LWIDTH
|
||||
arglabel:Line Thickness
|
||||
argtype:slider
|
||||
argvalue:2
|
||||
argmin:1
|
||||
argmax:5
|
||||
|
||||
item:--------------------------
|
||||
item:Add a new Protein blast db
|
||||
itemmethod:xterm -e formatdb -i $sourcefile -p T -o T; /usr/local/biotools/GDE/bin/installBLASTDBPROT.pl $sourcefile $menuname;
|
||||
|
||||
arg:sourcefile
|
||||
argtype:text
|
||||
arglabel: Enter the file (in FASTA)
|
||||
|
||||
arg:menuname
|
||||
argtype:text
|
||||
arglabel: Enter the name of the DB
|
||||
|
||||
menu:Phylogeny
|
||||
|
||||
|
||||
item:Phylip help
|
||||
itemmethod:(netscape /usr/local/biotools/phylip/doc/$FILE)&
|
||||
|
||||
arg:FILE
|
||||
argtype:choice_list
|
||||
arglabel:Which program?
|
||||
argchoice:clique:clique.html
|
||||
argchoice:consense:consense.html
|
||||
argchoice:contchar:contchar.html
|
||||
argchoice:contml:contml.html
|
||||
argchoice:contrast:contrast.html
|
||||
argchoice:discrete:discrete.html
|
||||
argchoice:distance:distance.html
|
||||
argchoice:dnaboot:dnaboot.html
|
||||
argchoice:dnacomp:dnacomp.html
|
||||
argchoice:dnadist:dnadist.html
|
||||
argchoice:dnainvar:dnainvar.html
|
||||
argchoice:dnaml:dnaml.html
|
||||
argchoice:dnamlk:dnamlk.html
|
||||
argchoice:dnamove:dnamove.html
|
||||
argchoice:dnapars:dnapars.html
|
||||
argchoice:dnapenny:dnapenny.html
|
||||
argchoice:dollop:dollop.html
|
||||
argchoice:dolmove:dolmove.html
|
||||
argchoice:dolpenny:dolpenny.html
|
||||
argchoice:draw:draw.html
|
||||
argchoice:drawgram:drawgram.html
|
||||
argchoice:drawtree:drawtree.html
|
||||
argchoice:factor:factor.html
|
||||
argchoice:fitch:fitch.html
|
||||
argchoice:gendist:gendist.html
|
||||
argchoice:kitsch:kitsch.html
|
||||
argchoice:main:main.html
|
||||
argchoice:mix:mix.html
|
||||
argchoice:move:move.html
|
||||
argchoice:neighbor:neighbor.html
|
||||
argchoice:penny:penny.html
|
||||
argchoice:protpars:protpars.html
|
||||
argchoice:read.me.general:read.me.general.html
|
||||
argchoice:restml:restml.html
|
||||
argchoice:seqboot:seqboot.html
|
||||
argchoice:sequence:sequence.html
|
||||
|
||||
|
||||
|
||||
item:Phylip 3.5
|
||||
itemmethod:(rm -f outfile ; readseq -a -f12 in1 | sed "s/ YF//1" > infile;$PREEDIT /usr/bin/X11/xterm -e $PROGRAM;kedit outfile; treetool outtree; rm in1 )&
|
||||
|
||||
arg:PROGRAM
|
||||
argtype:choice_list
|
||||
arglabel:Which program to run?
|
||||
argchoice:DNAPARS:dnapars
|
||||
argchoice:DNABOOT:dnaboot
|
||||
argchoice:DNAPENNY:dnapenny
|
||||
argchoice:DNAML:dnaml
|
||||
argchoice:DNAMLK:dnamlk
|
||||
argchoice:DNACOMP:dnacomp
|
||||
argchoice:DNAMOVE:dnamove
|
||||
argchoice:DNAINVAR:dnainvar
|
||||
argchoice:PROTPARS:protpars
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
item:Phylip DNA Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 | sed "s/ YF//1" > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e dnadist;mv -f outfile infile; cp infile $DNA; /usr/bin/X11/xterm -e neighbor; cp outtree intree; cp outfile $NEI; $PROGRAM kedit outfile; cp outtree $TREE; treetool outtree; /bin/rm -f in1 infile outfile intree outtree)&
|
||||
|
||||
arg:EXPLAIN
|
||||
argtype:text
|
||||
arglabel:To produce a bootstraped tree choose DNADIST+NEIGHOR+CONSENSE
|
||||
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:DNADIST+NEIGHBOR:
|
||||
argchoice:DNADIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Run ?
|
||||
argtype:chooser
|
||||
argchoice:Run without Bootstrap:
|
||||
argchoice:Run with Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
|
||||
arg:DNA
|
||||
argtype:text
|
||||
arglabel:Name of DNADIST outfile?
|
||||
|
||||
arg:NEI
|
||||
argtype:text
|
||||
arglabel:Name of NEIGHBOR outfile?
|
||||
|
||||
arg:TREE
|
||||
argtype:text
|
||||
arglabel:Name of TREEFILE ?
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
item:Phylip PROTEIN Distance methods
|
||||
itemmethod:(readseq -a -f12 in1 > infile ;$PROG mv -f outfile infile; /usr/bin/X11/xterm -e protdist;mv -f outfile infile; /usr/bin/X11/xterm -e neighbor; cp outtree intree; $PROGRAM kedit outfile;treetool outtree;/bin/rm -f in1 infile outfile)&
|
||||
|
||||
arg:PROGRAM
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:PROTDIST+NEIGHBOR:
|
||||
argchoice:PROTDIST+NEIGHOR+CONSENSE: /usr/bin/X11/xterm -e consense;
|
||||
|
||||
arg:PROG
|
||||
arglabel:Which method?
|
||||
argtype:chooser
|
||||
argchoice:Bootstrap: /usr/bin/X11/xterm -e seqboot;
|
||||
argchoice:No Bootstrap:
|
||||
|
||||
arg:PREEDIT
|
||||
argtype:chooser
|
||||
arglabel:Edit input before running?
|
||||
argchoice:No:
|
||||
argchoice:Yes:kedit infile;
|
||||
|
||||
in:in1
|
||||
informat:genbank
|
||||
inmask:
|
||||
insave:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
menu:On-Line Res.
|
||||
item:tytyt
|
||||
itemmethod:netscape hnu[phoph &
|
||||
item:SANBI
|
||||
itemmethod:netscape again &
|
||||
item:PlasmoDB
|
||||
itemmethod:netscape http://www.plasmodb.org &
|
||||
item:NCBI
|
||||
itemmethod:netscape http://www.ncbi.nlm.nih.gov &
|
||||
item:sanbi
|
||||
itemmethod:netscape http://www.sanbi.ac.za &
|
||||
item:SANBI
|
||||
itemmethod:netscape http://www.sanbi.ac.za &
|
||||
|
||||
item:GDE for Linux resources at Bioafrica.net
|
||||
itemmethod:netscape http://www.bioafrica.net &
|
||||
|
||||
item:-------------------------
|
||||
item:add a new website
|
||||
itemmethod:xterm -e /usr/local/biotools/GDE/newURL.pl $name $url
|
||||
|
||||
arg:name
|
||||
argtype:text
|
||||
arglabel:Enter the site name
|
||||
|
||||
arg:url
|
||||
argtype:text
|
||||
arglabel:Enter the URL (including http://)
|
|
@ -426,7 +426,7 @@ SetNADData()
|
|||
Fills in the display data structure for an initial monochrome display.
|
||||
All settings are simple defaults, and will need to be modified externally
|
||||
if otherwise. This routine passes back a new NA_DisplayData structure, which
|
||||
can be destroyed after use with a call to cfree().
|
||||
can be destroyed after use with a call to free().
|
||||
|
||||
Copyright (c) 1989-1990, University of Illinois board of trustees. All
|
||||
rights reserved. Written by Steven Smith at the Center for Prokaryote Genome
|
||||
|
|
Binary file not shown.
2680
CORE/BuiltIn.c
2680
CORE/BuiltIn.c
File diff suppressed because it is too large
Load Diff
BIN
CORE/BuiltIn.o
BIN
CORE/BuiltIn.o
Binary file not shown.
|
@ -360,8 +360,8 @@ int fl_make_list()
|
|||
|
||||
|
||||
getcwd(dirname, GBUFSIZ);
|
||||
sprintf(tmpcmd, "cd %s;ls -F > /usr/tmp/.svlffil%d", dirname, pid);
|
||||
sprintf(tmpname, "/usr/tmp/.svlffil%d", pid);
|
||||
sprintf(tmpcmd, "cd %s;ls -F > /tmp/.svlffil%d", dirname, pid);
|
||||
sprintf(tmpname, "/tmp/.svlffil%d", pid);
|
||||
system(tmpcmd);
|
||||
dirp = fopen(tmpname, "r");
|
||||
if (dirp == NULL) /* just a check to make sure */
|
||||
|
|
Binary file not shown.
Binary file not shown.
BIN
CORE/DrawNA.o
BIN
CORE/DrawNA.o
Binary file not shown.
|
@ -342,7 +342,7 @@ Notify_arg arg;
|
|||
|
||||
repeat_cnt = 0;
|
||||
SetNACursor(ddata,EditCan,win,xwin,dpy,gc);
|
||||
cfree(buf);
|
||||
free(buf);
|
||||
}
|
||||
/*
|
||||
* Check mode
|
||||
|
|
BIN
CORE/Edit.o
BIN
CORE/Edit.o
Binary file not shown.
|
@ -410,7 +410,7 @@ DO()
|
|||
xv_set(pframe,FRAME_BUSY,TRUE,0);
|
||||
xv_set(frame,FRAME_BUSY,TRUE,0);
|
||||
system(Action);
|
||||
cfree(Action);
|
||||
free(Action);
|
||||
xv_set(pframe,FRAME_BUSY,FALSE,0);
|
||||
xv_set(frame,FRAME_BUSY,FALSE,0);
|
||||
BlockInput = FALSE;
|
||||
|
@ -503,7 +503,7 @@ GfileFormat file;
|
|||
strncat(temp,Action,i);
|
||||
strncat(temp,method,strlen(method));
|
||||
strcat( temp,&(Action[i+strlen(symbol)]) );
|
||||
cfree(Action);
|
||||
free(Action);
|
||||
Action = temp;
|
||||
}
|
||||
return(Action);
|
||||
|
@ -592,7 +592,7 @@ GmenuItemArg arg;
|
|||
strncat(temp,Action,i-1);
|
||||
strncat(temp,textvalue,strlen(textvalue));
|
||||
strcat( temp,&(Action[i+strlen(symbol)]) );
|
||||
cfree(Action);
|
||||
free(Action);
|
||||
Action = temp;
|
||||
}
|
||||
else
|
||||
|
@ -605,7 +605,7 @@ GmenuItemArg arg;
|
|||
strncat(temp,Action,i);
|
||||
strncat(temp,method,strlen(method));
|
||||
strcat( temp,&(Action[i+strlen(symbol)]) );
|
||||
cfree(Action);
|
||||
free(Action);
|
||||
Action = temp;
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
1148
CORE/FileIO.c
1148
CORE/FileIO.c
File diff suppressed because it is too large
Load Diff
1056
CORE/FileIO.c~
1056
CORE/FileIO.c~
File diff suppressed because it is too large
Load Diff
BIN
CORE/FileIO.o
BIN
CORE/FileIO.o
Binary file not shown.
BIN
CORE/Free.o
BIN
CORE/Free.o
Binary file not shown.
594
CORE/Genbank.c
594
CORE/Genbank.c
|
@ -1,10 +1,12 @@
|
|||
#include <sys/time.h>
|
||||
#include <stdio.h>
|
||||
#include <malloc.h>
|
||||
#include <xview/xview.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <xview/panel.h>
|
||||
#include "menudefs.h"
|
||||
#include <xview/xview.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "menudefs.h"
|
||||
|
||||
/*
|
||||
Copyright (c) 1989-1990, University of Illinois board of trustees. All
|
||||
|
@ -19,223 +21,220 @@ Copyright (c) 1993, Steven Smith, all rights reserved.
|
|||
|
||||
*/
|
||||
|
||||
ReadGen(filename,dataset,type)
|
||||
char *filename;
|
||||
ReadGen(filename, dataset, type) char *filename;
|
||||
NA_Alignment *dataset;
|
||||
int type;
|
||||
{
|
||||
register int done = FALSE,len = 0, j=0;
|
||||
int count,IS_REALLY_AA = FALSE;
|
||||
char Inline[GBUFSIZ],c;
|
||||
char *buffer,*gencomments = NULL,fields[8][GBUFSIZ];
|
||||
int buflen = 0,genclen = 0,curelem = 0,n = 0,flag = 0;
|
||||
register int done = FALSE, len = 0, j = 0;
|
||||
int count, IS_REALLY_AA = FALSE;
|
||||
char Inline[GBUFSIZ], c;
|
||||
char *buffer, *gencomments = NULL, fields[8][GBUFSIZ];
|
||||
int buflen = 0, genclen = 0, curelem = 0, n = 0, flag = 0;
|
||||
int start_col = -1;
|
||||
|
||||
NA_Sequence *this_elem;
|
||||
FILE *file;
|
||||
extern int Default_DNA_Trans[], Default_RNA_Trans[];
|
||||
extern int Default_NA_RTrans[];
|
||||
extern int Default_PROColor_LKUP[],Default_NAColor_LKUP[];
|
||||
extern int Default_PROColor_LKUP[], Default_NAColor_LKUP[];
|
||||
|
||||
ErrorOut("No such file",file = fopen(filename,"r"));
|
||||
ErrorOut("No such file", file = fopen(filename, "r"));
|
||||
|
||||
for(;fgets(Inline,GBUFSIZ,file) != 0;)
|
||||
{
|
||||
if(Inline[strlen(Inline)-1] == '\n')
|
||||
Inline[strlen(Inline)-1] = '\0';
|
||||
if(Find(Inline,"LOCUS"))
|
||||
{
|
||||
for (; fgets(Inline, GBUFSIZ, file) != 0;) {
|
||||
if (Inline[strlen(Inline) - 1] == '\n')
|
||||
Inline[strlen(Inline) - 1] = '\0';
|
||||
if (Find(Inline, "LOCUS")) {
|
||||
curelem = dataset->numelements++;
|
||||
if(curelem == 0)
|
||||
{
|
||||
dataset->element=(NA_Sequence*)
|
||||
Calloc(5,sizeof(NA_Sequence));
|
||||
if (curelem == 0) {
|
||||
dataset->element = (NA_Sequence *)Calloc(
|
||||
5, sizeof(NA_Sequence));
|
||||
dataset->maxnumelements = 5;
|
||||
}
|
||||
else if (curelem==dataset->maxnumelements)
|
||||
{
|
||||
else if (curelem == dataset->maxnumelements) {
|
||||
(dataset->maxnumelements) *= 2;
|
||||
dataset->element =(NA_Sequence*)
|
||||
Realloc(dataset->element,
|
||||
dataset->maxnumelements * sizeof(NA_Sequence));
|
||||
dataset->element = (NA_Sequence *)Realloc(
|
||||
dataset->element, dataset->maxnumelements *
|
||||
sizeof(NA_Sequence));
|
||||
}
|
||||
this_elem = &(dataset->element[curelem]);
|
||||
n = sscanf(Inline,"%s %s %s %s %s %s %s %s",
|
||||
fields[0],fields[1],fields[2],fields[3],fields[4],
|
||||
fields[5],fields[6],fields[7]);
|
||||
if(IS_REALLY_AA)
|
||||
{
|
||||
InitNASeq(this_elem,PROTEIN);
|
||||
n = sscanf(Inline, "%s %s %s %s %s %s %s %s", fields[0],
|
||||
fields[1], fields[2], fields[3], fields[4],
|
||||
fields[5], fields[6], fields[7]);
|
||||
if (IS_REALLY_AA) {
|
||||
InitNASeq(this_elem, PROTEIN);
|
||||
}
|
||||
else if(Find(Inline,"DNA"))
|
||||
{
|
||||
InitNASeq(this_elem,DNA);
|
||||
else if (Find(Inline, "DNA")) {
|
||||
InitNASeq(this_elem, DNA);
|
||||
}
|
||||
else if(Find(Inline,"RNA"))
|
||||
{
|
||||
InitNASeq(this_elem,RNA);
|
||||
else if (Find(Inline, "RNA")) {
|
||||
InitNASeq(this_elem, RNA);
|
||||
}
|
||||
else if(Find(Inline,"MASK"))
|
||||
{
|
||||
InitNASeq(this_elem,MASK);
|
||||
else if (Find(Inline, "MASK")) {
|
||||
InitNASeq(this_elem, MASK);
|
||||
}
|
||||
else if(Find(Inline,"TEXT"))
|
||||
{
|
||||
InitNASeq(this_elem,TEXT);
|
||||
else if (Find(Inline, "TEXT")) {
|
||||
InitNASeq(this_elem, TEXT);
|
||||
}
|
||||
else if(Find(Inline,"PROT"))
|
||||
{
|
||||
InitNASeq(this_elem,PROTEIN);
|
||||
else if (Find(Inline, "PROT")) {
|
||||
InitNASeq(this_elem, PROTEIN);
|
||||
}
|
||||
else
|
||||
InitNASeq(this_elem,DNA);
|
||||
InitNASeq(this_elem, DNA);
|
||||
|
||||
strncpy(this_elem->short_name,fields[1],31);
|
||||
AsciiTime(&(this_elem->t_stamp.origin),fields[n-1]);
|
||||
strncpy(this_elem->short_name, fields[1], 31);
|
||||
AsciiTime(&(this_elem->t_stamp.origin), fields[n - 1]);
|
||||
this_elem->attr = DEFAULT_X_ATTR;
|
||||
|
||||
if( Find(Inline, "Circular") )
|
||||
if (Find(Inline, "Circular"))
|
||||
this_elem->attr |= IS_CIRCULAR;
|
||||
|
||||
gencomments = NULL;
|
||||
genclen = 0;
|
||||
}
|
||||
else if(Find(Inline,"DEFINITION"))
|
||||
strncpy(this_elem->description,&(Inline[12]),79);
|
||||
else if (Find(Inline, "DEFINITION"))
|
||||
strncpy(this_elem->description, &(Inline[12]), 79);
|
||||
|
||||
else if(Find(Inline,"AUTHOR"))
|
||||
strncpy(this_elem->authority,&(Inline[12]),79);
|
||||
else if (Find(Inline, "AUTHOR"))
|
||||
strncpy(this_elem->authority, &(Inline[12]), 79);
|
||||
|
||||
else if(Find(Inline," ORGANISM"))
|
||||
strncpy(this_elem->seq_name,&(Inline[12]),79);
|
||||
else if (Find(Inline, " ORGANISM"))
|
||||
strncpy(this_elem->seq_name, &(Inline[12]), 79);
|
||||
|
||||
else if(Find(Inline,"ACCESSION"))
|
||||
strncpy(this_elem->id,&(Inline[12]),79);
|
||||
else if (Find(Inline, "ACCESSION"))
|
||||
strncpy(this_elem->id, &(Inline[12]), 79);
|
||||
|
||||
else if(Find(Inline,"ORIGIN"))
|
||||
{
|
||||
else if (Find(Inline, "ORIGIN")) {
|
||||
done = FALSE;
|
||||
len = 0;
|
||||
for(;done == FALSE && fgets(Inline,GBUFSIZ,file) != 0;)
|
||||
{
|
||||
if(Inline[0] != '/')
|
||||
{
|
||||
if(buflen == 0)
|
||||
{
|
||||
for (; done == FALSE &&
|
||||
fgets(Inline, GBUFSIZ, file) != 0;) {
|
||||
if (Inline[0] != '/') {
|
||||
if (buflen == 0) {
|
||||
buflen = GBUFSIZ;
|
||||
buffer = Calloc(sizeof(char) ,
|
||||
buflen);
|
||||
buffer = Calloc(sizeof(char),
|
||||
buflen);
|
||||
}
|
||||
|
||||
else if (len+strlen(Inline) >= buflen)
|
||||
{
|
||||
else if (len + strlen(Inline) >=
|
||||
buflen) {
|
||||
buflen += GBUFSIZ;
|
||||
buffer = Realloc(buffer,
|
||||
sizeof(char)*buflen);
|
||||
for(j=buflen-GBUFSIZ
|
||||
;j<buflen;j++)
|
||||
buffer = Realloc(
|
||||
buffer,
|
||||
sizeof(char) * buflen);
|
||||
for (j = buflen - GBUFSIZ;
|
||||
j < buflen; j++)
|
||||
buffer[j] = '\0';
|
||||
}
|
||||
/*
|
||||
* Search for the fist column of data (whitespace-number-whitespace)data
|
||||
*/
|
||||
if(start_col == -1)
|
||||
{
|
||||
for(start_col=0; Inline[start_col] == ' ' ||
|
||||
Inline[start_col] == '\t';start_col++);
|
||||
/*
|
||||
* Search for the fist column of
|
||||
*data
|
||||
*(whitespace-number-whitespace)data
|
||||
*/
|
||||
if (start_col == -1) {
|
||||
for (start_col = 0;
|
||||
Inline[start_col] == ' ' ||
|
||||
Inline[start_col] == '\t';
|
||||
start_col++)
|
||||
;
|
||||
|
||||
for(start_col++;strchr("1234567890",
|
||||
Inline[start_col]) != NULL;start_col++);
|
||||
|
||||
for(start_col++; Inline[start_col] == ' ' ||
|
||||
Inline[start_col] == '\t';start_col++);
|
||||
for (start_col++;
|
||||
strchr(
|
||||
"1234567890",
|
||||
Inline[start_col]) !=
|
||||
NULL;
|
||||
start_col++)
|
||||
;
|
||||
|
||||
for (start_col++;
|
||||
Inline[start_col] == ' ' ||
|
||||
Inline[start_col] == '\t';
|
||||
start_col++)
|
||||
;
|
||||
}
|
||||
for(j=start_col;(c = Inline[j]) != '\0';j++)
|
||||
{
|
||||
if((c != '\n') &&
|
||||
((j-start_col + 1) % 11 !=0))
|
||||
for (j = start_col;
|
||||
(c = Inline[j]) != '\0'; j++) {
|
||||
if ((c != '\n') &&
|
||||
((j - start_col + 1) % 11 !=
|
||||
0))
|
||||
buffer[len++] = c;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
AppendNA(buffer,len,&(dataset->
|
||||
element[curelem]));
|
||||
for(j=0;j<len;j++)
|
||||
else {
|
||||
AppendNA(buffer, len,
|
||||
&(dataset->element[curelem]));
|
||||
for (j = 0; j < len; j++)
|
||||
buffer[j] = '\0';
|
||||
len = 0;
|
||||
done = TRUE;
|
||||
dataset->element[curelem].comments
|
||||
= gencomments;
|
||||
dataset->element[curelem].comments_len=
|
||||
dataset->element[curelem].comments =
|
||||
gencomments;
|
||||
dataset->element[curelem].comments_len =
|
||||
genclen - 1;
|
||||
dataset->element[curelem].
|
||||
comments_maxlen = genclen;
|
||||
dataset->element[curelem]
|
||||
.comments_maxlen = genclen;
|
||||
|
||||
gencomments = NULL;
|
||||
genclen = 0;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Test if sequence should be converted by the translation table
|
||||
* If it looks like a protein...
|
||||
*/
|
||||
if(dataset->element[curelem].rmatrix &&
|
||||
IS_REALLY_AA == FALSE)
|
||||
{
|
||||
IS_REALLY_AA = CheckType(dataset->element[curelem].
|
||||
sequence,dataset->element[curelem].seqlen);
|
||||
/*
|
||||
* Test if sequence should be converted by
|
||||
*the translation table If it looks like a protein...
|
||||
*/
|
||||
if (dataset->element[curelem].rmatrix &&
|
||||
IS_REALLY_AA == FALSE) {
|
||||
IS_REALLY_AA = CheckType(
|
||||
dataset->element[curelem].sequence,
|
||||
dataset->element[curelem].seqlen);
|
||||
|
||||
if(IS_REALLY_AA == FALSE)
|
||||
Ascii2NA(dataset->element[curelem].sequence,
|
||||
if (IS_REALLY_AA == FALSE)
|
||||
Ascii2NA(
|
||||
dataset->element[curelem].sequence,
|
||||
dataset->element[curelem].seqlen,
|
||||
dataset->element[curelem].rmatrix);
|
||||
else
|
||||
/*
|
||||
* Force the sequence to be AA
|
||||
*/
|
||||
/*
|
||||
* Force the sequence to be AA
|
||||
*/
|
||||
{
|
||||
dataset->element[curelem].elementtype = PROTEIN;
|
||||
dataset->element[curelem].rmatrix = NULL;
|
||||
dataset->element[curelem].tmatrix = NULL;
|
||||
dataset->element[curelem].elementtype =
|
||||
PROTEIN;
|
||||
dataset->element[curelem].rmatrix =
|
||||
NULL;
|
||||
dataset->element[curelem].tmatrix =
|
||||
NULL;
|
||||
dataset->element[curelem].col_lut =
|
||||
Default_PROColor_LKUP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (Find(Inline,"ZZZZZ"))
|
||||
{
|
||||
else if (Find(Inline, "ZZZZZ")) {
|
||||
Cfree(gencomments);
|
||||
genclen = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (gencomments == NULL)
|
||||
{
|
||||
else {
|
||||
if (gencomments == NULL) {
|
||||
gencomments = String(Inline);
|
||||
genclen = strlen(gencomments)+1;
|
||||
genclen = strlen(gencomments) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
genclen += strlen(Inline)+1;
|
||||
gencomments = Realloc(gencomments,genclen *
|
||||
sizeof(char));
|
||||
strncat(gencomments,Inline,GBUFSIZ);
|
||||
strncat(gencomments,"\n",GBUFSIZ);
|
||||
else {
|
||||
genclen += strlen(Inline) + 1;
|
||||
gencomments = Realloc(gencomments,
|
||||
genclen * sizeof(char));
|
||||
strncat(gencomments, Inline, GBUFSIZ);
|
||||
strncat(gencomments, "\n", GBUFSIZ);
|
||||
}
|
||||
}
|
||||
}
|
||||
Cfree(buffer);
|
||||
fclose(file);
|
||||
for(j=0;j<dataset->numelements;j++)
|
||||
dataset->maxlen = MAX(dataset->maxlen,
|
||||
dataset->element[j].seqlen+dataset->element[j].offset);
|
||||
for (j = 0; j < dataset->numelements; j++)
|
||||
dataset->maxlen =
|
||||
MAX(dataset->maxlen, dataset->element[j].seqlen +
|
||||
dataset->element[j].offset);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
typedef struct mya {
|
||||
int yy;
|
||||
int mm;
|
||||
|
@ -245,7 +244,7 @@ typedef struct mya {
|
|||
int sc;
|
||||
} sA;
|
||||
|
||||
AsciiTime(sA *a,char *asciitime)
|
||||
AsciiTime(sA *a, char *asciitime)
|
||||
{
|
||||
int j;
|
||||
char temp[GBUFSIZ];
|
||||
|
@ -254,186 +253,229 @@ AsciiTime(sA *a,char *asciitime)
|
|||
a->dd = 0;
|
||||
a->yy = 0;
|
||||
a->mm = 0;
|
||||
sscanf(asciitime,"%d%5c%d",&(a->dd),temp,&(a->yy));
|
||||
sscanf(asciitime, "%d%5c%d", &(a->dd), temp, &(a->yy));
|
||||
temp[5] = '\0';
|
||||
for(j=0;j<12;j++)
|
||||
if(strcmp(temp,month[j]) == 0)
|
||||
a->mm = j+1;
|
||||
if(a->dd <0 || a->dd > 31 || a->yy < 0 || a->mm > 11)
|
||||
SetTime(a);
|
||||
for (j = 0; j < 12; j++)
|
||||
if (strcmp(temp, month[j]) == 0) a->mm = j + 1;
|
||||
if (a->dd < 0 || a->dd > 31 || a->yy < 0 || a->mm > 11) SetTime(a);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
WriteGen(aln,filename,method,maskable)
|
||||
NA_Alignment *aln;
|
||||
WriteGen(aln, filename, method, maskable) NA_Alignment *aln;
|
||||
char *filename;
|
||||
int method,maskable;
|
||||
int method, maskable;
|
||||
{
|
||||
int i,j,k,mask = -1;
|
||||
int i, j, k, mask = -1;
|
||||
FILE *file;
|
||||
NA_Sequence *this_elem;
|
||||
extern char month[12][6];
|
||||
char c;
|
||||
if(aln == NULL)
|
||||
return;
|
||||
if(aln->na_ddata == NULL)
|
||||
return;
|
||||
if (aln == NULL) return;
|
||||
if (aln->na_ddata == NULL) return;
|
||||
|
||||
file = fopen(filename,"w");
|
||||
if(file == NULL)
|
||||
{
|
||||
file = fopen(filename, "w");
|
||||
if (file == NULL) {
|
||||
Warning("Cannot open file for output");
|
||||
return(1);
|
||||
return (1);
|
||||
}
|
||||
|
||||
if(maskable && method != SELECT_REGION)
|
||||
for(j=0;j<aln->numelements;j++)
|
||||
if(aln->element[j].elementtype == MASK &&
|
||||
if (maskable && method != SELECT_REGION)
|
||||
for (j = 0; j < aln->numelements; j++)
|
||||
if (aln->element[j].elementtype == MASK &&
|
||||
aln->element[j].selected)
|
||||
mask = j;
|
||||
|
||||
for(j=0;j<aln->numelements;j++)
|
||||
{
|
||||
if((aln->element[j].selected && j!=mask && method != SELECT_REGION)
|
||||
||(aln->element[j].subselected && method == SELECT_REGION)
|
||||
|| (method == ALL))
|
||||
{
|
||||
for (j = 0; j < aln->numelements; j++) {
|
||||
if ((aln->element[j].selected && j != mask &&
|
||||
method != SELECT_REGION) ||
|
||||
(aln->element[j].subselected && method == SELECT_REGION) ||
|
||||
(method == ALL)) {
|
||||
this_elem = &(aln->element[j]);
|
||||
fprintf(file,
|
||||
fprintf(
|
||||
file,
|
||||
"LOCUS %10s%8d bp %4s %10s %2d%5s%4d\n",
|
||||
this_elem->short_name,this_elem->seqlen+this_elem->offset,
|
||||
(this_elem->elementtype == DNA) ? "DNA":
|
||||
(this_elem->elementtype ==RNA)?"RNA":
|
||||
(this_elem->elementtype == MASK)?"MASK":
|
||||
(this_elem->elementtype == PROTEIN)?"PROT":"TEXT",
|
||||
this_elem->attr & IS_CIRCULAR?"Circular":"",
|
||||
this_elem->short_name,
|
||||
this_elem->seqlen + this_elem->offset,
|
||||
(this_elem->elementtype == DNA) ? "DNA"
|
||||
: (this_elem->elementtype == RNA) ? "RNA"
|
||||
: (this_elem->elementtype == MASK) ? "MASK"
|
||||
: (this_elem->elementtype == PROTEIN) ? "PROT"
|
||||
: "TEXT",
|
||||
this_elem->attr & IS_CIRCULAR ? "Circular" : "",
|
||||
this_elem->t_stamp.origin.dd,
|
||||
month[this_elem->t_stamp.origin.mm-1],
|
||||
this_elem->t_stamp.origin.yy>1900?this_elem->t_stamp.origin.yy:
|
||||
this_elem->t_stamp.origin.yy+1900);
|
||||
if(this_elem->description[0])
|
||||
fprintf(file,"DEFINITION %s\n",this_elem->description);
|
||||
if(this_elem->seq_name[0])
|
||||
fprintf(file," ORGANISM %s\n",this_elem->seq_name);
|
||||
if(this_elem->id[0])
|
||||
fprintf(file," ACCESSION %s\n",this_elem->id);
|
||||
if(this_elem->authority[0])
|
||||
fprintf(file," AUTHORS %s\n",this_elem->authority);
|
||||
if(this_elem->comments)
|
||||
fprintf(file,"%s\n",this_elem->comments);
|
||||
fprintf(file,"ORIGIN");
|
||||
if(this_elem->tmatrix)
|
||||
{
|
||||
if(mask == -1)
|
||||
{
|
||||
for(i=0,k=0;k<this_elem->seqlen+this_elem->offset;k++)
|
||||
{
|
||||
if(method == SELECT_REGION)
|
||||
{
|
||||
if(aln->selection_mask[k] == '1')
|
||||
{
|
||||
if(i%60 == 0)
|
||||
fprintf(file,"\n%9d",i+1);
|
||||
if(i%10 == 0)
|
||||
fprintf(file," ");
|
||||
fprintf(file,"%c",this_elem->tmatrix
|
||||
[getelem(this_elem,k)]);
|
||||
month[this_elem->t_stamp.origin.mm - 1],
|
||||
this_elem->t_stamp.origin.yy > 1900
|
||||
? this_elem->t_stamp.origin.yy
|
||||
: this_elem->t_stamp.origin.yy + 1900);
|
||||
if (this_elem->description[0])
|
||||
fprintf(file, "DEFINITION %s\n",
|
||||
this_elem->description);
|
||||
if (this_elem->seq_name[0])
|
||||
fprintf(file, " ORGANISM %s\n",
|
||||
this_elem->seq_name);
|
||||
if (this_elem->id[0])
|
||||
fprintf(file, " ACCESSION %s\n",
|
||||
this_elem->id);
|
||||
if (this_elem->authority[0])
|
||||
fprintf(file, " AUTHORS %s\n",
|
||||
this_elem->authority);
|
||||
if (this_elem->comments)
|
||||
fprintf(file, "%s\n", this_elem->comments);
|
||||
fprintf(file, "ORIGIN");
|
||||
if (this_elem->tmatrix) {
|
||||
if (mask == -1) {
|
||||
for (i = 0, k = 0;
|
||||
k < this_elem->seqlen +
|
||||
this_elem->offset;
|
||||
k++) {
|
||||
if (method == SELECT_REGION) {
|
||||
if (aln->selection_mask
|
||||
[k] == '1') {
|
||||
if (i % 60 == 0)
|
||||
fprintf(
|
||||
file,
|
||||
"\n"
|
||||
"%9"
|
||||
"d",
|
||||
i + 1);
|
||||
if (i % 10 == 0)
|
||||
fprintf(
|
||||
file,
|
||||
" ");
|
||||
fprintf(
|
||||
file, "%c",
|
||||
this_elem->tmatrix
|
||||
[getelem(
|
||||
this_elem,
|
||||
k)]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(i%60 == 0)
|
||||
fprintf(file,"\n%9d",i+1);
|
||||
if(i%10 == 0)
|
||||
fprintf(file," ");
|
||||
fprintf(file,"%c",this_elem->tmatrix
|
||||
[getelem(this_elem,k)]);
|
||||
else {
|
||||
if (i % 60 == 0)
|
||||
fprintf(file,
|
||||
"\n%9d",
|
||||
i + 1);
|
||||
if (i % 10 == 0)
|
||||
fprintf(file,
|
||||
" ");
|
||||
fprintf(
|
||||
file, "%c",
|
||||
this_elem->tmatrix
|
||||
[getelem(
|
||||
this_elem,
|
||||
k)]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(k=0;k<this_elem->seqlen+this_elem->offset;k++)
|
||||
{
|
||||
c =(char)getelem(&(aln->element[mask]),k);
|
||||
if(c != '0' && c!= '-')
|
||||
{
|
||||
if(k%60 == 0)
|
||||
fprintf(file,"\n%9d",k+1);
|
||||
if(k%10 == 0)
|
||||
fprintf(file," ");
|
||||
fprintf(file,"%c",this_elem->tmatrix
|
||||
[getelem(this_elem,k)]);
|
||||
else {
|
||||
for (k = 0; k < this_elem->seqlen +
|
||||
this_elem->offset;
|
||||
k++) {
|
||||
c = (char)getelem(
|
||||
&(aln->element[mask]), k);
|
||||
if (c != '0' && c != '-') {
|
||||
if (k % 60 == 0)
|
||||
fprintf(file,
|
||||
"\n%9d",
|
||||
k + 1);
|
||||
if (k % 10 == 0)
|
||||
fprintf(file,
|
||||
" ");
|
||||
fprintf(
|
||||
file, "%c",
|
||||
this_elem->tmatrix
|
||||
[getelem(
|
||||
this_elem,
|
||||
k)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(mask == -1)
|
||||
{
|
||||
for(i=0,k=0;k<this_elem->seqlen+this_elem->offset;k++)
|
||||
{
|
||||
if(method == SELECT_REGION)
|
||||
{
|
||||
if(aln->selection_mask[k] == '1')
|
||||
{
|
||||
if(i%60 == 0)
|
||||
fprintf(file,"\n%9d",i+1);
|
||||
if(i%10 == 0)
|
||||
fprintf(file," ");
|
||||
fprintf(file,"%c", getelem(this_elem,k));
|
||||
else {
|
||||
if (mask == -1) {
|
||||
for (i = 0, k = 0;
|
||||
k < this_elem->seqlen +
|
||||
this_elem->offset;
|
||||
k++) {
|
||||
if (method == SELECT_REGION) {
|
||||
if (aln->selection_mask
|
||||
[k] == '1') {
|
||||
if (i % 60 == 0)
|
||||
fprintf(
|
||||
file,
|
||||
"\n"
|
||||
"%9"
|
||||
"d",
|
||||
i + 1);
|
||||
if (i % 10 == 0)
|
||||
fprintf(
|
||||
file,
|
||||
" ");
|
||||
fprintf(
|
||||
file, "%c",
|
||||
getelem(
|
||||
this_elem,
|
||||
k));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(i%60 == 0)
|
||||
fprintf(file,"\n%9d",i+1);
|
||||
if(i%10 == 0)
|
||||
fprintf(file," ");
|
||||
fprintf(file,"%c",getelem(this_elem,k));
|
||||
else {
|
||||
if (i % 60 == 0)
|
||||
fprintf(file,
|
||||
"\n%9d",
|
||||
i + 1);
|
||||
if (i % 10 == 0)
|
||||
fprintf(file,
|
||||
" ");
|
||||
fprintf(
|
||||
file, "%c",
|
||||
getelem(this_elem,
|
||||
k));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(k=0;k<this_elem->seqlen+this_elem->offset;k++)
|
||||
{
|
||||
c =(char)getelem(&(aln->element[mask]),k);
|
||||
if(c != '0' && c!= '-')
|
||||
{
|
||||
if(k%60 == 0)
|
||||
fprintf(file,"\n%9d",k+1);
|
||||
if(k%10 == 0)
|
||||
fprintf(file," ");
|
||||
fprintf(file,"%c",getelem(this_elem,k));
|
||||
else {
|
||||
for (k = 0; k < this_elem->seqlen +
|
||||
this_elem->offset;
|
||||
k++) {
|
||||
c = (char)getelem(
|
||||
&(aln->element[mask]), k);
|
||||
if (c != '0' && c != '-') {
|
||||
if (k % 60 == 0)
|
||||
fprintf(file,
|
||||
"\n%9d",
|
||||
k + 1);
|
||||
if (k % 10 == 0)
|
||||
fprintf(file,
|
||||
" ");
|
||||
fprintf(
|
||||
file, "%c",
|
||||
getelem(this_elem,
|
||||
k));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(file,"\n//\n");
|
||||
fprintf(file, "\n//\n");
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
SetTime(sA *a)
|
||||
{
|
||||
struct tm *tim,*localtime();
|
||||
struct tm *tim, *localtime();
|
||||
long clock;
|
||||
|
||||
clock = time(0);
|
||||
tim = localtime(&clock);
|
||||
|
||||
a->yy = tim->tm_year;
|
||||
a->mm = tim->tm_mon+1;
|
||||
a->mm = tim->tm_mon + 1;
|
||||
a->dd = tim->tm_mday;
|
||||
a->hr = tim->tm_hour;
|
||||
a->mn = tim->tm_min;
|
||||
|
@ -442,24 +484,20 @@ SetTime(sA *a)
|
|||
}
|
||||
|
||||
/*
|
||||
* CheckType: Check base composition to see if the sequence
|
||||
* appears to be an amino acid sequence. If it is, pass back
|
||||
* TRUE, else FALSE.
|
||||
*/
|
||||
CheckType(seq,len)
|
||||
char *seq;
|
||||
* CheckType: Check base composition to see if the sequence
|
||||
* appears to be an amino acid sequence. If it is, pass back
|
||||
* TRUE, else FALSE.
|
||||
*/
|
||||
CheckType(seq, len) char *seq;
|
||||
int len;
|
||||
{
|
||||
int j, count1 = 0, count2 = 0;
|
||||
|
||||
int j,count1 = 0,count2 = 0;
|
||||
|
||||
for(j=0;j<len;j++)
|
||||
if(((seq[j]|32) < 'z') && ((seq[j]|32) > 'a'))
|
||||
{
|
||||
for (j = 0; j < len; j++)
|
||||
if (((seq[j] | 32) < 'z') && ((seq[j] | 32) > 'a')) {
|
||||
count1++;
|
||||
if(index("ACGTUNacgtun",seq[j]) == NULL)
|
||||
count2++;
|
||||
if (index("ACGTUNacgtun", seq[j]) == NULL) count2++;
|
||||
}
|
||||
|
||||
return( (count2 > count1/4)?TRUE:FALSE);
|
||||
return ((count2 > count1 / 4) ? TRUE : FALSE);
|
||||
}
|
||||
|
|
BIN
CORE/Genbank.o
BIN
CORE/Genbank.o
Binary file not shown.
958
CORE/HGLfile.c
958
CORE/HGLfile.c
File diff suppressed because it is too large
Load Diff
BIN
CORE/HGLfile.o
BIN
CORE/HGLfile.o
Binary file not shown.
|
@ -5,8 +5,8 @@ SRCS= ParseMenu.c main.c BasicDisplay.c EventHandler.c FileIO.c \
|
|||
DrawNA.c Free.c BuiltIn.c Edit.c Genbank.c Scroll.c ChooseFile.c \
|
||||
CutCopyPaste.c HGLfile.c
|
||||
|
||||
LIBS= -lm -lxview -lolgx -lX11
|
||||
CFLAGS= -g -L/usr/openwin/lib -I/usr/openwin/include
|
||||
LIBS= -lm -lxview -lolgx -lX11 -ltirpc
|
||||
CFLAGS= -g -m32 -L/usr/lib32 -I/usr/include
|
||||
CC = cc
|
||||
# Possible defines, SUN4 SGI DEC HGL
|
||||
DEFINES = -DLINUX
|
||||
|
|
700
CORE/ParseMenu.c
700
CORE/ParseMenu.c
|
@ -1,9 +1,10 @@
|
|||
#include <stdio.h>
|
||||
#include <malloc.h>
|
||||
#include <xview/xview.h>
|
||||
#include <stdio.h>
|
||||
#include <xview/panel.h>
|
||||
#include "menudefs.h"
|
||||
#include <xview/xview.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "menudefs.h"
|
||||
|
||||
/*
|
||||
ParseMenus(): Read in the menu config file, and generate the internal
|
||||
|
@ -24,106 +25,98 @@ int num_menus;
|
|||
|
||||
ParseMenu()
|
||||
{
|
||||
int j,curmenu = -1,curitem = 0;
|
||||
int curchoice = 0 ,curarg = 0,curinput = 0, curoutput = 0;
|
||||
char Inline[GBUFSIZ],temp[GBUFSIZ],head[GBUFSIZ];
|
||||
char tail[GBUFSIZ],*home;
|
||||
int j, curmenu = -1, curitem = 0;
|
||||
int curchoice = 0, curarg = 0, curinput = 0, curoutput = 0;
|
||||
char Inline[GBUFSIZ], temp[GBUFSIZ], head[GBUFSIZ];
|
||||
char tail[GBUFSIZ], *home;
|
||||
Gmenu *thismenu;
|
||||
GmenuItem *thisitem;
|
||||
GmenuItemArg *thisarg;
|
||||
GfileFormat *thisinput,*thisoutput;
|
||||
GfileFormat *thisinput, *thisoutput;
|
||||
FILE *file;
|
||||
char *resize;
|
||||
|
||||
/*
|
||||
* Open the menu configuration file ".GDEmenus"
|
||||
* First search the local directory, then the home directory.
|
||||
*/
|
||||
file=fopen(".GDEmenus","r");
|
||||
if(file == NULL)
|
||||
{
|
||||
home = (char*)getenv("HOME");
|
||||
strcpy(temp,home);
|
||||
strcat(temp,"/.GDEmenus");
|
||||
/*
|
||||
* Open the menu configuration file ".GDEmenus"
|
||||
* First search the local directory, then the home directory.
|
||||
*/
|
||||
file = fopen(".GDEmenus", "r");
|
||||
if (file == NULL) {
|
||||
home = (char *)getenv("HOME");
|
||||
strcpy(temp, home);
|
||||
strcat(temp, "/.GDEmenus");
|
||||
|
||||
file=fopen(temp,"r");
|
||||
if(file == NULL)
|
||||
{
|
||||
home = (char*)getenv("GDE_HELP_DIR");
|
||||
if(home != NULL)
|
||||
{
|
||||
strcpy(temp,home);
|
||||
strcat(temp,"/.GDEmenus");
|
||||
file=fopen(temp,"r");
|
||||
file = fopen(temp, "r");
|
||||
if (file == NULL) {
|
||||
home = (char *)getenv("GDE_HELP_DIR");
|
||||
if (home != NULL) {
|
||||
strcpy(temp, home);
|
||||
strcat(temp, "/.GDEmenus");
|
||||
file = fopen(temp, "r");
|
||||
}
|
||||
if(file == NULL)
|
||||
Error(
|
||||
".GDEmenus file not in the home, local, or $GDE_HELP_DIR directory");
|
||||
if (file == NULL)
|
||||
Error(
|
||||
".GDEmenus file not in the home, local, or "
|
||||
"$GDE_HELP_DIR directory");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the .GDEmenus file, and assemble an internal representation
|
||||
* of the menu/menu-item hierarchy.
|
||||
*/
|
||||
/*
|
||||
* Read the .GDEmenus file, and assemble an internal representation
|
||||
* of the menu/menu-item hierarchy.
|
||||
*/
|
||||
|
||||
for(;getline(file,Inline) != EOF;)
|
||||
{
|
||||
/*
|
||||
* menu: chooses menu to use
|
||||
*/
|
||||
if(Inline[0] == '#');
|
||||
else if(Find(Inline,"menu:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
for (; gde_getline(file, Inline) != EOF;) {
|
||||
/*
|
||||
* menu: chooses menu to use
|
||||
*/
|
||||
if (Inline[0] == '#')
|
||||
;
|
||||
else if (Find(Inline, "menu:")) {
|
||||
crop(Inline, head, temp);
|
||||
curmenu = -1;
|
||||
for(j=0;j<num_menus;j++)
|
||||
if(Find(temp,menu[j].label))
|
||||
curmenu=j;
|
||||
/*
|
||||
* If menu not found, make a new one
|
||||
*/
|
||||
if(curmenu == -1)
|
||||
{
|
||||
for (j = 0; j < num_menus; j++)
|
||||
if (Find(temp, menu[j].label)) curmenu = j;
|
||||
/*
|
||||
* If menu not found, make a new one
|
||||
*/
|
||||
if (curmenu == -1) {
|
||||
curmenu = num_menus++;
|
||||
thismenu = &menu[curmenu];
|
||||
thismenu->label =
|
||||
(char*)calloc(strlen(temp)+1,sizeof(char));
|
||||
thismenu->label = (char *)calloc(
|
||||
strlen(temp) + 1, sizeof(char));
|
||||
|
||||
if(thismenu->label == NULL)
|
||||
Error("Calloc");
|
||||
(void)strcpy(thismenu->label,temp);
|
||||
thismenu->numitems = 0;
|
||||
if (thismenu->label == NULL) Error("Calloc");
|
||||
(void)strcpy(thismenu->label, temp);
|
||||
thismenu->numitems = 0;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* item: chooses menu item to use
|
||||
*/
|
||||
else if(Find(Inline,"item:"))
|
||||
{
|
||||
/*
|
||||
* item: chooses menu item to use
|
||||
*/
|
||||
else if (Find(Inline, "item:")) {
|
||||
curarg = -1;
|
||||
curinput = -1;
|
||||
curoutput = -1;
|
||||
crop(Inline,head,temp);
|
||||
crop(Inline, head, temp);
|
||||
curitem = thismenu->numitems++;
|
||||
/*
|
||||
* Resize the item list for this menu (add one item);
|
||||
*/
|
||||
if(curitem == 0)
|
||||
resize = (char*)calloc(1,sizeof(GmenuItem));
|
||||
/*
|
||||
* Resize the item list for this menu (add one
|
||||
*item);
|
||||
*/
|
||||
if (curitem == 0)
|
||||
resize = (char *)calloc(1, sizeof(GmenuItem));
|
||||
else
|
||||
resize = realloc(thismenu->item,
|
||||
thismenu -> numitems*sizeof(GmenuItem) );
|
||||
resize = realloc(
|
||||
thismenu->item,
|
||||
thismenu->numitems * sizeof(GmenuItem));
|
||||
|
||||
if(resize == NULL)
|
||||
Error ("Calloc");
|
||||
thismenu->item =(GmenuItem*)resize;
|
||||
if (resize == NULL) Error("Calloc");
|
||||
thismenu->item = (GmenuItem *)resize;
|
||||
|
||||
thisitem = &(thismenu->item[curitem]);
|
||||
thisitem->label = (char*)calloc(strlen(temp)+1,
|
||||
sizeof(char));
|
||||
thisitem->label =
|
||||
(char *)calloc(strlen(temp) + 1, sizeof(char));
|
||||
thisitem->meta = '\0';
|
||||
thisitem->numinputs = 0;
|
||||
thisitem->numoutputs = 0;
|
||||
|
@ -131,74 +124,66 @@ ParseMenu()
|
|||
thisitem->X = 0;
|
||||
thisitem->help = NULL;
|
||||
|
||||
/*
|
||||
* Create new item
|
||||
*/
|
||||
/*
|
||||
* Create new item
|
||||
*/
|
||||
|
||||
if(thisitem->label == NULL)
|
||||
Error("Calloc");
|
||||
(void)strcpy(thisitem->label,temp);
|
||||
if (thisitem->label == NULL) Error("Calloc");
|
||||
(void)strcpy(thisitem->label, temp);
|
||||
}
|
||||
|
||||
/*
|
||||
* itemmethod: generic command line generated by this item
|
||||
*/
|
||||
else if(Find(Inline,"itemmethod:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
thisitem->method =
|
||||
(char*)calloc(strlen(temp)+1,sizeof(char));
|
||||
if(thisitem->method == NULL)
|
||||
Error("Calloc");
|
||||
(void)strcpy(thisitem->method,temp);
|
||||
/*
|
||||
* itemmethod: generic command line generated by this item
|
||||
*/
|
||||
else if (Find(Inline, "itemmethod:")) {
|
||||
crop(Inline, head, temp);
|
||||
thisitem->method =
|
||||
(char *)calloc(strlen(temp) + 1, sizeof(char));
|
||||
if (thisitem->method == NULL) Error("Calloc");
|
||||
(void)strcpy(thisitem->method, temp);
|
||||
}
|
||||
/*
|
||||
* Help file
|
||||
*/
|
||||
else if(Find(Inline,"itemhelp:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
/*
|
||||
* Help file
|
||||
*/
|
||||
else if (Find(Inline, "itemhelp:")) {
|
||||
crop(Inline, head, temp);
|
||||
thisitem->help =
|
||||
(char*)calloc(strlen(temp)+1,sizeof(char));
|
||||
if(thisitem->method == NULL)
|
||||
Error("Calloc");
|
||||
(void)strcpy(thisitem->help,temp);
|
||||
(char *)calloc(strlen(temp) + 1, sizeof(char));
|
||||
if (thisitem->method == NULL) Error("Calloc");
|
||||
(void)strcpy(thisitem->help, temp);
|
||||
}
|
||||
/*
|
||||
* Meta key equiv
|
||||
*/
|
||||
else if(Find(Inline,"itemmeta:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
thisitem->meta = temp[0];
|
||||
/*
|
||||
* Meta key equiv
|
||||
*/
|
||||
else if (Find(Inline, "itemmeta:")) {
|
||||
crop(Inline, head, temp);
|
||||
thisitem->meta = temp[0];
|
||||
}
|
||||
/*
|
||||
* arg: defines the symbol for a command line arguement.
|
||||
* this is used for substitution into the itemmethod
|
||||
* definition.
|
||||
*/
|
||||
/*
|
||||
* arg: defines the symbol for a command line arguement.
|
||||
* this is used for substitution into the
|
||||
*itemmethod definition.
|
||||
*/
|
||||
|
||||
else if(Find(Inline,"arg:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
curarg=thisitem->numargs++;
|
||||
if(curarg == 0)
|
||||
resize = (char*)calloc(1,sizeof(GmenuItemArg));
|
||||
else if (Find(Inline, "arg:")) {
|
||||
crop(Inline, head, temp);
|
||||
curarg = thisitem->numargs++;
|
||||
if (curarg == 0)
|
||||
resize =
|
||||
(char *)calloc(1, sizeof(GmenuItemArg));
|
||||
else
|
||||
resize = realloc(thisitem->arg,
|
||||
thisitem->numargs*sizeof(GmenuItemArg) );
|
||||
|
||||
resize = realloc(
|
||||
thisitem->arg,
|
||||
thisitem->numargs * sizeof(GmenuItemArg));
|
||||
|
||||
if(resize == NULL)
|
||||
Error("arg: Realloc");
|
||||
if (resize == NULL) Error("arg: Realloc");
|
||||
|
||||
(thisitem->arg) = (GmenuItemArg*)resize;
|
||||
(thisitem->arg) = (GmenuItemArg *)resize;
|
||||
thisarg = &(thisitem->arg[curarg]);
|
||||
thisarg->symbol = (char*)calloc(strlen(temp)+1,
|
||||
sizeof(char));
|
||||
if(thisarg->symbol == NULL)
|
||||
Error("Calloc");
|
||||
(void)strcpy(thisarg->symbol,temp);
|
||||
thisarg->symbol =
|
||||
(char *)calloc(strlen(temp) + 1, sizeof(char));
|
||||
if (thisarg->symbol == NULL) Error("Calloc");
|
||||
(void)strcpy(thisarg->symbol, temp);
|
||||
thisarg->optional = FALSE;
|
||||
thisarg->type = 0;
|
||||
thisarg->min = 0;
|
||||
|
@ -208,153 +193,145 @@ ParseMenu()
|
|||
thisarg->textvalue = NULL;
|
||||
thisarg->value = 0;
|
||||
}
|
||||
/*
|
||||
* argtype: Defines the type of argument (menu,chooser, text, slider)
|
||||
*/
|
||||
else if(Find(Inline,"argtype:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
if(strcmp(temp,"text")==0)
|
||||
{
|
||||
thisarg->type=TEXTFIELD;
|
||||
/*
|
||||
* argtype: Defines the type of argument (menu,chooser,
|
||||
*text, slider)
|
||||
*/
|
||||
else if (Find(Inline, "argtype:")) {
|
||||
crop(Inline, head, temp);
|
||||
if (strcmp(temp, "text") == 0) {
|
||||
thisarg->type = TEXTFIELD;
|
||||
thisarg->textvalue =
|
||||
(char*)calloc(GBUFSIZ,sizeof(char));
|
||||
if(thisarg->textvalue == NULL)
|
||||
Error("Calloc");
|
||||
(char *)calloc(GBUFSIZ, sizeof(char));
|
||||
if (thisarg->textvalue == NULL) Error("Calloc");
|
||||
}
|
||||
else if(strcmp(temp,"choice_list")==0)
|
||||
thisarg->type=CHOICE_LIST;
|
||||
else if(strcmp(temp,"choice_menu")==0)
|
||||
thisarg->type=CHOICE_MENU;
|
||||
else if(strcmp(temp,"chooser")==0)
|
||||
thisarg->type=CHOOSER;
|
||||
else if(strcmp(temp,"slider")==0)
|
||||
thisarg->type=SLIDER;
|
||||
else if (strcmp(temp, "choice_list") == 0)
|
||||
thisarg->type = CHOICE_LIST;
|
||||
else if (strcmp(temp, "choice_menu") == 0)
|
||||
thisarg->type = CHOICE_MENU;
|
||||
else if (strcmp(temp, "chooser") == 0)
|
||||
thisarg->type = CHOOSER;
|
||||
else if (strcmp(temp, "slider") == 0)
|
||||
thisarg->type = SLIDER;
|
||||
else
|
||||
Error(sprintf(head,"Unknown argtype %s",temp));
|
||||
Error(
|
||||
sprintf(head, "Unknown argtype %s", temp));
|
||||
}
|
||||
/*
|
||||
* argtext: The default text value of the symbol.
|
||||
* $argument is replaced by this value if it is not
|
||||
* changed in the dialog box by the user.
|
||||
*/
|
||||
else if(Find(Inline,"argtext:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
(void)strcpy(thisarg->textvalue,temp);
|
||||
/*
|
||||
* argtext: The default text value of the symbol.
|
||||
* $argument is replaced by this value if it is not
|
||||
* changed in the dialog box by the user.
|
||||
*/
|
||||
else if (Find(Inline, "argtext:")) {
|
||||
crop(Inline, head, temp);
|
||||
(void)strcpy(thisarg->textvalue, temp);
|
||||
}
|
||||
/*
|
||||
* arglabel: Text label displayed in the dialog box for
|
||||
* this argument. It should be a discriptive label.
|
||||
*/
|
||||
else if(Find(Inline,"arglabel:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
thisarg->label=(char*)calloc(strlen(temp)+1,
|
||||
sizeof(char));
|
||||
if(thisarg->label == NULL)
|
||||
Error("Calloc");
|
||||
(void)strcpy(thisarg->label,temp);
|
||||
/*
|
||||
* arglabel: Text label displayed in the dialog box for
|
||||
* this argument. It should be a discriptive
|
||||
*label.
|
||||
*/
|
||||
else if (Find(Inline, "arglabel:")) {
|
||||
crop(Inline, head, temp);
|
||||
thisarg->label =
|
||||
(char *)calloc(strlen(temp) + 1, sizeof(char));
|
||||
if (thisarg->label == NULL) Error("Calloc");
|
||||
(void)strcpy(thisarg->label, temp);
|
||||
}
|
||||
/*
|
||||
* Argument choice values use the following notation:
|
||||
*
|
||||
* argchoice:Displayed value:Method
|
||||
*
|
||||
* Where "Displayed value" is the label displayed in the dialog box
|
||||
* and "Method" is the value passed back on the command line.
|
||||
*/
|
||||
else if(Find(Inline,"argchoice:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
crop(temp,head,tail);
|
||||
/*
|
||||
* Argument choice values use the following notation:
|
||||
*
|
||||
* argchoice:Displayed value:Method
|
||||
*
|
||||
* Where "Displayed value" is the label displayed in the
|
||||
*dialog box and "Method" is the value passed back on the
|
||||
*command line.
|
||||
*/
|
||||
else if (Find(Inline, "argchoice:")) {
|
||||
crop(Inline, head, temp);
|
||||
crop(temp, head, tail);
|
||||
curchoice = thisarg->numchoices++;
|
||||
if(curchoice == 0)
|
||||
resize = (char*)calloc(1,sizeof(GargChoice));
|
||||
if (curchoice == 0)
|
||||
resize = (char *)calloc(1, sizeof(GargChoice));
|
||||
else
|
||||
resize = realloc(thisarg->choice,
|
||||
thisarg->numchoices*sizeof(GargChoice));
|
||||
resize = realloc(
|
||||
thisarg->choice,
|
||||
thisarg->numchoices * sizeof(GargChoice));
|
||||
|
||||
if(resize == NULL)
|
||||
Error("argchoice: Realloc");
|
||||
thisarg->choice = (GargChoice*)resize;
|
||||
if (resize == NULL) Error("argchoice: Realloc");
|
||||
thisarg->choice = (GargChoice *)resize;
|
||||
|
||||
(thisarg->choice[curchoice].label) = NULL;
|
||||
(thisarg->choice[curchoice].method) = NULL;
|
||||
|
||||
(thisarg->choice[curchoice].label) =
|
||||
(char*)calloc(strlen(head)+1,sizeof(char));
|
||||
(char *)calloc(strlen(head) + 1, sizeof(char));
|
||||
|
||||
(thisarg->choice[curchoice].method) =
|
||||
(char*)calloc(strlen(tail)+1,sizeof(char));
|
||||
(char *)calloc(strlen(tail) + 1, sizeof(char));
|
||||
|
||||
if(thisarg->choice[curchoice].method == NULL ||
|
||||
thisarg->choice[curchoice].label == NULL)
|
||||
if (thisarg->choice[curchoice].method == NULL ||
|
||||
thisarg->choice[curchoice].label == NULL)
|
||||
Error("Calloc");
|
||||
|
||||
(void)strcpy(thisarg->choice[curchoice].label,head);
|
||||
(void)strcpy(thisarg->choice[curchoice].method,tail);
|
||||
(void)strcpy(thisarg->choice[curchoice].label, head);
|
||||
(void)strcpy(thisarg->choice[curchoice].method, tail);
|
||||
}
|
||||
/*
|
||||
* argmin: Minimum value for a slider
|
||||
*/
|
||||
else if(Find(Inline,"argmin:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
(void)sscanf(temp,"%d",&(thisarg->min));
|
||||
/*
|
||||
* argmin: Minimum value for a slider
|
||||
*/
|
||||
else if (Find(Inline, "argmin:")) {
|
||||
crop(Inline, head, temp);
|
||||
(void)sscanf(temp, "%d", &(thisarg->min));
|
||||
}
|
||||
/*
|
||||
* argmax: Maximum value for a slider
|
||||
*/
|
||||
else if(Find(Inline,"argmax:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
(void)sscanf(temp,"%d",&(thisarg->max));
|
||||
/*
|
||||
* argmax: Maximum value for a slider
|
||||
*/
|
||||
else if (Find(Inline, "argmax:")) {
|
||||
crop(Inline, head, temp);
|
||||
(void)sscanf(temp, "%d", &(thisarg->max));
|
||||
}
|
||||
/*
|
||||
* argmethod: Command line flag associated with this argument.
|
||||
* Replaces argument in itemmethod description.
|
||||
*/
|
||||
else if(Find(Inline,"argmethod:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
thisarg->method = (char*)calloc(GBUFSIZ,strlen(temp));
|
||||
if(thisarg->method == NULL)
|
||||
Error("Calloc");
|
||||
(void)strcpy(thisarg->method,tail);
|
||||
/*
|
||||
* argmethod: Command line flag associated with this
|
||||
*argument. Replaces argument in itemmethod description.
|
||||
*/
|
||||
else if (Find(Inline, "argmethod:")) {
|
||||
crop(Inline, head, temp);
|
||||
thisarg->method = (char *)calloc(GBUFSIZ, strlen(temp));
|
||||
if (thisarg->method == NULL) Error("Calloc");
|
||||
(void)strcpy(thisarg->method, tail);
|
||||
}
|
||||
/*
|
||||
* argvalue: default value for a slider
|
||||
*/
|
||||
else if(Find(Inline,"argvalue:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
if(thisarg->type == TEXT)
|
||||
strcpy(thisarg->textvalue,temp);
|
||||
/*
|
||||
* argvalue: default value for a slider
|
||||
*/
|
||||
else if (Find(Inline, "argvalue:")) {
|
||||
crop(Inline, head, temp);
|
||||
if (thisarg->type == TEXT)
|
||||
strcpy(thisarg->textvalue, temp);
|
||||
else
|
||||
(void)sscanf(temp,"%d",&(thisarg->value));
|
||||
(void)sscanf(temp, "%d", &(thisarg->value));
|
||||
}
|
||||
/*
|
||||
* argoptional: Flag specifying that an arguement is optional
|
||||
*/
|
||||
else if(Find(Inline,"argoptional:"))
|
||||
/*
|
||||
* argoptional: Flag specifying that an arguement is
|
||||
*optional
|
||||
*/
|
||||
else if (Find(Inline, "argoptional:"))
|
||||
thisarg->optional = TRUE;
|
||||
/*
|
||||
* in: Input file description
|
||||
*/
|
||||
else if(Find(Inline,"in:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
/*
|
||||
* in: Input file description
|
||||
*/
|
||||
else if (Find(Inline, "in:")) {
|
||||
crop(Inline, head, temp);
|
||||
curinput = (thisitem->numinputs)++;
|
||||
if(curinput == 0)
|
||||
resize = (char*)calloc(1,sizeof(GfileFormat));
|
||||
if (curinput == 0)
|
||||
resize = (char *)calloc(1, sizeof(GfileFormat));
|
||||
else
|
||||
resize = realloc(thisitem->input,
|
||||
(thisitem->numinputs)*sizeof(GfileFormat));
|
||||
(thisitem->numinputs) *
|
||||
sizeof(GfileFormat));
|
||||
|
||||
if(resize == NULL)
|
||||
Error("in: Realloc");
|
||||
thisitem->input = (GfileFormat*)resize;
|
||||
if (resize == NULL) Error("in: Realloc");
|
||||
thisitem->input = (GfileFormat *)resize;
|
||||
thisinput = &(thisitem->input)[curinput];
|
||||
thisinput->save = FALSE;
|
||||
thisinput->overwrite = FALSE;
|
||||
|
@ -365,202 +342,181 @@ ParseMenu()
|
|||
thisinput->select = SELECTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* out: Output file description
|
||||
*/
|
||||
/*
|
||||
* out: Output file description
|
||||
*/
|
||||
|
||||
else if(Find(Inline,"out:"))
|
||||
{
|
||||
crop(Inline,head,temp);
|
||||
else if (Find(Inline, "out:")) {
|
||||
crop(Inline, head, temp);
|
||||
curoutput = (thisitem->numoutputs)++;
|
||||
if(curoutput == 0)
|
||||
resize = (char*)calloc(1,sizeof(GfileFormat));
|
||||
if (curoutput == 0)
|
||||
resize = (char *)calloc(1, sizeof(GfileFormat));
|
||||
else
|
||||
resize = realloc(thisitem->output,
|
||||
(thisitem->numoutputs)*sizeof(GfileFormat));
|
||||
(thisitem->numoutputs) *
|
||||
sizeof(GfileFormat));
|
||||
|
||||
if(resize == NULL)
|
||||
Error("out: Realloc");
|
||||
thisitem->output = (GfileFormat*)resize;
|
||||
if (resize == NULL) Error("out: Realloc");
|
||||
thisitem->output = (GfileFormat *)resize;
|
||||
thisoutput = &(thisitem->output)[curoutput];
|
||||
thisitem->output = (GfileFormat*)resize;
|
||||
thisitem->output = (GfileFormat *)resize;
|
||||
thisoutput = &(thisitem->output)[curoutput];
|
||||
thisoutput->save = FALSE;
|
||||
thisoutput->save = FALSE;
|
||||
thisoutput->overwrite = FALSE;
|
||||
thisoutput->format = 0;
|
||||
thisoutput->symbol= String(temp);
|
||||
thisoutput->symbol = String(temp);
|
||||
thisoutput->name = NULL;
|
||||
}
|
||||
else if(Find(Inline,"informat:"))
|
||||
{
|
||||
if(thisinput == NULL)
|
||||
Error("Problem with .GDEmenus");
|
||||
crop(Inline,head,tail);
|
||||
if(Find(tail,"genbank"))
|
||||
else if (Find(Inline, "informat:")) {
|
||||
if (thisinput == NULL) Error("Problem with .GDEmenus");
|
||||
crop(Inline, head, tail);
|
||||
if (Find(tail, "genbank"))
|
||||
thisinput->format = GENBANK;
|
||||
else if(Find(tail,"gde"))
|
||||
else if (Find(tail, "gde"))
|
||||
thisinput->format = GDE;
|
||||
else if(Find(tail,"na_flat"))
|
||||
else if (Find(tail, "na_flat"))
|
||||
thisinput->format = NA_FLAT;
|
||||
else if(Find(tail,"colormask"))
|
||||
else if (Find(tail, "colormask"))
|
||||
thisinput->format = COLORMASK;
|
||||
else if(Find(tail,"flat"))
|
||||
else if (Find(tail, "flat"))
|
||||
thisinput->format = NA_FLAT;
|
||||
else if(Find(tail,"status"))
|
||||
else if (Find(tail, "status"))
|
||||
thisinput->format = STATUS_FILE;
|
||||
else fprintf(stderr,"Warning, unknown file format %s\n"
|
||||
,tail);
|
||||
else
|
||||
fprintf(stderr,
|
||||
"Warning, unknown file format %s\n",
|
||||
tail);
|
||||
}
|
||||
else if(Find(Inline,"insave:"))
|
||||
{
|
||||
if(thisinput == NULL)
|
||||
Error("Problem with .GDEmenus");
|
||||
else if (Find(Inline, "insave:")) {
|
||||
if (thisinput == NULL) Error("Problem with .GDEmenus");
|
||||
thisinput->save = TRUE;
|
||||
}
|
||||
else if(Find(Inline,"inselect:"))
|
||||
{
|
||||
if(thisinput == NULL)
|
||||
Error("Problem with .GDEmenus");
|
||||
crop(Inline,head,tail);
|
||||
if(Find(tail,"one"))
|
||||
thisinput->select = SELECT_ONE;
|
||||
else if(Find(tail,"region"))
|
||||
thisinput->select = SELECT_REGION;
|
||||
else if(Find(tail,"all"))
|
||||
thisinput->select = ALL;
|
||||
else if (Find(Inline, "inselect:")) {
|
||||
if (thisinput == NULL) Error("Problem with .GDEmenus");
|
||||
crop(Inline, head, tail);
|
||||
if (Find(tail, "one"))
|
||||
thisinput->select = SELECT_ONE;
|
||||
else if (Find(tail, "region"))
|
||||
thisinput->select = SELECT_REGION;
|
||||
else if (Find(tail, "all"))
|
||||
thisinput->select = ALL;
|
||||
}
|
||||
else if(Find(Inline,"inmask:"))
|
||||
{
|
||||
if(thisinput == NULL)
|
||||
Error("Problem with .GDEmenus");
|
||||
else if (Find(Inline, "inmask:")) {
|
||||
if (thisinput == NULL) Error("Problem with .GDEmenus");
|
||||
thisinput->maskable = TRUE;
|
||||
}
|
||||
else if(Find(Inline,"outformat:"))
|
||||
{
|
||||
if(thisoutput == NULL)
|
||||
Error("Problem with .GDEmenus");
|
||||
crop(Inline,head,tail);
|
||||
if(Find(tail,"genbank"))
|
||||
thisoutput->format = GENBANK;
|
||||
else if(Find(tail,"gde"))
|
||||
else if (Find(Inline, "outformat:")) {
|
||||
if (thisoutput == NULL) Error("Problem with .GDEmenus");
|
||||
crop(Inline, head, tail);
|
||||
if (Find(tail, "genbank"))
|
||||
thisoutput->format = GENBANK;
|
||||
else if (Find(tail, "gde"))
|
||||
thisoutput->format = GDE;
|
||||
else if(Find(tail,"na_flat"))
|
||||
else if (Find(tail, "na_flat"))
|
||||
thisoutput->format = NA_FLAT;
|
||||
else if(Find(tail,"flat"))
|
||||
else if (Find(tail, "flat"))
|
||||
thisoutput->format = NA_FLAT;
|
||||
else if(Find(tail,"status"))
|
||||
else if (Find(tail, "status"))
|
||||
thisoutput->format = STATUS_FILE;
|
||||
else if(Find(tail,"colormask"))
|
||||
else if (Find(tail, "colormask"))
|
||||
thisoutput->format = COLORMASK;
|
||||
else fprintf(stderr,"Warning, unknown file format %s\n"
|
||||
,tail);
|
||||
else
|
||||
fprintf(stderr,
|
||||
"Warning, unknown file format %s\n",
|
||||
tail);
|
||||
}
|
||||
else if(Find(Inline,"outsave:"))
|
||||
{
|
||||
if(thisoutput == NULL)
|
||||
Error("Problem with .GDEmenus");
|
||||
else if (Find(Inline, "outsave:")) {
|
||||
if (thisoutput == NULL) Error("Problem with .GDEmenus");
|
||||
thisoutput->save = TRUE;
|
||||
}
|
||||
else if(Find(Inline,"outoverwrite:"))
|
||||
{
|
||||
if(thisoutput == NULL)
|
||||
Error("Problem with .GDEmenus");
|
||||
else if (Find(Inline, "outoverwrite:")) {
|
||||
if (thisoutput == NULL) Error("Problem with .GDEmenus");
|
||||
thisoutput->overwrite = TRUE;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Find(): Search the target string for the given key
|
||||
*/
|
||||
Find(target,key)
|
||||
char *key,*target;
|
||||
Find(target, key) char *key, *target;
|
||||
{
|
||||
int i,j,len1,dif,flag = FALSE;
|
||||
dif = (strlen(target)) - (len1 = strlen(key)) +1;
|
||||
int i, j, len1, dif, flag = FALSE;
|
||||
dif = (strlen(target)) - (len1 = strlen(key)) + 1;
|
||||
|
||||
if(len1>0)
|
||||
for(j=0;j<dif && flag == FALSE;j++)
|
||||
{
|
||||
if (len1 > 0)
|
||||
for (j = 0; j < dif && flag == FALSE; j++) {
|
||||
flag = TRUE;
|
||||
for(i=0;i<len1 && flag;i++)
|
||||
flag = (key[i] == target[i+j])?TRUE:FALSE;
|
||||
|
||||
for (i = 0; i < len1 && flag; i++)
|
||||
flag = (key[i] == target[i + j]) ? TRUE : FALSE;
|
||||
}
|
||||
return(flag);
|
||||
return (flag);
|
||||
}
|
||||
|
||||
|
||||
Find2(target,key)
|
||||
char *key,*target;
|
||||
Find2(target, key) char *key, *target;
|
||||
/*
|
||||
* Like find, but returns the index of the leftmost
|
||||
* occurence, and -1 if not found.
|
||||
*/
|
||||
* Like find, but returns the index of the leftmost
|
||||
* occurence, and -1 if not found.
|
||||
*/
|
||||
{
|
||||
int i,j,len1,dif,flag = FALSE;
|
||||
dif = (strlen(target)) - (len1 = strlen(key)) +1;
|
||||
int i, j, len1, dif, flag = FALSE;
|
||||
dif = (strlen(target)) - (len1 = strlen(key)) + 1;
|
||||
|
||||
if(len1>0)
|
||||
for(j=0;j<dif && flag == FALSE;j++)
|
||||
{
|
||||
if (len1 > 0)
|
||||
for (j = 0; j < dif && flag == FALSE; j++) {
|
||||
flag = TRUE;
|
||||
for(i=0;i<len1 && flag;i++)
|
||||
flag = (key[i] == target[i+j])?TRUE:FALSE;
|
||||
|
||||
for (i = 0; i < len1 && flag; i++)
|
||||
flag = (key[i] == target[i + j]) ? TRUE : FALSE;
|
||||
}
|
||||
return(flag?j-1:-1);
|
||||
return (flag ? j - 1 : -1);
|
||||
}
|
||||
|
||||
|
||||
Error(msg)
|
||||
char *msg;
|
||||
Error(msg) char *msg;
|
||||
{
|
||||
(void)fprintf(stderr,"%s\n",msg);
|
||||
(void)fprintf(stderr, "%s\n", msg);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
int getline(file,string)
|
||||
int gde_getline(file, string)
|
||||
FILE *file;
|
||||
char string[];
|
||||
{
|
||||
char c;
|
||||
int i;
|
||||
for(i=0;((c=getc(file))!='\n') && (c!=EOF);i++)
|
||||
string[i]=c;
|
||||
for (i = 0; ((c = getc(file)) != '\n') && (c != EOF); i++)
|
||||
string[i] = c;
|
||||
string[i] = '\0';
|
||||
if (i==0 && c==EOF) return (EOF);
|
||||
else return (0);
|
||||
if (i == 0 && c == EOF)
|
||||
return (EOF);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
Crop():
|
||||
Split "this:that[:the_other]"
|
||||
into: "this" and "that[:the_other]"
|
||||
into: "this" and "that[:the_other]"
|
||||
*/
|
||||
|
||||
crop(input,head,tail)
|
||||
char input[],head[],tail[];
|
||||
crop(input, head, tail) char input[], head[], tail[];
|
||||
{
|
||||
/*
|
||||
* Crop needs to be fixed so that whitespace is compressed off the end
|
||||
* of tail
|
||||
*/
|
||||
int offset,end,i,j,length;
|
||||
/*
|
||||
* Crop needs to be fixed so that whitespace is compressed off the
|
||||
*end of tail
|
||||
*/
|
||||
int offset, end, i, j, length;
|
||||
|
||||
length=strlen(input);
|
||||
for(offset=0;offset<length && input[offset] != ':';offset++)
|
||||
head[offset]=input[offset];
|
||||
length = strlen(input);
|
||||
for (offset = 0; offset < length && input[offset] != ':'; offset++)
|
||||
head[offset] = input[offset];
|
||||
head[offset++] = '\0';
|
||||
for(;offset<length && input[offset] == ' ';offset++);
|
||||
for(end=length-1;input[end] ==' ' && end>offset;end--);
|
||||
for (; offset < length && input[offset] == ' '; offset++)
|
||||
;
|
||||
for (end = length - 1; input[end] == ' ' && end > offset; end--)
|
||||
;
|
||||
|
||||
for(j=0,i=offset;i<=end;i++,j++)
|
||||
tail[j]=input[i];
|
||||
for (j = 0, i = offset; i <= end; i++, j++) tail[j] = input[i];
|
||||
tail[j] = '\0';
|
||||
return;
|
||||
}
|
||||
|
|
BIN
CORE/ParseMenu.o
BIN
CORE/ParseMenu.o
Binary file not shown.
BIN
CORE/Scroll.o
BIN
CORE/Scroll.o
Binary file not shown.
|
@ -1,8 +0,0 @@
|
|||
|
||||
========================[ Feb 1, 2002 1:57 PM ]========================
|
||||
NOTE: CoreLib [002.003] FileOpen("HIV1POLDNA.fasta","r") failed
|
||||
Cannot open input database file. Formating failed...
|
||||
|
||||
========================[ Feb 1, 2002 7:27 PM ]========================
|
||||
NOTE: CoreLib [002.003] FileOpen("SIVPOLPRO.fasta","r") failed
|
||||
Cannot open input database file. Formating failed...
|
191
CORE/infile
191
CORE/infile
|
@ -1,191 +0,0 @@
|
|||
10 916
|
||||
contig GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG
|
||||
W22140 AAAAANGCCC NNTTCNAAGN GGGGGGGGGG GGGGGGGATA TTTTGCNNAG
|
||||
R.C.W27436 GGGNNNNGNN NNNNNNNNNN NNNNNNAANN NNNNNNNNNN NNNNNNNNNN
|
||||
R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
W28762 TCTTGACATT TGTCTCCATT TCAGCAAAAC GANACCTGTG GTGAAGGGAT
|
||||
#10005_2 2 GGnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
R.C.W27652 GGNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
W28762 ---------- ---------- ---------- ---------- ----------
|
||||
W28762(165 GGGNNGGNGN GGNNNGNNGN NNNGGNNNNN NNNTNTGTNT GNNGGNAGGG
|
||||
#10005_2 2 GGGnnGGnGn GGnnnGnnGn nnnGGnnnnn nnnTnTGTnT GnnGGnAGGG
|
||||
nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA
|
||||
GGGGGCATGA TGNNGAGANC NAAAGAAAGN NCNGGGNGGG AAAAAAGAAG
|
||||
NNNANNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
TTGTGTGCTG GCACTG---- ---------- ---------- ----------
|
||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
NNTNTNANNN NNTTNTANAG TNAAAGNTTG GTNNNNGTNN NTTTGANGAA
|
||||
nnTnTnAnnn nnTTnTAnAG TnAAAGnTTG GTnnnnGTnn nTTTGAnGAA
|
||||
GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC
|
||||
GAGGNCCCTG GNGGGAGGGG GGNNCGNNTT TNNTGCNCCG GATGGAGGGN
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
nnnnnnnnGn AAnnnnnnnn nnnnnnnnnn nnnnnnnnnT TGAAAACTGT
|
||||
NNNNNNNNGN AANNNNNNNN NNNNNNNNNN NNNNNNNNNT TGAAAACTGT
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
GNTCAANNTG GGGNNNANAN NNGNNNTTGA NTGAAAATGG GGNAANCCCC
|
||||
GnTCAAnnTG GGGnnnAnAn nnGnnnTTGA nTGAAAATGG GGnAAnCCCC
|
||||
CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC TGAA-n--Tc TACT---CCG
|
||||
GGGGNTTTTN AAGNNTGTTT NTTTANAAGN AAGAGGGGGA NAAAATTTTT
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAACCGAAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TAnCCAAnTG GAATCCTAAG ACAATTTTCT -cCAwTTCA- sCAAC-CGAA
|
||||
TANCCAANTG GAATCCTAAG ACAATTTTCT TCCAANCCAC CCAAC-CGAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
CNTTTTNCCA GTCANCTGGT AAGTCCAAGC TGAA-N--TC TACTC--C-G
|
||||
CnTTTTnCCA GTCAnCTGGT AAGTCCAAGC Tgaa----Tc TACTC--C-G
|
||||
CATGTAA-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
||||
TTNNTTCTNT NNCTNGNNNG GGGGGGGGGG GGGGCCCCCA ATAAGNNNTT
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
CCCTGTGGTG GAGGGAATTN CGTTCTTGGC NCTTCAGACT NCAGGGCAGG
|
||||
---------- ---------- ---------- ----CAGACT GCAGGGNAGG
|
||||
ACCCTGTGGT GrAGGGATTT GTGTGCT-GG CACTGCAGAC TGCAGGGCAG
|
||||
ACCCTGTGGT GGAGGGAATT NCGTTCTTGG CNCTTCAGAC TNCAGGGCAG
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
CATGTAACCC C-NAAAGAGT TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
||||
CATGTAa-CC C-AAAAGAGm TGTCCAGAGC CAAGGCTTCT ACCTTCATTG
|
||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
||||
GNGCNCAGAA NNAGGGGGGG GNGGGGGGGC CCCTTTNCTC CNAAAAATTT
|
||||
NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
|
||||
AA-------- ---------- ---------- ---------- ----------
|
||||
AA-------- ---------- ---------- ---------- ----------
|
||||
GAAAGGGCTA GGGCCCAGGG GCTGGGAmAT GCATGAGGT- gCTCGGAGGA
|
||||
GAAAGGGCTA GGGCCCAGGG GCTGGGAAAT GCATGAGGTT GCTCGGAGGA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
||||
TCCCTCTCTG TGCTCAAGGA GTTCCATTCC AGGAGGAAGA GATCTATACC
|
||||
CTaAGCAGAT AGCAAAGaAG ATaATGGAGG AgCAATTGGT CATGGCCtTG
|
||||
CCCCCCNTTT TGGGNAAGGG TGGGGGAAAN NNTTTGGGCA AANAGGGGAA
|
||||
NNNNNNNNNN NNNAANNAGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA
|
||||
---------- -------AGG GCTAGGGCCC AGGGGCTGGG AAATGCATGA
|
||||
---------- -------AGG GCTAGGGCCC AGGGGCTGGG ACATGCATGA
|
||||
GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC
|
||||
GCCTGGCTAA ATCCAAGCAC CAGCACCTGT GAGTCTGCTC TCTTCTCAGC
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
CTAAGCAGAT AGCAAAGNAG ATNATGGAGG ANCAATTGGT CATGGCCNTG
|
||||
CTAAGCAGAT AGCAAAGAAG ATAATGGAGG AGCAATTGGT CATGGCCTTG
|
||||
GTTTCCCTCk AAACaACgCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
||||
AAAAAAAGNG GGGGGGGGCG GNTTCCANAA AANAANAAAG GGTNCACCCN
|
||||
GG-TTCTNGG NGGAGCCTGG CTAAANCCAA GCACCAGCAC CTGTGAGTCT
|
||||
GGTTGCTCGG AGGAGCCTGG CTAAATCCAA GCACCAGCAC CTGTGAGTCT
|
||||
GG-TGCTCGG AGGAGCCTGG NTAAATCCAA GCACCAGCAC CTGTGAGTCT
|
||||
TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC
|
||||
TGGCTCCCAA GTAAACCTGT AGCTTTGCCT CTTCTCCCAG CTCTCGTGCC
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
GTTTCCCTCC AAACNACNCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
||||
GTTTCCCTCk AAACAACGCT GCAGATTTAT CTGCACAAAC ATCTCCACTT
|
||||
tmGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
||||
TNGGGGGNCN CCCCCCCCNC NNGNAAATCN TCCCTTTTTT TGANGGGCNA
|
||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT NCCTCTTCTC
|
||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC
|
||||
GCTCTCTTCT CAGCTGGCTC CCAAGTAAAC CTGTAGCTTT GCCTCTTCTC
|
||||
TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCAAAAG
|
||||
TCCTGAAGGT AGTCCAGGGA ACTGGAATCT ACCCACCTTT CCCCCCNAAG
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
||||
TTGGGGGAAA GGTGGGTAGA TTCCAGTTCC CTGGACTACC TTCAGGAGGC
|
||||
ACGAGAGCTG GGAGAAGAGG cAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
||||
ANNNCATTTN CTTGNCCTTG AAGATTGACC NTGACTGCTC TGGCAAGAAG
|
||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
||||
CCAGCTCTCG TGCCTCCTGA AGGTAGTCCA GGGAACTGGA ATCTACCCAC
|
||||
TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTkGA GGGAAACCAA
|
||||
TGGAGATGTT TGTGCAGATA AATCTGCAGC GTTGTTTTGA GGGAAACCAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
||||
ACGAGAGCTG GGAGAAGAGG CAAAGCTACA GGTTTACTTG GGAGCCAGCT
|
||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGaTTTA gCCAGGCTCC
|
||||
AAGAGGTGTC CTTACAGAGA CCTCTTTACT GACCAACTGA AGNATAGACT
|
||||
CTTTCCCCCN AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT
|
||||
CTTTCCCCCC NAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGCGTTGTT
|
||||
CTTTCCCCCA AAAGTGGAGA TGTTTGTGCA GATAAATCTG CAGNGTNGTT
|
||||
GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT
|
||||
GGCCATGACC AATTGCTCCT CCATTATCTT CTTTGCTATC TGCTTAGGGT
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA NCCAGGCTCC
|
||||
GAGAAGAGAG CAGACTCACA GGTGCTGGTG CTTGGATTTA GCCAGGCTCC
|
||||
tCCgAGkA-- CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT-----
|
||||
TACTGCTGGA CAATCTGCAT GGGCATCACC CCTCCCCGCA TGTAACCC-A
|
||||
TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC
|
||||
TTGAGGGAAA CCAAGGCCAT GACCAATTGC TCCTCCATTA TCTTCTTTGC
|
||||
TGGAGGGAAA CCANGGCCAT GACCAATTGN TCCTCCATNA TCTNCTTTGC
|
||||
ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA
|
||||
ATAGATCTCT TCCTCCTGGA ATGGAACTCC TTGAGCACAG AGAGGGACAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TCCGAGC--A CCTCATGCAT GTCCCAGCCC CTGGGCCCTA GCCCT-----
|
||||
TCCGAGc--A CCTCATGCAT mTCCCAGCCC CTGGGCCCTA GCCCT-----
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
AAAGAGGTGT CCAGAGCCAA GGCTTCTACC TTCATTGTCC CTCTCTGTGC
|
||||
TATCTGCTNA GAGNANNCAA NNNAANNNA- ---------- ----------
|
||||
TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC
|
||||
TATCTGCTTA GGGTATAGAT CTCTTCCTCC TGGAATGGAA CTCCTTGAGC
|
||||
TGAAGGTAGA AGCCTTGGCT CTGGACAmCT CTTTTGGG-t TACATGCG--
|
||||
TGAAGGTAGA AGCCTTGGCT CTGGACACCT CTTTTGGG-T TACATGCGGT
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- TTCCTgCCCT GcAGTCTGAA GnGCCAAG-A -ACGnAATTC
|
||||
TCAAGGAGTT CCATTCCAGG AGGAAGAGAT CTATACCCT- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC ACCTCTTTT-
|
||||
ACAGAGAGGG ACAATGAAGG TAGAAGCCTT GGCTCTGGAC AACTCTTTNG
|
||||
GAGTAgA-tt cAGCTTGGAC TTACCAGnTG ACTGGnAAAA nGGGGGnTTn
|
||||
GAGTANA-NN NA-------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- TTCCTNCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C
|
||||
---------- TTCCTGCCCT GCAGTCTGCA GTGCC-AGCA CACA-AAT-C
|
||||
CCTCCACCAC AGGGTTTCG- GTTGGGTGGn TTGGAAGA-A AATTGTCTTA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
GGGTTACATG CGGTGAGTAN ANNNA----- ---------- ----------
|
||||
GGGTTACATG CGG--AGTAG ANTTCAGCTT GGACTTACCA GNTGACTGGN
|
||||
CCCCATTTTC AnTCAAnnnC nnnTnTnnnC CCCAnnTTGA nCTTCnTCAA
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
CCTTCACCAC A-GGTNTCGT TTTGC-TGAA ATGG-AGACA AAT-GTCa-a
|
||||
CCTrCACCAC AGGGTTTCG- GTTGs-TGAA wTGg-AGA-A AATTGTCTTA
|
||||
GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
AAAANGGGGG NTTNCCCCAT TTTCANTCAA NNNCNNNTNT NNNCCCCANN
|
||||
AnnnACnnnn ACCAAnCTTT nACTnTAnAA nnnnnTnAnA nnCCCTnCCn
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
g-a------- ---------- ---------- ---------- ----------
|
||||
GGATTCCAnT TGGnTAACAG TTTTCAAnnn nnnnnnnnnn nnnnnnnnnn
|
||||
nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
TTGANCTTCN TCAAANNNAC NNNNACCAAN CTTTNACTNT ANAANNNNNT
|
||||
nCAnACAnAn nnnnnnnCCn nnnCnnCnnn CCnCnCCnnC CC--------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
nnnnTTnCnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
NANANNCCCT NCCNNCANAC ANANNNNNNN NCCNNNNCNN CNNNCCNCNC
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
---------- ---------- ---------- ---------- ----------
|
||||
nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn
|
||||
nnnnnnnnnn nnnnCC
|
||||
---------- ------
|
||||
---------- ------
|
||||
---------- ------
|
||||
CNNCCC---- ------
|
||||
---------- ------
|
||||
---------- ------
|
||||
---------- ------
|
||||
---------- ------
|
||||
nnnnnnnnnn nnnnCC
|
|
@ -1,2 +0,0 @@
|
|||
make
|
||||
cp gde ../bin
|
BIN
CORE/libxview.a
BIN
CORE/libxview.a
Binary file not shown.
192
CORE/main.c
192
CORE/main.c
|
@ -1,14 +1,15 @@
|
|||
#include <malloc.h>
|
||||
#include <pixrect/pixrect.h>
|
||||
#include <stdio.h>
|
||||
#include <xview/defaults.h>
|
||||
#include <xview/icon.h>
|
||||
#include <xview/panel.h>
|
||||
#include <xview/window.h>
|
||||
#include <xview/xview.h>
|
||||
#include <xview/panel.h>
|
||||
#include <xview/window.h>
|
||||
#include <xview/icon.h>
|
||||
#include <pixrect/pixrect.h>
|
||||
#include <malloc.h>
|
||||
#include "menudefs.h"
|
||||
|
||||
#include "defines.h"
|
||||
#include "globals.h"
|
||||
#include "menudefs.h"
|
||||
|
||||
/*
|
||||
Main()
|
||||
|
@ -25,152 +26,121 @@ All rights reserved.
|
|||
*/
|
||||
|
||||
Gmenu menu[100];
|
||||
int num_menus = 0,repeat_cnt = 0;
|
||||
Frame frame,pframe,infoframe;
|
||||
Panel popup,infopanel;
|
||||
Panel_item left_foot,right_foot;
|
||||
Canvas EditCan,EditNameCan;
|
||||
// int num_menus = 0;
|
||||
int repeat_cnt = 0;
|
||||
Frame frame, pframe, infoframe;
|
||||
Panel popup, infopanel;
|
||||
Panel_item left_foot, right_foot;
|
||||
Canvas EditCan, EditNameCan;
|
||||
int DisplayType;
|
||||
GmenuItem *current_item;
|
||||
NA_Alignment *DataSet = NULL;
|
||||
NA_Alignment *Clipboard = NULL;
|
||||
char **TextClip;
|
||||
int TextClipSize = 0,TextClipLength = 0;
|
||||
int TextClipSize = 0, TextClipLength = 0;
|
||||
|
||||
/*
|
||||
* Icon structure (pixmap dependent)
|
||||
*/
|
||||
* Icon structure (pixmap dependent)
|
||||
*/
|
||||
|
||||
|
||||
static short GDEicon[258]={
|
||||
static short GDEicon[258] = {
|
||||
#include "icon_gde"
|
||||
};
|
||||
|
||||
mpr_static(iconpr,64,64,1,GDEicon);
|
||||
mpr_static(iconpr, 64, 64, 1, GDEicon);
|
||||
|
||||
main(argc,argv)
|
||||
int argc;
|
||||
main(argc, argv) int argc;
|
||||
char **argv;
|
||||
{
|
||||
Icon tool_icon; /* obvious */
|
||||
extern char FileName[], current_dir[];
|
||||
|
||||
Icon tool_icon; /* obvious */
|
||||
extern char FileName[],current_dir[];
|
||||
|
||||
int type = GENBANK; /* default file type */
|
||||
int type = GENBANK; /* default file type */
|
||||
DataSet = NULL;
|
||||
Clipboard = (NA_Alignment*)Calloc(1,sizeof(NA_Alignment));
|
||||
DisplayType = NASEQ_ALIGN; /* default data type */
|
||||
Clipboard = (NA_Alignment *)Calloc(1, sizeof(NA_Alignment));
|
||||
DisplayType = NASEQ_ALIGN; /* default data type */
|
||||
Clipboard->maxnumelements = 5;
|
||||
Clipboard->element =(NA_Sequence*)Calloc(Clipboard->maxnumelements,
|
||||
sizeof(NA_Sequence));
|
||||
|
||||
/*
|
||||
* Connect to server, and set up initial XView data types
|
||||
* that are common to ALL display types
|
||||
*/
|
||||
xv_init(XV_INIT_ARGC_PTR_ARGV, &argc,argv,0);
|
||||
|
||||
Clipboard->element = (NA_Sequence *)Calloc(Clipboard->maxnumelements,
|
||||
sizeof(NA_Sequence));
|
||||
|
||||
/*
|
||||
* Main frame (primary window);
|
||||
*/
|
||||
|
||||
frame = xv_create((int) NULL,FRAME,
|
||||
FRAME_NO_CONFIRM,FALSE,
|
||||
FRAME_LABEL, "Genetic Data Environment 2.2",
|
||||
FRAME_INHERIT_COLORS,TRUE,
|
||||
XV_WIDTH,700,
|
||||
XV_HEIGHT,500,
|
||||
FRAME_SHOW_FOOTER,TRUE,
|
||||
0);
|
||||
* Connect to server, and set up initial XView data types
|
||||
* that are common to ALL display types
|
||||
*/
|
||||
xv_init(XV_INIT_ARGC_PTR_ARGV, &argc, argv, 0);
|
||||
|
||||
/*
|
||||
* Popup frame (dialog box window), and default settings in
|
||||
* the dialog box. These are changed to fit each individual
|
||||
* command's needs in EventHandler().
|
||||
*/
|
||||
infoframe = xv_create(frame,FRAME_CMD,
|
||||
FRAME_LABEL,"Messages",
|
||||
WIN_DESIRED_HEIGHT,100,
|
||||
WIN_DESIRED_WIDTH,300,
|
||||
FRAME_SHOW_RESIZE_CORNER,TRUE,
|
||||
FRAME_INHERIT_COLORS,TRUE,
|
||||
FRAME_CLOSED,FALSE,
|
||||
WIN_SHOW,FALSE,
|
||||
0);
|
||||
* Main frame (primary window);
|
||||
*/
|
||||
|
||||
pframe = xv_create(frame,FRAME_CMD,
|
||||
FRAME_CMD_PUSHPIN_IN,TRUE,
|
||||
FRAME_DONE_PROC,FrameDone,
|
||||
XV_HEIGHT,100,
|
||||
XV_WIDTH,300,
|
||||
FRAME_SHOW_RESIZE_CORNER,FALSE,
|
||||
FRAME_CLOSED,FALSE,
|
||||
XV_X,300,
|
||||
XV_Y,150,
|
||||
WIN_SHOW,FALSE,
|
||||
0);
|
||||
frame = xv_create((int)NULL, FRAME, FRAME_NO_CONFIRM, FALSE,
|
||||
FRAME_LABEL, "Genetic Data Environment 2.2.1",
|
||||
FRAME_INHERIT_COLORS, TRUE, XV_WIDTH, 700, XV_HEIGHT,
|
||||
500, FRAME_SHOW_FOOTER, TRUE, 0);
|
||||
|
||||
infopanel = xv_get(infoframe,FRAME_CMD_PANEL);
|
||||
xv_set(infopanel, PANEL_LAYOUT,PANEL_VERTICAL,
|
||||
XV_WIDTH,300,
|
||||
XV_HEIGHT,50,
|
||||
0);
|
||||
/*
|
||||
* Popup frame (dialog box window), and default settings in
|
||||
* the dialog box. These are changed to fit each individual
|
||||
* command's needs in EventHandler().
|
||||
*/
|
||||
infoframe =
|
||||
xv_create(frame, FRAME_CMD, FRAME_LABEL, "Messages",
|
||||
WIN_DESIRED_HEIGHT, 100, WIN_DESIRED_WIDTH, 300,
|
||||
FRAME_SHOW_RESIZE_CORNER, TRUE, FRAME_INHERIT_COLORS,
|
||||
TRUE, FRAME_CLOSED, FALSE, WIN_SHOW, FALSE, 0);
|
||||
|
||||
left_foot = xv_create(infopanel,PANEL_MESSAGE,0);
|
||||
right_foot = xv_create(infopanel,PANEL_MESSAGE,0);
|
||||
pframe = xv_create(frame, FRAME_CMD, FRAME_CMD_PUSHPIN_IN, TRUE,
|
||||
FRAME_DONE_PROC, FrameDone, XV_HEIGHT, 100, XV_WIDTH,
|
||||
300, FRAME_SHOW_RESIZE_CORNER, FALSE, FRAME_CLOSED,
|
||||
FALSE, XV_X, 300, XV_Y, 150, WIN_SHOW, FALSE, 0);
|
||||
|
||||
infopanel = xv_get(infoframe, FRAME_CMD_PANEL);
|
||||
xv_set(infopanel, PANEL_LAYOUT, PANEL_VERTICAL, XV_WIDTH, 300,
|
||||
XV_HEIGHT, 50, 0);
|
||||
|
||||
left_foot = xv_create(infopanel, PANEL_MESSAGE, 0);
|
||||
right_foot = xv_create(infopanel, PANEL_MESSAGE, 0);
|
||||
|
||||
window_fit(infoframe);
|
||||
|
||||
/*
|
||||
popup = xv_create(pframe,PANEL,
|
||||
PANEL_LAYOUT,PANEL_HORIZONTAL,
|
||||
0);
|
||||
*/
|
||||
popup = xv_get(pframe,FRAME_CMD_PANEL);
|
||||
/*
|
||||
popup = xv_create(pframe,PANEL,
|
||||
PANEL_LAYOUT,PANEL_HORIZONTAL,
|
||||
0);
|
||||
*/
|
||||
popup = xv_get(pframe, FRAME_CMD_PANEL);
|
||||
|
||||
xv_create(popup,PANEL_BUTTON,
|
||||
PANEL_LABEL_STRING,"HELP",
|
||||
PANEL_NOTIFY_PROC,HELP,
|
||||
0);
|
||||
xv_create(popup, PANEL_BUTTON, PANEL_LABEL_STRING, "HELP",
|
||||
PANEL_NOTIFY_PROC, HELP, 0);
|
||||
|
||||
xv_create(popup,PANEL_BUTTON,
|
||||
PANEL_LABEL_STRING,"OK",
|
||||
PANEL_NOTIFY_PROC,DO,
|
||||
0);
|
||||
xv_create(popup, PANEL_BUTTON, PANEL_LABEL_STRING, "OK",
|
||||
PANEL_NOTIFY_PROC, DO, 0);
|
||||
|
||||
xv_create(popup,PANEL_BUTTON,
|
||||
PANEL_LABEL_STRING,"Cancel",
|
||||
PANEL_NOTIFY_PROC,DONT,
|
||||
0);
|
||||
xv_create(popup, PANEL_BUTTON, PANEL_LABEL_STRING, "Cancel",
|
||||
PANEL_NOTIFY_PROC, DONT, 0);
|
||||
|
||||
/*
|
||||
* Keep original directory where program was started
|
||||
*/
|
||||
/*
|
||||
* Keep original directory where program was started
|
||||
*/
|
||||
(void)getwd(current_dir);
|
||||
|
||||
ParseMenu();
|
||||
GenMenu(type);
|
||||
if(argc>1)
|
||||
LoadData(argv[1]);
|
||||
if (argc > 1) LoadData(argv[1]);
|
||||
|
||||
/*
|
||||
* Set up the basics of the displays, and off to the main loop.
|
||||
*/
|
||||
* Set up the basics of the displays, and off to the main loop.
|
||||
*/
|
||||
BasicDisplay(DataSet);
|
||||
|
||||
if(DataSet != NULL)
|
||||
((NA_Alignment*)DataSet)->na_ddata = (char*)SetNADData
|
||||
((NA_Alignment*)DataSet,EditCan,EditNameCan);
|
||||
if (DataSet != NULL)
|
||||
((NA_Alignment *)DataSet)->na_ddata = (char *)SetNADData(
|
||||
(NA_Alignment *)DataSet, EditCan, EditNameCan);
|
||||
|
||||
tool_icon = xv_create((int) NULL,ICON,
|
||||
ICON_IMAGE,&iconpr,
|
||||
ICON_LABEL,strlen(FileName)>0?FileName:"GDE",
|
||||
0);
|
||||
tool_icon = xv_create((int)NULL, ICON, ICON_IMAGE, &iconpr, ICON_LABEL,
|
||||
strlen(FileName) > 0 ? FileName : "GDE", 0);
|
||||
|
||||
xv_set(frame,
|
||||
FRAME_ICON,tool_icon,
|
||||
0);
|
||||
xv_set(frame, FRAME_ICON, tool_icon, 0);
|
||||
|
||||
window_main_loop(frame);
|
||||
exit(0);
|
||||
|
|
BIN
CORE/main.o
BIN
CORE/main.o
Binary file not shown.
34
CORE/outfile
34
CORE/outfile
|
@ -1,34 +0,0 @@
|
|||
|
||||
DNA parsimony algorithm, version 3.51c
|
||||
|
||||
|
||||
One most parsimonious tree found:
|
||||
|
||||
|
||||
|
||||
|
||||
+-----------------------#10005_2 2
|
||||
!
|
||||
! +--------------------W28762(165
|
||||
+--9 !
|
||||
! ! ! +--R.C.W27652
|
||||
! ! ! +-----------6
|
||||
! ! ! ! +--#10005_2 2
|
||||
! +--8 !
|
||||
! ! +--5 +--W28762
|
||||
! ! ! ! +--7
|
||||
--1 ! ! ! +--4 +--W28762
|
||||
! ! ! ! ! !
|
||||
! +--2 +-----3 +-----R.C.W27652
|
||||
! ! !
|
||||
! ! +--------R.C.W27436
|
||||
! !
|
||||
! +-----------------W22140
|
||||
!
|
||||
+--------------------------contig
|
||||
|
||||
remember: this is an unrooted tree!
|
||||
|
||||
|
||||
requires a total of 2453.000
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
((#10005_2_2,(W28762(165,(((R.C.W27652,#10005_2_2),(((W28762,W28762),
|
||||
R.C.W27652),R.C.W27436)),W22140))),contig);
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,25 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FEATURES/GDE Accession File Instructions
|
||||
;
|
||||
; 1. Type in one or more GenBank Accession #'s below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of numbers.
|
||||
;
|
||||
; (NOTE: File can not contain LOCUS names.)
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each accession # on a separate line
|
||||
; SAMPLE ACCESSION FILE:
|
||||
;
|
||||
; M18249
|
||||
; X13383
|
||||
; J03680
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
clu2ig update 3 Feb 94
|
||||
|
||||
NAME
|
||||
clu2ig
|
||||
|
||||
SYNOPSIS
|
||||
clu2ig clustalfile > igfile
|
||||
|
||||
DESCRIPTION
|
||||
Converts interleaved .aln output from Clustal V into
|
||||
sequential .ig (IntelliGenetics) format for use by MASE.
|
||||
|
||||
clustalfile:
|
||||
CLUSTAL V multiple sequence alignment
|
||||
|
||||
name1 AACTTTCG
|
||||
name2 ATCTTTCG
|
||||
* ******
|
||||
|
||||
name1 CCTGCT
|
||||
name2 CCCGCT
|
||||
** ***
|
||||
|
||||
igfile:
|
||||
;
|
||||
name1
|
||||
AACTTTCG
|
||||
CCTGCT
|
||||
:
|
||||
name2
|
||||
ATCTTTCG
|
||||
CCCGCT
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,36 +0,0 @@
|
|||
dbstat update 3 Feb 94
|
||||
|
||||
NAME
|
||||
dbstat - calculates amino acid frequencies in a protein
|
||||
database
|
||||
|
||||
SYNOPSIS
|
||||
dbstat
|
||||
|
||||
DESCRIPTION
|
||||
dbstat reads a file of one or more nucleic acid sequences
|
||||
and calculates the amino acid frequencies, both in terms of
|
||||
absolute numbers, and as a fraction of the total.
|
||||
|
||||
input - The input file is the standard .wrp (Pearson) format,
|
||||
such as that produced by getob:
|
||||
|
||||
>name
|
||||
; one or more comment lines (optional)
|
||||
sequence lines
|
||||
|
||||
Comments begin either with semicolon (;) or right arrow (>)
|
||||
characters.
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,30 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FEATURES/GDE Expression File Instructions 8/7/95
|
||||
;
|
||||
; 1. Type in one or more GenBank expressions below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of feature keys.
|
||||
; or
|
||||
; Copy expressions from another window and Paste into this window.
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; NOTES:
|
||||
; 1) FEATURES will then extract the appropriate sequences.
|
||||
; YOU DON'T NEED TO EDIT OUT THESE COMMENT LINES.
|
||||
; 2) All expressions referring to GenBank entries must begin with a '@'
|
||||
; Literals (ie. sequences to be embedded in the final output)
|
||||
; do NOT begin with a '@'.
|
||||
; 3) Put each expression on a separate line.
|
||||
;
|
||||
; SAMPLE EXPRESSION FILE:
|
||||
;
|
||||
; @J05635:83..1813
|
||||
; ; EcoRI/NotI adaptor {this is a comment line}
|
||||
; AATTGCGGCCGC
|
||||
; @J05635:/product="flagellin A"
|
||||
; @x17548:singed_trans
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FEATURES/GDE Feature Key File Instructions
|
||||
;
|
||||
; 1. Type in one or more GenBank FEATURE Table feature keys below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of feature keys.
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each feature key on a separate line
|
||||
; SAMPLE FEATURE KEY FILE:
|
||||
;
|
||||
; mRNA
|
||||
; CDS
|
||||
; mat_peptide
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,407 +0,0 @@
|
|||
|
||||
FEATURES.DOC update 7 Feb 94
|
||||
|
||||
|
||||
NAME
|
||||
FEATURES - extracts features from GenBank entries
|
||||
|
||||
SYNOPSIS
|
||||
features
|
||||
features expression
|
||||
features [-f featurekey | -F keyfile]
|
||||
[-n name |-a accession | -e expression |
|
||||
-N namefile |-A accfile | -E expfile]
|
||||
[-u dbfile | -U dbfile | -g ]
|
||||
features -h
|
||||
|
||||
DESCRIPTION
|
||||
FEATURES extracts sequence objects from GenBank entries, using
|
||||
the Features Table language. Features can be retrieved either by
|
||||
specifying keywords (eg. CDS, mRNA, exon, intron etc.) or by
|
||||
evaluating expressions. In practical terms, FEATURES is actually
|
||||
a user interface for GETOB, which actually performs the parsing
|
||||
and extraction of sequence objects. FEATURES can be run either as
|
||||
an interactive program or with command line arguments.
|
||||
|
||||
'features' with no arguments runs the program interactively.
|
||||
'features' followed by an expression retrieves the data directly
|
||||
from GenBank and evaluates the expression. The third form of
|
||||
features requires all arguments to be accompanied by their
|
||||
respective option flags. Finally, 'features -h' prints the
|
||||
SYNOPSIS.
|
||||
|
||||
|
||||
INTERACTIVE EXECUTION
|
||||
FEATURES executed with no arguments runs interactively. An example of the
|
||||
FEATURES menu is shown below:
|
||||
|
||||
___________________________________________________________________
|
||||
FEATURES - Version 7 FEB 94
|
||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
||||
___________________________________________________________________
|
||||
Features: tRNA
|
||||
Entries: EPFCPCG
|
||||
Dataset:
|
||||
___________________________________________________________________
|
||||
Parameter Description Value
|
||||
-------------------------------------------------------------------
|
||||
1).................... FEATURES TO EXTRACT ....................> f
|
||||
f:Type a feature at the keyboard
|
||||
F:Read a list of features from a file
|
||||
2)....................ENTRIES TO BE PROCESSED (choose one).....> n
|
||||
Keyboard input - n:name a:accession # e:expression
|
||||
File input - N:name(s) A:accession #(s) E:expression(s)
|
||||
3)....................WHERE TO GET IT .........................> g
|
||||
u:Genbank dataset g:complete GenBank database
|
||||
U: same as u, but all entries
|
||||
4)....................WHERE TO SEND IT ........................> a
|
||||
s:Each feature to a separate file a:All output to same file
|
||||
---------------------------------------------------------------
|
||||
Type number of your choice or 0 to continue:
|
||||
0
|
||||
Messages will be written to EPFCPCG.msg
|
||||
Final sequence output will be written to EPFCPCG.out
|
||||
Expressions will be written to EPFCPCG.exp
|
||||
Extracting features...
|
||||
|
||||
In the example, FEATURES was instructed to retrieve all tRNAs from
|
||||
the GenBank entry EPFCPCG, which contains the Epifagus plastid
|
||||
genome. By default, the GenBank database was the source of the
|
||||
sequence. Messages indicate the progress of the job. A log describing
|
||||
the extraction of each feature is written to EPFCPCG.msg, while the
|
||||
extracted features themselves are written to EPFCPCG.out. Feature
|
||||
expressions which could be used by FEATURES to reconstruct the .out
|
||||
file, are written to EPFCPCG.exp.
|
||||
|
||||
The first step is to retrieve the EPFCPCG entry from GenBank, which is
|
||||
accomplished by calling FETCH. Next, FEATURES extracts the specified
|
||||
features from the entry.
|
||||
|
||||
An excerpt from EPFCPCG.msg is shown below, describing the extraction
|
||||
of the fifth tRNA found in this entry. To create this tRNA, two exons
|
||||
had to be joined. The qualifier line associated with this feature
|
||||
indicates that it is an Isoleucine tRNA with a gat anticodon.
|
||||
|
||||
|
||||
EPFCPCG:anticodon gtg
|
||||
complement
|
||||
(
|
||||
join
|
||||
(
|
||||
70023 70028
|
||||
|
||||
1 69
|
||||
|
||||
)
|
||||
|
||||
)
|
||||
|
||||
|
||||
/product="transfer RNA-His"
|
||||
/gene="His-tRNA"
|
||||
/label=anticodon gtg
|
||||
/note="anticodon gtg"
|
||||
//----------------------------------------------
|
||||
|
||||
|
||||
The actual sequence for this feature, as written to EPFCPCG.out, is
|
||||
written with each exon beginning a new line:
|
||||
|
||||
>EPFCPCG:anticodon gtg
|
||||
ggcggatgtagccaaatggatcaaggtagtggattgtgaatccaacatat
|
||||
gcgggttcaattcccgtcg
|
||||
ttcgcc
|
||||
|
||||
Finally, the expression that was evaluated to create this feature is
|
||||
written to EPFCPCG.exp:
|
||||
|
||||
>EPFCPCG:anticodon gtg
|
||||
@M81884:anticodon gtg
|
||||
|
||||
If EPFCPCG.exp was used as an expression file in option 2 (E) of FEATURES,
|
||||
EPFCPCG.out would be recreated.
|
||||
|
||||
OPTIONS
|
||||
1) FEATURES - choosing f will cause FEATURES to prompt for
|
||||
a feature to extract. If you wish to extract several types of
|
||||
features simultaneously (ie. F), you must construct a file listing the
|
||||
feature keywords. The following example would retrieve both tRNA and
|
||||
rRNA sequences:
|
||||
|
||||
OBJECTS
|
||||
tRNA
|
||||
rRNA
|
||||
SITES
|
||||
|
||||
The words 'OBJECTS' and 'SITES' must enclose the feature keywords,
|
||||
and each keyword must be on a separate line. For a rigorous
|
||||
definition of the input file format, see the GETOB manual pages
|
||||
(getob.doc).
|
||||
|
||||
In the menu shown above, f was chosen, and the user entered tRNA at
|
||||
the prompt. Thus tRNA is now displayed on the Features: line. If
|
||||
features had been specified from a file (suboption F) then the
|
||||
filename containing the feature keywords would be displayed instead.
|
||||
A complete list of legal feature keywords can be found in the GenBank
|
||||
Release notes (gbrel.txt) under the subheading 'Feature Key Names'.
|
||||
|
||||
2) ENTRIES
|
||||
n User is prompted for the name of an entry from which the
|
||||
feature is to be extracted. The name of the entry will appear
|
||||
on the 'Entries' line of the menu.
|
||||
|
||||
N User is prompted for a filename containing one or more
|
||||
entry names. Each name must be on a separate line. The filename
|
||||
will be displayed on the 'Entries' menu line.
|
||||
|
||||
a User is prompted for an accession number, which will appear
|
||||
on the 'Entries' line of the menu.
|
||||
|
||||
A User is prompted for a filename for accession numbers. The filename
|
||||
will appear on the 'Entries:' line.
|
||||
|
||||
e User is prompted for a GenBank Features expression of the
|
||||
form accession:location.'accession' refers to a GenBank
|
||||
accession number, while 'location' is any legal feature location.
|
||||
A brief description of location syntax can be found under the
|
||||
subheading "Feature Location" in the GenBank release notes
|
||||
(gbrel.txt). See "The DDBJ/EMBL/GenBank Feature Table:
|
||||
Definition" Version 1.04 for a complete definition.
|
||||
E User is prompted for a filename containing one or more Feature
|
||||
expressions. EACH EXPRESSION MUST BEGIN A '@'. All lines beginning
|
||||
with '@' are processed as expressions, and all other lines are
|
||||
copied to the output file unchanged.
|
||||
|
||||
Examples:
|
||||
|
||||
The tRNA shown above could have been extracted by choosing
|
||||
suboption e and entering either of the following expressions:
|
||||
|
||||
M81884:complement(join(70023..70028,1..69))
|
||||
M81884:anticodon gtg
|
||||
|
||||
In the first example, the feature line from the original entry
|
||||
is used as the location. In the second example, the feature is
|
||||
found by its qualifier line, which also appeared in the
|
||||
original entry. It must be noted that the qualifier line must
|
||||
be unique from others in the same entry in its first 15
|
||||
characters after the = .
|
||||
|
||||
The flaL protein coding region of B. licheniformis is described
|
||||
in GenBank entry BLIFALA, accession number M60287 in the
|
||||
following feature:
|
||||
|
||||
CDS 305..640
|
||||
/note="flaD (sin) homologue"
|
||||
/gene="flaL"
|
||||
/label=ORF2
|
||||
/codon_start=1
|
||||
|
||||
This feature could be retrieved using any of the following
|
||||
expressions:
|
||||
|
||||
M60287:305..640
|
||||
M60287:ORF2
|
||||
M60287:/label=ORF2
|
||||
M60287:/gene="flaL"
|
||||
M60287:/note="flaD (sin) homologue"
|
||||
|
||||
Note that the /label= qualifier is special, in that labels are
|
||||
specifically intented as unique tags on an feature. For labels,
|
||||
only the label itself is need be specified. Thus, /label=ORF2 is
|
||||
equivalent to ORF2. For other qualifiers, the qualifier keyword
|
||||
(eg. /note=) must be included.
|
||||
|
||||
3) DATABASE (WHERE TO GET IT) - By default, all entries processed will
|
||||
be automatically retrieved from GenBank using FETCH. Specifying 'u'
|
||||
(User-defined database subset) makes it possible to extract features
|
||||
from GenBank subsets created by the user. Usually, retrieval of
|
||||
features is much faster with a User-defined subset, so if you
|
||||
frequently work with sets of genes, it is best to retrieve them
|
||||
en-masse using FETCH, and work with them directly. For example, if
|
||||
you had retrieved a set of Beta-globin sequences into a file called
|
||||
'globin.gen', you could directly extract features from these entries
|
||||
by specifying 'globin' or 'globin.gen' as your User-defined database.
|
||||
If the file extension is '.gen', FEATURES will automatically create
|
||||
temporary files called globin.ano, globin.wrp and globin.ind,
|
||||
containing annotation, sequence, and an index, respectively. These
|
||||
files will be read during feature extraction, and then discarded. If
|
||||
you have already created such files using SPLITDB, simply specify
|
||||
any of 'globin', 'globin.ano', etc. ie. anything, as long as it does
|
||||
not have the .gen file extension.
|
||||
|
||||
'U' rather than 'u' causes ALL entries in the user-defined
|
||||
database to be subset. This means that it is unnecessary to
|
||||
specify entry options (eg -n, -N etc.), as these will be
|
||||
ignored, if given.
|
||||
|
||||
One consequence of these conventions is that the individual GenBank
|
||||
divisions can be processed directly. For example, suppose you were only
|
||||
interested in rodent globins. You could directly access the rodent
|
||||
division of GenBank by specifying the base name of that file division
|
||||
(eg. /home/psgendb/GenBank/gbrod) as your user-defined database. In
|
||||
this case, the files gbrod.ano, gbrod.wrp and gbrod.ind already
|
||||
exist. Again, this approach is faster, since FEATURES would not have
|
||||
to find and retrieve the sequences, but can read directly from the
|
||||
database files. Finally, if you wanted to process all of the entries
|
||||
in the database division, simply use -U. The user is warned that a
|
||||
GenBank division is a huge amount of data, and processing every entry
|
||||
could take a long time.
|
||||
|
||||
4) WHERE TO SEND IT - By default (a), the output for all entries goes
|
||||
to a single set of files, whose names are chosen by FEATURES,
|
||||
depending on the setting of option 2, Entries. If a single name (n) or
|
||||
accession number (a) has been chosen, that will be used as
|
||||
the raw filename. For example, if you were processing the entry
|
||||
WHTCAB, the output files would be WHTCAB.msg and WHTCAB.out. If names
|
||||
(N), accession numbers (A) or expressions (E) were read from a file,
|
||||
the raw name of that file would be used eg. cellulase.nam would result
|
||||
in cellulase.msg and cellulase.out. Finally, if a single expression
|
||||
is processed (e), then the primary accession number in that
|
||||
expression will be used for the filenames. In all cases, FEATURES
|
||||
will tell you the names of the files being written.
|
||||
|
||||
Choosing suboption s, you can specify that the features created for
|
||||
each entry be sent to separate files. In this case, each file will
|
||||
have the name of that entry, with the extension .obj. However, all
|
||||
messages and expressions will still go to a single files. While this
|
||||
can be a convenient way of creating separate files when you need them,
|
||||
this option still has the limitation of writing all features for a
|
||||
given entry (if there are more than one) to the same file. Also,
|
||||
successive resolution of features (anything requiring 'getob -r')
|
||||
will not work with this option. This may be corrected in future
|
||||
versions.
|
||||
|
||||
|
||||
COMMAND LINE EXECUTION
|
||||
|
||||
There are two ways of running FEATURES from the command line. If only one
|
||||
argument is supplied, that argument is interpreted as an expression, and
|
||||
the result of that expression (ie. a sequence ) is written to the
|
||||
standard output. .msg, .out and .exp files are NOT created. For example,
|
||||
GenBank entry BACFLALA (M60287) contains the following feature:
|
||||
|
||||
CDS 95..271
|
||||
/label=LORF-
|
||||
/codon_start=1
|
||||
/translation="MNKDKNEKEELDEEWTELIKHALEQGISPDDIRIFLNLGKKSSK
|
||||
PSASIERSHSINPF"
|
||||
Any of
|
||||
|
||||
features M60287:LORF-
|
||||
features M60287:95..271
|
||||
features M60287:/label=LORF-
|
||||
|
||||
would write the open reading frame to the standard output:
|
||||
|
||||
atgaataaagataaaaatgagaaagaagaattggatgaggagtggacaga
|
||||
actgattaaacacgctcttgaacaaggcattagtccagacgatatacgta
|
||||
tttttctcaatttgggtaagaagtcttcaaaaccttccgcatcaattgaa
|
||||
agaagtcattcaataaatcctttctga
|
||||
|
||||
This form of FEATURES is provided to make it easy to pipe output to
|
||||
other programs for further processing. For example
|
||||
|
||||
features M60287:LORF- |ribosome >LORF.protein
|
||||
|
||||
would write the translation of the open reading frame to a file called
|
||||
LORF.protein.
|
||||
|
||||
The full functionality of the FEATURES can be accessed using arguments on
|
||||
the command line. In particular, when there are multiple entries to be
|
||||
processed, or multiple features within entries, it is much faster to
|
||||
supply FEATURES with lists of entries, feature keys or expressions.
|
||||
Command line options are similar to suboptions in menu items 1-3 above:
|
||||
|
||||
Feature keys:
|
||||
-f key {feature key}
|
||||
-F filename {file of feature keys}
|
||||
|
||||
Entries:
|
||||
-n name {GenBank LOCUS name}
|
||||
-N filename {file of GenBank LOCUS names}
|
||||
-a accession {GenBank ACCESSION number}
|
||||
-A filename {file of GenBank ACCESSION numbers}
|
||||
-e expression {Feature Table expression}
|
||||
-E filename {file of Feature Table expressions, each begin-
|
||||
ning with '@'}
|
||||
|
||||
Databases:
|
||||
-u filename {GenBank dataset}
|
||||
-U filename { " " " " " " ,
|
||||
process all entries ie. -nNaAeE options
|
||||
will be ignored}
|
||||
-g {GenBank}
|
||||
|
||||
Examples:
|
||||
|
||||
features -f tRNA -n EPFCPCG
|
||||
|
||||
retrieves all tRNAs from GenBank entry EPFCPCG and writes .msg, .out,
|
||||
and .exp files.
|
||||
|
||||
features -e M60287:LORF-
|
||||
|
||||
would retrieve the same open reading frame as in the earlier example.
|
||||
|
||||
|
||||
Since most time-consuming operation in FEATURES is sequence retrieval,
|
||||
it is often best to retrieve frequently-used sequences as database
|
||||
subsets. For example, a set GenBank entries for chlorophyl a/b binding
|
||||
protein genes might be stored in a file called CAB.gen.
|
||||
|
||||
features -f CDS -N CAB.nam -u CAB.gen
|
||||
|
||||
would generate the files CAB.msg, CAB.out and CAB.exp containing output
|
||||
for all CDS features in the entries listed in the file CAB.nam.
|
||||
|
||||
features -E CAB.exp -u CAB.gen
|
||||
|
||||
would re-create the output file CAB.out.
|
||||
|
||||
|
||||
|
||||
BUGS
|
||||
FEATURES does no preliminary error checking for syntax of
|
||||
GenBank expressions prior to their evaluation. Expressions that can
|
||||
not be evaluated will be flagged by GETOB in the .msg file.
|
||||
|
||||
At present, little checking is done to test for the presence or
|
||||
correctness of input files. Some errors may cause the program to
|
||||
crash.
|
||||
|
||||
For User-defined datasets, filename expansion is not performed.
|
||||
|
||||
FILES
|
||||
Temporary files:
|
||||
X.term X.ano X.wrp X.ind X.gen {X is raw filename, see 4) }
|
||||
UNRESOLVED.fea UNRESOLVED.out
|
||||
FEA.inf FEA.nam FEA.gen FEA.ano FEA.wrp FEA.ind FEA.msg FEA.out
|
||||
|
||||
SEE ALSO
|
||||
grep(1V) fetch getob splitdb
|
||||
|
||||
TRANSPORTATION NOTES
|
||||
It should be fairly easy to get FEATURES to work even on systems
|
||||
in which GenBank has not been formatted for the XYLEM package.
|
||||
This is because FEATURES does not work directly on the database, but
|
||||
rather retrieves all necessary sequences by calling FETCH. Thus,
|
||||
statements like 'fetch FEA.nam FEA.gen' could be replaced with any
|
||||
command that, given a file containing names or accession numbers,
|
||||
returns a file containing GenBank entries. In principle, you
|
||||
could even implement this sort of command to retrieve entries from
|
||||
the email server (retrieve@ncbi.nlm.nih.gov) at NCBI, although
|
||||
such a setup would undoubtedly be quite slow.
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,320 +0,0 @@
|
|||
|
||||
FETCH.DOC update 24 Feb 96
|
||||
|
||||
|
||||
NAME
|
||||
fetch - retrieves database entries by name or accession number
|
||||
|
||||
SYNOPSIS
|
||||
fetch {interactive mode}
|
||||
fetch [options] namefile [output file] {batch mode}
|
||||
|
||||
DESCRIPTION
|
||||
fetch retrieves one or more entries from a database.
|
||||
|
||||
Interactive mode: fetch prompts the user to set search parameters,
|
||||
using an interactive menu:
|
||||
___________________________________________________________________
|
||||
FETCH - Version 7 Feb 94
|
||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
||||
___________________________________________________________________
|
||||
Namefile:
|
||||
Outfile:
|
||||
Database:
|
||||
-------------------------------------------------------------------
|
||||
Parameter Description Value
|
||||
|
||||
1) Name/Acc Name or Accession sequence to get
|
||||
2) Namefile Get list of sequences from Namefile
|
||||
3) WhatToGet a:annotation s:sequence b:both b
|
||||
4) Database g:GenBank p:PIR v:VecBase l:LiMB g
|
||||
G:GenBank dataset P:PIR dataset
|
||||
5) Outfile Send all output to a single file (Outfile)
|
||||
6) Files f:Send each entry to a separate file f
|
||||
-------------------------------------------------------------
|
||||
Type number of your choice or 0 to continue:
|
||||
|
||||
After all parameters have been set, type 0 to commence the search.
|
||||
Messages regarding the progress of the search will be printed.
|
||||
|
||||
(1,2) Which entries to get?
|
||||
If you want to get a single entry, option 1 lets you type in the
|
||||
name of that entry, without having to create a namefile. To get
|
||||
more than one entry, choose option 2, and specify the name of a
|
||||
file containing sequence names or accession numbers.
|
||||
|
||||
namefile is a file containing one or more sequence names or
|
||||
accession numbers, each on a separate line. Names and accession
|
||||
numbers can even be interspersed, in upper or lowercase, and in
|
||||
any order. For example, the namefile prp.nam might contain
|
||||
|
||||
; plant pathogenesis related proteins
|
||||
; (these are sample comment lines)
|
||||
; note that any line containing a semicolon is ignored
|
||||
x06362
|
||||
x05454
|
||||
TOBPR1A1
|
||||
; comments can be interspersed with names.
|
||||
PUMPR13
|
||||
tobpr1ar
|
||||
|
||||
Options 1 & 2 are mutually exclusive. Setting one will negate the
|
||||
other. If option 2 is chosen, the name of the namefile will appear
|
||||
at the top of the menu.
|
||||
|
||||
(3) WhatToGet
|
||||
Use this option to specify whether to get annotation, sequence,
|
||||
or both (default=both).
|
||||
|
||||
(4) Database
|
||||
Use this option to select the database. (default=GenBank).
|
||||
G and P select user-created database subsets containing GenBank
|
||||
or PIR entries, respectively. It is assumed that the database
|
||||
has been split into .ano, .wrp and .ind files using splitdb.
|
||||
For example, if you had created a database subset called PR1.pir,
|
||||
splitdb would create PR1.ano, PR1.wrp and PR1.ind. These are
|
||||
the files actually read by FETCH. When prompted for the name
|
||||
of the database, simply type "PR1", without a file extension.
|
||||
(If you do type a file extension, it will be ignored).
|
||||
|
||||
(5, 6) Where to send output
|
||||
By default, option 6 is set to f, and each entry will be written to
|
||||
a separate file, where the name of the file is the name of the
|
||||
entry, followed by a file extension. If a complete entry is
|
||||
retrieved, the file extension will indicate the type of database
|
||||
(GenBank: .gen; PIR: .pir, Vecbase: .vec; LiMB: .LiMB). If only
|
||||
annotation or sequence are retrieved, the file extensions will be
|
||||
.ano or .wrp, respectively. Using the default, the namefile above
|
||||
would create the following files:
|
||||
|
||||
PUMPR13.gen
|
||||
TOBPR1A1.gen
|
||||
TOBPR1AR.gen
|
||||
TOBPR1CR.gen
|
||||
TOBPR1PS.gen
|
||||
|
||||
By choosing option 5, you can specify the name of an output file
|
||||
for all entries to go to. This filename will appear at the top
|
||||
of the menu. Obviously, options 5 & 6 are mutually exclusive.
|
||||
Note entries retrieved are writen in alphabetical order (sorting by
|
||||
ASCII values), not the order in which they appeared in namefile.
|
||||
|
||||
(Note for remote users only: -f will only work for a single
|
||||
name/accession supplied in 1). -f IS NOT ENABLED FOR NAMEFILES
|
||||
specified in 2).)
|
||||
|
||||
Batch mode:
|
||||
Although it is transparent to the user, all fetch really does
|
||||
is call getloc, saving the user the trouble of knowing which
|
||||
database files to retrieve sequences from, or of having to
|
||||
execute getloc multiple times to retrieve sequences from
|
||||
different database files. Thus, the options are identical to those
|
||||
for getloc:
|
||||
|
||||
-a Write annotation portions of entries only, terminated by '//'.
|
||||
-s Write sequence data only, in Pearson (.wrp) format.
|
||||
-f Write each entry to a separate file.
|
||||
-g GenBank (default)
|
||||
-e EMBL {not implemented}
|
||||
-p PIR (NBRF)
|
||||
-v Vecbase
|
||||
-l LiMB
|
||||
-G GenBank_dataset
|
||||
-P PIR_dataset
|
||||
|
||||
If -f is not specified, outfile must be specified.
|
||||
|
||||
-L force execution of findkey on local host even if
|
||||
$XYLEM_RHOST is set. See "REMOTE EXECUTION" below
|
||||
|
||||
|
||||
PIR_dataset
|
||||
GenBank_dataset
|
||||
This can be either a file of PIR entries, a file of GenBank entries,
|
||||
or a XYLEM dataset created by splitdb. A file of PIR entries must
|
||||
have the file extension ".pir". A file of GenBank entries must have
|
||||
the file extension ".gen". A XYLEM dataset contains PIR entries split
|
||||
among three files by splitdb: annotation (.ano), sequence (.wrp)
|
||||
and index (.ind). These file extensions must be used!
|
||||
|
||||
When specifying a split dataset, only the base name needs to be
|
||||
used. For example given a XYLEM dataset consisting of the files
|
||||
myset.ano, myset.wrp and myset.ind, the following two commands
|
||||
are equivalent:
|
||||
|
||||
fetch -P myset something.nam something.pir
|
||||
fetch -P myset.ano something.nam something.pir
|
||||
|
||||
If the original .pir file had been used, the command would have
|
||||
been
|
||||
|
||||
fetch -P myset.pir something.nam something.pir
|
||||
|
||||
The ability to work directly with .gen or .pir files is quite
|
||||
convenient. However, since FETCH needs to work with a split
|
||||
FETCH automatically splits .pir or .gen files into .ano, .wrp
|
||||
and .ind files, which are removed when finished. This requires
|
||||
extra disk space and execution time, which could be significant
|
||||
for large datasets.
|
||||
|
||||
EXAMPLES
|
||||
Batch example:
|
||||
fetch -f chitinase.nam
|
||||
will retrieve annotation and sequence for sequences listed in
|
||||
chitinase.nam from GenBank, writing each entry to a separate file
|
||||
with the extension .gen.
|
||||
|
||||
fetch -s -v pbr.nam pbr.wrp
|
||||
will retrieve sequence data only for the entries listed in pbr.nam,
|
||||
from VecBase, and write all sequences to a Pearson format file
|
||||
(ie. readable by fasta) with the name pbr.wrp.
|
||||
|
||||
fetch -G sample sample.nam new.gen
|
||||
fetch -G sample.ano sample.nam new.gen
|
||||
Assumes that a set of GenBank entries has been split by splitdb
|
||||
into sample.ano sample.wrp and sample.ind. The entries listed in
|
||||
sample.nam are written to new.gen.
|
||||
|
||||
|
||||
FILES
|
||||
Database files:
|
||||
The directories for database files are specified by the environment
|
||||
variables $GB (GenBank) $PIR (PIR/NBRF) $VEC(Vecbase) and $LIMB
|
||||
(LiMB).
|
||||
|
||||
Index files are $GB/gbacc.idx for GenBank (this file is supplied
|
||||
with each GenBank release), while the other databases
|
||||
use .ind files generated by splitdb. Split database files MUST
|
||||
have the following file extensions: .ano {annotation}, .wrp
|
||||
{sequence} and .ind {index}. Thus, when creating database files
|
||||
for pir1.dat with splitdb, the output files should be pir1.ano,
|
||||
pir1.wrp and pir1.ind.
|
||||
|
||||
Temporary files:
|
||||
NAMEFILE.fetch
|
||||
PRELIMINARY.fetch
|
||||
TMP.fetch
|
||||
FOUND.fetch
|
||||
FETCHDIR {temporary directory}
|
||||
|
||||
REMOTE EXECUTION
|
||||
Where the databases can not be stored locally, FETCH can call
|
||||
FETCH on another system and retrieve the results. To run
|
||||
FETCH remotely, your .cshrc file should contain the following
|
||||
lines:
|
||||
|
||||
setenv XYLEM_RHOST remotehostname
|
||||
setenv XYLEM_USERID remoteuserid
|
||||
|
||||
where remotehostname is the name of the host on which the
|
||||
databases reside (in XYLEM split format) and remoteuserid
|
||||
is your userid on the remote system. When run remotely,
|
||||
your local copy of FETCH will generate the following
|
||||
commands:
|
||||
|
||||
rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
|
||||
rsh $XYLEM_RHOST -l $XYLEM_USERID fetch ...
|
||||
rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
|
||||
rsh $XYLEM_RHOST -l $XYLEM_USERID $RM temporary_files
|
||||
|
||||
Because FETCH uses rsh and rcp, your home directory on both
|
||||
the local and remote systems must have a world-readable
|
||||
file called .rhosts, containing the names of trusted remote
|
||||
hosts and your userid on each host. Before trying to get
|
||||
FETCH to work remotely, make sure that you can rcp and
|
||||
rsh to the remote host.
|
||||
|
||||
Obviously, remote execution of FETCH implies that FETCH
|
||||
must already be installed on the remote host. When FETCH
|
||||
runs another copy of FETCH remotely, it uses the -L option
|
||||
(findkey -L) to insure that the remote FETCH job executes,
|
||||
rather than calling yet another FETCH on another host.
|
||||
|
||||
|
||||
---------- Remote execution on more than 1 host -----------
|
||||
If more than 1 remote host is available for running FINDKEY
|
||||
(say, in a clustered environment where many servers mount
|
||||
a common filesystem) the choice of a host can be determined
|
||||
by the csh script choosehost, such that execution of
|
||||
choosehost returns the name of a remote server. To use this
|
||||
approach, the following script, called 'choosehost' should
|
||||
be in your bin directory:
|
||||
|
||||
#!/bin/csh
|
||||
# choosehost - choose a host to use for a remote job.
|
||||
# This script rotates among servers listed in .rexhosts,
|
||||
# by choosing the host at the top of the list and moving
|
||||
# it to the bottom.
|
||||
|
||||
#Rotate the list, putting the current host to the bottom.
|
||||
set HOST = `head -1 $home/.rexhosts`
|
||||
set JOBID = $$
|
||||
tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
|
||||
echo $HOST >> /tmp/.rexhosts.$JOBID
|
||||
/usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
|
||||
|
||||
# Write out the current host name
|
||||
echo $HOST
|
||||
|
||||
You must also have a file in your home directory called
|
||||
.rexhosts, listing remote hosts, such as
|
||||
|
||||
graucho.cc.umanitoba.ca
|
||||
harpo.cc.umanitoba.ca
|
||||
chico.cc.umanitoba.ca
|
||||
zeppo.cc.umanitoba.ca
|
||||
|
||||
Each time choosehost is called, choosehost will rotate the
|
||||
names in the file. For example, starting with the .rexhosts
|
||||
as shown, it will move graucho.cc.umanitoba.ca to the bottom
|
||||
of the file, and write the line 'graucho.cc.umanitoba.ca'
|
||||
to the standard output. The next time choosehosts is
|
||||
run, it would write 'harpo.cc.umanitoba.ca', and so on.
|
||||
|
||||
Depending on your local configuration, you may wish to
|
||||
rewrite choosehosts. All that is really necessary is that
|
||||
echo `choosehost` should return the name of a valid host.
|
||||
|
||||
Once you have installed choosehost and tested it, you can
|
||||
get FINDKEY to use choosehost simply by setting
|
||||
|
||||
setenv XYLEM_RHOST choosehost
|
||||
|
||||
in your .cshrc file.
|
||||
|
||||
--------------- Remote filesystems -----------------------
|
||||
Finally, an alternative to remote execution is to remotely mount
|
||||
the file system containing the databases across the network.
|
||||
This has the advantage of simplicity, and means that the
|
||||
databases are available for ALL programs on your local
|
||||
workstation. However, it may still be advantageous to run
|
||||
FETCH remotely, since that will shift much of the computational
|
||||
load to another host.
|
||||
|
||||
BUGS
|
||||
When retrieving entries directly from GenBank, FETCH uses the
|
||||
Accession Number index file gbacc.idx. In this case, FETCH
|
||||
can retrieve all entries containing a given accession number.
|
||||
This capability makes it possible to retrieve an entry using a
|
||||
secondary accession number. However if more than one entry
|
||||
share a secondary accession number, all of those entries will
|
||||
be retrieved. While this behavior might be a bit of an
|
||||
annoyance at times, it can also be useful because it alerts
|
||||
the user to the presence of other, related entries that might
|
||||
be of interest.
|
||||
|
||||
SEE ALSO
|
||||
getloc features
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,365 +0,0 @@
|
|||
|
||||
FINDKEY.DOC update 13 Mar 97
|
||||
|
||||
|
||||
NAME
|
||||
findkey - finds database entries containg one or more keywords
|
||||
|
||||
SYNOPSIS
|
||||
findkey
|
||||
findkey [-pvbmgrdutielnsaxzL] keywordfile [namefile findfile]
|
||||
findkey [-P PIR_dataset] keywordfile [namefile findfile]
|
||||
findkey [-G GenBank_dataset] keywordfile [namefile findfile]
|
||||
|
||||
DESCRIPTION
|
||||
findkey uses the grep family of commands to find lines in database
|
||||
annotation files containing one or more keywords. Next, identify
|
||||
is called to create a .nam file, containing the names of entries
|
||||
containing the keywords, and a .fnd file, containing the actual
|
||||
lines from each entry containing hits. A PIR or GenBank dataset is
|
||||
either a file containing one or more GenBank or PIR entries, or
|
||||
the name of a XYLEM dataset created by splitdb. See FILES below
|
||||
for a more detailed description.
|
||||
|
||||
INTERACTIVE USE
|
||||
findkey prompts the user to set search parameters, using an interactive
|
||||
menu:
|
||||
|
||||
___________________________________________________________________
|
||||
FINDKEY - Version 12 Aug 94
|
||||
Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003
|
||||
___________________________________________________________________
|
||||
Keyfile:
|
||||
Dataset:
|
||||
-------------------------------------------------------------------
|
||||
Parameter Description Value
|
||||
-------------------------------------------------------------------
|
||||
1) Keyword Keyword to find thionin
|
||||
2) Keyfile Get list of keywords from Keyfile
|
||||
3) WhereToLook p:PIR v:VecBase p
|
||||
GenBank - b:bacterial i:invertebrate
|
||||
m:mamalian e:expressed seq. tag
|
||||
g:phage l:plant
|
||||
r:primate n:rna
|
||||
d:rodent s:synthetic
|
||||
u:unannotated a:viral
|
||||
t:vertebrate x:patented
|
||||
z:STS
|
||||
G: GenBank dataset P: PIR dataset
|
||||
-------------------------------------------------------------
|
||||
Type number of your choice or 0 to continue:
|
||||
0
|
||||
Searching /home/psgendb/PIR/pir1.ano...
|
||||
Sequence names will be written to thionin~pir.nam
|
||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
||||
Searching /home/psgendb/PIR/pir2.ano...
|
||||
Sequence names will be written to thionin~pir.nam
|
||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
||||
Searching /home/psgendb/PIR/pir3.ano...
|
||||
Sequence names will be written to thionin~pir.nam
|
||||
Lines containing keyword(s) will be written to thionin~pir.fnd
|
||||
|
||||
As shown in the example above, the keyword thionin was specified
|
||||
as the keyword to search for. By default, option 3 is set to p,
|
||||
and the PIR protein database is searched. Messages describe the
|
||||
progress of the search. Since PIR is broken up into two divisions
|
||||
(new and protein) both are searched, but all output is written to
|
||||
thionin.pir.nam and thionin.pir.fnd
|
||||
|
||||
OPTIONS
|
||||
(1,2) Which keywords to search for?
|
||||
If you want to search for a single keyword, option 1 lets you type
|
||||
the keyword, without having to create a file. To search for more
|
||||
than one keyword, choose option 2, and specify the name of a
|
||||
file containing the keywords. For example, entries containing
|
||||
genes for antibiotic resistance might be found using the
|
||||
following keyword file:
|
||||
|
||||
ampicillin
|
||||
chloramphenicol
|
||||
kanamycin
|
||||
neomycin
|
||||
tetracycline
|
||||
|
||||
Note: keyword searches are case insensitive.
|
||||
|
||||
As you might expect, it takes longer to search for multiple
|
||||
keywords than a single keyword.
|
||||
|
||||
Options 1 & 2 are mutually exclusive. Setting one will negate the
|
||||
other. If option 2 is chosen, the name of the keyword file will
|
||||
appear at the top of the menu.
|
||||
|
||||
Finally, it is probably not a good idea to search GenBank
|
||||
entries using very short keywords consisting only of letters.
|
||||
This is because GenBank entries now include a /translation
|
||||
field containing the amino acid sequence of each protein
|
||||
coding sequence. Consequently, 3 or 4 letter keywords
|
||||
consisting of legal amino acid symbols (eg. CAP, recA) will
|
||||
turn up fairly often in protein translations.
|
||||
|
||||
(3) WhereToLook
|
||||
Use this option to specify the database to be searched In the
|
||||
case of GenBank, only one division at a time may be searched.
|
||||
User-created database subsets containing PIR (P) or GenBank (G)
|
||||
entries may also be searched. User-created database subsets
|
||||
must be in the .ano/.wrp/.ind form created by splitdb.
|
||||
|
||||
OUTPUT
|
||||
The output filenames take the following form:
|
||||
|
||||
name_ex1.ex2
|
||||
|
||||
The 'name' part of the filename is either the keyword searched for,
|
||||
if option 1 was chosen, or the name of the keyword file,if option 2
|
||||
obtains. 'ex1' indicates the database division that was searched. For
|
||||
PIR and VecBase, ex1 is 'pir' and 'vec', respectively. For GenBank,
|
||||
ex1 is as follows:
|
||||
|
||||
bct - bacterial
|
||||
inv - invertebrate
|
||||
mam - other mamalian
|
||||
est - expressed sequence tag
|
||||
phg - phage
|
||||
pln - plant (includes fungi)
|
||||
pri - primate
|
||||
rna - structural RNAs
|
||||
rod - rodent
|
||||
syn - synthetic sequences
|
||||
sts - sequence tagged sites
|
||||
una - unannotated (new) sequences
|
||||
vrl - viral
|
||||
vrt - other vertebrate
|
||||
|
||||
'ex2' distinguishes the files containing the names of entries
|
||||
containing keywords (.nam) and the files containing the lines found
|
||||
in each entry (.fnd).
|
||||
|
||||
The .nam file can be used directly as a namefile for fetch, getloc,
|
||||
or getob.
|
||||
|
||||
COMMAND LINE USE
|
||||
|
||||
OPTIONS
|
||||
p search PIR (default)
|
||||
P PIR dataset search dbfile, containing PIR entries
|
||||
v search VecBase
|
||||
b search Genbank bacterial division
|
||||
m search Genbank mamalian division
|
||||
g search Genbank phage division
|
||||
r search Genbank primate division
|
||||
d search Genbank rodent division
|
||||
u search Genbank unannotated division
|
||||
t search Genbank vertebrate division
|
||||
i search Genbank invertebrate division
|
||||
l search Genbank plant division
|
||||
n search Genbank rna division
|
||||
s search Genbank synthetic division
|
||||
a search Genbank viral division
|
||||
x search Genbank patented division
|
||||
e search Genbank exp.seq.tag division
|
||||
z search GenBank STS division
|
||||
S search GenBank Genom. Survey division
|
||||
h search GenBank High Thrput. division
|
||||
G GenBank dataset search dbfile, containing GenBank entries
|
||||
|
||||
L force execution of findkey on local host
|
||||
even if $XYLEM_RHOST is set. See "REMOTE
|
||||
EXECUTION" below
|
||||
|
||||
FILES
|
||||
|
||||
keywordfile - contains keywords to search for
|
||||
|
||||
namefile - LOCUS names of hits are written to this file
|
||||
|
||||
findfile - for each hit, a report listing the LOCUS name and the
|
||||
lines matching the keyword if written to this file.
|
||||
|
||||
If namefile and findfile are not specified on the command line,
|
||||
filenames will be created as described above for interactive
|
||||
use.
|
||||
|
||||
PIR_dataset
|
||||
GenBank_dataset
|
||||
This can be either a file of PIR entries, a file of GenBank entries,
|
||||
or a XYLEM dataset created by splitdb. A file of PIR entries must
|
||||
have the file extension ".pir". A file of GenBank entries must have
|
||||
the file extension ".gen". A XYLEM dataset contains PIR entries split
|
||||
among three files by splitdb: annotation (.ano), sequence (.wrp)
|
||||
and index (.ind). These file extensions must be used!
|
||||
|
||||
When specifying a split dataset, only the base name needs to be
|
||||
used. For example given a XYLEM dataset consisting of the files
|
||||
myset.ano, myset.wrp and myset.ind, the following two commands
|
||||
are equivalent:
|
||||
|
||||
findkey -P myset something.kw
|
||||
findkey -P myset.ano something.kw
|
||||
|
||||
If the original .pir file had been used, the command would have
|
||||
been
|
||||
|
||||
findkey -P myset.pir something.kw
|
||||
|
||||
The ability to work directly with .gen or .pir files is quite
|
||||
convenient. However, since FINDKEY needs to work with a split
|
||||
FINDKEY automatically splits .pir or .gen files into .ano, .wrp
|
||||
and .ind files, which are removed when finished. This requires
|
||||
extra disk space and execution time, which could be significant
|
||||
for large datasets.
|
||||
|
||||
EXAMPLES
|
||||
If the list of antibiotics shown above was stored in the file
|
||||
antibiotic.kw, and option 3 was set to 'b', then the annotation
|
||||
portion of the GenBank bacterial division would be searched, and
|
||||
all lines containing any of these keywords would be written to
|
||||
antibiotic~bac.fnd. The corresponding GenBank entry names would
|
||||
appear in antibiotic~bac.nam.
|
||||
|
||||
The same keyword file could be used to search other database files.
|
||||
If VecBase was searched, the output files would be antibiotic~vec.fnd
|
||||
and antibiotic~vec.nam. These filename conventions make it easy
|
||||
to search different database divisions, and to keep track of where
|
||||
data came from.
|
||||
|
||||
Command line examples:
|
||||
|
||||
findkey thionin.kw
|
||||
|
||||
would be equivalent to the interactive example shown above. In
|
||||
this case, the file thionin.kw contains the word 'thionin'.
|
||||
(Note that since PIR is the default, -p need not be supplied.)
|
||||
|
||||
findkey -b antibiotic.kw drugs.nam drugs.fnd
|
||||
|
||||
would search the GenBank bacterial division for the keywords
|
||||
contained in antibiotic.kw, and write the output to drugs.nam
|
||||
and drugs.kw.
|
||||
|
||||
FILES
|
||||
Database files:
|
||||
The directories for database files are specified by the environment
|
||||
variables $GB (GenBank) $PIR (PIR/NBRF) and $VEC(Vecbase).
|
||||
Annotation (.ano) and index (.ind) are those generated by splitdb.
|
||||
|
||||
Temporary files:
|
||||
$jobid.fnd
|
||||
$jobid.nam
|
||||
$jobid.grep
|
||||
|
||||
where $jobid is a unique jobid generated by the shell
|
||||
|
||||
REMOTE EXECUTION
|
||||
Where the databases can not be stored locally, FINDKEY can call
|
||||
FINDKEY on another system and retrieve the results. To run
|
||||
FINDKEY remotely, your .cshrc file should contain the following
|
||||
lines:
|
||||
|
||||
setenv XYLEM_RHOST remotehostname
|
||||
setenv XYLEM_USERID remoteuserid
|
||||
|
||||
where remotehostname is the name of the host on which the
|
||||
databases reside (in XYLEM split format) and remoteuserid
|
||||
is your userid on the remote system. When run remotely,
|
||||
your local copy of FINDKEY will generate the following
|
||||
commands:
|
||||
|
||||
rcp filename $XYLEM_USERID@$XYLEM_HOST:filename
|
||||
rsh $XYLEM_RHOST -l $XYLEM_USERID findkey ...
|
||||
rcp $XYLEM_USERID@$XYLEM_HOST:outputfilename outputfilename
|
||||
rsh $XYLEM_RHOST -l $XYLEM_USERID rm temporary_files
|
||||
|
||||
Because FINDKEY uses rsh and rcp, your home directory on both
|
||||
the local and remote systems must have a world-readable
|
||||
file called .rhosts, containing the names of trusted remote
|
||||
hosts and your userid on each host. Before trying to get
|
||||
FINDKEY to work remotely, make sure that you can rcp and
|
||||
rsh to the remote host.
|
||||
|
||||
Obviously, remote execution of FINDKEY implies that FINDKEY
|
||||
must already be installed on the remote host. When FINDKEY
|
||||
runs another copy of FINDKEY remotely, it uses the -L option
|
||||
(findkey -L) to insure that the remote FINDKEY job executes,
|
||||
rather than calling yet another FINDKEY on another host.
|
||||
|
||||
---------- Remote execution on more than 1 host -----------
|
||||
If more than 1 remote host is available for running FINDKEY
|
||||
(say, in a clustered environment where many servers mount
|
||||
a common filesystem) the choice of a host can be determined
|
||||
by the csh script choosehost, such that execution of
|
||||
choosehost returns the name of a remote server. To use this
|
||||
approach, the following script, called 'choosehost' should
|
||||
be in your bin directory:
|
||||
|
||||
#!/bin/csh
|
||||
# choosehost - choose a host to use for a remote job.
|
||||
# This script rotates among servers listed in .rexhosts,
|
||||
# by choosing the host at the top of the list and moving
|
||||
# it to the bottom.
|
||||
|
||||
#Rotate the list, putting the current host to the bottom.
|
||||
set HOST = `head -1 $home/.rexhosts`
|
||||
set JOBID = $$
|
||||
tail +2 $home/.rexhosts > /tmp/.rexhosts.$JOBID
|
||||
echo $HOST >> /tmp/.rexhosts.$JOBID
|
||||
/usr/bin/mv /tmp/.rexhosts.$JOBID $home/.rexhosts
|
||||
|
||||
# Write out the current host name
|
||||
echo $HOST
|
||||
|
||||
You must also have a file in your home directory called
|
||||
.rexhosts, listing remote hosts, such as
|
||||
|
||||
graucho.cc.umanitoba.ca
|
||||
harpo.cc.umanitoba.ca
|
||||
chico.cc.umanitoba.ca
|
||||
zeppo.cc.umanitoba.ca
|
||||
|
||||
Each time choosehost is called, choosehost will rotate the
|
||||
names in the file. For example, starting with the .rexhosts
|
||||
as shown, it will move graucho.cc.umanitoba.ca to the bottom
|
||||
of the file, and write the line 'graucho.cc.umanitoba.ca'
|
||||
to the standard output. The next time choosehosts is
|
||||
run, it would write 'harpo.cc.umanitoba.ca', and so on.
|
||||
|
||||
Depending on your local configuration, you may wish to
|
||||
rewrite choosehosts. All that is really necessary is that
|
||||
echo `choosehost` should return the name of a valid host.
|
||||
|
||||
Once you have installed choosehost and tested it, you can
|
||||
get FINDKEY to use choosehost simply by setting
|
||||
|
||||
setenv XYLEM_RHOST choosehost
|
||||
|
||||
in your .cshrc file.
|
||||
|
||||
--------------- Remote filesystems -----------------------
|
||||
Finally, an alternative to remote execution is to remotely mount
|
||||
the file system containing the databases across the network.
|
||||
This has the advantage of simplicity, and means that the
|
||||
databases are available for ALL programs on your local
|
||||
workstation. However, it may still be advantageous to run
|
||||
XYLEM remotely, since that will shift much of the computational
|
||||
load to another host.
|
||||
|
||||
|
||||
BUGS
|
||||
At present, regular expression characters cannot be used for
|
||||
keyword searches.
|
||||
|
||||
SEE ALSO
|
||||
grep(1V) identify splitdb
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,65 +0,0 @@
|
|||
|
||||
GETLOC.DOC update 30 May 95
|
||||
|
||||
|
||||
NAME
|
||||
getloc - retrieve database entries listed in namefile to outfile.
|
||||
|
||||
SYNOPSIS
|
||||
getloc [-asfcgepvl] namefile [anofile] [seqfile] indfile outfile
|
||||
|
||||
DESCRIPTION
|
||||
getloc reads a list of names from namefile and recreates
|
||||
entries by combining the annotation and sequence portions of each
|
||||
entry from anofile and seqfile. getloc will work most quickly
|
||||
when the namefile is in alphabetical order, but it will also
|
||||
work on unsorted lists. The following options affect the output:
|
||||
|
||||
a Write annotation portions of entries only, terminated by '//'.
|
||||
seqfile is not included on command line.
|
||||
|
||||
s Write sequence data only, in Pearson (.wrp) format.
|
||||
anofile is not included on commandline.
|
||||
|
||||
f Write each entry to a separate file. The filename will
|
||||
consist of the LOCUS name, followed by .ano for annotation
|
||||
only, .wrp for sequence only, or gen for complete GenBank
|
||||
format.
|
||||
|
||||
c namefile contains accession numbers, rather than names
|
||||
|
||||
The following options identify the type of database being read:
|
||||
|
||||
g GenBank (default)
|
||||
e EMBL
|
||||
p PIR (NBRF)
|
||||
v Vecbase
|
||||
l LiMB
|
||||
|
||||
namefile consists of an alphabetically ordered list of LOCUS names,
|
||||
each on a separate line. Indfile could be used to create a
|
||||
namefile by simply editing out some subset of names. (This can also
|
||||
be done using the Unix comm command.) If the entire indfile was
|
||||
used, the entire database would be recreated, minus the header
|
||||
information that might have been present in the original, but
|
||||
deleted by splitdb.
|
||||
|
||||
NOTE
|
||||
Getloc automatically expands leading blanks that have been
|
||||
compressed using splitdb -c. See splitdb.doc for more information.
|
||||
|
||||
SEE ALSO
|
||||
splitdb, comm(1).
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,327 +0,0 @@
|
|||
|
||||
GETOB 21 Dec 94
|
||||
|
||||
|
||||
NAME
|
||||
getob - Get an object from GenBank
|
||||
|
||||
SYNOPSIS
|
||||
getob [-frcn] infile namefile anofile seqfile indfile message
|
||||
[outfile] expfile
|
||||
|
||||
DESCRIPTION
|
||||
getob extracts 'objects' (subsequences) from GenBank entries, using
|
||||
the features table, and writes them to outfile (.out). A log
|
||||
describing the construction of each object is written to message
|
||||
(.msg). If -r is not set, a list of expressions that would recreate
|
||||
the .out file if evaluated by getob -r, is written to expfile (.exp)
|
||||
|
||||
The following options are available:
|
||||
|
||||
f Write each entry to a separate file. The name will consist
|
||||
of the entry name, and the extension '.obj'.
|
||||
|
||||
r Resolve expressions from namefile into objects.
|
||||
Expressions take the form:
|
||||
|
||||
@[<database>::]<accession>:<location>
|
||||
|
||||
In effect, r makes it possible to use getob to resolve
|
||||
features that span more than one entry, such as segmented
|
||||
files. In the first run of the program, features that require
|
||||
data from outside the entry in which they are defined will be
|
||||
written to outfile with those externally-defined parts rep-
|
||||
resented using the '@' notation described above. During a
|
||||
subsequent run, the outfile from the previous run is used as
|
||||
namefile. When r is set, all lines not beginning with '@' (ie.
|
||||
name lines and sequence lines) are simply copied to the new
|
||||
outfile. When an '@' is encountered, the expression is parsed
|
||||
into accession number and location. The entry with the
|
||||
specified accession number is located in indfile, and read from
|
||||
anofile and seqfile. It is then evaluated, and the result
|
||||
written to outfile in place of the '@' expression.
|
||||
|
||||
getob can also be used to get specific labeled objects from
|
||||
a given entry. Examples:
|
||||
|
||||
@k30576:polyprotein
|
||||
@k30576:/label=polyprotein
|
||||
@x10345:/product="hsp70"
|
||||
@j00879:group(1..2200,mutation_37)
|
||||
|
||||
The first two constructs given above are equivalent. Both
|
||||
will extract the feature called polyprotein. The third
|
||||
construct shows that any feature label can be specified. If
|
||||
none is specified, as in the first example, then /label= is
|
||||
assumed. One limitation, however, is that the label sought
|
||||
must be unique within the entry in its first 15 characters
|
||||
including double quotes ("). Otherwise, only the first
|
||||
matching label expression will be evaluated. Finally, the
|
||||
last example shows that a mutant sequence can be constructed
|
||||
by first specifying an expression that evaluates to a
|
||||
sequence (ie. 1..2200) and then a labeled expression that
|
||||
upon evaluation, uses replace() to modify that sequence. The
|
||||
usage shown in examples 3 & 4 above represent extensions to
|
||||
the DDBJ/EMBL/GenBank Features Table Format.
|
||||
|
||||
As touched on briefly above, the r option makes it possible
|
||||
to construct objects that include recursive references to
|
||||
other entries (eg. segmented files) by iterative calls to
|
||||
getob. The 'features' command automates this process. The basic
|
||||
algorithm is as follows:
|
||||
|
||||
getob infile namefile anofile seqfile indfile ...
|
||||
|
||||
#Pull out all lines containing indirect references
|
||||
grep '@' outfile > unresolved.grep
|
||||
|
||||
while (unresolved.grep is not empty)
|
||||
|
||||
#extract accession numbers to be retrieved
|
||||
cut -c2-7 unresolved.grep > unresolved.nam
|
||||
|
||||
#retrieve the sequences into a new file, and create
|
||||
#a database subset to be used by getob
|
||||
fetch unresolved.nam new.gen
|
||||
splitdb new.gen new.ano new.wrp new.ind
|
||||
|
||||
#run getob again to resolve indirect references
|
||||
getob -r infile outfile new.ano new.wrp new.ind ...
|
||||
|
||||
#Pull out all lines containing indirect references
|
||||
grep '@' outfile > unresolved.grep
|
||||
end
|
||||
|
||||
c NAMEFILE contains accession numbers, rather than locus names
|
||||
|
||||
n By default, the qualifier 'codon_start' is used to determine
|
||||
how many n's, if necessary, must be added to the 5' end of
|
||||
CDS, mat_peptide, or sig_peptide, to preserve the reading
|
||||
frame. To turn OFF this feature, -n must be set. -n must be set
|
||||
for GenBank Releases 67.0 and earlier.
|
||||
|
||||
infile contains commands indicating what data is to be pulled from
|
||||
each entry. Two types of output may be presented, GenBank or
|
||||
OBJECTS. These are described below:
|
||||
|
||||
1) GenBank output - If the word 'GENBANK' is the first line in
|
||||
infile, a pseudo-GenBank entry will be recreated. This option
|
||||
is only intended for debugging purposes and will probably be
|
||||
removed in later releases.
|
||||
|
||||
2) Object format - This option instructs getob to write part or
|
||||
all of each sequence, along with site annotation, by specifying
|
||||
feature key names. The syntax for infile is shown below:
|
||||
|
||||
Backus-Naur format: Example:
|
||||
----------------------------------------------------------
|
||||
OBJECTS OBJECTS
|
||||
<feature key> tRNA
|
||||
{<feature key> rRNA
|
||||
. . . SITES
|
||||
<feature key>} stem_loop
|
||||
SITES
|
||||
{<feature key>
|
||||
. . .
|
||||
<feature key>}
|
||||
|
||||
In the example above, getob is instructed to extract all tRNA or
|
||||
rRNA sequences from each entry, and annotate the position of each
|
||||
stem/loop structure. Note that the SITES coordinates written to the
|
||||
file tell the positions of those SITES relative to the start of the
|
||||
object, rather than the original location in the sequence. As above,
|
||||
each word begins a separate line.
|
||||
|
||||
While the -r option does not use infile, at least a dummy infile
|
||||
must be included in the command line. This dummy file need only
|
||||
contain two lines:
|
||||
|
||||
OBJECTS
|
||||
SITES
|
||||
|
||||
NOTE: SITES IS NOT YET IMPLEMENTED! Although inclusion of SITES in
|
||||
the input file will have no effect, the word SITES must still be
|
||||
present after the last feature key.
|
||||
|
||||
|
||||
namefile
|
||||
namefile consists of a list of LOCUS names or accession numbers,
|
||||
each on a separate line. Names or accession numbers should appear
|
||||
in the order in which they appear in the database file. Unordered
|
||||
namefiles will slow the progress of the search. Since only the
|
||||
first non-blank field of each line in namefile is read, indfile
|
||||
could be used to create a namefile. If the entire indfile was
|
||||
used, the entire database file would be processed. A sample
|
||||
namefile requesting four sequences by LOCUS name is shown below:
|
||||
|
||||
POTPR1A
|
||||
POTPSTH2
|
||||
POTPSTH21
|
||||
POTSTHA
|
||||
|
||||
anofile, seqfile, and indfile
|
||||
The database subset containing GenBank entries must be divided
|
||||
among annotation, sequence and an index by splitdb.
|
||||
|
||||
message
|
||||
message contains a log describing the parsing of each object.
|
||||
For annotative purposes, qualifier lines from the object are
|
||||
included in along with the location expression being parsed.
|
||||
The beginning of a typical message file is shown below:
|
||||
|
||||
GETOB Version 0.962 14 May 1992
|
||||
|
||||
POTPR1A:CDS1
|
||||
join
|
||||
(
|
||||
295 603
|
||||
|
||||
1011 1355
|
||||
|
||||
)
|
||||
|
||||
|
||||
/note="pathogenesis-related protein (prp1)"
|
||||
/codon_start=1
|
||||
/translation="MAEVKLLGLRYSPFSHRVEWALKIKGVKYEFIEEDLQNKSPLLL
|
||||
QSNPIHKKIPVLIHNGKCICESMVILEYIDEAFEGPSILPKDPYDRALARFWAKYVED
|
||||
KGAAVWKSFFSKGEEQEKAKEEAYEMLKILDNEFKDKKCFVGDKFGFADIVANGAALY
|
||||
LGILEEVSGIVLATSEKFPNFCAWRDEYCTQNEEYFPSRDELLIRYRAYIQPVDASK"
|
||||
//----------------------------------------------
|
||||
|
||||
In the example above, getob was instructed to retrieve all CDS
|
||||
features from the database subset. The message for the entry
|
||||
POTPR1A is shown, along with a reconstruction of the location
|
||||
expression that was evaluated to create the object. In this
|
||||
case, protien coding sequences from two exons had to be joined
|
||||
to create the object.
|
||||
|
||||
outfile
|
||||
outfile contains the actual objects constructed, consisting of
|
||||
sites found and sequences. The beginning of a typical output file
|
||||
is shown below:
|
||||
|
||||
>POTPR1A:CDS1
|
||||
atggcagaagtgaagttgcttggtctaaggtatagtccttttagccatag
|
||||
agttgaatgggctctaaaaattaagggagtgaaatatgaatttatagagg
|
||||
aagatttacaaaataagagccctttacttcttcaatctaatccaattcac
|
||||
aagaaaattccagtgttaattcacaatggcaagtgcatttgtgagtctat
|
||||
ggtcattcttgaatacattgatgaggcatttgaaggcccttccattttgc
|
||||
ctaaagacccttatgatcgcgctttagcacgattttgggctaaatacgtc
|
||||
gaagataag
|
||||
ggggcagcagtgtggaaaagtttcttttcgaaaggagaggaacaagagaa
|
||||
agctaaagaggaagcttatgagatgttgaaaattcttgataatgagttca
|
||||
aggacaagaagtgctttgttggtgacaaatttggatttgctgatattgtt
|
||||
gcaaatggtgcagcactttatttgggaattcttgaagaagtatctggaat
|
||||
tgttttggcaacaagtgaaaaatttccaaatttttgtgcttggagagatg
|
||||
aatattgcacacaaaacgaggaatattttccttcaagagatgaattgctt
|
||||
atccgttaccgagcctacattcagcctgttgatgcttcaaaatga
|
||||
|
||||
In the example, the CDS from entry POTPR1A has been written in
|
||||
two chunks, corresponding to the two exon portions of the coding
|
||||
sequence. Each location retrieved in constructing the object is
|
||||
written as a separate block of sequence. By comparing message file
|
||||
to outfile, it is possible to verify the correctness of the
|
||||
operation.
|
||||
|
||||
Numbers are appended to the sequence names to indicate
|
||||
which CDS in the entry has been retrieved. Thus, if two CDS
|
||||
features were present, the second one would be named >POTPR1A:2.
|
||||
For compatiblility with the FASTA programs of Pearson, the name line
|
||||
begins with a '>'.
|
||||
|
||||
expfile
|
||||
The expression evaluated to create this feature is written
|
||||
to expfile:
|
||||
|
||||
>POTPR1A:CDS1
|
||||
@J03679:join(295..603,1011..1355)
|
||||
|
||||
expfile is only created if -r is not set. It is itended as a way
|
||||
of automating the creation of a feature expression file for use
|
||||
in generating customized datasets. Expressions in expfile can be
|
||||
deleted or modified, or new expressions added, to tailor the
|
||||
dataset to individual needs. To generate a dataset from expfile:
|
||||
|
||||
getob -r infile expfile anofile seqfile indfile message outfile
|
||||
|
||||
EXTENSIONS TO THE FEATURE TABLE LANGUAGE
|
||||
|
||||
1) poly(<absolute_location>|<literal>|<feature_name>,x)
|
||||
|
||||
This operator evaluates an absolute location, literal, or
|
||||
feature name (ie. any location not containing functional
|
||||
operators) and writes it x times. The most obvious
|
||||
application of poly is to create spacers to represent regions
|
||||
of unknown sequence between sequences that are known. For
|
||||
example, the restriction map of a 4kb EcoR1 fragment with a
|
||||
Hind3 site 1000 bp from one end could be represented as follows:
|
||||
|
||||
join("gaattc",poly("n",1000),"aagctt",poly("n",3000),"gaattc")
|
||||
|
||||
2) The following feature keys are recognized by GETOB, although
|
||||
not included in the language definition. While they will not
|
||||
appear in GenBank entries, they could be used in user-created
|
||||
GenBank-format files:
|
||||
|
||||
contig
|
||||
This feature key is meant to be used to assemble large
|
||||
sequence segments from smaller segments, possibly using the
|
||||
poly() operator.
|
||||
|
||||
chromosome
|
||||
Intended to annotate the complete sequence of a chromosome. This
|
||||
feature may be constructed by a join of two or more contigs.
|
||||
|
||||
Use of these keywords is illustrated in the features table
|
||||
shown below, which could be used to construct a model of part
|
||||
of the E.coli chromosome, spanning map units 763.4 to 1031.4 kb:
|
||||
|
||||
contig join(J01619:1..13063,poly("n",7140),
|
||||
J03939:1..1363,poly("n",14380),
|
||||
X02306:complement(1..1622),poly("n",14710),
|
||||
J04423:1..5793,poly("n",22500),
|
||||
X03722:1..2400,poly("n",123750),
|
||||
one-of(X05017:complement(1..1854),X05017:1..1854))
|
||||
/label=Eco_contig8
|
||||
/map=763.4-950.6kb
|
||||
contig join(V00352:1..2412,poly("n",28800),M15273:1..3409)
|
||||
/label=Eco_contig9
|
||||
/map=972.9-1001.7kb
|
||||
contig join(X02826:1..1357,poly("n",13540),
|
||||
J01654:complement(1..2270))
|
||||
/label=Eco_contig10
|
||||
/map=1016.5-1031.4kb
|
||||
chromosome join(Eco_contig8,poly("n",22300),
|
||||
Eco_contig9,poly("n",14800),
|
||||
Eco_contig10)
|
||||
/label=Ecoli_chromosome
|
||||
|
||||
NOTES
|
||||
1) If the const DEBUG is set to true in the Pascal source code, getob
|
||||
writes messages to the standard output, indicating the progress of
|
||||
processing for each entry read in. By default, DEBUG=false.
|
||||
This feature is solely for debugging purposes and will be removed in
|
||||
later releases.
|
||||
|
||||
2) GETOB automatically expands leading blanks that have been
|
||||
compressed using splitdb -c. See splitdb.doc for more information.
|
||||
|
||||
SEE ALSO
|
||||
features, splitdb, getloc
|
||||
The DDBJ/EMBL/GenBank Feature Table: Definition, Version 1.04
|
||||
September 1, 1992
|
||||
GenBank Release Notes for Release 79.0.
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,83 +0,0 @@
|
|||
|
||||
IDENTIFY update 3 Feb 94
|
||||
|
||||
|
||||
NAME
|
||||
identify - creates a file of locus names corresponding to lines
|
||||
found by grep in a GenBank annotation file.
|
||||
|
||||
SYNOPSIS
|
||||
identify grepfile indfile namefile findfile
|
||||
|
||||
DESCRIPTION
|
||||
grepfile is created using the Unix grep command to search a .ano
|
||||
file created by splitgb. For example, to find all lines containing
|
||||
the word 'chlorophyll' in plant.ano, use
|
||||
|
||||
grep -n -i 'chlorophyll' plant.ano > plant.grep
|
||||
|
||||
In the example shown, the -n option causes each line written to
|
||||
plant.grep to be preceeded by the number of that line in plant.ano.
|
||||
(The -i option causes grep to ignore case.) Identify can use the
|
||||
indfile do determine which entry a given numbered line was found
|
||||
in, and writes the corresponding LOCUS name to namefile. In
|
||||
addition, all lines found in a given entry are re-written to
|
||||
findfile without the line numbers, and preceeded by the LOCUS name
|
||||
for that entry.
|
||||
|
||||
EXAMPLES
|
||||
Suppose you wanted to obtain a list of names for all plant
|
||||
sequences which code for proteins. The task is complicated by the
|
||||
fact that many fungal sequences are included in the GenBank plant
|
||||
file. You could begin by searching plant.ano (containing all
|
||||
GenBank plant entries) for the word 'Planta':
|
||||
|
||||
grep -n 'Planta' plant.ano > Planta.grep
|
||||
|
||||
However, we want to eliminate all fungal sequences, as well as all
|
||||
sequences for RNAs other than mRNAs. If we create the file
|
||||
bad.str containing the keywords
|
||||
|
||||
Mycophyta
|
||||
tRNA
|
||||
rRNA
|
||||
uRNA
|
||||
|
||||
we can then type
|
||||
|
||||
grep -n -f bad.str plant.ano > bad.grep
|
||||
|
||||
bad.grep now contains all lines containing the offending keywords.
|
||||
We next use identify to find the names of the entries found by
|
||||
grep.
|
||||
|
||||
identify Planta.grep plant.ind Planta.nam Planta.fnd
|
||||
identify bad.grep plant.ind bad.nam bad.fnd
|
||||
|
||||
Next, we can use the Unix comm command to compare the two .nam
|
||||
files and produce an output file containing only names which are
|
||||
present in Planta.nam but not bad.nam:
|
||||
|
||||
comm -23 Planta.nam bad.nam > plants.nam
|
||||
|
||||
The file plants.nam now contains names of either plant cDNA or
|
||||
genomic sequences which do not code for structural RNAs.
|
||||
At this point, getloc could to create a sub-database containing
|
||||
only those entries listed in planta.nam. See documentation for
|
||||
getloc for a more detailed discussion.
|
||||
|
||||
SEE ALSO
|
||||
grep, fgrep, egrep, ngrep, comm, splitgb, getloc
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,23 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FINDKEY/GDE Keyword File Instructions
|
||||
;
|
||||
; 1. Type in one or more keywords below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of keywords.
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FINDKEY will then perform the keyword search. YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each keyword on a separate line
|
||||
; SAMPLE KEYWORD FILE:
|
||||
;
|
||||
; maize
|
||||
; corn
|
||||
; Z.mays
|
||||
; Zea
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FETCH/GDE Name/Accession File Instructions
|
||||
;
|
||||
; 1. Type in one or more LOCUS names or Accession #'s below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of names or accession #'s.
|
||||
; or
|
||||
; Copy names or accession #'s from another window and Paste into this window.
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FETCH will then retrieve the data. YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each name on a separate line
|
||||
; SAMPLE NAME/ACCESSION FILE:
|
||||
;
|
||||
; X30412
|
||||
; PSDRR1
|
||||
; PEADRRG
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
;---------------------------------------------------------------------------
|
||||
; FEATURES/GDE Name File Instructions
|
||||
;
|
||||
; 1. Type in one or more GenBank LOCUS names below,
|
||||
; or
|
||||
; Place cursor at end of this file and choose 'Include File' in the FILE
|
||||
; menu to read in a file of names.
|
||||
;
|
||||
; (NOTE: File can not contain accession numbers.)
|
||||
;
|
||||
; 2. Choose 'Save Current File' in the File menu
|
||||
; 3. Quit this window
|
||||
;
|
||||
; FEATURES will then extract the appropriate sequences . YOU DON'T NEED TO EDIT
|
||||
; OUT THESE COMMENT LINES.
|
||||
;
|
||||
; NOTE: Put each name on a separate line
|
||||
; SAMPLE NAME FILE:
|
||||
;
|
||||
; PEADRRA
|
||||
; PSDRR1
|
||||
; PEADRRG
|
||||
;
|
||||
;---------------------------------------------------------------------------
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
printdoc update 3 Feb 94
|
||||
|
||||
NAME
|
||||
printdoc - prints documentation files
|
||||
|
||||
SYNOPSIS
|
||||
printdoc filename
|
||||
|
||||
DESCRIPTION
|
||||
printdoc uses the file extension to decide how to print a
|
||||
documentation file. If necessary, a filter such as pr or nroff
|
||||
is used to format the file before sending to the appropriate
|
||||
printer. A list of file extensions recognized by printdoc is
|
||||
given below. If no file extension is given, or the extension is
|
||||
not in the list, printdoc assumes .doc.
|
||||
|
||||
.doc - (default) Uses pr to print the text, using the default
|
||||
settings provided by pr (56 text lines per page plus a 5 line
|
||||
header and footer). Printing is at 12 cpi, front only. This works
|
||||
reasonbly well for most unformatted documentation files,
|
||||
provided that the line length doesn't exceed 80 char. This
|
||||
option assumes that a half-inch left margin is automatically
|
||||
provided by the printer.
|
||||
|
||||
.tex - Assumes that document is already pre-formatted. Thus,
|
||||
no headers or footers are provided, and it is assumed that
|
||||
the top and bottom of pages are padded with blanks or header/
|
||||
footer lines as needed. Form-feed characters (^L) may be
|
||||
included in the text to force page breaks.
|
||||
|
||||
.ps - Assumes file is in PostScript format. Sends it to the
|
||||
PostScript printer.
|
||||
|
||||
.nroff - Assumes file is formatted for use by nroff, using the
|
||||
standard macro set (nroff -ms).
|
||||
|
||||
.nroff.me - Assumes file is formatted for use by nroff, using the
|
||||
e macro set (nroff -me).
|
||||
|
||||
TRANSPORTATION NOTES
|
||||
For reasons which should be obvious, this script needs major
|
||||
rewriting at each site, since the available printers will
|
||||
be of different types and have different names.
|
||||
|
||||
SEE ALSO
|
||||
pr, pr(V), xlp, nroff
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
prot2nuc update 10 Aug 94
|
||||
|
||||
NAME
|
||||
prot2nuc - reverse translates protein into nucleic acid
|
||||
|
||||
SYNOPSIS
|
||||
prot2nuc [-ln -gn] < input > output
|
||||
|
||||
DESCRIPTION
|
||||
prot2nuc reads a file containing an amino acid sequence
|
||||
and writes the corresponding reverse translated nucleic acid
|
||||
sequence, using the standard IUPAC-IUB ambiguity codes to output.
|
||||
The amino acid sequence may contain internal stop '*' characters.
|
||||
That is, all legal amino acid characters will be processed.
|
||||
|
||||
-ln print n amino acids/codons per line. (default = 25)
|
||||
|
||||
-gn number the amino acid sequence every n amino acids/codons.
|
||||
(defalut = 5)
|
||||
|
||||
If l is not evenly divisible by g, the defaults are used.
|
||||
|
||||
input - If the first line of the file begins with '>' or ';',
|
||||
input will be read as the standard .wrp (Pearson) format,
|
||||
such as that produced by getob:
|
||||
|
||||
>name
|
||||
sequence lines
|
||||
|
||||
|
||||
Otherwise, it will be assumed that the file ONLY contains
|
||||
sequence, and all legal IUPAC/IUB DNA characters will be
|
||||
read as sequence.
|
||||
|
||||
output - The output begins with a header, listing the both
|
||||
1 and 3 letter amino acid codes [J. Biol. Chem. 243, 3557-3559
|
||||
(1968)], as well as the nucleic acid ambiguity codes [Cornish-
|
||||
Bowden (1985) Nucl. Acids Res. 13:3021-3030.]. The amino acid
|
||||
sequence, along with its reverse translation, are then printed on
|
||||
lines of l amino acids/codons, numbering every g amino acids/codons.
|
||||
Non-ambiguous nucleotides appear capitalized, while ambiguous
|
||||
nucleotides are in lowercase. A sample output file appears below:
|
||||
|
||||
PROT2NUC Version 8/10/94
|
||||
|
||||
IUPAC-IUP AMINO ACID SYMBOLS
|
||||
[J. Biol. Chem. 243, 3557-3559 (1968)]
|
||||
|
||||
Phe F Leu L Ile I
|
||||
Met M Val V Ser S
|
||||
Pro P Thr T Ala A
|
||||
Tyr Y His H Gln Q
|
||||
Asn N Lys K Asp D
|
||||
Glu E Cys C Trp W
|
||||
Arg R Gly G STOP *
|
||||
Asx B Glx Z UNKNOWN X
|
||||
|
||||
|
||||
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE
|
||||
[Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.]
|
||||
|
||||
Symbol Meaning | Symbol Meaning
|
||||
------------------------------------+---------------------------------
|
||||
G Guanine | k G or T
|
||||
A Adenine | s G or C
|
||||
C Cytosine | w A or T
|
||||
T Thymine | h A or C or T
|
||||
U Uracil | b G or T or C
|
||||
r Purine (A or G) | v G or C or A
|
||||
y Pyrimidine (C or T) | d G or T or A
|
||||
m A or C | n G or A or T or C
|
||||
|
||||
pI39
|
||||
5 10 15 20
|
||||
M E K K S L A A L S F L L L L V L F V A
|
||||
ATGGArAArAArTCnCTnGCnGCnCTnTCnTTyCTnCTnCTnCTnGTnCTnTTyGTnGCn
|
||||
AGyTTr TTrAGy TTrTTrTTrTTr TTr
|
||||
|
||||
25 30 35 40
|
||||
Q E I V V T E A N T C E H L A D T Y R G
|
||||
CArGArAThGTnGTnACnGArGCnAAyACnTGyGArCAyCTnGCnGAyACnTAyCGnGGn
|
||||
TTr AGr
|
||||
|
||||
45 50 55 60
|
||||
V C F T N A S C D D H C K N K A H L I S
|
||||
GTnTGyTTyACnAAyGCnTCnTGyGAyGAyCAyTGyAArAAyAArGCnCAyCTnAThTCn
|
||||
AGy TTr AGy
|
||||
|
||||
65 70
|
||||
G T C H D W K C F C T Q N C
|
||||
GGnACnTGyCAyGAyTGGAArTGyTTyTGyACnCArAAyTGy
|
||||
|
||||
|
||||
With the Universal Genetic code, ambiguity symbols make it possible
|
||||
to represent all possible codons for an amino acid using two output
|
||||
lines. It is important to realize that the ambiguities on each line
|
||||
can not be combined. For example, CTn and TTr represent all codons for
|
||||
Leucine. However, attempting to combine them into a single triplet,
|
||||
yTn, would be incorrect. For example, TTT and TTC are codons for
|
||||
Phenylalanine, not Leucine.
|
||||
|
||||
FUTURE PLANS
|
||||
1. It wouldn't be hard to have the output printed as nucleic acid
|
||||
sequences in Perason format, so that the output could be read back
|
||||
into GDE. I don't know why you would want to do this, but it could
|
||||
be done.
|
||||
2. Right now, only the Universal Genetic Code is used, but it should
|
||||
be possible to read in alternative genetic codes, have prot2nuc
|
||||
figure out the ambiguity rules (as is already done in ribosome) and
|
||||
print out the appropriate ambiguous codons.
|
||||
3. It might be useful to have each possible codon printed out, rather
|
||||
than ambiguous codons. This would take up a lot more space and
|
||||
wouldn't be as pretty. If there's a lot of demand I could do this.
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
|
@ -1,107 +0,0 @@
|
|||
reform update 3 Feb 94
|
||||
|
||||
NAME
|
||||
reform - reformats multiply-aligned sequences for printing.
|
||||
|
||||
SYNOPSIS
|
||||
reform [-gpcnm] [-fx] [-sn] [-ln] [file {ralign only}]
|
||||
or
|
||||
ralign file parameters | reform [-gpcn] [-sn] [-ln] file
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
g Gaps are to be represented by dashes (-).
|
||||
p Bases which agree with the consensus are
|
||||
represented by periods (.).
|
||||
c Positions at which all sequences agree are
|
||||
capitalized in the consensus.
|
||||
n Sequence data is nucleic acid. Protein default
|
||||
fx Specify input file format, where x is
|
||||
r:RALIGN (default) p:PEARSON i:MBCRR-MASE (Intelligenetics)
|
||||
m Input file contains multiline format sequences already aligned,
|
||||
as opposed to ralign output. This option is obsolete, and is
|
||||
equivalent to -fp.
|
||||
ln The output linelength is set to n.
|
||||
Default is 70.
|
||||
sn numbering starts with n (default=0)
|
||||
|
||||
file Sequence file as described in ralign docu-
|
||||
mentation. reform needs to re-read the
|
||||
sequence file read by ralign to get the
|
||||
names of the sequences, which ralign ignores.
|
||||
This filename is only included for ralign output.
|
||||
If -m is set, file is ignored, and sequence names
|
||||
must be read from the input.
|
||||
|
||||
Note that positions in the consensus at which no nucleotide is in the
|
||||
majority are represented by n's (for nucleic acids) or x's (for proteins),
|
||||
rather than periods, as in ralign.
|
||||
|
||||
Gaps in the input sequences may be represented by either blanks or dashes.
|
||||
|
||||
INPUT FILE FORMATS
|
||||
|
||||
(a) ralign (default, -fr)
|
||||
As described in ralign documentation, the input file (which is assumed to
|
||||
be ralign output) must have each sequence on a single long line. All
|
||||
characters on a given line will be included in the alignment. All lines
|
||||
must be exactly the same length. For example, if ralign had been read
|
||||
sequence from a file called 'allcab.seq' and written output to 'allcab.ral',
|
||||
the following command might be used:
|
||||
|
||||
reform allcab.seq <allcab.ralign >allcab.ref
|
||||
|
||||
(b) Pearson (-fp, -m)
|
||||
Compatible with sequence files used by Pearson's fasta programs as shown:
|
||||
>name1
|
||||
sequence1
|
||||
>name2
|
||||
sequence2
|
||||
...
|
||||
>namen
|
||||
sequencen
|
||||
|
||||
Sequences may run over many lines and line length does not have to be
|
||||
uniform. However, both dashes ('-') and blanks (' ') will be read in
|
||||
as gaps in the alignment. A right arrow (>) at the beginning of a line
|
||||
indicates the name line at the beginning of a new sequence.
|
||||
|
||||
Any line beginning with a semicolon (';') will be considered a comment,
|
||||
and will be ignored.
|
||||
|
||||
(c) MBCRR-MASE (Intelligenetics) (-fi)
|
||||
Compatible with .mase files produced by MBCRR's mase and pima programs,
|
||||
which use the Intelligenetics format as shown:
|
||||
|
||||
;one or more comment lines
|
||||
name1
|
||||
sequence1
|
||||
;one or more comment lines
|
||||
name2
|
||||
sequence2
|
||||
...
|
||||
;one or more comment lines
|
||||
namen
|
||||
sequencen
|
||||
|
||||
Sequences may run over many lines and line length does not have to be
|
||||
uniform. However, both dashes ('-') and blanks (' ') will be read in
|
||||
as gaps in the alignment. Each sequence MUST begin with at least one
|
||||
comment line. When a comment line is encountered, that signals the
|
||||
beginning of a new sequence. The first line after the comment is read
|
||||
as the name, and the sequence begins on the next line after that.
|
||||
|
||||
SEE ALSO ralign, mase
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,84 +0,0 @@
|
|||
ribosome update 3 Feb 94
|
||||
|
||||
NAME
|
||||
ribosome - translates nucleic acid into protein
|
||||
|
||||
SYNOPSIS
|
||||
ribosome [-g gcfile] < input > output
|
||||
|
||||
DESCRIPTION
|
||||
ribosome reads a file of one or more nucleic acid sequences
|
||||
and writes the corresponding amino acid sequence, in the standard
|
||||
one letter code, to output. Ribosome begins translating at the
|
||||
first nucleotide in each input sequence and continues to the end.
|
||||
If the length of the translated sequence is not divisible by 3,
|
||||
ribosome pads the final codon with N's and attempts to use ambi-
|
||||
guity rules to translate the final codon. Based on the genetic
|
||||
code used, ribosome derives a set of rules to resolve all ambi-
|
||||
guities that can possibly be resolved.
|
||||
|
||||
-g read in an alternative genetic code from gcfile. If this
|
||||
option is not specified, ribosome uses the universal
|
||||
genetic code.
|
||||
|
||||
gcfile - This file specifies an alternative genetic code. An
|
||||
example is shown below. ribosome reads the first 64 legal
|
||||
capital letters as amino acids. Consequently, lowercase letters
|
||||
can be used for annotation purposes, as shown in the example.
|
||||
All non-amino acid characters are ignored.
|
||||
|
||||
sgc2 - yeast mitochondrial genetic code
|
||||
|
||||
second position
|
||||
first position ------------------------------- third position
|
||||
(5' end) u c a g (3' end)
|
||||
-----------------------------------------------------------------
|
||||
u F S Y C u
|
||||
F S Y C c
|
||||
L S * W a
|
||||
L S * W g
|
||||
-----------------------------------------------------------------
|
||||
c T P H R u
|
||||
T P H R c
|
||||
T P Q R a
|
||||
T P Q R g
|
||||
-----------------------------------------------------------------
|
||||
a I T N S u
|
||||
I T N S c
|
||||
M T K R a
|
||||
M T K R g
|
||||
-----------------------------------------------------------------
|
||||
g V A D G u
|
||||
V A D G c
|
||||
V A E G a
|
||||
V A E G g
|
||||
|
||||
|
||||
input - If the first line of the file begins with '>' or ';',
|
||||
input will be read as the standard .wrp (Pearson) format,
|
||||
such as that produced by getob:
|
||||
|
||||
>name
|
||||
; one or more comment lines (optional)
|
||||
sequence lines
|
||||
|
||||
|
||||
Otherwise, it will be assumed that the file ONLY contains
|
||||
sequence, and all legal IUPAC/IUB DNA characters will be
|
||||
read as sequence.
|
||||
|
||||
SEE ALSO
|
||||
getob
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,66 +0,0 @@
|
|||
shuffle.doc update 3 Feb 94
|
||||
|
||||
SYNOPSIS
|
||||
shuffle -sn [-wn -on]
|
||||
|
||||
DESCRIPTION
|
||||
Shuffles sequences locally. See Lipman DJ, Wilbur WJ, Smith TF
|
||||
and Waterman MS (1984) On the statistical significance of nucleic
|
||||
acid similarities. Nucl. Acids Res. 12:215-226.
|
||||
-sn n is a random integer between 0 and 32767. This number
|
||||
must be provided for each run.
|
||||
|
||||
-wn n is an integer, indicating the width of the window for
|
||||
random localization. If w exceeds the length of a sequence,
|
||||
or is negative, the entire sequence is scrambled as a single
|
||||
window. This is also the case if w is not specified.
|
||||
|
||||
-on n is an integer, indicating the number of nucleotides
|
||||
overlap between adjacent windows. It should never exceed
|
||||
the window size. o defaults to 0 if not specified.
|
||||
|
||||
If w and o are specified, overlapping windows of w nucleotides
|
||||
are shuffled, thus preserving the local characteristic base
|
||||
composition. Windows overlap by o nucleotides.
|
||||
If w and o are not specified, each sequence is shuffled globally,
|
||||
thus preserving the overall base composition, but not the local
|
||||
variations in comp.
|
||||
|
||||
Any number of sequences may be processed from a single input
|
||||
file. In Pearson-format files, each new sequence begins with a
|
||||
'>' comment line, indicating the name and a short description of
|
||||
the sequence.
|
||||
|
||||
No distinction is made between protein or nucleic acid sequences.
|
||||
That is, shuffle will read any of the following characters as
|
||||
sequence:
|
||||
|
||||
T,U,C,A,G,N,R,Y,M,W,S,K,D,H,V,B,L,Z,F,P,E,I,Q,X,*,-
|
||||
|
||||
where '*' is the result of translating a stop codon, and '-'
|
||||
is a gap generated during sequence alignment. Lowercase is
|
||||
also accepted.
|
||||
|
||||
EXAMPLE
|
||||
A sample output file is shown below. Note that the first two
|
||||
lines of output are comment lines, listing the version of the
|
||||
program and the parameters used in the run.
|
||||
|
||||
>SHUFFLE VERSION 11/ 8/93
|
||||
>RANDOM SEED: 9873 WINDOW: 12 OVERLAP: 3
|
||||
>BAZFAZ - Borborigmus azerbi F-actin-zeta gene
|
||||
ctgagtagctagtcctaaatagttagtccatagtactagtacgggtcgtt
|
||||
cacccttgggcagtg.....(etc.)
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,141 +0,0 @@
|
|||
|
||||
SPLITDB update 28 Mar 98
|
||||
|
||||
|
||||
NAME
|
||||
splitdb - split GenBank files into annotation, sequence, and index
|
||||
|
||||
SYNOPSIS
|
||||
splitdb [-gepvlct] dbfile anofile seqfile indfile
|
||||
|
||||
DESCRIPTION
|
||||
Splitdb splits a database (dbfile) among three files: anofile, seqfile
|
||||
and indfile. Splitdb ignores any header information that might be in the
|
||||
file and begins processing at the first entry.
|
||||
|
||||
anofile contains the annotation portion of each entry. Entries are
|
||||
terminated with '//' or '///' (PIR only). Trailing blanks present in
|
||||
dbfile are omitted in anofile.
|
||||
|
||||
seqfile contains the sequence data for each entry. Each sequence
|
||||
entry begins with a header line, followed by sequence data on
|
||||
succeeding lines of 75 characters per line. The header line
|
||||
includes the header flag character '>' in column 1, followed by the
|
||||
name, followed by the first 50 characters of the 1st
|
||||
DEFINITION line. An example is shown below:
|
||||
|
||||
>UNHOR1 - Unicorn horn protein 1, complete cDNA sequence
|
||||
attcctctatagtctattctagctagccaaataggttagatggctgtcttactacttacgc
|
||||
...
|
||||
|
||||
Removal of blanks and numbers from sequence lines makes makes split
|
||||
datasets about 8-9% smaller than the original GenBank files.
|
||||
|
||||
indfile is an index which tells the line numbers for each entry in
|
||||
anofile and seqfile. It is assumed to be in alphabetical order by
|
||||
name. Each line contains a name and accession number, followed by the
|
||||
line numbers on which the annotation and sequence data begin in anofile
|
||||
and seqfile, respectively. Thus the file plants.ind might contain:
|
||||
|
||||
|
||||
A15660 TA156608 1 1
|
||||
A15671 A15671 33 11
|
||||
A15673 A15673 65 25
|
||||
A15675 AK156751 97 36
|
||||
A15677 BA156770 128 46
|
||||
A16780 BA167807 160 57
|
||||
A16782 A16782 192 70
|
||||
ATHRPRP1C GM905105 225 83
|
||||
etc...
|
||||
|
||||
Note that indfile is a perfectly legitimate .nam file, for use with
|
||||
programs such as getloc, getob, or comm.
|
||||
|
||||
|
||||
The following options identify the type of database being read:
|
||||
|
||||
-g GenBank (default)
|
||||
-e EMBL
|
||||
-p PIR (NBRF)
|
||||
-v Vecbase
|
||||
-l LiMB
|
||||
|
||||
Other options:
|
||||
-c Compress 3 or more leading blanks in annotation lines
|
||||
to take the form <CRUNCHFLAG><CRUNCHCHAR>, where CRUNCHFLAG
|
||||
is the ASCII character specified by the Pascal const
|
||||
CRUNCHOFFSET, which is set to 33 ("!") in the current
|
||||
implementation. For each annotation line read, if the
|
||||
number of leading blanks is >=3, splitdb sets CRUNCHCHAR
|
||||
to CRUNCHOFFSET+the number of blanks. Thus, for lines
|
||||
with 3, 4, or 5 leading blanks, CRUNCHCHAR would be
|
||||
'$', '%' and '&', respectively. GETLOC and GETOB
|
||||
automatically expand crunched blanks when CRUNCHFLAG
|
||||
is encountered on an input line. Empiracle observations
|
||||
indicate that the -c option decreases the size of
|
||||
GenBank files by about 10%.
|
||||
|
||||
This compression method may fail when the number of
|
||||
leading blanks exceeds 127-CRUNCHOFFSET. However,
|
||||
none of the above mentioned databases currently
|
||||
supports any datafield with anywhere near that number
|
||||
of leading blanks.
|
||||
|
||||
-t (GenBank only) Append all information in the first
|
||||
ORGANISM to the end of each line in indfile. For example,
|
||||
the entry which begins:
|
||||
|
||||
LOCUS GORMTDLOOZ 282 bp DNA UNA 11-MAR-1996
|
||||
DEFINITION GGGOMT493; Gorilla gorilla gorilla (BomBom, ISIS 438, Audubon
|
||||
Zoological Gardens) mitochondrial D-loop DNA.
|
||||
ACCESSION L76759
|
||||
NID g1222584
|
||||
KEYWORDS D-loop.
|
||||
SOURCE Mitochondrion Gorilla gorilla gorilla (individual_isolate BomBom,
|
||||
ISIS 438, Audubon Zoological Gardens, sub_species gorilla) male
|
||||
DNA.
|
||||
ORGANISM Mitochondrion Gorilla gorilla gorilla
|
||||
Eukaryotae; mitochondrial eukaryotes; Metazoa; Chordata;
|
||||
Vertebrata; Eutheria; Primates; Catarrhini; Hominidae; Gorilla.
|
||||
|
||||
might be indexed as
|
||||
|
||||
GORMTDLOOZ L76759 1 1 Mitochondrion Gorilla gorilla gorilla
|
||||
|
||||
This is useful for taxonomic studies, or as a way of making
|
||||
it easy to create subsets from a single index. Thus,
|
||||
'grep gorilla primates.ind' would print all lines in the
|
||||
file that contained the word gorilla. The output from
|
||||
this command could be used as a .nam file for extracting
|
||||
just gorilla sequences from a larger dataset using
|
||||
fetch.
|
||||
|
||||
|
||||
NOTES
|
||||
1. Header lines that aren't part of entries are automatically
|
||||
stripped out during processing. For example, in a file containing
|
||||
GenBank entries, all lines up to the first occurrence of 'LOCUS'
|
||||
starting in column 1, are ignored. Similarly for PIR, processing
|
||||
begins on the first line containing 'ENTRY' beginning in column 1.
|
||||
2. GenBank/EMBL/DDBJ entries created on or after Feb. 1, 1996,
|
||||
have accession numbers of 8 characters, rather than 6. Previously
|
||||
assigned accession numbers will remain at 6 characters. Splitdb has
|
||||
been updated to write all accession numbers to the .ind file, left
|
||||
justified in a field of 8 characters, in columns 14-21 of the .ind
|
||||
file.
|
||||
|
||||
SEE ALSO
|
||||
getloc, getob, comm(1) (Unix command).
|
||||
|
||||
AUTHOR
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB Canada R3T 2N2
|
||||
Phone: 204-474-6085
|
||||
FAX: 204-261-5732
|
||||
frist@cc.umanitoba.ca
|
||||
|
||||
REFERENCE
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
|
@ -1,125 +0,0 @@
|
|||
|
||||
|
||||
XYLEM.DOC update 10 Aug 1994
|
||||
|
||||
XYLEM: TOOLS FOR MANIPULATION OF GENETIC DATABASES
|
||||
Brian Fristensky, University of Manitoba
|
||||
|
||||
Fristensky, B. (1993) Feature expressions: creating and manipulating
|
||||
sequence datasets. Nucleic Acids Research 21:5997-6003.
|
||||
|
||||
SPLITDB - Splits files containing one or more GenBank entries into
|
||||
annotation, sequence, and index files. Indexfiles can also serve as
|
||||
namefiles for GETLOC. Sequence files are in the format required for
|
||||
use with the Pearson programs (FASTA,LFASTA etc.).
|
||||
|
||||
GETLOC - Reads a file containing LOCUS names (namefile) and
|
||||
retrieves either annotation, sequence, or both from a split
|
||||
database or database subset created by SPLITDB.
|
||||
|
||||
FETCH - A c-shell script that provides a convenient menu-driven
|
||||
front end for retrieval of database entries using GETLOC.
|
||||
|
||||
FINDKEY - A c-shell script that provides a convenient menu-driven
|
||||
front end for keyword searches of database annotation files,
|
||||
using IDENTIFY.
|
||||
|
||||
IDENTIFY- Given line-numbered output from grep, IDENTIFY uses the
|
||||
index file to determine which entries contained the keywords
|
||||
searched for by grep. It then produces a namefile for use by
|
||||
GETLOC. Namefiles can serve as logical databases, and utilities
|
||||
such as the Unix comm command can perform logical operations on
|
||||
these namefiles to produce database subsets.
|
||||
|
||||
FEATURES/GETOB - Given a namefile, pulls objects (mRNA, tRNA, CDS
|
||||
etc.) from each of the named entries, using the new
|
||||
DDBJ/EMBL/GenBank International Features Table Format. A future
|
||||
version will also allow the annotation of sites within objects that
|
||||
are extracted.
|
||||
|
||||
DBSTAT - Calculates amino acid frequencies in a protein database.
|
||||
|
||||
RIBOSOME - Given a file of one or more nucleic acids (eg. output
|
||||
from GETOB) , RIBOSOME translates them into protein, using either
|
||||
the universal genetic code or an alternative genetic code supplied
|
||||
by the user. All ambiguities that can be resolved are translated.
|
||||
|
||||
PROT2NUC - reverse translates a sequence from protein to nucleic
|
||||
acid, using IUPAC-IUB ambiguity codes.
|
||||
|
||||
SHUFFLE - Given a random seed, shuffles each sequence in a Pearson-
|
||||
format (.wrp) file. Shuffling is done locally in overlapping windows
|
||||
across the length of a given sequence. The window size and overlap
|
||||
length can be specified by the user.
|
||||
|
||||
REFORM - Reformats multiply aligned nucleic acid or protein
|
||||
sequences for publication. Output for M. Waterman's RALIGN
|
||||
program, or the MBCRR MASE editor, can be directly used as input.
|
||||
A variety of options are available for representing gaps, consensus
|
||||
sequences and other features.
|
||||
|
||||
Fristensky (Cornell) Sequence Analysis Package - General purpose
|
||||
sequence analysis package written in Standard Pascal. Features
|
||||
include: sequence numbering, formatting, & translation, restriction
|
||||
site searches & mapping, matrix similarity searches, TESTCODE
|
||||
analysis, base composition analysis. All programs are interactive
|
||||
and read free-format, BIONET, and GenBank files.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
XYLEM DATABASE TOOLS
|
||||
|
||||
|
||||
|
||||
----------
|
||||
| .gen | getloc
|
||||
|----------|<--------------------------
|
||||
| GenBank | |
|
||||
---------- |
|
||||
| |
|
||||
| splitgb |
|
||||
/|\ |
|
||||
/ | \ |
|
||||
/ | \ |
|
||||
/ | \ |
|
||||
/ | \ |
|
||||
/ | \ |
|
||||
v v v |
|
||||
---------- ---------- ---------- |
|
||||
| .ano | | .wrp | | .ind | |
|
||||
|----------| |----------| |----------| |
|
||||
|annotation| | sequence | | index | |
|
||||
---------- ---------- ---------- |
|
||||
| \ | / |
|
||||
| \ | / |
|
||||
| \ | / |
|
||||
| \ | / |
|
||||
grep -n | \ | / |
|
||||
| \ | / |
|
||||
| | |
|
||||
| | -------------------------------+
|
||||
| ^ |
|
||||
v | getob |
|
||||
---------- ---------- v
|
||||
| .grep | identify | .nam | ----------
|
||||
|----------| --------->|----------| | .wrp |
|
||||
| numbered | | LOCUS | ----------
|
||||
|file lines| ---------- | eg. mRNA |
|
||||
---------- | ^ | tRNA |
|
||||
| | | rRNA |
|
||||
| | | CDS |
|
||||
--comm-- ----------
|
||||
(logical operations on
|
||||
sets of names)
|
||||
|
||||
Dr. Brian Fristensky
|
||||
Dept. of Plant Science
|
||||
University of Manitoba
|
||||
Winnipeg, MB R3T 2N2 CANADA
|
||||
204-474-6085
|
||||
frist@cc.umanitoba.ca
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
[Desktop Entry]
|
||||
X-Desktop-File-Install-Version=0.26
|
||||
Name=GDE
|
||||
Comment=Genetic Data Environment
|
||||
Icon=/usr/share/icons/Gde.svg
|
||||
Categories=Science
|
||||
Exec=gde %f
|
||||
Type=Application
|
6258
GDE2.0_manual.ps
6258
GDE2.0_manual.ps
File diff suppressed because one or more lines are too long
446
HGL_SRC/Alloc.c
446
HGL_SRC/Alloc.c
|
@ -1,133 +1,122 @@
|
|||
#include <stdio.h>
|
||||
#include "global_defs.h"
|
||||
#include <ctype.h>
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "global_defs.h"
|
||||
/*
|
||||
* Alloc.c
|
||||
* Memory functions for Harvard Genome Laboratory.
|
||||
* Last revised 6/3/91
|
||||
*
|
||||
* Print error message, and die
|
||||
*/
|
||||
void ErrorOut(code,string)
|
||||
int code;
|
||||
* Alloc.c
|
||||
* Memory functions for Harvard Genome Laboratory.
|
||||
* Last revised 6/3/91
|
||||
*
|
||||
* Print error message, and die
|
||||
*/
|
||||
void ErrorOut(code, string) int code;
|
||||
char *string;
|
||||
{
|
||||
if (code == 0)
|
||||
{
|
||||
fprintf(stderr,"Error:%s\n",string);
|
||||
exit(1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Calloc count*size bytes with memory aligned to size.
|
||||
* Return pointer to new block.
|
||||
*/
|
||||
char *Calloc(count,size)
|
||||
int count,size;
|
||||
/*unsigned count,size;*/
|
||||
{
|
||||
char *temp;
|
||||
temp = calloc(count,(unsigned)size);
|
||||
|
||||
if(count*size == 0)
|
||||
fprintf(stderr,"Allocate ZERO blocks?\n");
|
||||
ErrorOut(temp,"Cannot allocate memory");
|
||||
return(temp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reallocate memory at block, expand to size.
|
||||
* Return pointer to (possibly) new block.
|
||||
*/
|
||||
char *Realloc(block,size)
|
||||
char *block;
|
||||
unsigned size;
|
||||
{
|
||||
char *temp;
|
||||
temp=realloc(block,size);
|
||||
ErrorOut(temp,"Cannot change memory size");
|
||||
return(temp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free block Allocated by Calloc.
|
||||
* Return error code from free().
|
||||
*/
|
||||
|
||||
void Cfree(block)
|
||||
char* block;
|
||||
{
|
||||
extern void Warning();
|
||||
if(block != NULL)
|
||||
{
|
||||
#ifdef SUN4
|
||||
if(free(block) == 0)
|
||||
Warning("Error in Cfree...");
|
||||
#endif
|
||||
}
|
||||
/* else
|
||||
Warning("Error in Cfree, NULL block");
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Print Warning message to stderr.
|
||||
*/
|
||||
void Warning(s)
|
||||
char *s;
|
||||
{
|
||||
fprintf(stderr,"Warning:%s\n",s);
|
||||
if (code == 0) {
|
||||
fprintf(stderr, "Error:%s\n", string);
|
||||
exit(1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get array element from a sequence structure. The index
|
||||
* is relative to the alignment.
|
||||
*/
|
||||
char GetElem(seq,indx)
|
||||
Sequence *seq; /*Sequence to search*/
|
||||
int indx; /*Index relative to the global offset*/
|
||||
* Calloc count*size bytes with memory aligned to size.
|
||||
* Return pointer to new block.
|
||||
*/
|
||||
char *Calloc(count, size)
|
||||
int count, size;
|
||||
/*unsigned count,size;*/
|
||||
{
|
||||
if((indx<seq->offset) || (indx >= seq->offset + seq->seqlen))
|
||||
return('-');
|
||||
else
|
||||
return((char)(seq->c_elem[indx-seq->offset]));
|
||||
char *temp;
|
||||
temp = calloc(count, (unsigned)size);
|
||||
|
||||
if (count * size == 0) fprintf(stderr, "Allocate ZERO blocks?\n");
|
||||
ErrorOut(temp, "Cannot allocate memory");
|
||||
return (temp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace the array element at seq[indx] with elem. The index
|
||||
* is relative to the alignment.
|
||||
*/
|
||||
|
||||
void ReplaceElem(seq,indx,elem)
|
||||
Sequence *seq; /*Sequence */
|
||||
int indx; /*Position to overwrite (replace) */
|
||||
unsigned char elem; /*Character to replace with */
|
||||
* Reallocate memory at block, expand to size.
|
||||
* Return pointer to (possibly) new block.
|
||||
*/
|
||||
char *Realloc(block, size)
|
||||
char *block;
|
||||
unsigned size;
|
||||
{
|
||||
int j;
|
||||
char *temp;
|
||||
temp = realloc(block, size);
|
||||
ErrorOut(temp, "Cannot change memory size");
|
||||
return (temp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free block Allocated by Calloc.
|
||||
* Return error code from free().
|
||||
*/
|
||||
|
||||
void Cfree(block) char *block;
|
||||
{
|
||||
extern void Warning();
|
||||
if (block != NULL) {
|
||||
#ifdef SUN4
|
||||
if (free(block) == 0) Warning("Error in Cfree...");
|
||||
#endif
|
||||
}
|
||||
/* else
|
||||
Warning("Error in Cfree, NULL block");
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Print Warning message to stderr.
|
||||
*/
|
||||
void Warning(s) char *s;
|
||||
{
|
||||
fprintf(stderr, "Warning:%s\n", s);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get array element from a sequence structure. The index
|
||||
* is relative to the alignment.
|
||||
*/
|
||||
char GetElem(seq, indx)
|
||||
Sequence *seq; /*Sequence to search*/
|
||||
int indx; /*Index relative to the global offset*/
|
||||
{
|
||||
if ((indx < seq->offset) || (indx >= seq->offset + seq->seqlen))
|
||||
return ('-');
|
||||
else
|
||||
return ((char)(seq->c_elem[indx - seq->offset]));
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace the array element at seq[indx] with elem. The index
|
||||
* is relative to the alignment.
|
||||
*/
|
||||
|
||||
void ReplaceElem(seq, indx, elem) Sequence *seq; /*Sequence */
|
||||
int indx; /*Position to overwrite (replace) */
|
||||
unsigned char elem; /*Character to replace with */
|
||||
{
|
||||
int j;
|
||||
extern char *Calloc();
|
||||
int width;
|
||||
|
||||
/*
|
||||
* If no c_elem has been allocated yet...
|
||||
*/
|
||||
/* if(index("abcdefghijklmnopqrstuvwxyz-0123456789",elem)==0)
|
||||
fprintf(stderr,"Warning (ReplaceElem) elem = %c\n",elem);
|
||||
*/
|
||||
width = seq->offset-indx;
|
||||
if(seq->seqlen == 0 && elem != '-')
|
||||
{
|
||||
if(seq->seqmaxlen == 0 || seq->c_elem == NULL)
|
||||
{
|
||||
seq->c_elem = Calloc(4,sizeof(char));
|
||||
/*
|
||||
* If no c_elem has been allocated yet...
|
||||
*/
|
||||
/* if(index("abcdefghijklmnopqrstuvwxyz-0123456789",elem)==0)
|
||||
fprintf(stderr,"Warning (ReplaceElem) elem =
|
||||
%c\n",elem);
|
||||
*/
|
||||
width = seq->offset - indx;
|
||||
if (seq->seqlen == 0 && elem != '-') {
|
||||
if (seq->seqmaxlen == 0 || seq->c_elem == NULL) {
|
||||
seq->c_elem = Calloc(4, sizeof(char));
|
||||
seq->offset = indx;
|
||||
seq->seqmaxlen = 4;
|
||||
}
|
||||
|
@ -135,118 +124,109 @@ unsigned char elem; /*Character to replace with */
|
|||
seq->c_elem[0] = elem;
|
||||
seq->offset = indx;
|
||||
}
|
||||
/*
|
||||
* If inserting before the c_elem (< offset)
|
||||
*/
|
||||
else if((indx<seq->offset) && (elem!='-'))
|
||||
{
|
||||
/*
|
||||
* If inserting before the c_elem (< offset)
|
||||
*/
|
||||
else if ((indx < seq->offset) && (elem != '-')) {
|
||||
seq->seqmaxlen += width;
|
||||
seq->c_elem = Realloc(seq->c_elem,seq->seqmaxlen*sizeof(char));
|
||||
for(j=seq->seqmaxlen-1;j>=width;j--)
|
||||
seq->c_elem[j] = seq->c_elem[j-width];
|
||||
for(j=0;j<width;j++)
|
||||
seq->c_elem[j] = '-';
|
||||
seq->c_elem =
|
||||
Realloc(seq->c_elem, seq->seqmaxlen * sizeof(char));
|
||||
for (j = seq->seqmaxlen - 1; j >= width; j--)
|
||||
seq->c_elem[j] = seq->c_elem[j - width];
|
||||
for (j = 0; j < width; j++) seq->c_elem[j] = '-';
|
||||
seq->c_elem[0] = elem;
|
||||
seq->seqlen += width;
|
||||
seq->offset = indx;
|
||||
}
|
||||
/*
|
||||
* if inserting after c_elem (indx > offset + seqlen)
|
||||
*/
|
||||
else if((indx>=seq->offset+seq->seqlen) && (elem!='-'))
|
||||
{
|
||||
if(indx-seq->offset >= seq->seqmaxlen)
|
||||
{
|
||||
seq->seqmaxlen = indx-seq->offset+256;
|
||||
seq->c_elem = Realloc(seq->c_elem,seq->seqmaxlen*
|
||||
sizeof(char));
|
||||
/*
|
||||
* if inserting after c_elem (indx > offset + seqlen)
|
||||
*/
|
||||
else if ((indx >= seq->offset + seq->seqlen) && (elem != '-')) {
|
||||
if (indx - seq->offset >= seq->seqmaxlen) {
|
||||
seq->seqmaxlen = indx - seq->offset + 256;
|
||||
seq->c_elem =
|
||||
Realloc(seq->c_elem, seq->seqmaxlen * sizeof(char));
|
||||
}
|
||||
for(j=seq->seqlen;j<seq->seqmaxlen;j++)
|
||||
for (j = seq->seqlen; j < seq->seqmaxlen; j++)
|
||||
seq->c_elem[j] = '-';
|
||||
seq->c_elem[indx-seq->offset] = elem;
|
||||
seq->seqlen = indx-seq->offset+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(indx-(seq->offset)>=0 && indx-(seq->offset)<seq->seqlen)
|
||||
seq->c_elem[indx-(seq->offset)] = elem;
|
||||
else if(elem!='-')
|
||||
fprintf(stderr,"%c better be a -\n",elem);
|
||||
seq->c_elem[indx - seq->offset] = elem;
|
||||
seq->seqlen = indx - seq->offset + 1;
|
||||
}
|
||||
return;
|
||||
else {
|
||||
if (indx - (seq->offset) >= 0 &&
|
||||
indx - (seq->offset) < seq->seqlen)
|
||||
seq->c_elem[indx - (seq->offset)] = elem;
|
||||
else if (elem != '-')
|
||||
fprintf(stderr, "%c better be a -\n", elem);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InsertElem is a modification of InsertElems, and should be
|
||||
* optimized. s.s.5/6/91
|
||||
*/
|
||||
int InsertElem(a,b,ch)
|
||||
Sequence *a; /* Sequence */
|
||||
int b; /*Position to insert BEFORE*/
|
||||
char ch; /*element to insert */
|
||||
* InsertElem is a modification of InsertElems, and should be
|
||||
* optimized. s.s.5/6/91
|
||||
*/
|
||||
int InsertElem(a, b, ch)
|
||||
Sequence *a; /* Sequence */
|
||||
int b; /*Position to insert BEFORE*/
|
||||
char ch; /*element to insert */
|
||||
{
|
||||
char c[2];
|
||||
c[0]=ch;
|
||||
c[1] = '\0';
|
||||
|
||||
return (InsertElems(a,b,c));
|
||||
char c[2];
|
||||
c[0] = ch;
|
||||
c[1] = '\0';
|
||||
|
||||
return (InsertElems(a, b, c));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Make a copy of Sequence one, place in Sequence two
|
||||
*/
|
||||
void SeqCopy(one,two)
|
||||
Sequence *one,*two;
|
||||
* Make a copy of Sequence one, place in Sequence two
|
||||
*/
|
||||
void SeqCopy(one, two) Sequence *one, *two;
|
||||
{
|
||||
int j;
|
||||
*two = *one;
|
||||
if(two->seqmaxlen)
|
||||
two->c_elem = Calloc(one->seqmaxlen,sizeof(char));
|
||||
if(two->commentsmaxlen)
|
||||
two->comments = Calloc(one->commentsmaxlen,sizeof(char));
|
||||
for(j=0;j<one->seqlen;j++)
|
||||
two->c_elem[j] = one->c_elem[j];
|
||||
for(j=0;j<one->commentslen;j++)
|
||||
if (two->seqmaxlen) two->c_elem = Calloc(one->seqmaxlen, sizeof(char));
|
||||
if (two->commentsmaxlen)
|
||||
two->comments = Calloc(one->commentsmaxlen, sizeof(char));
|
||||
for (j = 0; j < one->seqlen; j++) two->c_elem[j] = one->c_elem[j];
|
||||
for (j = 0; j < one->commentslen; j++)
|
||||
two->comments[j] = one->comments[j];
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Normalize seq (remove leading indels in the c_elem;
|
||||
*/
|
||||
void SeqNormal(seq)
|
||||
Sequence *seq;
|
||||
* Normalize seq (remove leading indels in the c_elem;
|
||||
*/
|
||||
void SeqNormal(seq) Sequence *seq;
|
||||
{
|
||||
int len,j,shift_width,trailer;
|
||||
int len, j, shift_width, trailer;
|
||||
char *c_elem;
|
||||
len = seq->seqlen;
|
||||
|
||||
c_elem = seq->c_elem;
|
||||
|
||||
if(len == 0) return;
|
||||
if (len == 0) return;
|
||||
|
||||
for(shift_width=0; (shift_width<len) && (c_elem[shift_width] == '-');
|
||||
shift_width++);
|
||||
for (shift_width = 0;
|
||||
(shift_width < len) && (c_elem[shift_width] == '-'); shift_width++)
|
||||
;
|
||||
|
||||
for(j=0;j<len-shift_width;j++)
|
||||
c_elem[j] = c_elem[j+shift_width];
|
||||
for (j = 0; j < len - shift_width; j++)
|
||||
c_elem[j] = c_elem[j + shift_width];
|
||||
|
||||
seq->seqlen -= shift_width;
|
||||
seq->offset += shift_width;
|
||||
for(trailer=seq->seqlen-1;(c_elem[trailer] =='-' ||
|
||||
c_elem[trailer] == '\0') && trailer>=0;
|
||||
trailer--)
|
||||
c_elem[trailer] = '\0';
|
||||
seq->seqlen = trailer+1;
|
||||
for (trailer = seq->seqlen - 1;
|
||||
(c_elem[trailer] == '-' || c_elem[trailer] == '\0') &&
|
||||
trailer >= 0;
|
||||
trailer--)
|
||||
c_elem[trailer] = '\0';
|
||||
seq->seqlen = trailer + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
void SeqRev(seq,min,max)
|
||||
Sequence *seq;
|
||||
int min,max;
|
||||
void SeqRev(seq, min, max) Sequence *seq;
|
||||
int min, max;
|
||||
/*
|
||||
SeqRev will reverse a given sequence within a window from
|
||||
min to max (inclusive). The idea is to allow several sequences
|
||||
|
@ -260,72 +240,68 @@ int min,max;
|
|||
*/
|
||||
{
|
||||
int j;
|
||||
char temp1,temp2;
|
||||
char temp1, temp2;
|
||||
extern char GetElem();
|
||||
extern void ReplaceElem();
|
||||
|
||||
for(j=0;j<= (max-min)/2;j++)
|
||||
{
|
||||
temp1 = GetElem(seq,min+j);
|
||||
temp2 = GetElem(seq,max-j);
|
||||
ReplaceElem(seq,min+j,(unsigned char)temp2);
|
||||
ReplaceElem(seq,max-j,(unsigned char)temp1);
|
||||
for (j = 0; j <= (max - min) / 2; j++) {
|
||||
temp1 = GetElem(seq, min + j);
|
||||
temp2 = GetElem(seq, max - j);
|
||||
ReplaceElem(seq, min + j, (unsigned char)temp2);
|
||||
ReplaceElem(seq, max - j, (unsigned char)temp1);
|
||||
}
|
||||
|
||||
seq->direction *= -1;
|
||||
|
||||
|
||||
SeqNormal(seq);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* sequence complementing. */
|
||||
void SeqComp(seq)
|
||||
Sequence *seq;
|
||||
void SeqComp(seq) Sequence *seq;
|
||||
{
|
||||
int j;
|
||||
unsigned char in,out,case_bit;
|
||||
int j;
|
||||
unsigned char in, out, case_bit;
|
||||
char *c;
|
||||
static int tmatr[16] = {'-','a','c','m','g','r','s','v',
|
||||
't','w','y','h','k','d','b','n'};
|
||||
|
||||
static int matr[128] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0x01,0x0e,0x02,0x0d,0,0,0x04,0x0b,0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06,
|
||||
0x08,0x08,0x07,0x09,0x00,0x0a,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04,
|
||||
0x0b,0,0,0x0c,0,0x03,0x0f,0,0x05,0,0x05,0x06,0x08,0x08,0x07,0x09,0x00,0x0a,
|
||||
0,0,0,0,0x00,0
|
||||
};
|
||||
|
||||
static int tmatr[16] = {'-', 'a', 'c', 'm', 'g', 'r', 's', 'v',
|
||||
't', 'w', 'y', 'h', 'k', 'd', 'b', 'n'};
|
||||
|
||||
static int matr[128] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01,
|
||||
0x0e, 0x02, 0x0d, 0, 0, 0x04, 0x0b, 0, 0, 0x0c, 0,
|
||||
0x03, 0x0f, 0, 0x05, 0, 0x05, 0x06, 0x08, 0x08, 0x07, 0x09,
|
||||
0x00, 0x0a, 0, 0, 0, 0, 0, 0, 0, 0x01, 0x0e,
|
||||
0x02, 0x0d, 0, 0, 0x04, 0x0b, 0, 0, 0x0c, 0, 0x03,
|
||||
0x0f, 0, 0x05, 0, 0x05, 0x06, 0x08, 0x08, 0x07, 0x09, 0x00,
|
||||
0x0a, 0, 0, 0, 0, 0x00, 0};
|
||||
|
||||
c = seq->c_elem;
|
||||
for(j=0;j<seq->seqlen;j++)
|
||||
{
|
||||
/*
|
||||
* Save Case bit...
|
||||
*/
|
||||
case_bit = c[j] & 32;
|
||||
for (j = 0; j < seq->seqlen; j++) {
|
||||
/*
|
||||
* Save Case bit...
|
||||
*/
|
||||
case_bit = c[j] & 32;
|
||||
out = 0;
|
||||
in = matr[c[j]];
|
||||
if(in&1)
|
||||
out|=8;
|
||||
if(in&2)
|
||||
out|=4;
|
||||
if(in&4)
|
||||
out|=2;
|
||||
if(in&8)
|
||||
out|=1;
|
||||
if (in & 1) out |= 8;
|
||||
if (in & 2) out |= 4;
|
||||
if (in & 4) out |= 2;
|
||||
if (in & 8) out |= 1;
|
||||
|
||||
if(case_bit == 0)
|
||||
c[j] = toupper(tmatr[out]);
|
||||
if (case_bit == 0)
|
||||
c[j] = toupper(tmatr[out]);
|
||||
else
|
||||
c[j] = tmatr[out];
|
||||
c[j] = tmatr[out];
|
||||
}
|
||||
|
||||
seq->direction *= -1;
|
||||
seq->strandedness = ( seq->strandedness == 2)?1:
|
||||
( seq->strandedness == 1)?2:
|
||||
0;
|
||||
seq->strandedness = (seq->strandedness == 2) ? 1
|
||||
: (seq->strandedness == 1) ? 2
|
||||
: 0;
|
||||
return;
|
||||
|
||||
}
|
||||
|
|
BIN
HGL_SRC/Alloc.o
BIN
HGL_SRC/Alloc.o
Binary file not shown.
Binary file not shown.
Binary file not shown.
4792
HGL_SRC/HGLfuncs.c
4792
HGL_SRC/HGLfuncs.c
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
@ -409,8 +409,8 @@ int fl_make_list()
|
|||
|
||||
|
||||
getcwd(dirname, GBUFSIZ);
|
||||
sprintf(tmpcmd, "cd %s;ls -aF > /usr/tmp/.svlffil%d", dirname, pid);
|
||||
sprintf(tmpname, "/usr/tmp/.svlffil%d", pid);
|
||||
sprintf(tmpcmd, "cd %s;ls -aF > /tmp/.svlffil%d", dirname, pid);
|
||||
sprintf(tmpname, "/tmp/.svlffil%d", pid);
|
||||
system(tmpcmd);
|
||||
dirp = fopen(tmpname, "r");
|
||||
if (dirp == NULL) /* just a check to make sure */
|
||||
|
|
Binary file not shown.
BIN
HGL_SRC/MakeCons
BIN
HGL_SRC/MakeCons
Binary file not shown.
|
@ -1,11 +1,11 @@
|
|||
|
||||
CC = cc
|
||||
#FLAGS = -g
|
||||
OPENWINHOME = /usr/openwin
|
||||
FLAGS = -m32
|
||||
OPENWINHOME = /usr
|
||||
MFILE =
|
||||
INCDIR = -I$(OPENWINHOME)/include
|
||||
LIBDIR = -L$(OPENWINHOME)/lib
|
||||
LIBS = -lxview -lolgx -lX11
|
||||
INCDIR = -I/usr/include
|
||||
LIBDIR = -L/usr/lib32
|
||||
LIBS = -lxview -lolgx -lX11 -ltirpc
|
||||
|
||||
libs.o = Alloc.o HGLfuncs.o
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,5 +0,0 @@
|
|||
#/bin/csh
|
||||
make all
|
||||
cp Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool ../bin
|
||||
rm Consto01mask MakeCons PrintStrat Translate heapsortHGL mapview DotPlotTool
|
||||
rm *.o
|
BIN
HGL_SRC/mapview
BIN
HGL_SRC/mapview
Binary file not shown.
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 20 KiB |
|
@ -0,0 +1,28 @@
|
|||
|
||||
<div align="center">
|
||||
<img src="./LinuxGDE.svg" width = "300" alt="logo" align=center />
|
||||
</div>
|
||||
|
||||
# Genetic Data Environment
|
||||
|
||||
GDE is originally distributed on SunOS in the 1990s by [Smith et al. (1994)](https://doi.org/10.1093/bioinformatics/10.6.671). With efforts from [Oliveira et al. (2003)](http://dx.doi.org/10.1093/bioinformatics/19.1.153), GDE started to work on Linux.
|
||||
|
||||
This software is partly from the source code fixed by Oliveria et al. (2003) and partly from the original source code of GDE working on SunOS. Both parts are fixed again to make sure that the source code can be complied pass by GCC 12.
|
||||
|
||||
GDE now is again re-distributed here after GDE (SunOS) was offline for 25 years and GDE (Linux) was offline for 16 years.
|
||||
|
||||
|
||||
# Usage
|
||||
|
||||
Though the dependency of GDE, xview, is orphaned by quite a lot of distributions, I still fix the software to give an outlook of how biologists worked during the last centenary.
|
||||
|
||||
To use it, you must install xview lib and include. xview hardly work well on x86_64 architecture.
|
||||
|
||||
The easist way is to install it from BioArchLinux
|
||||
|
||||
|
||||
# Maintaince
|
||||
|
||||
I plan to maintain this Linux version GDE for updating the items update to the current Linux system. The 2.2.1 version fixes the basic function. Further, the aims of development would be to cooperate with modern phylogenetic-related software and fixed the broken LoopTool.
|
||||
|
||||
If someone can help migrate it to x86_64 architecture, I would be very thankful.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,149 @@
|
|||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
struct data_format {
|
||||
int length;
|
||||
char *nuc;
|
||||
int offset;
|
||||
char name[64];
|
||||
char type;
|
||||
};
|
||||
|
||||
char *Realloc(char *block, int size);
|
||||
char *Calloc(int count, int size);
|
||||
int ErrorOut(int code, char *string);
|
||||
int Errorout(char *string);
|
||||
int ReadFlat(FILE *file, struct data_format align[], int maxseqs);
|
||||
int WriteData(FILE *file, struct data_format data[], int count);
|
||||
|
||||
int ReadFlat(FILE *file, struct data_format align[], int maxseqs)
|
||||
{
|
||||
int j, len = 0, count = -1, offset;
|
||||
unsigned maxlen = 1024;
|
||||
char cinline[1025];
|
||||
extern char *Calloc(), *Realloc();
|
||||
|
||||
if (file == NULL) Errorout("Cannot open data file");
|
||||
|
||||
for (; fgets(cinline, 1024, file) != NULL;) {
|
||||
cinline[strlen(cinline) - 1] = '\0';
|
||||
switch (cinline[0]) {
|
||||
case '>':
|
||||
case '#':
|
||||
case '%':
|
||||
case '"':
|
||||
case '@':
|
||||
offset = 0;
|
||||
for (j = 0; j < strlen(cinline); j++) {
|
||||
if (cinline[j] == '(') {
|
||||
sscanf(
|
||||
(char *)(cinline + j + 1),
|
||||
"%d", &offset);
|
||||
cinline[j] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
if (count != -1) {
|
||||
align[count].length = len;
|
||||
align[count].nuc[len] = '\0';
|
||||
maxlen = len;
|
||||
}
|
||||
|
||||
count++;
|
||||
if (count > maxseqs)
|
||||
Errorout(
|
||||
"Sorry, alignment is too large");
|
||||
|
||||
align[count].nuc = Calloc(maxlen, sizeof(char));
|
||||
align[count].type = cinline[0];
|
||||
align[count].offset = offset;
|
||||
if (align[count].nuc == NULL)
|
||||
Errorout("Calloc problem");
|
||||
|
||||
sscanf((char *)(cinline + 1), "%s",
|
||||
align[count].name);
|
||||
len = 0;
|
||||
break;
|
||||
default:
|
||||
if (len + strlen(cinline) > maxlen) {
|
||||
maxlen = (maxlen + strlen(cinline)) * 2;
|
||||
align[count].nuc =
|
||||
Realloc(align[count].nuc, maxlen);
|
||||
}
|
||||
for (j = 0; j < strlen(cinline); j++)
|
||||
align[count].nuc[j + len] = cinline[j];
|
||||
len += strlen(cinline);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (count == -1) exit(1);
|
||||
|
||||
align[count].length = len;
|
||||
align[count].nuc[len] = '\0';
|
||||
return (++count);
|
||||
}
|
||||
|
||||
int Errorout(char *string)
|
||||
{
|
||||
fprintf(stderr, "%s\n", string);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int WriteData(FILE *file, struct data_format data[], int count)
|
||||
{
|
||||
int i, j;
|
||||
for (j = 0; j < count; j++) {
|
||||
if (data[j].offset)
|
||||
fprintf(file, "\n%c%s(%d)", data[j].type, data[j].name,
|
||||
data[j].offset);
|
||||
else
|
||||
fprintf(file, "\n%c%s", data[j].type, data[j].name);
|
||||
|
||||
for (i = 0; i < data[j].length; i++) {
|
||||
if (i % 60 == 0) fputc('\n', file);
|
||||
fputc(data[j].nuc[i], file);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ErrorOut(int code, char *string)
|
||||
{
|
||||
if (code == 0) {
|
||||
fprintf(stderr, "Error:%s\n", string);
|
||||
exit(1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *Calloc(int count, int size)
|
||||
{
|
||||
char *temp;
|
||||
|
||||
temp = (char *)calloc(count, size);
|
||||
if (temp == NULL) {
|
||||
fprintf(stdout, "Error in Calloc\n");
|
||||
exit(-1);
|
||||
}
|
||||
else
|
||||
return (temp);
|
||||
}
|
||||
|
||||
char *Realloc(char *block, int size)
|
||||
{
|
||||
char *temp;
|
||||
temp = (char *)realloc(block, size);
|
||||
if (temp == NULL) {
|
||||
fprintf(stdout, "Error in Calloc\n");
|
||||
exit(-1);
|
||||
}
|
||||
else
|
||||
return (temp);
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
CC = cc
|
||||
FLAGS = -lm
|
||||
|
||||
all:CAP2 Restriction count findall varpos lsadt sho_helix Zuk_to_gen
|
||||
|
||||
CAP2: CAP2.c
|
||||
$(CC) CAP2.c -O -o CAP2
|
||||
Restriction: Restriction.c
|
||||
$(CC) Restriction.c -O -o Restriction
|
||||
Zuk_to_gen: Zuk_to_gen.c
|
||||
$(CC) Zuk_to_gen.c -O -o Zuk_to_gen
|
||||
count: count.c
|
||||
$(CC) count.c -O -o count $(FLAGS)
|
||||
findall: findall.c
|
||||
$(CC) findall.c -O -o findall
|
||||
lsadt: lsadt.c
|
||||
$(CC) lsadt.c -O -o lsadt $(FLAGS)
|
||||
sho_helix: sho_helix.c
|
||||
$(CC) sho_helix.c -O -o sho_helix
|
||||
varpos: varpos.c
|
||||
$(CC) varpos.c -O -o varpos
|
|
@ -0,0 +1,70 @@
|
|||
#include "Flatio.c"
|
||||
#define WIDTH 50
|
||||
|
||||
main()
|
||||
{
|
||||
struct data_format data[10000];
|
||||
int i,j,k,numseqs,maxlen = 0,minlen=999999999;
|
||||
int lines_printed;
|
||||
int len[1000];
|
||||
char a,b;
|
||||
|
||||
numseqs = ReadFlat(stdin,data,10000);
|
||||
if(numseqs == 0)
|
||||
exit(1);
|
||||
|
||||
for(k=0;k<numseqs;k++)
|
||||
{
|
||||
minlen = MIN(minlen,data[k].offset);
|
||||
maxlen = MAX(maxlen,data[j].length+data[k].offset);
|
||||
}
|
||||
|
||||
for(j=minlen;j<maxlen;j+=WIDTH)
|
||||
{
|
||||
lines_printed = FALSE;
|
||||
for (i=0;i<numseqs;i++)
|
||||
{
|
||||
data[i].name[19] = '\0';
|
||||
if(((data[i].offset > j+WIDTH) ||
|
||||
(data[i].offset+data[i].length<j)));
|
||||
else
|
||||
{
|
||||
lines_printed = TRUE;
|
||||
printf("\n%20s%5d ", data[i].name,
|
||||
indx(j,&(data[i])));
|
||||
for(k=j;k<j+WIDTH;k++)
|
||||
{
|
||||
if((k<data[i].length+data[i].offset)
|
||||
&& (k>=data[i].offset))
|
||||
putchar(data[i].nuc[k-data[i].offset]);
|
||||
else putchar(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
if(lines_printed)
|
||||
{
|
||||
printf("\n |---------|---------|---------|---------|---------\n");
|
||||
printf(" %6d %6d %6d %6d %6d\n\n",j+1,j+11,j+21,j+31,j+41);
|
||||
}
|
||||
}
|
||||
putchar('\n');
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
||||
int indx(pos,seq)
|
||||
int pos;
|
||||
struct data_format *seq;
|
||||
{
|
||||
int j,count=0;
|
||||
if(pos < seq->offset)
|
||||
return (0);
|
||||
if(pos>seq->offset+seq->length)
|
||||
pos = seq->offset+seq->length;
|
||||
pos -= seq->offset;
|
||||
for(j=0;j<pos;j++)
|
||||
if(seq->nuc[j] != '-')
|
||||
if(seq->nuc[j] != '~')
|
||||
count++;
|
||||
return (count);
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Copyright 1991 Steven Smith at the Harvard Genome Lab.
|
||||
* All rights reserved.
|
||||
*/
|
||||
#include "Flatio.c"
|
||||
|
||||
|
||||
main(ac,av)
|
||||
int ac;
|
||||
char **av;
|
||||
{
|
||||
struct data_format data[10000];
|
||||
FILE *file;
|
||||
int i,j,k,color,numseqs,numenzymes,nextpos,len;
|
||||
char enzymes[80][80],dummy[80];
|
||||
if(ac<3)
|
||||
{
|
||||
fprintf(stderr,"Usage: %s enzyme_file seq_file\n",av[0]);
|
||||
exit(-1);
|
||||
}
|
||||
file = fopen(av[2],"r");
|
||||
if(file == NULL)
|
||||
exit(-1);
|
||||
|
||||
numseqs = ReadFlat(file,data,10000);
|
||||
|
||||
file = fopen(av[1],"r");
|
||||
if(file == NULL)
|
||||
exit(-1);
|
||||
|
||||
for(numenzymes = 0;
|
||||
fscanf(file,"%s %s",enzymes[numenzymes],dummy)>0;
|
||||
numenzymes++);
|
||||
|
||||
for(i=0;i<numseqs;i++)
|
||||
{
|
||||
/*
|
||||
if(numseqs>1)
|
||||
*/
|
||||
printf("name:%s\n",data[i].name);
|
||||
printf("length:%d\n",strlen(data[i].nuc));
|
||||
if(numseqs>1)
|
||||
printf("nodash:\n");
|
||||
printf("start:\n");
|
||||
for(j=0;j<data[i].length;)
|
||||
{
|
||||
for(;data[i].nuc[j] == '-' && j<data[i].length;)
|
||||
{
|
||||
printf("8\n");
|
||||
j++;
|
||||
}
|
||||
if((nextpos = FindNext(data[i].nuc,j,enzymes,numenzymes
|
||||
,&len,&color)) != -1)
|
||||
{
|
||||
for(k=j;k<nextpos;k++)
|
||||
printf("8\n");
|
||||
for(k=j+nextpos;k<j+nextpos+len;k++)
|
||||
printf("%d\n",color);
|
||||
j=nextpos+len;
|
||||
}
|
||||
else
|
||||
for(;j<data[i].length;j++)
|
||||
printf("8\n");
|
||||
}
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
||||
FindNext(target,offset,enzymes,numenzymes,match_len,color)
|
||||
char *target,enzymes[][80];
|
||||
int numenzymes,*match_len,*color;
|
||||
{
|
||||
int i,j,k,closest,len1,dif,flag = FALSE;
|
||||
closest = strlen(target);
|
||||
*match_len = 0;
|
||||
for(k=0;k<numenzymes;k++)
|
||||
{
|
||||
dif = (strlen(target)) - (len1 = strlen(enzymes[k])) +1;
|
||||
|
||||
if(len1>0)
|
||||
for(flag = FALSE,j=offset;j<dif && flag == FALSE;j++)
|
||||
{
|
||||
flag = TRUE;
|
||||
for(i=0;i<len1 && flag;i++)
|
||||
{
|
||||
flag = Comp(enzymes[k][i],target[i+j])?
|
||||
TRUE:FALSE;
|
||||
}
|
||||
}
|
||||
if(j-1<closest)
|
||||
{
|
||||
closest = j-1;
|
||||
*color = k%6+1;
|
||||
*match_len = strlen(enzymes[k]);
|
||||
}
|
||||
}
|
||||
if(closest + *match_len < strlen(target))
|
||||
return(closest);
|
||||
else
|
||||
return(-1);
|
||||
}
|
||||
|
||||
Comp(a,b)
|
||||
char a,b;
|
||||
{
|
||||
static int CtoB[128]={
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0x01,0xe,0x02,0x0d,0,0,0x04,0x0b,0,0,0x0c,0,0x03,0x0f,0,0,0,0x05,0x06,
|
||||
0x08,0x08,0x07,0,0x09,0xa,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04,
|
||||
0x0b,0,0,0x0c,0,0x03,0x0f,0,0,0,0x05,0x06,0x08,0x08,0x07,0,0x09,0x0a,
|
||||
0,0,0,0,0x00,0
|
||||
};
|
||||
|
||||
static int BtoC[128] =
|
||||
{
|
||||
'-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N',
|
||||
'~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n',
|
||||
'-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N',
|
||||
'~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n',
|
||||
'-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N',
|
||||
'~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n',
|
||||
'-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N',
|
||||
'~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n',
|
||||
};
|
||||
|
||||
|
||||
return ((CtoB[a]) & (CtoB[b]));
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef struct Sequence {
|
||||
int len;
|
||||
char name[80];
|
||||
char type[8];
|
||||
char *nuc;
|
||||
} Sequence;
|
||||
|
||||
main()
|
||||
{
|
||||
char a[5000], b[5000], cinline[132];
|
||||
int pos1, pos2, pos3, i, j, k, FLAG;
|
||||
Sequence pair[2];
|
||||
|
||||
for (j = 0; j < 5000; j++) b[j] = '-';
|
||||
FLAG = (int)gets(cinline);
|
||||
for (j = 0; FLAG; j++) {
|
||||
FLAG = (int)gets(cinline);
|
||||
sscanf(cinline, "%d", &pos1);
|
||||
if ((sscanf(cinline, "%*6c %c %d %d %d", &(a[j]), &k, &pos2,
|
||||
&pos3) == 4) &&
|
||||
(FLAG)) {
|
||||
if (pos3 != 0) {
|
||||
if (pos1 < pos3) {
|
||||
b[pos1 - 1] = '[';
|
||||
b[pos3 - 1] = ']';
|
||||
}
|
||||
else {
|
||||
b[pos3 - 1] = '[';
|
||||
b[pos1 - 1] = ']';
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
pair[0].len = j;
|
||||
strcpy(pair[0].name, "HELIX");
|
||||
strcpy(pair[0].type, "TEXT");
|
||||
|
||||
pair[1].len = j;
|
||||
/*
|
||||
sscanf(cinline,"%*24c
|
||||
%s",pair[1].name);
|
||||
*/
|
||||
strcpy(pair[1].name, "Sequence");
|
||||
strcpy(pair[1].type, "RNA");
|
||||
|
||||
pair[0].nuc = b;
|
||||
pair[1].nuc = a;
|
||||
|
||||
WriteGen(pair, stdout, 2);
|
||||
for (j = 0; j < 5000; j++) b[j] = '-';
|
||||
j = -1;
|
||||
}
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
WriteGen(seq, file, numseq) Sequence *seq;
|
||||
FILE *file;
|
||||
int numseq;
|
||||
{
|
||||
register i, j;
|
||||
char temp[14];
|
||||
|
||||
for (j = 0; j < numseq; j++) fprintf(file, "%-.12s\n", seq[j].name);
|
||||
|
||||
fprintf(file, "ZZZZZZZZZZ\n");
|
||||
|
||||
for (j = 0; j < numseq; j++) {
|
||||
strcpy(temp, seq[j].name);
|
||||
for (i = strlen(temp); i < 13; i++) temp[i] = ' ';
|
||||
temp[i] = '\0';
|
||||
fprintf(file, "LOCUS %-.12s %s %d BP\n", temp,
|
||||
seq[j].type, seq[j].len);
|
||||
|
||||
fprintf(file, "ORIGIN");
|
||||
for (i = 0; i < seq[j].len; i++) {
|
||||
if (i % 60 == 0) fprintf(file, "\n%9d", i + 1);
|
||||
if (i % 10 == 0) fprintf(file, " ");
|
||||
fprintf(file, "%c", seq[j].nuc[i]);
|
||||
}
|
||||
fprintf(file, "\n//\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -0,0 +1,393 @@
|
|||
/*
|
||||
* Copyright 1991 Steven Smith at the Harvard Genome Lab.
|
||||
* All rights reserved.
|
||||
*/
|
||||
#include <math.h>
|
||||
|
||||
#include "Flatio.c"
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
|
||||
#define JUKES 0
|
||||
#define OLSEN 1
|
||||
#define NONE 2
|
||||
|
||||
#define Min(a, b) (a) < (b) ? (a) : (b)
|
||||
|
||||
int width, start, jump, usecase, sim, correction;
|
||||
int tbl, numseq, num, denom, special;
|
||||
char argtyp[255], argval[255];
|
||||
|
||||
float acwt = 1.0, agwt = 1.0, auwt = 1.0, ucwt = 1.0, ugwt = 1.0, gcwt = 1.0;
|
||||
|
||||
float dist[200][200];
|
||||
|
||||
struct data_format data[10000];
|
||||
float parta[200], partc[200], partg[200], partu[200], setdist();
|
||||
|
||||
main(ac, av) int ac;
|
||||
char **av;
|
||||
{
|
||||
int i, j, k;
|
||||
extern int ReadFlat();
|
||||
FILE *file;
|
||||
|
||||
width = 1;
|
||||
jump = 1;
|
||||
if (ac == 1) {
|
||||
fprintf(stderr,
|
||||
"usage: %s [-sim] [-case] [-c=<none,olsen,jukes>] ",
|
||||
av[0]);
|
||||
fprintf(stderr, "[-t] alignment_flat_file\n");
|
||||
exit(1);
|
||||
}
|
||||
for (j = 1; j < ac - 1; j++) {
|
||||
getarg(av, j, argtyp, argval);
|
||||
if (strcmp(argtyp, "-s=") == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%d", &start);
|
||||
start--;
|
||||
}
|
||||
else if (strcmp(argtyp, "-m=") == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%d", &width);
|
||||
}
|
||||
else if (strcmp(argtyp, "-j=") == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%d", &jump);
|
||||
}
|
||||
else if (strcmp(argtyp, "-case") == 0)
|
||||
usecase = TRUE;
|
||||
|
||||
else if (strcmp(argtyp, "-sim") == 0)
|
||||
sim = TRUE;
|
||||
|
||||
else if (strcmp(argtyp, "-c=") == 0) {
|
||||
if (strcmp(argval, "olsen") == 0)
|
||||
correction = OLSEN;
|
||||
|
||||
else if (strcmp(argval, "none") == 0)
|
||||
correction = NONE;
|
||||
|
||||
else if (strcmp(argval, "jukes") == 0)
|
||||
correction = JUKES;
|
||||
|
||||
else
|
||||
fprintf(stderr, "Correction type %s %s\n",
|
||||
argval, "unknown, using JUKES");
|
||||
}
|
||||
else if (strcmp("-t", argtyp) == 0)
|
||||
tbl = TRUE;
|
||||
|
||||
else if (strcmp("-ac=", argtyp) == 0 ||
|
||||
strcmp("-ca=", argtyp) == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%f", &acwt);
|
||||
special = TRUE;
|
||||
}
|
||||
else if (strcmp("-au=", argtyp) == 0 ||
|
||||
strcmp("-ua=", argtyp) == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%f", &auwt);
|
||||
special = TRUE;
|
||||
}
|
||||
else if (strcmp("-ag=", argtyp) == 0 ||
|
||||
strcmp("-ga=", argtyp) == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%f", &agwt);
|
||||
special = TRUE;
|
||||
}
|
||||
else if (strcmp("-uc=", argtyp) == 0 ||
|
||||
strcmp("-cu=", argtyp) == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%f", &ucwt);
|
||||
special = TRUE;
|
||||
}
|
||||
else if (strcmp("-ug=", argtyp) == 0 ||
|
||||
strcmp("-gu=", argtyp) == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%f", &ugwt);
|
||||
special = TRUE;
|
||||
}
|
||||
else if (strcmp("-gc=", argtyp) == 0 ||
|
||||
strcmp("-cg=", argtyp) == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%f", &gcwt);
|
||||
special = TRUE;
|
||||
}
|
||||
else if (strcmp("-transition=", argtyp) == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%f", &ucwt);
|
||||
agwt = ucwt;
|
||||
special = TRUE;
|
||||
}
|
||||
else if (strcmp("-transversion=", argtyp) == 0) {
|
||||
j++;
|
||||
sscanf(argval, "%f", &gcwt);
|
||||
ugwt = gcwt;
|
||||
acwt = gcwt;
|
||||
auwt = gcwt;
|
||||
special = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
file = fopen(av[ac - 1], "r");
|
||||
if ((file == NULL) || (ac == 1)) {
|
||||
fprintf(stderr, "Error opening input file %s\n", av[ac - 1]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
numseq = ReadFlat(file, data, 10000);
|
||||
|
||||
fclose(file);
|
||||
SetPart();
|
||||
|
||||
for (j = 0; j < numseq - 1; j++)
|
||||
for (k = j + 1; k < numseq; k++) {
|
||||
Compare(j, k, &num, &denom);
|
||||
dist[j][k] = setdist(num, denom, j, k);
|
||||
}
|
||||
|
||||
Report();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
Compare(a, b, num, denom) int a, b, *num, *denom;
|
||||
{
|
||||
int mn, i, j, casefix, match, blank;
|
||||
float fnum = 0.0;
|
||||
struct data_format *da, *db;
|
||||
char ac, bc;
|
||||
|
||||
casefix = (usecase) ? 0 : 32;
|
||||
*num = 0;
|
||||
*denom = 0;
|
||||
|
||||
da = &data[a];
|
||||
db = &data[b];
|
||||
mn = Min(da->length, db->length);
|
||||
|
||||
for (j = 0; j < mn; j += jump) {
|
||||
match = TRUE;
|
||||
blank = TRUE;
|
||||
for (i = 0; i < width; i++) {
|
||||
ac = da->nuc[j + i] | casefix;
|
||||
bc = db->nuc[j + i] | casefix;
|
||||
if (ac == 't') ac = 'u';
|
||||
if (ac == 'T') ac = 'U';
|
||||
if (bc == 't') bc = 'u';
|
||||
if (bc == 'T') bc = 'U';
|
||||
|
||||
if ((ac == '-') || (ac | 32) == 'n' || (ac == ' ') ||
|
||||
(bc == '-') || (bc | 32) == 'n' || (bc == ' '))
|
||||
;
|
||||
|
||||
else {
|
||||
blank = FALSE;
|
||||
if (ac != bc) {
|
||||
match = FALSE;
|
||||
switch (ac) {
|
||||
case 'a':
|
||||
if (bc == 'c')
|
||||
fnum += acwt;
|
||||
else if (bc == 'g')
|
||||
fnum += agwt;
|
||||
else if (bc == 'u')
|
||||
fnum += auwt;
|
||||
break;
|
||||
|
||||
case 'c':
|
||||
if (bc == 'a')
|
||||
fnum += acwt;
|
||||
else if (bc == 'g')
|
||||
fnum += gcwt;
|
||||
else if (bc == 'u')
|
||||
fnum += ucwt;
|
||||
break;
|
||||
|
||||
case 'g':
|
||||
if (bc == 'a')
|
||||
fnum += agwt;
|
||||
else if (bc == 'c')
|
||||
fnum += gcwt;
|
||||
else if (bc == 'u')
|
||||
fnum += ugwt;
|
||||
break;
|
||||
|
||||
case 'u':
|
||||
if (bc == 'a')
|
||||
fnum += auwt;
|
||||
else if (bc == 'c')
|
||||
fnum += ucwt;
|
||||
else if (bc == 'g')
|
||||
fnum += ugwt;
|
||||
break;
|
||||
|
||||
case 't':
|
||||
if (bc == 'a')
|
||||
fnum += auwt;
|
||||
else if (bc == 'c')
|
||||
fnum += ucwt;
|
||||
else if (bc == 'g')
|
||||
fnum += ugwt;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if ((blank == FALSE) && match) {
|
||||
(*num)++;
|
||||
(*denom)++;
|
||||
}
|
||||
else if (!blank)
|
||||
(*denom)++;
|
||||
}
|
||||
}
|
||||
if (special) (*num) = *denom - (int)fnum;
|
||||
return 0;
|
||||
}
|
||||
|
||||
float setdist(num, denom, a, b)
|
||||
int num, denom, a, b;
|
||||
{
|
||||
float cor;
|
||||
switch (correction) {
|
||||
case OLSEN:
|
||||
cor = parta[a] * parta[b] + partc[a] * partc[b] +
|
||||
partg[a] * partg[b] + partu[a] * partu[b];
|
||||
break;
|
||||
|
||||
case JUKES:
|
||||
cor = 0.25;
|
||||
break;
|
||||
|
||||
case NONE:
|
||||
cor = 0.0;
|
||||
break;
|
||||
|
||||
default:
|
||||
cor = 0.0;
|
||||
break;
|
||||
};
|
||||
|
||||
if (correction == NONE)
|
||||
return (1.0 - (float)num / (float)denom);
|
||||
else
|
||||
return (-(1.0 - cor) * log(1.0 / (1.0 - cor) *
|
||||
((float)num / (float)denom - cor)));
|
||||
}
|
||||
|
||||
getarg(av, ndx, atype, aval) char **av, atype[], aval[];
|
||||
int ndx;
|
||||
{
|
||||
int i, j;
|
||||
char c;
|
||||
for (j = 0; (c = av[ndx][j]) != ' ' && c != '=' && c != '\0'; j++)
|
||||
atype[j] = c;
|
||||
if (c == '=') {
|
||||
atype[j++] = c;
|
||||
atype[j] = '\0';
|
||||
}
|
||||
else {
|
||||
atype[j] = '\0';
|
||||
j++;
|
||||
}
|
||||
|
||||
if (c == '=') {
|
||||
for (i = 0; (c = av[ndx][j]) != '\0' && c != ' '; i++, j++)
|
||||
aval[i] = c;
|
||||
aval[i] = '\0';
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
SetPart()
|
||||
{
|
||||
int a, c, g, u, tot, i, j;
|
||||
char nuc;
|
||||
|
||||
for (j = 0; j < numseq; j++) {
|
||||
a = 0;
|
||||
c = 0;
|
||||
g = 0;
|
||||
u = 0;
|
||||
tot = 0;
|
||||
|
||||
for (i = 0; i < data[j].length; i++) {
|
||||
nuc = data[j].nuc[i] | 32;
|
||||
switch (nuc) {
|
||||
case 'a':
|
||||
a++;
|
||||
tot++;
|
||||
break;
|
||||
|
||||
case 'c':
|
||||
c++;
|
||||
tot++;
|
||||
break;
|
||||
|
||||
case 'g':
|
||||
g++;
|
||||
tot++;
|
||||
break;
|
||||
|
||||
case 'u':
|
||||
u++;
|
||||
tot++;
|
||||
break;
|
||||
|
||||
case 't':
|
||||
u++;
|
||||
tot++;
|
||||
break;
|
||||
};
|
||||
}
|
||||
parta[j] = (float)a / (float)tot;
|
||||
partc[j] = (float)c / (float)tot;
|
||||
partg[j] = (float)g / (float)tot;
|
||||
partu[j] = (float)u / (float)tot;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Report()
|
||||
{
|
||||
int i, ii, jj, j, k;
|
||||
|
||||
if (tbl) printf("#\n#-\n#-\n#-\n#-\n");
|
||||
for (jj = 0, j = 0; j < numseq; j++) {
|
||||
if (tbl) printf("%2d: %-.15s|", jj + 1, data[j].name);
|
||||
|
||||
for (i = 0; i < j; i++) {
|
||||
if (sim)
|
||||
printf("%6.1f", 100 - dist[i][j] * 100.0);
|
||||
else
|
||||
printf("%6.1f", dist[i][j] * 100.0);
|
||||
}
|
||||
printf("\n");
|
||||
jj++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int find(b, a)
|
||||
char *a, *b;
|
||||
{
|
||||
int flag, lenb, lena;
|
||||
register i, j;
|
||||
|
||||
flag = 0;
|
||||
lenb = strlen(b);
|
||||
lena = strlen(a);
|
||||
for (i = 0; ((i < lena) && flag == 0); i++) {
|
||||
for (j = 0; (j < lenb) && (a[i + j] == b[j]); j++)
|
||||
;
|
||||
flag = ((j == lenb) ? 1 : 0);
|
||||
}
|
||||
return flag;
|
||||
}
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Copyright 1991 Steven Smith at the Harvard Genome Lab.
|
||||
* All rights reserved.
|
||||
*/
|
||||
#include "Flatio.c"
|
||||
|
||||
int main(ac, av)
|
||||
int ac;
|
||||
char **av;
|
||||
{
|
||||
struct data_format data[10000];
|
||||
int Match = 2, Mismatch = 8;
|
||||
int i, j, k, l, numseqs, mis, Case = 32;
|
||||
int slen, pcnt, pos;
|
||||
int UT = FALSE;
|
||||
char c;
|
||||
if (ac < 3) {
|
||||
fprintf(stderr,
|
||||
"usage: %s search_string %%mismatch [-case] [-match "
|
||||
"color] [-mismatch color]\n",
|
||||
av[0]);
|
||||
fprintf(stderr, " [-u=t]\n");
|
||||
exit(0);
|
||||
}
|
||||
for (j = 3; j < ac; j++) {
|
||||
if (strcmp("-case", av[j]) == 0) Case = 0;
|
||||
if (strcmp("-match", av[j]) == 0)
|
||||
sscanf(av[j + 1], "%d", &Match);
|
||||
if (strcmp("-u=t", av[j]) == 0) UT = TRUE;
|
||||
if (strcmp("-mismatch", av[j]) == 0)
|
||||
sscanf(av[j + 1], "%d", &Mismatch);
|
||||
}
|
||||
numseqs = ReadFlat(stdin, data, 10000);
|
||||
|
||||
slen = strlen(av[1]);
|
||||
sscanf(av[2], "%d", &pcnt);
|
||||
pcnt *= slen;
|
||||
pcnt /= 100;
|
||||
|
||||
if (UT)
|
||||
for (j = 0; j <= strlen(av[1]); j++) {
|
||||
if (av[1][j] == 't') av[1][j] = 'u';
|
||||
if (av[1][j] == 'T') av[1][j] = 'U';
|
||||
}
|
||||
|
||||
for (i = 0; i < numseqs; i++) {
|
||||
if (UT)
|
||||
for (j = 0; data[i].nuc[j] != '\0'; j++) {
|
||||
if (data[i].nuc[j] == 't')
|
||||
data[i].nuc[j] = 'u';
|
||||
else if (data[i].nuc[j] == 'T')
|
||||
data[i].nuc[j] = 'U';
|
||||
}
|
||||
printf("name:%s\n", data[i].name);
|
||||
printf("length:%d\n", strlen(data[i].nuc));
|
||||
printf("start:\n");
|
||||
for (j = 0; j < data[i].length; j++) {
|
||||
mis = 0;
|
||||
for (k = 0, pos = j; k < slen && pos < data[i].length;
|
||||
k++, pos++) {
|
||||
c = data[i].nuc[pos];
|
||||
for (; (c == ' ' || c == '-' || c == '~') &&
|
||||
pos < data[i].length;)
|
||||
c = data[i].nuc[++pos];
|
||||
c |= Case;
|
||||
|
||||
if (data[i].type == '#') {
|
||||
if (CompIUP(c, (av[1][k] | Case)) ==
|
||||
FALSE)
|
||||
mis++;
|
||||
}
|
||||
else {
|
||||
if (c != (av[1][k] | Case)) mis++;
|
||||
}
|
||||
}
|
||||
if (k == slen && mis <= pcnt) {
|
||||
for (k = j; k < pos; k++) printf("%d\n", Match);
|
||||
j = pos - 1;
|
||||
}
|
||||
else
|
||||
printf("%d\n", Mismatch);
|
||||
}
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int CompIUP(a, b)
|
||||
char a, b;
|
||||
{
|
||||
static int tmatr[16] = {'-', 'a', 'c', 'm', 'g', 'r', 's', 'v',
|
||||
't', 'w', 'y', 'h', 'k', 'd', 'b', 'n'};
|
||||
|
||||
static int matr[128] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01,
|
||||
0xe, 0x02, 0x0d, 0, 0, 0x04, 0x0b, 0, 0, 0x0c, 0,
|
||||
0x03, 0x0f, 0, 0, 0, 0x05, 0x06, 0x08, 0x08, 0x07, 0x09,
|
||||
0x00, 0xa, 0, 0, 0, 0, 0, 0, 0, 0x01, 0x0e,
|
||||
0x02, 0x0d, 0, 0, 0x04, 0x0b, 0, 0, 0x0c, 0, 0x03,
|
||||
0x0f, 0, 0, 0, 0x05, 0x06, 0x08, 0x08, 0x07, 0x09, 0x00,
|
||||
0x0a, 0, 0, 0, 0, 0x00, 0};
|
||||
|
||||
int testa, testb;
|
||||
|
||||
if (a & 32 != b & 32) return (FALSE);
|
||||
|
||||
testa = matr[(int)a];
|
||||
testb = matr[(int)b];
|
||||
return (testa & testb);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,87 @@
|
|||
#include "Flatio.c"
|
||||
#define BLACK 8
|
||||
#define RED 3
|
||||
#define BLUE 6
|
||||
#define YELLOW 1
|
||||
#define AQUA 4
|
||||
main()
|
||||
{
|
||||
struct data_format data[10000];
|
||||
int i,j,k,numseqs,mask = -1;
|
||||
int pair[20000],stack[20000],spt = 0;
|
||||
char ch;
|
||||
|
||||
numseqs = ReadFlat(stdin,data,10000);
|
||||
if(numseqs == 0)
|
||||
exit(1);
|
||||
for(j=0;j<numseqs;j++)
|
||||
if(data[j].type == '"')
|
||||
mask = j;
|
||||
if(mask == -1)
|
||||
exit(1);
|
||||
|
||||
for(j=0;j<data[mask].length;j++)
|
||||
{
|
||||
if(data[mask].nuc[j] == '[')
|
||||
stack[spt++] = j;
|
||||
else if(data[mask].nuc[j] == ']')
|
||||
{
|
||||
i = stack[--spt];
|
||||
pair[j] = i;
|
||||
pair[i] = j;
|
||||
}
|
||||
else
|
||||
pair[j] = -1;
|
||||
}
|
||||
|
||||
for(j=0;j<numseqs;j++)
|
||||
if(j!=mask)
|
||||
{
|
||||
printf("name:%s\nlength:%d\nstart:\n",
|
||||
data[j].name,data[j].length);
|
||||
i = MIN(data[mask].length,data[j].length);
|
||||
for(k=0;k<i;k++)
|
||||
if(pair[k] != -1)
|
||||
printf("%d\n",match(data[j].nuc[k],
|
||||
data[j].nuc[pair[k]]));
|
||||
else
|
||||
printf("8\n");
|
||||
for(k=0;k<data[j].length - data[mask].length;k++)
|
||||
printf("8\n");
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int match(a,b)
|
||||
char a,b;
|
||||
{
|
||||
char aa,bb;
|
||||
aa=a|32;
|
||||
bb=b|32;
|
||||
|
||||
printf(stderr,"%c %c\n",aa,bb);
|
||||
|
||||
if(a=='-' || a=='~')
|
||||
{
|
||||
if((b=='-') || (b=='~'))
|
||||
return(BLACK);
|
||||
else
|
||||
return(RED);
|
||||
}
|
||||
else if(aa=='a' && (bb=='t' || bb=='u'))
|
||||
return(BLUE);
|
||||
else if(bb=='a' && (aa=='t' || aa=='u'))
|
||||
return(BLUE);
|
||||
else if(bb=='c' && aa=='g' )
|
||||
return(BLUE);
|
||||
else if(bb=='g' && aa=='c' )
|
||||
return(BLUE);
|
||||
else if(aa=='g' && (bb=='t' || bb=='u'))
|
||||
return(AQUA);
|
||||
else if(bb=='g' && (aa=='t' || aa=='u'))
|
||||
return(AQUA);
|
||||
else return(YELLOW);
|
||||
}
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
#include "Flatio.c"
|
||||
#define MAX(a,b) ((a)>(b)?(a):(b))
|
||||
|
||||
/*
|
||||
* Varpos.c- An extremely simple program for showing which positions
|
||||
* are varying in an alignment. Use this as a model for other
|
||||
* external functions.
|
||||
*
|
||||
* Read in a flat file alignment, pass back an alignment color
|
||||
* mask.
|
||||
*
|
||||
* Copyright 1991/1992 Steven Smith, Harvard Genome lab.
|
||||
*
|
||||
*/
|
||||
|
||||
main(ac,av)
|
||||
int ac;
|
||||
char **av;
|
||||
{
|
||||
struct data_format data[10000];
|
||||
int i,j,k,numseqs,rev = FALSE;
|
||||
int maxlen = -99999,
|
||||
score = 0,
|
||||
minoffset = 99999;
|
||||
char ch;
|
||||
if(ac>2)
|
||||
{
|
||||
fprintf(stderr,"Usage %s [-rev]<gde_flat_file>gde_color_mask\n", av[0]);
|
||||
exit(1);
|
||||
}
|
||||
if(ac == 2)
|
||||
if(strcmp(av[1],"-rev") == 0)
|
||||
rev = TRUE;
|
||||
|
||||
numseqs = ReadFlat(stdin,data,10000);
|
||||
|
||||
if(numseqs == 0)
|
||||
exit(1);
|
||||
|
||||
for(j=0;j<numseqs;j++)
|
||||
{
|
||||
if(data[j].length+data[j].offset > maxlen)
|
||||
maxlen = data[j].length+data[j].offset;
|
||||
if(data[j].offset < minoffset)
|
||||
minoffset = data[j].offset;
|
||||
}
|
||||
|
||||
printf("length:%d\n",maxlen);
|
||||
printf("offset:%d\n",minoffset);
|
||||
printf("start:\n");
|
||||
for(j=0;j<maxlen;j++)
|
||||
{
|
||||
int a=0,c=0,g=0,u=0;
|
||||
|
||||
for(k=0;k<numseqs;k++)
|
||||
if(data[k].length+data[k].offset > j)
|
||||
{
|
||||
if(j>data[k].offset)
|
||||
ch=data[k].nuc[j-data[k].offset] | 32;
|
||||
else
|
||||
ch = '-';
|
||||
|
||||
if(ch=='a')a++;
|
||||
if(ch=='c')c++;
|
||||
if(ch=='g')g++;
|
||||
if(ch=='u')u++;
|
||||
if(ch=='t')u++;
|
||||
}
|
||||
|
||||
score=MAX(a,c);
|
||||
score=MAX(score,g);
|
||||
score=MAX(score,u);
|
||||
if(a+c+g+u)
|
||||
{
|
||||
if(rev)
|
||||
score=(score*6/(a+c+g+u)+8);
|
||||
else
|
||||
score=((8-score*6/(a+c+g+u))+8);
|
||||
}
|
||||
else
|
||||
score=8;
|
||||
printf("%d\n",score);
|
||||
}
|
||||
exit(0);
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
#CFLAGS = -e -Bstatic
|
||||
CFLAGS = -g
|
||||
FC = g77
|
||||
FC = gfortran
|
||||
|
||||
all: lrna crna
|
||||
|
||||
|
|
|
@ -1,33 +1,33 @@
|
|||
implicit integer (a-z)
|
||||
parameter (maxn=1500,maxn2=3000)
|
||||
parameter (fldmax=maxn2)
|
||||
|
||||
c parameter (maxn=625,fldmax=2*maxn)
|
||||
parameter (maxn=1500,maxn2=3000)
|
||||
parameter (fldmax=maxn2)
|
||||
parameter (infinity=16000,sortmax=30000)
|
||||
parameter (mxbits=(maxn*(maxn+1)+31)/32)
|
||||
parameter (maxtloops=40)
|
||||
parameter (maxsiz=10000)
|
||||
|
||||
integer*2 vst(maxn*maxn),wst1(maxn*maxn),wst2(maxn*maxn)
|
||||
integer*2 vst(maxn*maxn),wst(maxn*maxn)
|
||||
integer poppen(4),maxpen
|
||||
real prelog
|
||||
|
||||
dimension newnum(maxsiz),hstnum(maxn2),force(maxn2),numseq(maxn2),
|
||||
. work1(maxn2,0:2),work2(maxn2),
|
||||
dimension newnum(maxsiz),hstnum(fldmax),force(fldmax),
|
||||
. numseq(fldmax), work(fldmax,0:2),
|
||||
. stack(5,5,5,5),tstk(5,5,5,5),dangle(5,5,5,2),hairpin(30)
|
||||
dimension bulge(30),inter(30),eparam(10),cntrl(10),nsave(2)
|
||||
c common /main/ newnum,hstnum,force,work1,work2,
|
||||
common /main/ newnum,hstnum,force,work1,work2,
|
||||
. stack,tstk,dangle,hairpin,bulge,inter,eparam,cntrl,nsave,n,
|
||||
. numseq,poppen,prelog,maxpen,vst,wst1,wst2
|
||||
common /main/ vst,wst,newnum,hstnum,force,numseq,work,stack,tstk,
|
||||
. dangle,hairpin,bulge,inter,eparam,cntrl,nsave,poppen,maxpen,prelog
|
||||
|
||||
character*1 seq(maxsiz)
|
||||
c character*5 inbuf
|
||||
character*10 progtitle
|
||||
character*30 seqlab
|
||||
common /seq/ seq,seqlab
|
||||
data progtitle/'crna'/
|
||||
|
||||
dimension list(100,4)
|
||||
common /list/ list,listsz
|
||||
common /nm/ vmin
|
||||
data progtitle/'lrna'/
|
||||
common /nm/ n,vmin
|
||||
|
||||
dimension basepr(maxn)
|
||||
common /traceback/ basepr
|
||||
|
@ -40,21 +40,3 @@ c character*5 inbuf
|
|||
|
||||
integer*2 tloop(maxtloops,2),numoftloops
|
||||
common/tloops/tloop,numoftloops
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,53 +0,0 @@
|
|||
#/bin/csh
|
||||
|
||||
mkdir bin
|
||||
|
||||
#echo "Making blast..."
|
||||
#cd BLAST
|
||||
#Install.sh
|
||||
#cd ..
|
||||
|
||||
echo "Making clustal..."
|
||||
cd CLUSTAL
|
||||
make
|
||||
cd ..
|
||||
|
||||
echo "Making core GDE editor"
|
||||
cd CORE
|
||||
install.csh
|
||||
cd ..
|
||||
|
||||
echo "Making FASTA"
|
||||
cd FASTA
|
||||
install.csh
|
||||
cd ..
|
||||
|
||||
echo "Making Harvard Genome Lab functions"
|
||||
cd HGL_SRC
|
||||
install.csh
|
||||
cd ..
|
||||
|
||||
echo "Making looptool"
|
||||
cd LOOPTOOL
|
||||
make
|
||||
cd ..
|
||||
|
||||
echo "Making PHYLIP"
|
||||
cd PHYLIP
|
||||
install.csh
|
||||
cd ..
|
||||
|
||||
echo "Making ReadSeq"
|
||||
cd READSEQ
|
||||
install.csh
|
||||
cd ..
|
||||
|
||||
echo "Making other support programs"
|
||||
cd SUPPORT
|
||||
make
|
||||
cd ..
|
||||
|
||||
echo "Making Zuker MFOLD"
|
||||
cd ZUKER
|
||||
install.csh
|
||||
cd ..
|
BIN
bin/LoopTool
BIN
bin/LoopTool
Binary file not shown.
BIN
bin/Restriction
BIN
bin/Restriction
Binary file not shown.
BIN
bin/Zuk_to_gen
BIN
bin/Zuk_to_gen
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue