init
This commit is contained in:
commit
f8b7bfff1b
949 changed files with 253751 additions and 0 deletions
296
README.txt
Normal file
296
README.txt
Normal file
|
@ -0,0 +1,296 @@
|
||||||
|
General Information
|
||||||
|
(Not for the faint hearted)
|
||||||
|
|
||||||
|
30 September 1992
|
||||||
|
|
||||||
|
|
||||||
|
0. Introduction
|
||||||
|
---------------
|
||||||
|
|
||||||
|
This document contains information on the following subjects:
|
||||||
|
|
||||||
|
1. Installing the Staden Package on SPARCstations and DECstations
|
||||||
|
2. Installing the Staden Package on Other Machines
|
||||||
|
3. A Quick Guide to What's on the Release Tape
|
||||||
|
4. Overview of Data Flow During Sequence Assembly
|
||||||
|
5. Acknowledgements
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
1. Installing the Staden Package on SPARCstations and DECstations
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
|
||||||
|
We are endeavouring to make the installation of the Staden Package as
|
||||||
|
quick and as easy as possible. In this current release we provide
|
||||||
|
statically linked sparc and mips executables as well as all sources.
|
||||||
|
|
||||||
|
To install the package:
|
||||||
|
|
||||||
|
1) Create a new directory for the software. You may have to log on as
|
||||||
|
superuser to do this.
|
||||||
|
|
||||||
|
% mkdir -p /home/BioSW/staden
|
||||||
|
|
||||||
|
2) Place the distribution tape in the drive and down load the package:
|
||||||
|
|
||||||
|
-sun-
|
||||||
|
% tar xvf /dev/rst0
|
||||||
|
...system messages...
|
||||||
|
|
||||||
|
-dec-
|
||||||
|
% tar xvf /dev/rmt0h
|
||||||
|
...system messages...
|
||||||
|
|
||||||
|
3) Users of the C Shell should add the following to his/her .login
|
||||||
|
file:
|
||||||
|
|
||||||
|
setenv STADENROOT /home/BioSW/staden
|
||||||
|
source $STADENROOT/staden.login
|
||||||
|
|
||||||
|
Users of the Bourne shell should add the following to their .profile
|
||||||
|
file:
|
||||||
|
|
||||||
|
STADENROOT=/home/BioSW/staden
|
||||||
|
export STADENROOT
|
||||||
|
. $STADENROOT/staden.profile
|
||||||
|
|
||||||
|
|
||||||
|
4) When the user next logs onto the work station the required
|
||||||
|
initialisation will automatically be performed, and the programs in
|
||||||
|
the Staden package can be run. Refer to the help/*.MEM files for
|
||||||
|
information on the various program. (eg help on xdap is in
|
||||||
|
help/DAP.MEM)
|
||||||
|
|
||||||
|
|
||||||
|
2. Installing the Staden Package on Other Machines
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
This is a little more difficult as you will need to remake all the
|
||||||
|
executables. Your system configuration may also mean that some changes
|
||||||
|
will need to be made, though hopefully only to makefiles. We provide
|
||||||
|
a script to aid installation (we hope!), but you may prefer to make
|
||||||
|
all the components manually.
|
||||||
|
|
||||||
|
To remake the Staden package you will require the following:
|
||||||
|
1) A Fortran77 compiler
|
||||||
|
2) An ANSI C compiler
|
||||||
|
3) X11 Release 4, including the Athena Widget libraries.
|
||||||
|
|
||||||
|
Start by following step 1 through 3 above, to unload the sources and
|
||||||
|
perform initialisations. Read the rest of this document and the other
|
||||||
|
help files. Look at the make files. Follow your nose!
|
||||||
|
|
||||||
|
If you have any problems or successes porting our software to other
|
||||||
|
platforms we would love to hear from you. We would also appreciate
|
||||||
|
receiving your general comments on the package.
|
||||||
|
|
||||||
|
Rodger Staden (principle author)
|
||||||
|
phone: +44 223 402389 email: rs@mrc-lmba.cam.ac.uk
|
||||||
|
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||||
|
Simon Dear:
|
||||||
|
phone: +44 223 402266 email: sd@mrc-lmba.cam.ac.uk
|
||||||
|
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||||
|
James Bonfield:
|
||||||
|
phome: +44 223 402499 email: jkb@mrc-lmba.cam.ac.uk
|
||||||
|
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
3. A Quick Guide to What's on the Release Tape
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
The directory structure on this tape is very important. Once set up, the Staden
|
||||||
|
package expects things to be in a predefined place. The root directory
|
||||||
|
of the structure is referred to by the environment variable
|
||||||
|
STADENROOT. Below this there should be at least the following:
|
||||||
|
|
||||||
|
1) bin/
|
||||||
|
All executable files and scripts should be in this directory.
|
||||||
|
$STADENROOT/bin is added to the search path by the script staden.login
|
||||||
|
(or staden.profile if you are using the Bourne Shell). Though you are
|
||||||
|
not forced to keep programs here, we find it is the simplest place to
|
||||||
|
keep them.
|
||||||
|
|
||||||
|
2) help/
|
||||||
|
All on-line help files are in this directory. Files of the form *.MEM
|
||||||
|
or *.mem are formatted ascii files and can be printed for personal
|
||||||
|
reference. The script staden.login sets up many environment variables
|
||||||
|
that refer to files in this directory, as well as modifying
|
||||||
|
XFILESEARCHPATH, which is used by X programs.
|
||||||
|
|
||||||
|
3) manl/
|
||||||
|
Local manual pages for ted and the staden package are in this directory. The
|
||||||
|
environment variable MANPATH is modified in staden.login to search
|
||||||
|
here too.
|
||||||
|
|
||||||
|
4) staden.login and staden.profile
|
||||||
|
These two files are scripts to set up environment variables required
|
||||||
|
by the Staden package. C Shell users should source staden.login from
|
||||||
|
their .login file, and Bourne Shell users should "source" staden.profile
|
||||||
|
from their .profile directory. See "Installing the Staden Package on
|
||||||
|
SPARCstations and DECstations", Part 3.
|
||||||
|
|
||||||
|
5) tables/
|
||||||
|
Configuration files for the Staden package are in this directory.
|
||||||
|
Various environment variables are set in staden.login to refer to
|
||||||
|
files in this directory.
|
||||||
|
|
||||||
|
Also of use are the following:
|
||||||
|
|
||||||
|
doc/ - Miscellaneous documentation.
|
||||||
|
userdata/ - Sample databases
|
||||||
|
src/ - program sources
|
||||||
|
ReleaseNotes - Notes on this and future releases
|
||||||
|
Staden_install - Installation script
|
||||||
|
SequenceLibraries - Notes on the use and installation of sequence libraries
|
||||||
|
|
||||||
|
|
||||||
|
Program Sources
|
||||||
|
---------------
|
||||||
|
|
||||||
|
All the program sources are found in the directories in $STADENROOT/src:
|
||||||
|
|
||||||
|
0) Misc/
|
||||||
|
Sources for a library of useful routines used by the staden package.
|
||||||
|
** Should be made before the programs in staden/ **
|
||||||
|
|
||||||
|
1) staden/
|
||||||
|
Sources for the Staden suite: mep, xmep, nip, xnip, nipl, pip, xpip,
|
||||||
|
pipl, sap (now superseded by dap), xsap (now superceded by xdap), sip,
|
||||||
|
xsip, sipl, dap, xdap, splitp1, splitp2, splitp3, gip and convert_project.
|
||||||
|
|
||||||
|
2) ted/
|
||||||
|
Sources for the trace display and sequence editing program ted.
|
||||||
|
|
||||||
|
3) abi/
|
||||||
|
Sample scripts and programs for handling ABI 373A data files.
|
||||||
|
|
||||||
|
4) alf/
|
||||||
|
Sample scripts and programs for handling Pharmacia A.L.F. data files.
|
||||||
|
|
||||||
|
Each directory has appropriate makefiles and README files.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
4. Overview of Data Flow During Sequence Assembly
|
||||||
|
-------------------------------------------------
|
||||||
|
|
||||||
|
During a sequence assembly project the data can enter the sequence
|
||||||
|
assembly program from various routes (See Figure below).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Fluorescent Based
|
||||||
|
Sequencing Machine
|
||||||
|
Chromatogram Autoradiogram
|
||||||
|
|
||||||
|
ABI 373A Pharmacia A.L.F. |
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
| alfsplit |
|
||||||
|
| | |
|
||||||
|
+--------+--------+ |
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
ted (gip)
|
||||||
|
| |
|
||||||
|
+----------------+----------------+
|
||||||
|
|
|
||||||
|
|
|
||||||
|
xdap
|
||||||
|
|
||||||
|
|
||||||
|
Figure 1: Data Flow Through The Staden Suite
|
||||||
|
|
||||||
|
|
||||||
|
The Pharmacia A.L.F. data files in their original format consist of
|
||||||
|
one file for the (up to 10) samples that were on the gel. The program
|
||||||
|
alfsplit divides the file up so that each sample is in a file of
|
||||||
|
its own. From then on each gel reading can be handled individually.
|
||||||
|
Whether these files can be transferred back to the Compaq for
|
||||||
|
reprocessing is unknown.
|
||||||
|
|
||||||
|
All data from fluorescent based sequencing machines must pass through
|
||||||
|
the trace editing program ted. Ted allows data vector sequence at the
|
||||||
|
5' end and unreliable data at the 3' end to be clipped. The sequence
|
||||||
|
can be edited if desired, though we should stress that this is NOT
|
||||||
|
RECOMMENDED when used in conjunction with xdap. Ted translates all
|
||||||
|
Pharmacia A.L.F. uncertainty codes to a hyphen ("-") and outputs the
|
||||||
|
clipped sequence, along with additional information on the position
|
||||||
|
and content of cutoffs, to a file.
|
||||||
|
|
||||||
|
People wanting to use xdap with ABI and Pharmacia files, but who have
|
||||||
|
written their own trace clipping software should be aware that xdap
|
||||||
|
requires information to be passed in the sequence file so that
|
||||||
|
traces can be displayed. You may want to modify your software to be
|
||||||
|
compatible with our file format. The file consists of four parts:
|
||||||
|
|
||||||
|
1) Cut off information (Optional).
|
||||||
|
Format is ";%6d%6d%6d%-4s%-16s", where
|
||||||
|
field 1 = total number of bases called
|
||||||
|
2 = number of bases in the clipped sequence at the 5' end
|
||||||
|
3 = number of bases in the sequence in this file
|
||||||
|
4 = type of trace file.
|
||||||
|
"ALF " - Pharmacia A.L.F.
|
||||||
|
"ABI " - ABI 373A
|
||||||
|
"SCF " - SCF
|
||||||
|
"PLN " - Text only
|
||||||
|
5 = name of trace file.
|
||||||
|
|
||||||
|
2) Content of the clipped sequence at the 5' end (Optional).
|
||||||
|
The sequence can extend over several lines. Each line must
|
||||||
|
begin with ";<" and should be less than 80 characters in
|
||||||
|
length.
|
||||||
|
|
||||||
|
3) Content of the clipped sequence at the 3' end (Optional).
|
||||||
|
The sequence can extend over several lines. Each line must
|
||||||
|
begin with ";>" and should be less than 80 characters in
|
||||||
|
length.
|
||||||
|
|
||||||
|
4) Initial tags for the sequence (Optional)
|
||||||
|
Format is: ";;%4s %6d %6d %s\n", where
|
||||||
|
field 1 = type of tag to be created (see $STADTABL/TAGDB)
|
||||||
|
2 = position of tag
|
||||||
|
3 = length of tag
|
||||||
|
4 = annotation for tag (optional)
|
||||||
|
This feature is only available in the program xbap, which
|
||||||
|
at the time of writing is not yet being distributed with
|
||||||
|
the package.
|
||||||
|
|
||||||
|
5) The sequence, which can extend over several lines. Each
|
||||||
|
line should be less than 80 characters in length.
|
||||||
|
|
||||||
|
Here is a sample file:
|
||||||
|
|
||||||
|
; 660 55 450ABI a21d12.s1RES
|
||||||
|
;<AGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCGGTTCCTTCTGG
|
||||||
|
;<ATATC
|
||||||
|
;>-GATAAGCTGATTTG-TTT-CCATTATGGC-GGTTTGAGCCTC-G-GGTC
|
||||||
|
;>GACCACTCGGTGTGCCAGGAAGGGGTCTGAAATTGAATGGGTTATCACTA
|
||||||
|
;>GGCGACGTTT--TTTTCAAATTCCGGGCTAAATTTTACGGC-GGA-CGGT
|
||||||
|
;>TCCG-
|
||||||
|
;;COMM 1 10 M13mp18 subclone
|
||||||
|
CAAGACATTTTGAAATACTTGGAATACTGAATCCAAGATGTGGAACATTA
|
||||||
|
GACATATCCGTGTGCTCAACAATCGACATTTGATCCACTGATGAAAATGT
|
||||||
|
TCTTCGTTTAGAATTTCTCATAGCATCAGCCACTTTTGCATAATACTCGA
|
||||||
|
TTGAAGGTTCATGGAAAAAGCTGCGTAGAAGGCATGTCATTGTGCTTACG
|
||||||
|
AGCCATTTCGGATATCTTGTGAATTTAGCAGGAAGTTCTGTAACTGGTTG
|
||||||
|
GAATTCAAATATATCAGTTCTTCTTCCTGGATCTCGTCCTTTTTGCACTA
|
||||||
|
AAACCATTGCGATTGCATCCGGATTCTGAGTAAGAGCCACTACAGCTTTA
|
||||||
|
TGATACAGGCTCTTGTTATTCCTTTCGTGCTCGAATGGGAACTTTCCAGT
|
||||||
|
GGCACAAAAATATAGTGTACATCCCAGAGCCCATAGATCACATGTTCCGA
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
5. Acknowledgements
|
||||||
|
|
||||||
|
We would like to thank Applied Biosystems, Inc. and Pharmacia LKB
|
||||||
|
Biotechnology for their cooperation in agreeing to our routines
|
||||||
|
accessing the data files of their fluorescent sequencing machines.
|
||||||
|
|
||||||
|
373A sequence data file formats are the exclusive property of Applied
|
||||||
|
Biosystems, Inc.
|
||||||
|
|
||||||
|
ALF sequence data file formats are the exclusive property of Pharmacia
|
||||||
|
LKB Biotechnology, Inc.
|
||||||
|
|
190
ReleaseNotes
Normal file
190
ReleaseNotes
Normal file
|
@ -0,0 +1,190 @@
|
||||||
|
Release Notes for Staden Package 1992.3
|
||||||
|
---------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
Installation guide
|
||||||
|
------------------
|
||||||
|
|
||||||
|
The file doc/install.PS contain installation instructions.
|
||||||
|
|
||||||
|
|
||||||
|
Manual for the Staden Package
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
There is now a 135 page manual on the Staden Package. It is currently
|
||||||
|
being distributed on a Word4 document on a Macintosh floppy disk.
|
||||||
|
|
||||||
|
|
||||||
|
Feedback and bug reports
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
We welcome comments and suggestions on all aspects of the package and are
|
||||||
|
best contacted by email: rs@uk.ac.cam.mrc-lmb and sd@uk.ac.cam.mrc-lmb.
|
||||||
|
All abnormal terminations are bugs and we would like to be told of them
|
||||||
|
so they can be fixed. We recommend that you request an update at least once
|
||||||
|
a year as the package is evolving very rapidly.
|
||||||
|
|
||||||
|
Note due to popular demand we have decided to release new routines earlier
|
||||||
|
than in the past so please report bugs. The documentation for additions may
|
||||||
|
be sparser than before, or non-existent, but if there is something with which
|
||||||
|
you need help, email us.
|
||||||
|
|
||||||
|
|
||||||
|
Changes this release
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
|
||||||
|
The assembly programs bap and xbap heve several new functions:
|
||||||
|
1. Find single stranded regions and try to fill them with "hidden"
|
||||||
|
data from the adjacent readings.
|
||||||
|
2. Find single stranded regions (includes ends of contigs) and
|
||||||
|
select primers and templates for double stranding them (joining
|
||||||
|
them).
|
||||||
|
3. Pre assembly screening for readings to find those that align
|
||||||
|
best. Optionally the hidden data can also be included in the
|
||||||
|
comparison (part of assembly function).
|
||||||
|
4. Find pairs of readings taken from opposite ends of the same
|
||||||
|
template (ie forward and reverse read pairs). List or plot their
|
||||||
|
positions.
|
||||||
|
5. A new function to check that readings have been assembled into
|
||||||
|
the correct positions. It aligns the hidden (previously termed "unused")
|
||||||
|
parts of readings with the consensus they overlap to see how well
|
||||||
|
they align. Poor alignments are reported.
|
||||||
|
6. During assembly each reading is now allowed to match up to 100
|
||||||
|
different places.
|
||||||
|
|
||||||
|
It might be guessed from the above that we are trying to improve our
|
||||||
|
ability to deal with the assembly of human data. Hence, also the next
|
||||||
|
addition.
|
||||||
|
|
||||||
|
A new experimental program (rep) for screening readings for Alu
|
||||||
|
sequences prior to assembly. The Alu containing segments are tagged
|
||||||
|
so they can be seen in the contig editor. A library of Alu sequences
|
||||||
|
is included in /tables/alus. The program is quite slow as it compares
|
||||||
|
each reading in both orientations with all of the Alu sequences (126
|
||||||
|
of them) in order to find the best match. Only time and more data will
|
||||||
|
tell how sensitive it is, and whether the current default score 0f 0.6
|
||||||
|
is "correct". BEWARE rep modifies the original reading files to include
|
||||||
|
the tag information. The only information is in /help/alu.help
|
||||||
|
|
||||||
|
A new program for extracting sets of sequences and their annotations
|
||||||
|
from the sequence libraries (lip). The only information is in
|
||||||
|
/help/lip.help
|
||||||
|
|
||||||
|
Changes to the xterm userinterface. These routines have been completely
|
||||||
|
rewritten. One addition is that now ?? in response to a question will
|
||||||
|
allow the user to get help on any function in a program. help is also
|
||||||
|
improved in the x version.
|
||||||
|
|
||||||
|
|
||||||
|
Changes last release
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
|
||||||
|
DAP, XDAP have been replaced by BAP and XBAP (see below)
|
||||||
|
|
||||||
|
A new function for examining repeats has been added to NIP
|
||||||
|
|
||||||
|
A new repeat search has been added to SIP
|
||||||
|
|
||||||
|
Some outputs have been changed to produce FASTA format files
|
||||||
|
instead of PIR.
|
||||||
|
|
||||||
|
MEP now allows searches for motifs in which any 8 out of a string
|
||||||
|
of 20 can be switched on.
|
||||||
|
|
||||||
|
The manual has been updated.
|
||||||
|
|
||||||
|
Keyword and author searches on sequence libraries
|
||||||
|
|
||||||
|
All programs that use the libraries can now perform author
|
||||||
|
and keyword searches on all libraries (only nip did so before).
|
||||||
|
|
||||||
|
Postscript output
|
||||||
|
|
||||||
|
All graphics can now be saved to disk in postscript form by
|
||||||
|
use of a sub-option in "Redirect output".
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Sequence assembly
|
||||||
|
|
||||||
|
BAP, XBAP replace DAP and XDAP. A program to convert DAP databases to BAP
|
||||||
|
databases (convert) is included. BAP databases can contain up to 8000 readings
|
||||||
|
and a consensus of 500,000 bases. A minor edit and recompilation will allow
|
||||||
|
up to 99,999 readings. The space is used more efficiently now as the databases
|
||||||
|
grow as the number of readings increases. Reading names can be 16 characters
|
||||||
|
in length. In addition:
|
||||||
|
|
||||||
|
1) Assembly is 4 times as fast as in the DAP.
|
||||||
|
|
||||||
|
2) Find internal joins is 5 times as fast and now brings up the join editor
|
||||||
|
with the two contigs in the correct orientation and aligned.
|
||||||
|
|
||||||
|
3) The assembly routines align pads better, plus a new automatic function can
|
||||||
|
also be used to align them prior to editing.
|
||||||
|
|
||||||
|
4) The contig editor has been greatly speeded up and its functionality
|
||||||
|
has been enhanced.
|
||||||
|
|
||||||
|
5) A routine for selecting oligos for primer walking is included.
|
||||||
|
|
||||||
|
6) A new routine allows batches of readings to be removed from a database.
|
||||||
|
|
||||||
|
7) We have also included routines for making SCF files, for getting the
|
||||||
|
sequence from SCF files, and one for marking the poor quality data in
|
||||||
|
readings. See the manual.
|
||||||
|
|
||||||
|
Sequence library formats
|
||||||
|
|
||||||
|
The standard sequence library indexing method is now that used on the
|
||||||
|
EMBL CD-ROM. The libraries (EMBL nucleotide and SWISSPROT protein) can be
|
||||||
|
left on the CD-ROM or copied to disk. We include in the package programs
|
||||||
|
for creating this type of index for EMBL updates, PIR in codata format,
|
||||||
|
NRL3D and GenBank. If the indexes are created all programs can read all
|
||||||
|
these libraries. Programs and scripts for this task are contained in the
|
||||||
|
directory indexseqlibs.
|
||||||
|
The keyword and author searches are particularly fast and the
|
||||||
|
keyword index is based on ALL text in the files - not just the keywords.
|
||||||
|
|
||||||
|
Feature table formats
|
||||||
|
|
||||||
|
The programs now use the new feature table format common to EMBL
|
||||||
|
and GenBank, but retain the old format for SWISSPROT which has not yet
|
||||||
|
changed.
|
||||||
|
|
||||||
|
For details of the above see file SequenceLibraries.
|
||||||
|
|
||||||
|
Pattern searches
|
||||||
|
|
||||||
|
Pipl and Nipl now have the facility to find only the best scoring
|
||||||
|
match for each sequence. The prompt is "? report all matches", so typing
|
||||||
|
only return means all matches will be shown and typing n means only the
|
||||||
|
highest scoring will be reported. It is particularly useful when employed
|
||||||
|
to create alignments. The corresponding help file has not been updated.
|
||||||
|
Also to incorporate long unix file names the pattern files no longer include
|
||||||
|
the annotation "filename".
|
||||||
|
|
||||||
|
|
||||||
|
Nip
|
||||||
|
|
||||||
|
Option 38 in nip "translate and list" has been removed as the the
|
||||||
|
more flexible routines of option 39 incorporate all its functionality. Many
|
||||||
|
options that relate to feature tables have been modified but their help files
|
||||||
|
are not yet up to date.
|
||||||
|
|
||||||
|
|
||||||
|
Vep
|
||||||
|
|
||||||
|
A program (vep) for automatic excising of vector (either
|
||||||
|
sequencing vector or cosmid vector) sequences from readings is now
|
||||||
|
included in the package.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Rodger Staden, Simon Dear, James Bonfield
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
420
SequenceLibraries
Normal file
420
SequenceLibraries
Normal file
|
@ -0,0 +1,420 @@
|
||||||
|
Notes on library handling
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Contents of this document:
|
||||||
|
|
||||||
|
I) Introduction
|
||||||
|
II) Details of file organisation and use
|
||||||
|
III) Options currently available
|
||||||
|
IV) Installation guide
|
||||||
|
V) New feature table handling routines
|
||||||
|
VI) Indexing the sequence libraries
|
||||||
|
|
||||||
|
|
||||||
|
Section I Introduction
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Available sequence libraries
|
||||||
|
|
||||||
|
There are a number of different sequence libraries for nucleotide and protein:
|
||||||
|
PIR, GenBank, EMBL, Swissprot, and the Japanese Databank. Even after all the
|
||||||
|
years of their existence they still use different formats for their data. This
|
||||||
|
provides tedious and unrewarding work for software developers. Recently EMBL
|
||||||
|
and GenBank agreed a new and common way of writing their feature tables, which
|
||||||
|
is great help, although the rest of their format is different. Swissprot still
|
||||||
|
uses the old embl style feature table format and PIR yet another.
|
||||||
|
|
||||||
|
All the libraries distribute their data on magnetic tapes and EMBL and GenBank
|
||||||
|
have started to distribute on cdrom. The EMBL cdrom also contains Swissprot.
|
||||||
|
The GenBank and EMBL cdroms use different formats and have different contents.
|
||||||
|
The EMBL cdrom has useful indexes sorted alphabetically: those for entry name
|
||||||
|
and accession number, brief descriptions, keywords and freetext indexes are
|
||||||
|
already available and others are expected. These indexes point to the data for
|
||||||
|
each entry, and can be used to extract the data for any entry quickly.
|
||||||
|
|
||||||
|
Moving to unix
|
||||||
|
|
||||||
|
The VAX version of our package used PIR format which meant reformatting all
|
||||||
|
libraries other than PIR into that format. This required, at least
|
||||||
|
temporarily, having space for two copies of the libraries, and quite a lot of
|
||||||
|
cpu time. The software for doing this was provided by PIR, and is very VAX
|
||||||
|
specific and hence will not run under unix. For the unix version of our package
|
||||||
|
I have decided to use the EMBL cdrom format and its indexes as the primary
|
||||||
|
format. The current programs also support the use of PIR format libraries
|
||||||
|
without indexes - ie just the sequence and annotation files.
|
||||||
|
|
||||||
|
Indexing GenBank, EMBL updates, PIR and NRL3D
|
||||||
|
|
||||||
|
We include programs to create indexes for the above libraries. See below and
|
||||||
|
the README file in indexseqlibs. The programs can read all the above libraries
|
||||||
|
once the indexes are created. The indexing programs index the data in its
|
||||||
|
distributed form: WE DO NOT REFORMAT OR COPY THE LIBRARIES but simply create
|
||||||
|
indexes to the original files. Obviously this saves a lot of disk space, and
|
||||||
|
for those content to use only embl and swissprot from the cdrom, almost no disk
|
||||||
|
space is required. We havent tried it yet, but for genbank on cdrom, the only
|
||||||
|
extra disk space required would be for the indexes.
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Section II Details of file organisation and use
|
||||||
|
-----------------------------------------------
|
||||||
|
|
||||||
|
The following strategy has been used to try to deal with alternate
|
||||||
|
and changing sequence library formats.
|
||||||
|
|
||||||
|
1) libraries are described at several levels:
|
||||||
|
|
||||||
|
a) the top level file is a list of available libraries which contains:
|
||||||
|
the library type, the name of the file containing the name of
|
||||||
|
each libraries individual files, and the prompt to appear on
|
||||||
|
the users screen: LTYPE LOGNAM PROMPT
|
||||||
|
|
||||||
|
b) the file containing the names of the libraries individual files
|
||||||
|
contains flags to define the file types: FTYPE LOGNAM
|
||||||
|
|
||||||
|
c) the individual library files
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
2) libary types handled:
|
||||||
|
|
||||||
|
a) EMBL/SWISSPROT in distributed format with cdrom index format
|
||||||
|
LTYPE = 'A'
|
||||||
|
b) GenBank in distributed format with cdrom index format LTYPE = 'C'
|
||||||
|
c) PIR/NRL3D in CODATA format with cdrom index format LTYPE = 'B'
|
||||||
|
d) PIR/NBRF .seq files can be read sequentially as "personal files
|
||||||
|
in PIR format" and do not appear in the list of available libraries.
|
||||||
|
e) FASTA format files can be read sequentially as "personal files
|
||||||
|
in FASTA format" and do not appear in the list of available
|
||||||
|
libraries.
|
||||||
|
|
||||||
|
3) EMBL, SWISSPROT and other libraries for which EMBL-style indexes have been
|
||||||
|
created
|
||||||
|
|
||||||
|
current file types:
|
||||||
|
|
||||||
|
A division.lookup
|
||||||
|
B entryname.index
|
||||||
|
C accession.target
|
||||||
|
D accession.hits
|
||||||
|
E brief description
|
||||||
|
F freetext.target
|
||||||
|
G freetext.hits
|
||||||
|
H author.target
|
||||||
|
I author.hits
|
||||||
|
|
||||||
|
|
||||||
|
Library list
|
||||||
|
level 1
|
||||||
|
|
|
||||||
|
|
|
||||||
|
-----------------------------------------------------------
|
||||||
|
| | |
|
||||||
|
lib 1 file list lib 2 file list lib 3 file list
|
||||||
|
level 2
|
||||||
|
| |
|
||||||
|
-------- ---------
|
||||||
|
level 3
|
||||||
|
file 1 file 1
|
||||||
|
file 2 file 2
|
||||||
|
. .
|
||||||
|
file n file n
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
|
||||||
|
Level 1
|
||||||
|
|
||||||
|
File name: sequence.libs
|
||||||
|
Environment variable: SEQUENCELIBRARIES
|
||||||
|
Contents:
|
||||||
|
|
||||||
|
A EMBLFILES EMBL nucleotide library ! in cdrom format
|
||||||
|
C GENBFILES GenBank nucleotide library!
|
||||||
|
A SWISSFILES SWISSPROT protein library! in cdrom format
|
||||||
|
B PIRFILES PIR protein library!
|
||||||
|
B NRL3DFILES NRL3D protein library!
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
The libraries have types A,B,C. The logical names are EMBLLIBDESCRP and
|
||||||
|
SWISSLIBDESCRP, etc and the prompts are 'EMBL nucleotide library' and
|
||||||
|
'SWISSPROT protein library', etc. Anything to the right of a ! is a comment.
|
||||||
|
|
||||||
|
Level 2: the list of library files (using embl as an example)
|
||||||
|
|
||||||
|
File name: embl.files
|
||||||
|
Environment variable: EMBLFILES
|
||||||
|
Contents:
|
||||||
|
|
||||||
|
A EMBLDIVPATH/embl_div.lkp
|
||||||
|
B EMBLINDPATH/entrynam.idx
|
||||||
|
C EMBLINDPATH/acnum.trg
|
||||||
|
D EMBLINDPATH/acnum.hit
|
||||||
|
E EMBLINDPATH/brief.idx
|
||||||
|
F EMBLINDPATH/freetext.trg
|
||||||
|
G EMBLINDPATH/freetext.hit
|
||||||
|
H EMBLINDPATH/author.trg
|
||||||
|
I EMBLINDPATH/author.hit
|
||||||
|
|
||||||
|
|
||||||
|
Level 3: the sequence and annotation files (eg 15 for embl, 1 for swissprot).
|
||||||
|
|
||||||
|
Paths and file names:
|
||||||
|
|
||||||
|
EMBLPATH/bb.dat
|
||||||
|
EMBLPATH/fun.dat
|
||||||
|
EMBLPATH/inv.dat
|
||||||
|
EMBLPATH/mam.dat
|
||||||
|
EMBLPATH/org.dat
|
||||||
|
EMBLPATH/patent.dat
|
||||||
|
EMBLPATH/phg.dat
|
||||||
|
EMBLPATH/pln.dat
|
||||||
|
EMBLPATH/pri.dat
|
||||||
|
EMBLPATH/pro.dat
|
||||||
|
EMBLPATH/rod.dat
|
||||||
|
EMBLPATH/syn.dat
|
||||||
|
EMBLPATH/una.dat
|
||||||
|
EMBLPATH/vrl.dat
|
||||||
|
EMBLPATH/vrt.dat
|
||||||
|
|
||||||
|
All files from the division lookup file down are exactly as they appear on the
|
||||||
|
cdrom. The division lookup file relates numbers stored in the indexes to
|
||||||
|
actual division (or data) files stored on the disk. We rewrite it so the
|
||||||
|
directory structure and file names can be chosen locally. Its format is
|
||||||
|
I6,1x,A. An example is given below.
|
||||||
|
|
||||||
|
Division lookup file
|
||||||
|
|
||||||
|
File name: STADTABL/embl_div.lkp
|
||||||
|
Environment variable path EMBLDIVPATH
|
||||||
|
Contents:
|
||||||
|
|
||||||
|
1 EMBLPATH/bb.dat
|
||||||
|
2 EMBLPATH/fun.dat
|
||||||
|
3 EMBLPATH/inv.dat
|
||||||
|
4 EMBLPATH/mam.dat
|
||||||
|
5 EMBLPATH/org.dat
|
||||||
|
6 EMBLPATH/patent.dat
|
||||||
|
7 EMBLPATH/phg.dat
|
||||||
|
8 EMBLPATH/pln.dat
|
||||||
|
9 EMBLPATH/pri.dat
|
||||||
|
10 EMBLPATH/pro.dat
|
||||||
|
11 EMBLPATH/rod.dat
|
||||||
|
12 EMBLPATH/syn.dat
|
||||||
|
13 EMBLPATH/una.dat
|
||||||
|
14 EMBLPATH/vrl.dat
|
||||||
|
15 EMBLPATH/vrt.dat
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
Section III Options currently available
|
||||||
|
---------------------------------------
|
||||||
|
|
||||||
|
Facilities currently offered in nip,pip,sip,nipl,pipl,sipl:
|
||||||
|
|
||||||
|
Get a sequence by knowing its entry name
|
||||||
|
Get a sequences' annotation by knowing its entry name
|
||||||
|
Get an entry name by knowing its accession number
|
||||||
|
Search the freetext index
|
||||||
|
Search the author index
|
||||||
|
|
||||||
|
Facilities currently offered in nipl,pipl,sipl:
|
||||||
|
|
||||||
|
Search whole library
|
||||||
|
Search only a list of entry names
|
||||||
|
Search all but a list of entry names
|
||||||
|
|
||||||
|
Outline of each type of operation
|
||||||
|
|
||||||
|
Looking for an entry by name: the programs will open the library description
|
||||||
|
file and read the names of its files and their file types. Then they will open
|
||||||
|
the entrynam.idx file, and find the sequence offset, annotation offset and
|
||||||
|
division number. Then open the division lookup file, find the file name for the
|
||||||
|
division required, open that file, seek to the required byte and get the data.
|
||||||
|
|
||||||
|
Looking for an entry by accession number: the programs will open the library
|
||||||
|
description file and read the names of its files and their file types. Then
|
||||||
|
they open the acnum.trg and acnum.hit files. The acnum.trg file is read to find
|
||||||
|
the accession number and a pointer to the acnum.hit file and the number of
|
||||||
|
hits. That file is read and the corresponding entry names displayed. At
|
||||||
|
present no further action is performed, although I expect to list out the
|
||||||
|
titles for the entries found.
|
||||||
|
|
||||||
|
Searching the whole of a library: the programs will open the library
|
||||||
|
description file and read the names of its files and their file types. Then
|
||||||
|
they open the division lookup file, read the names and numbers of the sequence
|
||||||
|
files, open all of them, then open the entryname file. Then the library is
|
||||||
|
processed sequentially by reading the entry names, their sequence offsets and
|
||||||
|
division numbers from the entry names file, and then the sequence from the
|
||||||
|
appropriate data file.
|
||||||
|
|
||||||
|
Searching the whole of a library using a list of entry names to include: the
|
||||||
|
programs will open the library description file and read the names of its files
|
||||||
|
and their file types. Then they open the division lookup file, read the names
|
||||||
|
and numbers of the sequence files, open all of them, then open the entryname
|
||||||
|
file. Then the library is processed by reading the list of entry names and
|
||||||
|
finding the names in the entry names file to get their sequence offsets and
|
||||||
|
division numbers, and then the sequence from the appropriate data file. It will
|
||||||
|
stop when it reaches the end of the list of entry names. The list of entry
|
||||||
|
names can be in any order.
|
||||||
|
|
||||||
|
Searching the whole of a library using a list of entry names to exclude: the
|
||||||
|
programs will open the library description file and read the names of its files
|
||||||
|
and their file types. Then they open the division lookup file, read the names
|
||||||
|
and numbers of the sequence files, open all of them, then open the entryname
|
||||||
|
file. Then the library is processed sequentially by reading the list of entry
|
||||||
|
names, reading the next entry in the entry names file to make sure it does not
|
||||||
|
match, then getting the sequence offsets and division numbers, and then the
|
||||||
|
sequence from the appropriate data file. If a the next name matches the name on
|
||||||
|
the list of entry names, it will be skipped, and the next name to exclude read.
|
||||||
|
If the list of excluded names is finished the rest of the library is searched
|
||||||
|
sequentially. The list of entry names must be in the same order as those in the
|
||||||
|
library (ie sorted alphabetically).
|
||||||
|
|
||||||
|
Searching a whole library using a PIR format file is performed by reading it
|
||||||
|
sequentially. If as list of entry names is used it must be in the same order as
|
||||||
|
the entries in the library file.
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Section IV Installation guide
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
EMBL CDROM
|
||||||
|
|
||||||
|
The data can be left on the cdrom or copied to hard disk. The files
|
||||||
|
staden.login and staden.profile source the file $STADTABL/libraries.config.csh
|
||||||
|
and $STADTABL/libraries.config.sh respectively. Refer to this file to see what
|
||||||
|
is required to install, add or move a sequence library that you want to be used
|
||||||
|
by the programs.
|
||||||
|
|
||||||
|
Other libraries (PIR, Genbank, EMBL updates)
|
||||||
|
|
||||||
|
Create the indexes then edit the files that tell the programs where the data is
|
||||||
|
stored. The files staden.login and staden.profile source the file
|
||||||
|
$STADTABL/libraries.config Refer to this file to see what is required to
|
||||||
|
install, add or move a sequence library that you want to be used by the
|
||||||
|
programs.
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
Section V New feature table handling facilities
|
||||||
|
-----------------------------------------------
|
||||||
|
|
||||||
|
As mentioned above EMBL and GenBank have recently introduced new feature tables
|
||||||
|
for annotating the sequences. They are a great improvement on the previous ones
|
||||||
|
and, among other things, now permit correct translation of spliced genes.
|
||||||
|
Various options within nip have been added or modified to take advantage of
|
||||||
|
these changes. The routine to translate DNA to protein and write the protein
|
||||||
|
to disk now gives correct results for spliced genes. The routine to translate
|
||||||
|
DNA to protein and display the two together now gives correct translations
|
||||||
|
except for the amino acids spanning intron/exon junctions. The routine to plot
|
||||||
|
maps from feature tables can use the new style. The open reading frame finding
|
||||||
|
routine writes out its results in the new style. The routine that finds open
|
||||||
|
reading frames and writes their translations to disk also writes a title in the
|
||||||
|
form of a new style feature table entry. The feature table format output from
|
||||||
|
the pattern searches in nip also uses the new style.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Section VI Indexing the sequence libraries
|
||||||
|
--------------------------------------------
|
||||||
|
|
||||||
|
We handle EMBL, SwissProt, and GenBank in their distributed format, plus
|
||||||
|
PIR and NRL3D in codata format. All programs and scripts are in directory
|
||||||
|
indexseqlibs.
|
||||||
|
|
||||||
|
Currently we produce entryname index, accession number index freetext index,
|
||||||
|
and brief index (brief index contains the entry name the primary accession
|
||||||
|
number the sequence length and an 80 character description).
|
||||||
|
|
||||||
|
To produce any of the indexes requires the creation of several intermediate
|
||||||
|
files and the indexing programs are written so that the intermediate files
|
||||||
|
are the same for all libraries. This means that only the programs that read
|
||||||
|
the distributed form of each library need to be unique to that library, and
|
||||||
|
all the other processing programs can be used for all libraries.
|
||||||
|
|
||||||
|
|
||||||
|
However even the though the indexes have the same format, programs (like nip)
|
||||||
|
that read the libraries need to treat each library separately because their
|
||||||
|
actual contents are written differently.
|
||||||
|
|
||||||
|
Making the entry name index
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
Common program entryname2
|
||||||
|
|
||||||
|
EMBL emblentryname1
|
||||||
|
SwissProt emblentryname1
|
||||||
|
|
||||||
|
GenBank genbentryname1
|
||||||
|
|
||||||
|
PIR pirentryname1
|
||||||
|
NRL3D pirentryname1
|
||||||
|
|
||||||
|
|
||||||
|
Making the accession number index
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
Common programs access2 access3 access4
|
||||||
|
|
||||||
|
EMBL emblaccess1
|
||||||
|
SwissProt emblaccess1
|
||||||
|
|
||||||
|
GenBank genbaccess1
|
||||||
|
|
||||||
|
PIR piraccess1 piraccess2
|
||||||
|
NRL3D No accession numbers
|
||||||
|
|
||||||
|
Making the brief index
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Common program title2
|
||||||
|
|
||||||
|
EMBL embltitle1
|
||||||
|
SwissProt embltitle1
|
||||||
|
|
||||||
|
GenBank genbtitle1
|
||||||
|
|
||||||
|
PIR pirtitle1 pirtitle2 (pir3 has no accession numbers)
|
||||||
|
NRL3D pirtitle2
|
||||||
|
|
||||||
|
Scripts
|
||||||
|
-------
|
||||||
|
|
||||||
|
emblentryname.script
|
||||||
|
emblaccession.script
|
||||||
|
embltitle.script
|
||||||
|
|
||||||
|
swissentryname.script
|
||||||
|
swissaccession.script
|
||||||
|
swisstitle.script
|
||||||
|
|
||||||
|
genbentrynamescript
|
||||||
|
genbaccession.script
|
||||||
|
genbtitle.script
|
||||||
|
|
||||||
|
pirentryname.script
|
||||||
|
piraccession.script
|
||||||
|
pirtitle.script
|
||||||
|
|
||||||
|
nrl3dentryname.script
|
||||||
|
nrl3dtitle.script
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
453
Staden_install-alpha
Normal file
453
Staden_install-alpha
Normal file
|
@ -0,0 +1,453 @@
|
||||||
|
#! /bin/csh -f
|
||||||
|
#
|
||||||
|
# staden_install - version 2.4
|
||||||
|
#
|
||||||
|
# This is a prototype installation program.
|
||||||
|
#
|
||||||
|
# 9 March 1992
|
||||||
|
# Modified for installation on Sun, Alliant, etc
|
||||||
|
# No longer install 2rs
|
||||||
|
#
|
||||||
|
# 20 November 1992
|
||||||
|
# Now includes convert, cop, frog, getMCH and scf
|
||||||
|
#
|
||||||
|
# 25 November 1992
|
||||||
|
# SGI supported
|
||||||
|
#
|
||||||
|
# 19 May 1993
|
||||||
|
# DEC Alpha, Solaris supported
|
||||||
|
#
|
||||||
|
# Written by sd@uk.ac.cam.mrc-lmb
|
||||||
|
#
|
||||||
|
|
||||||
|
# prelim
|
||||||
|
set prog = $0 ; set prog = $prog:t
|
||||||
|
|
||||||
|
# Machines supported: al sun dec sgi alpha solaris
|
||||||
|
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||||
|
set MACHINE = alpha
|
||||||
|
|
||||||
|
# For local (MRC-LMB) setup only
|
||||||
|
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||||
|
set LOCAL = NO
|
||||||
|
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -n "Staden Package installation procedure - "
|
||||||
|
switch (${MACHINE})
|
||||||
|
case "al":
|
||||||
|
echo "Alliant FX/2800 Concentrix version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "sun":
|
||||||
|
echo "SunOS version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "dec":
|
||||||
|
echo "DEC Ultrix (mips) version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "sgi":
|
||||||
|
echo "Silicon Graphics Iris version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "alpha":
|
||||||
|
echo "DEC Alpha OSF/1 version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "solaris":
|
||||||
|
echo "Solaris version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
default:
|
||||||
|
echo "Panic. Unknown version"
|
||||||
|
exit 1
|
||||||
|
endsw
|
||||||
|
echo ""
|
||||||
|
echo "* starting initialization...please wait."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Binary fork of source directory
|
||||||
|
if ($LOCAL == "YES") then
|
||||||
|
set DIR_BINARIES = ${MACHINE}-binaries
|
||||||
|
set DIR_PROGS = ${MACHINE}-bin
|
||||||
|
else
|
||||||
|
set DIR_BINARIES = .
|
||||||
|
set DIR_PROGS = bin
|
||||||
|
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||||
|
endif
|
||||||
|
|
||||||
|
init:
|
||||||
|
# Set useful shell variables
|
||||||
|
set YES="YES";
|
||||||
|
set NO="NO"
|
||||||
|
|
||||||
|
# set/unset some .cshrc envs.
|
||||||
|
unset noclobber
|
||||||
|
set noglob
|
||||||
|
|
||||||
|
# set interrupt trap
|
||||||
|
onintr end_failure
|
||||||
|
|
||||||
|
# Make dir command
|
||||||
|
set MKDIR = "mkdir"
|
||||||
|
|
||||||
|
# Copy command
|
||||||
|
set CP = "cp -p"
|
||||||
|
|
||||||
|
# Install command
|
||||||
|
#set INSTALL = "install"
|
||||||
|
#set INSTALL = "mv"
|
||||||
|
set INSTALL = "cp"
|
||||||
|
|
||||||
|
# Set up default responses
|
||||||
|
set DEF_STADEN_ROOT = `pwd`
|
||||||
|
|
||||||
|
set DEF_REQ_NONX = "$YES"
|
||||||
|
set DEF_REQ_X = "$YES"
|
||||||
|
set DEF_REQ_TED = "$YES"
|
||||||
|
set DEF_REQ_MISC = "$YES"
|
||||||
|
|
||||||
|
# directories
|
||||||
|
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||||
|
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||||
|
set DIR_MISC = $DIR_SRC/Misc
|
||||||
|
set DIR_STADEN = $DIR_SRC/staden
|
||||||
|
set DIR_TED = $DIR_SRC/ted
|
||||||
|
set DIR_ABI = $DIR_SRC/abi
|
||||||
|
set DIR_ALF = $DIR_SRC/alf
|
||||||
|
set DIR_BAP = $DIR_SRC/bap
|
||||||
|
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||||
|
set DIR_CONVERT = $DIR_SRC/convert
|
||||||
|
set DIR_COP = $DIR_SRC/cop
|
||||||
|
set DIR_FROG = $DIR_SRC/frog
|
||||||
|
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||||
|
set DIR_SCF = $DIR_SRC/scf
|
||||||
|
|
||||||
|
|
||||||
|
main:
|
||||||
|
|
||||||
|
|
||||||
|
preamble:
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo "* Please answer the following questions."
|
||||||
|
echo " Default answers to questions are given in square brackets."
|
||||||
|
echo " If you require help at any stage respond with a ? to the question."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_staden_root:
|
||||||
|
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||||
|
|
||||||
|
ask_require_nonx_progs:
|
||||||
|
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||||
|
set ANS_REQ_NONX = $<
|
||||||
|
if ("$ANS_REQ_NONX" == "?") then
|
||||||
|
echo "* If you do not have X windows on your system you will require"
|
||||||
|
echo " these. However, you will require Tektronics terminal emulation."
|
||||||
|
echo " If you do not require all of the non-X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
else if ("$ANS_REQ_NONX" != "") then
|
||||||
|
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||||
|
set ANS_REQ_NONX=$YES
|
||||||
|
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||||
|
set ANS_REQ_NONX=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||||
|
endif
|
||||||
|
|
||||||
|
ask_require_x_progs:
|
||||||
|
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||||
|
set ANS_REQ_X = $<
|
||||||
|
if ("$ANS_REQ_X" == "?") then
|
||||||
|
echo "* These are the programs that require X windows."
|
||||||
|
echo " If you do not require all of the X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
goto ask_require_x_progs
|
||||||
|
else if ("$ANS_REQ_X" != "") then
|
||||||
|
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||||
|
set ANS_REQ_X=$YES
|
||||||
|
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||||
|
set ANS_REQ_X=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_X=$DEF_REQ_X
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_ted:
|
||||||
|
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||||
|
set ANS_REQ_TED = $<
|
||||||
|
if ("$ANS_REQ_TED" == "?") then
|
||||||
|
echo "* This is the trace editor program. It allows you to look at"
|
||||||
|
echo " traces obtained from automated fluorescent sequencing machines."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_ted
|
||||||
|
else if ("$ANS_REQ_TED" != "") then
|
||||||
|
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||||
|
set ANS_REQ_TED=$YES
|
||||||
|
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||||
|
set ANS_REQ_TED=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_ted
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_TED=$DEF_REQ_TED
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_misc:
|
||||||
|
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||||
|
set ANS_REQ_MISC = $<
|
||||||
|
if ("$ANS_REQ_MISC" == "?") then
|
||||||
|
echo "* Other programs include:"
|
||||||
|
echo " alfsplit"
|
||||||
|
echo " getABISampleName"
|
||||||
|
echo ""
|
||||||
|
goto ask_require_misc
|
||||||
|
else if ("$ANS_REQ_MISC" != "") then
|
||||||
|
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||||
|
set ANS_REQ_MISC=$YES
|
||||||
|
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||||
|
set ANS_REQ_MISC=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_misc
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
time_taken_warning:
|
||||||
|
echo ""
|
||||||
|
echo "The installation procedure is now ready to start."
|
||||||
|
echo ""
|
||||||
|
echo "**** Warning:"
|
||||||
|
echo " The installation will take considerable time to complete. If you"
|
||||||
|
echo " are installing the whole Staden Package from scratch it could"
|
||||||
|
echo " take as long as an hour for all exectuables to be compiled and"
|
||||||
|
echo " installed."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_goahead:
|
||||||
|
echo -n "Proceed with the installation [YES]? "
|
||||||
|
set ANSWER=$<
|
||||||
|
if ("$ANSWER" == "?") then
|
||||||
|
echo "* Final confirmation to proceed with the installation. Answer"
|
||||||
|
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||||
|
echo ""
|
||||||
|
goto ask_goahead
|
||||||
|
else if ("$ANSWER" != "") then
|
||||||
|
if ("$ANSWER" =~ [nN]*) then
|
||||||
|
goto chickens_exit
|
||||||
|
else if ("$ANSWER" !~ [yY]*) then
|
||||||
|
goto ask_goahead
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
installation_proper:
|
||||||
|
|
||||||
|
# make binaries directory if it doesn't exist
|
||||||
|
|
||||||
|
if (! -d $DIR_BIN) then
|
||||||
|
$MKDIR $DIR_BIN
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||||
|
echo ""
|
||||||
|
echo "+ Compiling miscellaneous library"
|
||||||
|
|
||||||
|
pushd $DIR_MISC > /dev/null
|
||||||
|
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_NONX" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing non X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE nprogs lprogs
|
||||||
|
$INSTALL mep $DIR_BIN
|
||||||
|
$INSTALL nip $DIR_BIN
|
||||||
|
$INSTALL pip $DIR_BIN
|
||||||
|
$INSTALL sap $DIR_BIN
|
||||||
|
$INSTALL sapf $DIR_BIN
|
||||||
|
$INSTALL sip $DIR_BIN
|
||||||
|
$INSTALL splitp1 $DIR_BIN
|
||||||
|
$INSTALL splitp2 $DIR_BIN
|
||||||
|
$INSTALL splitp3 $DIR_BIN
|
||||||
|
$INSTALL sethelp $DIR_BIN
|
||||||
|
$INSTALL gip $DIR_BIN
|
||||||
|
$INSTALL nipl $DIR_BIN
|
||||||
|
$INSTALL pipl $DIR_BIN
|
||||||
|
$INSTALL sipl $DIR_BIN
|
||||||
|
$INSTALL dap $DIR_BIN
|
||||||
|
$INSTALL nipf $DIR_BIN
|
||||||
|
$INSTALL vep $DIR_BIN
|
||||||
|
$INSTALL rep $DIR_BIN
|
||||||
|
$INSTALL lip $DIR_BIN
|
||||||
|
#$INSTALL convert_project $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE bap
|
||||||
|
$INSTALL bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_TED" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing Trace editor"
|
||||||
|
|
||||||
|
pushd $DIR_TED > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE ted
|
||||||
|
$INSTALL ted $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_X" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xprogs
|
||||||
|
$INSTALL xmep $DIR_BIN
|
||||||
|
$INSTALL xnip $DIR_BIN
|
||||||
|
$INSTALL xpip $DIR_BIN
|
||||||
|
$INSTALL xsap $DIR_BIN
|
||||||
|
$INSTALL xsip $DIR_BIN
|
||||||
|
$INSTALL xdap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xbap
|
||||||
|
$INSTALL xbap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing miscellaneous programs"
|
||||||
|
|
||||||
|
pushd $DIR_ABI > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL getABISampleName $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_ALF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE alfsplit
|
||||||
|
$INSTALL alfsplit $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_CONVERT > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE convert
|
||||||
|
$INSTALL convert $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_COP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL cop $DIR_BIN
|
||||||
|
$INSTALL cop-bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_FROG > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE frog
|
||||||
|
$INSTALL frog $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_GETMCH > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE trace2seq
|
||||||
|
$INSTALL trace2seq $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_SCF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE makeSCF
|
||||||
|
$INSTALL makeSCF $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
installation_done:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation completed"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo " Some further initialisation is required in order to use the"
|
||||||
|
echo " package. csh users should insert the following in their .login"
|
||||||
|
echo " files:"
|
||||||
|
echo " "
|
||||||
|
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||||
|
echo ' source $STADENROOT/staden.login'
|
||||||
|
echo " "
|
||||||
|
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||||
|
echo " their .profile:"
|
||||||
|
echo " "
|
||||||
|
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||||
|
echo " export STADENROOT"
|
||||||
|
echo ' . $STADENROOT/staden.profile'
|
||||||
|
echo " "
|
||||||
|
echo " These initialisations will alter the shell's search path so that"
|
||||||
|
echo " it can find the programs in the STADEN Package"
|
||||||
|
echo " "
|
||||||
|
|
||||||
|
normal_exit:
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
chickens_exit:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation cancelled"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
end_failure:
|
||||||
|
unset noglob
|
||||||
|
echo ""
|
||||||
|
echo "Aborted STADEN Package installation on `date`"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
|
453
Staden_install-dec
Normal file
453
Staden_install-dec
Normal file
|
@ -0,0 +1,453 @@
|
||||||
|
#! /bin/csh -f
|
||||||
|
#
|
||||||
|
# staden_install - version 2.4
|
||||||
|
#
|
||||||
|
# This is a prototype installation program.
|
||||||
|
#
|
||||||
|
# 9 March 1992
|
||||||
|
# Modified for installation on Sun, Alliant, etc
|
||||||
|
# No longer install 2rs
|
||||||
|
#
|
||||||
|
# 20 November 1992
|
||||||
|
# Now includes convert, cop, frog, getMCH and scf
|
||||||
|
#
|
||||||
|
# 25 November 1992
|
||||||
|
# SGI supported
|
||||||
|
#
|
||||||
|
# 19 May 1993
|
||||||
|
# DEC Alpha, Solaris supported
|
||||||
|
#
|
||||||
|
# Written by sd@uk.ac.cam.mrc-lmb
|
||||||
|
#
|
||||||
|
|
||||||
|
# prelim
|
||||||
|
set prog = $0 ; set prog = $prog:t
|
||||||
|
|
||||||
|
# Machines supported: al sun dec sgi alpha solaris
|
||||||
|
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||||
|
set MACHINE = dec
|
||||||
|
|
||||||
|
# For local (MRC-LMB) setup only
|
||||||
|
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||||
|
set LOCAL = NO
|
||||||
|
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -n "Staden Package installation procedure - "
|
||||||
|
switch (${MACHINE})
|
||||||
|
case "al":
|
||||||
|
echo "Alliant FX/2800 Concentrix version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "sun":
|
||||||
|
echo "SunOS version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "dec":
|
||||||
|
echo "DEC Ultrix (mips) version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "sgi":
|
||||||
|
echo "Silicon Graphics Iris version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "alpha":
|
||||||
|
echo "DEC Alpha OSF/1 version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "solaris":
|
||||||
|
echo "Solaris version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
default:
|
||||||
|
echo "Panic. Unknown version"
|
||||||
|
exit 1
|
||||||
|
endsw
|
||||||
|
echo ""
|
||||||
|
echo "* starting initialization...please wait."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Binary fork of source directory
|
||||||
|
if ($LOCAL == "YES") then
|
||||||
|
set DIR_BINARIES = ${MACHINE}-binaries
|
||||||
|
set DIR_PROGS = ${MACHINE}-bin
|
||||||
|
else
|
||||||
|
set DIR_BINARIES = .
|
||||||
|
set DIR_PROGS = bin
|
||||||
|
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||||
|
endif
|
||||||
|
|
||||||
|
init:
|
||||||
|
# Set useful shell variables
|
||||||
|
set YES="YES";
|
||||||
|
set NO="NO"
|
||||||
|
|
||||||
|
# set/unset some .cshrc envs.
|
||||||
|
unset noclobber
|
||||||
|
set noglob
|
||||||
|
|
||||||
|
# set interrupt trap
|
||||||
|
onintr end_failure
|
||||||
|
|
||||||
|
# Make dir command
|
||||||
|
set MKDIR = "mkdir"
|
||||||
|
|
||||||
|
# Copy command
|
||||||
|
set CP = "cp -p"
|
||||||
|
|
||||||
|
# Install command
|
||||||
|
#set INSTALL = "install"
|
||||||
|
#set INSTALL = "mv"
|
||||||
|
set INSTALL = "cp"
|
||||||
|
|
||||||
|
# Set up default responses
|
||||||
|
set DEF_STADEN_ROOT = `pwd`
|
||||||
|
|
||||||
|
set DEF_REQ_NONX = "$YES"
|
||||||
|
set DEF_REQ_X = "$YES"
|
||||||
|
set DEF_REQ_TED = "$YES"
|
||||||
|
set DEF_REQ_MISC = "$YES"
|
||||||
|
|
||||||
|
# directories
|
||||||
|
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||||
|
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||||
|
set DIR_MISC = $DIR_SRC/Misc
|
||||||
|
set DIR_STADEN = $DIR_SRC/staden
|
||||||
|
set DIR_TED = $DIR_SRC/ted
|
||||||
|
set DIR_ABI = $DIR_SRC/abi
|
||||||
|
set DIR_ALF = $DIR_SRC/alf
|
||||||
|
set DIR_BAP = $DIR_SRC/bap
|
||||||
|
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||||
|
set DIR_CONVERT = $DIR_SRC/convert
|
||||||
|
set DIR_COP = $DIR_SRC/cop
|
||||||
|
set DIR_FROG = $DIR_SRC/frog
|
||||||
|
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||||
|
set DIR_SCF = $DIR_SRC/scf
|
||||||
|
|
||||||
|
|
||||||
|
main:
|
||||||
|
|
||||||
|
|
||||||
|
preamble:
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo "* Please answer the following questions."
|
||||||
|
echo " Default answers to questions are given in square brackets."
|
||||||
|
echo " If you require help at any stage respond with a ? to the question."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_staden_root:
|
||||||
|
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||||
|
|
||||||
|
ask_require_nonx_progs:
|
||||||
|
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||||
|
set ANS_REQ_NONX = $<
|
||||||
|
if ("$ANS_REQ_NONX" == "?") then
|
||||||
|
echo "* If you do not have X windows on your system you will require"
|
||||||
|
echo " these. However, you will require Tektronics terminal emulation."
|
||||||
|
echo " If you do not require all of the non-X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
else if ("$ANS_REQ_NONX" != "") then
|
||||||
|
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||||
|
set ANS_REQ_NONX=$YES
|
||||||
|
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||||
|
set ANS_REQ_NONX=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||||
|
endif
|
||||||
|
|
||||||
|
ask_require_x_progs:
|
||||||
|
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||||
|
set ANS_REQ_X = $<
|
||||||
|
if ("$ANS_REQ_X" == "?") then
|
||||||
|
echo "* These are the programs that require X windows."
|
||||||
|
echo " If you do not require all of the X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
goto ask_require_x_progs
|
||||||
|
else if ("$ANS_REQ_X" != "") then
|
||||||
|
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||||
|
set ANS_REQ_X=$YES
|
||||||
|
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||||
|
set ANS_REQ_X=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_X=$DEF_REQ_X
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_ted:
|
||||||
|
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||||
|
set ANS_REQ_TED = $<
|
||||||
|
if ("$ANS_REQ_TED" == "?") then
|
||||||
|
echo "* This is the trace editor program. It allows you to look at"
|
||||||
|
echo " traces obtained from automated fluorescent sequencing machines."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_ted
|
||||||
|
else if ("$ANS_REQ_TED" != "") then
|
||||||
|
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||||
|
set ANS_REQ_TED=$YES
|
||||||
|
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||||
|
set ANS_REQ_TED=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_ted
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_TED=$DEF_REQ_TED
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_misc:
|
||||||
|
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||||
|
set ANS_REQ_MISC = $<
|
||||||
|
if ("$ANS_REQ_MISC" == "?") then
|
||||||
|
echo "* Other programs include:"
|
||||||
|
echo " alfsplit"
|
||||||
|
echo " getABISampleName"
|
||||||
|
echo ""
|
||||||
|
goto ask_require_misc
|
||||||
|
else if ("$ANS_REQ_MISC" != "") then
|
||||||
|
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||||
|
set ANS_REQ_MISC=$YES
|
||||||
|
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||||
|
set ANS_REQ_MISC=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_misc
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
time_taken_warning:
|
||||||
|
echo ""
|
||||||
|
echo "The installation procedure is now ready to start."
|
||||||
|
echo ""
|
||||||
|
echo "**** Warning:"
|
||||||
|
echo " The installation will take considerable time to complete. If you"
|
||||||
|
echo " are installing the whole Staden Package from scratch it could"
|
||||||
|
echo " take as long as an hour for all exectuables to be compiled and"
|
||||||
|
echo " installed."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_goahead:
|
||||||
|
echo -n "Proceed with the installation [YES]? "
|
||||||
|
set ANSWER=$<
|
||||||
|
if ("$ANSWER" == "?") then
|
||||||
|
echo "* Final confirmation to proceed with the installation. Answer"
|
||||||
|
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||||
|
echo ""
|
||||||
|
goto ask_goahead
|
||||||
|
else if ("$ANSWER" != "") then
|
||||||
|
if ("$ANSWER" =~ [nN]*) then
|
||||||
|
goto chickens_exit
|
||||||
|
else if ("$ANSWER" !~ [yY]*) then
|
||||||
|
goto ask_goahead
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
installation_proper:
|
||||||
|
|
||||||
|
# make binaries directory if it doesn't exist
|
||||||
|
|
||||||
|
if (! -d $DIR_BIN) then
|
||||||
|
$MKDIR $DIR_BIN
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||||
|
echo ""
|
||||||
|
echo "+ Compiling miscellaneous library"
|
||||||
|
|
||||||
|
pushd $DIR_MISC > /dev/null
|
||||||
|
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_NONX" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing non X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE nprogs lprogs
|
||||||
|
$INSTALL mep $DIR_BIN
|
||||||
|
$INSTALL nip $DIR_BIN
|
||||||
|
$INSTALL pip $DIR_BIN
|
||||||
|
$INSTALL sap $DIR_BIN
|
||||||
|
$INSTALL sapf $DIR_BIN
|
||||||
|
$INSTALL sip $DIR_BIN
|
||||||
|
$INSTALL splitp1 $DIR_BIN
|
||||||
|
$INSTALL splitp2 $DIR_BIN
|
||||||
|
$INSTALL splitp3 $DIR_BIN
|
||||||
|
$INSTALL sethelp $DIR_BIN
|
||||||
|
$INSTALL gip $DIR_BIN
|
||||||
|
$INSTALL nipl $DIR_BIN
|
||||||
|
$INSTALL pipl $DIR_BIN
|
||||||
|
$INSTALL sipl $DIR_BIN
|
||||||
|
$INSTALL dap $DIR_BIN
|
||||||
|
$INSTALL nipf $DIR_BIN
|
||||||
|
$INSTALL vep $DIR_BIN
|
||||||
|
$INSTALL rep $DIR_BIN
|
||||||
|
$INSTALL lip $DIR_BIN
|
||||||
|
#$INSTALL convert_project $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE bap
|
||||||
|
$INSTALL bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_TED" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing Trace editor"
|
||||||
|
|
||||||
|
pushd $DIR_TED > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE ted
|
||||||
|
$INSTALL ted $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_X" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xprogs
|
||||||
|
$INSTALL xmep $DIR_BIN
|
||||||
|
$INSTALL xnip $DIR_BIN
|
||||||
|
$INSTALL xpip $DIR_BIN
|
||||||
|
$INSTALL xsap $DIR_BIN
|
||||||
|
$INSTALL xsip $DIR_BIN
|
||||||
|
$INSTALL xdap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xbap
|
||||||
|
$INSTALL xbap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing miscellaneous programs"
|
||||||
|
|
||||||
|
pushd $DIR_ABI > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL getABISampleName $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_ALF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE alfsplit
|
||||||
|
$INSTALL alfsplit $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_CONVERT > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE convert
|
||||||
|
$INSTALL convert $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_COP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL cop $DIR_BIN
|
||||||
|
$INSTALL cop-bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_FROG > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE frog
|
||||||
|
$INSTALL frog $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_GETMCH > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE trace2seq
|
||||||
|
$INSTALL trace2seq $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_SCF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE makeSCF
|
||||||
|
$INSTALL makeSCF $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
installation_done:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation completed"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo " Some further initialisation is required in order to use the"
|
||||||
|
echo " package. csh users should insert the following in their .login"
|
||||||
|
echo " files:"
|
||||||
|
echo " "
|
||||||
|
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||||
|
echo ' source $STADENROOT/staden.login'
|
||||||
|
echo " "
|
||||||
|
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||||
|
echo " their .profile:"
|
||||||
|
echo " "
|
||||||
|
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||||
|
echo " export STADENROOT"
|
||||||
|
echo ' . $STADENROOT/staden.profile'
|
||||||
|
echo " "
|
||||||
|
echo " These initialisations will alter the shell's search path so that"
|
||||||
|
echo " it can find the programs in the STADEN Package"
|
||||||
|
echo " "
|
||||||
|
|
||||||
|
normal_exit:
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
chickens_exit:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation cancelled"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
end_failure:
|
||||||
|
unset noglob
|
||||||
|
echo ""
|
||||||
|
echo "Aborted STADEN Package installation on `date`"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
|
453
Staden_install-sgi
Normal file
453
Staden_install-sgi
Normal file
|
@ -0,0 +1,453 @@
|
||||||
|
#! /bin/csh -f
|
||||||
|
#
|
||||||
|
# staden_install - version 2.4
|
||||||
|
#
|
||||||
|
# This is a prototype installation program.
|
||||||
|
#
|
||||||
|
# 9 March 1992
|
||||||
|
# Modified for installation on Sun, Alliant, etc
|
||||||
|
# No longer install 2rs
|
||||||
|
#
|
||||||
|
# 20 November 1992
|
||||||
|
# Now includes convert, cop, frog, getMCH and scf
|
||||||
|
#
|
||||||
|
# 25 November 1992
|
||||||
|
# SGI supported
|
||||||
|
#
|
||||||
|
# 19 May 1993
|
||||||
|
# DEC Alpha, Solaris supported
|
||||||
|
#
|
||||||
|
# Written by sd@uk.ac.cam.mrc-lmb
|
||||||
|
#
|
||||||
|
|
||||||
|
# prelim
|
||||||
|
set prog = $0 ; set prog = $prog:t
|
||||||
|
|
||||||
|
# Machines supported: al sun dec sgi alpha solaris
|
||||||
|
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||||
|
set MACHINE = sgi
|
||||||
|
|
||||||
|
# For local (MRC-LMB) setup only
|
||||||
|
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||||
|
set LOCAL = NO
|
||||||
|
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -n "Staden Package installation procedure - "
|
||||||
|
switch (${MACHINE})
|
||||||
|
case "al":
|
||||||
|
echo "Alliant FX/2800 Concentrix version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "sun":
|
||||||
|
echo "SunOS version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "dec":
|
||||||
|
echo "DEC Ultrix (mips) version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "sgi":
|
||||||
|
echo "Silicon Graphics Iris version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "alpha":
|
||||||
|
echo "DEC Alpha OSF/1 version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "solaris":
|
||||||
|
echo "Solaris version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
default:
|
||||||
|
echo "Panic. Unknown version"
|
||||||
|
exit 1
|
||||||
|
endsw
|
||||||
|
echo ""
|
||||||
|
echo "* starting initialization...please wait."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Binary fork of source directory
|
||||||
|
if ($LOCAL == "YES") then
|
||||||
|
set DIR_BINARIES = ${MACHINE}-binaries
|
||||||
|
set DIR_PROGS = ${MACHINE}-bin
|
||||||
|
else
|
||||||
|
set DIR_BINARIES = .
|
||||||
|
set DIR_PROGS = bin
|
||||||
|
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||||
|
endif
|
||||||
|
|
||||||
|
init:
|
||||||
|
# Set useful shell variables
|
||||||
|
set YES="YES";
|
||||||
|
set NO="NO"
|
||||||
|
|
||||||
|
# set/unset some .cshrc envs.
|
||||||
|
unset noclobber
|
||||||
|
set noglob
|
||||||
|
|
||||||
|
# set interrupt trap
|
||||||
|
onintr end_failure
|
||||||
|
|
||||||
|
# Make dir command
|
||||||
|
set MKDIR = "mkdir"
|
||||||
|
|
||||||
|
# Copy command
|
||||||
|
set CP = "cp -p"
|
||||||
|
|
||||||
|
# Install command
|
||||||
|
#set INSTALL = "install"
|
||||||
|
#set INSTALL = "mv"
|
||||||
|
set INSTALL = "cp"
|
||||||
|
|
||||||
|
# Set up default responses
|
||||||
|
set DEF_STADEN_ROOT = `pwd`
|
||||||
|
|
||||||
|
set DEF_REQ_NONX = "$YES"
|
||||||
|
set DEF_REQ_X = "$YES"
|
||||||
|
set DEF_REQ_TED = "$YES"
|
||||||
|
set DEF_REQ_MISC = "$YES"
|
||||||
|
|
||||||
|
# directories
|
||||||
|
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||||
|
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||||
|
set DIR_MISC = $DIR_SRC/Misc
|
||||||
|
set DIR_STADEN = $DIR_SRC/staden
|
||||||
|
set DIR_TED = $DIR_SRC/ted
|
||||||
|
set DIR_ABI = $DIR_SRC/abi
|
||||||
|
set DIR_ALF = $DIR_SRC/alf
|
||||||
|
set DIR_BAP = $DIR_SRC/bap
|
||||||
|
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||||
|
set DIR_CONVERT = $DIR_SRC/convert
|
||||||
|
set DIR_COP = $DIR_SRC/cop
|
||||||
|
set DIR_FROG = $DIR_SRC/frog
|
||||||
|
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||||
|
set DIR_SCF = $DIR_SRC/scf
|
||||||
|
|
||||||
|
|
||||||
|
main:
|
||||||
|
|
||||||
|
|
||||||
|
preamble:
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo "* Please answer the following questions."
|
||||||
|
echo " Default answers to questions are given in square brackets."
|
||||||
|
echo " If you require help at any stage respond with a ? to the question."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_staden_root:
|
||||||
|
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||||
|
|
||||||
|
ask_require_nonx_progs:
|
||||||
|
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||||
|
set ANS_REQ_NONX = $<
|
||||||
|
if ("$ANS_REQ_NONX" == "?") then
|
||||||
|
echo "* If you do not have X windows on your system you will require"
|
||||||
|
echo " these. However, you will require Tektronics terminal emulation."
|
||||||
|
echo " If you do not require all of the non-X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
else if ("$ANS_REQ_NONX" != "") then
|
||||||
|
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||||
|
set ANS_REQ_NONX=$YES
|
||||||
|
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||||
|
set ANS_REQ_NONX=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||||
|
endif
|
||||||
|
|
||||||
|
ask_require_x_progs:
|
||||||
|
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||||
|
set ANS_REQ_X = $<
|
||||||
|
if ("$ANS_REQ_X" == "?") then
|
||||||
|
echo "* These are the programs that require X windows."
|
||||||
|
echo " If you do not require all of the X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
goto ask_require_x_progs
|
||||||
|
else if ("$ANS_REQ_X" != "") then
|
||||||
|
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||||
|
set ANS_REQ_X=$YES
|
||||||
|
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||||
|
set ANS_REQ_X=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_X=$DEF_REQ_X
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_ted:
|
||||||
|
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||||
|
set ANS_REQ_TED = $<
|
||||||
|
if ("$ANS_REQ_TED" == "?") then
|
||||||
|
echo "* This is the trace editor program. It allows you to look at"
|
||||||
|
echo " traces obtained from automated fluorescent sequencing machines."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_ted
|
||||||
|
else if ("$ANS_REQ_TED" != "") then
|
||||||
|
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||||
|
set ANS_REQ_TED=$YES
|
||||||
|
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||||
|
set ANS_REQ_TED=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_ted
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_TED=$DEF_REQ_TED
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_misc:
|
||||||
|
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||||
|
set ANS_REQ_MISC = $<
|
||||||
|
if ("$ANS_REQ_MISC" == "?") then
|
||||||
|
echo "* Other programs include:"
|
||||||
|
echo " alfsplit"
|
||||||
|
echo " getABISampleName"
|
||||||
|
echo ""
|
||||||
|
goto ask_require_misc
|
||||||
|
else if ("$ANS_REQ_MISC" != "") then
|
||||||
|
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||||
|
set ANS_REQ_MISC=$YES
|
||||||
|
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||||
|
set ANS_REQ_MISC=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_misc
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
time_taken_warning:
|
||||||
|
echo ""
|
||||||
|
echo "The installation procedure is now ready to start."
|
||||||
|
echo ""
|
||||||
|
echo "**** Warning:"
|
||||||
|
echo " The installation will take considerable time to complete. If you"
|
||||||
|
echo " are installing the whole Staden Package from scratch it could"
|
||||||
|
echo " take as long as an hour for all exectuables to be compiled and"
|
||||||
|
echo " installed."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_goahead:
|
||||||
|
echo -n "Proceed with the installation [YES]? "
|
||||||
|
set ANSWER=$<
|
||||||
|
if ("$ANSWER" == "?") then
|
||||||
|
echo "* Final confirmation to proceed with the installation. Answer"
|
||||||
|
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||||
|
echo ""
|
||||||
|
goto ask_goahead
|
||||||
|
else if ("$ANSWER" != "") then
|
||||||
|
if ("$ANSWER" =~ [nN]*) then
|
||||||
|
goto chickens_exit
|
||||||
|
else if ("$ANSWER" !~ [yY]*) then
|
||||||
|
goto ask_goahead
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
installation_proper:
|
||||||
|
|
||||||
|
# make binaries directory if it doesn't exist
|
||||||
|
|
||||||
|
if (! -d $DIR_BIN) then
|
||||||
|
$MKDIR $DIR_BIN
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||||
|
echo ""
|
||||||
|
echo "+ Compiling miscellaneous library"
|
||||||
|
|
||||||
|
pushd $DIR_MISC > /dev/null
|
||||||
|
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_NONX" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing non X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE nprogs lprogs
|
||||||
|
$INSTALL mep $DIR_BIN
|
||||||
|
$INSTALL nip $DIR_BIN
|
||||||
|
$INSTALL pip $DIR_BIN
|
||||||
|
$INSTALL sap $DIR_BIN
|
||||||
|
$INSTALL sapf $DIR_BIN
|
||||||
|
$INSTALL sip $DIR_BIN
|
||||||
|
$INSTALL splitp1 $DIR_BIN
|
||||||
|
$INSTALL splitp2 $DIR_BIN
|
||||||
|
$INSTALL splitp3 $DIR_BIN
|
||||||
|
$INSTALL sethelp $DIR_BIN
|
||||||
|
$INSTALL gip $DIR_BIN
|
||||||
|
$INSTALL nipl $DIR_BIN
|
||||||
|
$INSTALL pipl $DIR_BIN
|
||||||
|
$INSTALL sipl $DIR_BIN
|
||||||
|
$INSTALL dap $DIR_BIN
|
||||||
|
$INSTALL nipf $DIR_BIN
|
||||||
|
$INSTALL vep $DIR_BIN
|
||||||
|
$INSTALL rep $DIR_BIN
|
||||||
|
$INSTALL lip $DIR_BIN
|
||||||
|
#$INSTALL convert_project $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE bap
|
||||||
|
$INSTALL bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_TED" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing Trace editor"
|
||||||
|
|
||||||
|
pushd $DIR_TED > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE ted
|
||||||
|
$INSTALL ted $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_X" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xprogs
|
||||||
|
$INSTALL xmep $DIR_BIN
|
||||||
|
$INSTALL xnip $DIR_BIN
|
||||||
|
$INSTALL xpip $DIR_BIN
|
||||||
|
$INSTALL xsap $DIR_BIN
|
||||||
|
$INSTALL xsip $DIR_BIN
|
||||||
|
$INSTALL xdap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xbap
|
||||||
|
$INSTALL xbap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing miscellaneous programs"
|
||||||
|
|
||||||
|
pushd $DIR_ABI > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL getABISampleName $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_ALF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE alfsplit
|
||||||
|
$INSTALL alfsplit $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_CONVERT > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE convert
|
||||||
|
$INSTALL convert $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_COP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL cop $DIR_BIN
|
||||||
|
$INSTALL cop-bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_FROG > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE frog
|
||||||
|
$INSTALL frog $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_GETMCH > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE trace2seq
|
||||||
|
$INSTALL trace2seq $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_SCF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE makeSCF
|
||||||
|
$INSTALL makeSCF $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
installation_done:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation completed"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo " Some further initialisation is required in order to use the"
|
||||||
|
echo " package. csh users should insert the following in their .login"
|
||||||
|
echo " files:"
|
||||||
|
echo " "
|
||||||
|
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||||
|
echo ' source $STADENROOT/staden.login'
|
||||||
|
echo " "
|
||||||
|
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||||
|
echo " their .profile:"
|
||||||
|
echo " "
|
||||||
|
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||||
|
echo " export STADENROOT"
|
||||||
|
echo ' . $STADENROOT/staden.profile'
|
||||||
|
echo " "
|
||||||
|
echo " These initialisations will alter the shell's search path so that"
|
||||||
|
echo " it can find the programs in the STADEN Package"
|
||||||
|
echo " "
|
||||||
|
|
||||||
|
normal_exit:
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
chickens_exit:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation cancelled"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
end_failure:
|
||||||
|
unset noglob
|
||||||
|
echo ""
|
||||||
|
echo "Aborted STADEN Package installation on `date`"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
|
453
Staden_install-solaris
Normal file
453
Staden_install-solaris
Normal file
|
@ -0,0 +1,453 @@
|
||||||
|
#! /bin/csh -f
|
||||||
|
#
|
||||||
|
# staden_install - version 2.4
|
||||||
|
#
|
||||||
|
# This is a prototype installation program.
|
||||||
|
#
|
||||||
|
# 9 March 1992
|
||||||
|
# Modified for installation on Sun, Alliant, etc
|
||||||
|
# No longer install 2rs
|
||||||
|
#
|
||||||
|
# 20 November 1992
|
||||||
|
# Now includes convert, cop, frog, getMCH and scf
|
||||||
|
#
|
||||||
|
# 25 November 1992
|
||||||
|
# SGI supported
|
||||||
|
#
|
||||||
|
# 19 May 1993
|
||||||
|
# DEC Alpha, Solaris supported
|
||||||
|
#
|
||||||
|
# Written by sd@uk.ac.cam.mrc-lmb
|
||||||
|
#
|
||||||
|
|
||||||
|
# prelim
|
||||||
|
set prog = $0 ; set prog = $prog:t
|
||||||
|
|
||||||
|
# Machines supported: al sun dec sgi alpha solaris
|
||||||
|
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||||
|
set MACHINE = solaris
|
||||||
|
|
||||||
|
# For local (MRC-LMB) setup only
|
||||||
|
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||||
|
set LOCAL = NO
|
||||||
|
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -n "Staden Package installation procedure - "
|
||||||
|
switch (${MACHINE})
|
||||||
|
case "al":
|
||||||
|
echo "Alliant FX/2800 Concentrix version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "sun":
|
||||||
|
echo "SunOS version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "dec":
|
||||||
|
echo "DEC Ultrix (mips) version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "sgi":
|
||||||
|
echo "Silicon Graphics Iris version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "alpha":
|
||||||
|
echo "DEC Alpha OSF/1 version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "solaris":
|
||||||
|
echo "Solaris version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
default:
|
||||||
|
echo "Panic. Unknown version"
|
||||||
|
exit 1
|
||||||
|
endsw
|
||||||
|
echo ""
|
||||||
|
echo "* starting initialization...please wait."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Binary fork of source directory
|
||||||
|
if ($LOCAL == "YES") then
|
||||||
|
set DIR_BINARIES = ${MACHINE}-binaries
|
||||||
|
set DIR_PROGS = ${MACHINE}-bin
|
||||||
|
else
|
||||||
|
set DIR_BINARIES = .
|
||||||
|
set DIR_PROGS = bin
|
||||||
|
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||||
|
endif
|
||||||
|
|
||||||
|
init:
|
||||||
|
# Set useful shell variables
|
||||||
|
set YES="YES";
|
||||||
|
set NO="NO"
|
||||||
|
|
||||||
|
# set/unset some .cshrc envs.
|
||||||
|
unset noclobber
|
||||||
|
set noglob
|
||||||
|
|
||||||
|
# set interrupt trap
|
||||||
|
onintr end_failure
|
||||||
|
|
||||||
|
# Make dir command
|
||||||
|
set MKDIR = "mkdir"
|
||||||
|
|
||||||
|
# Copy command
|
||||||
|
set CP = "cp -p"
|
||||||
|
|
||||||
|
# Install command
|
||||||
|
#set INSTALL = "install"
|
||||||
|
#set INSTALL = "mv"
|
||||||
|
set INSTALL = "cp"
|
||||||
|
|
||||||
|
# Set up default responses
|
||||||
|
set DEF_STADEN_ROOT = `pwd`
|
||||||
|
|
||||||
|
set DEF_REQ_NONX = "$YES"
|
||||||
|
set DEF_REQ_X = "$YES"
|
||||||
|
set DEF_REQ_TED = "$YES"
|
||||||
|
set DEF_REQ_MISC = "$YES"
|
||||||
|
|
||||||
|
# directories
|
||||||
|
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||||
|
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||||
|
set DIR_MISC = $DIR_SRC/Misc
|
||||||
|
set DIR_STADEN = $DIR_SRC/staden
|
||||||
|
set DIR_TED = $DIR_SRC/ted
|
||||||
|
set DIR_ABI = $DIR_SRC/abi
|
||||||
|
set DIR_ALF = $DIR_SRC/alf
|
||||||
|
set DIR_BAP = $DIR_SRC/bap
|
||||||
|
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||||
|
set DIR_CONVERT = $DIR_SRC/convert
|
||||||
|
set DIR_COP = $DIR_SRC/cop
|
||||||
|
set DIR_FROG = $DIR_SRC/frog
|
||||||
|
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||||
|
set DIR_SCF = $DIR_SRC/scf
|
||||||
|
|
||||||
|
|
||||||
|
main:
|
||||||
|
|
||||||
|
|
||||||
|
preamble:
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo "* Please answer the following questions."
|
||||||
|
echo " Default answers to questions are given in square brackets."
|
||||||
|
echo " If you require help at any stage respond with a ? to the question."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_staden_root:
|
||||||
|
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||||
|
|
||||||
|
ask_require_nonx_progs:
|
||||||
|
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||||
|
set ANS_REQ_NONX = $<
|
||||||
|
if ("$ANS_REQ_NONX" == "?") then
|
||||||
|
echo "* If you do not have X windows on your system you will require"
|
||||||
|
echo " these. However, you will require Tektronics terminal emulation."
|
||||||
|
echo " If you do not require all of the non-X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
else if ("$ANS_REQ_NONX" != "") then
|
||||||
|
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||||
|
set ANS_REQ_NONX=$YES
|
||||||
|
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||||
|
set ANS_REQ_NONX=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||||
|
endif
|
||||||
|
|
||||||
|
ask_require_x_progs:
|
||||||
|
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||||
|
set ANS_REQ_X = $<
|
||||||
|
if ("$ANS_REQ_X" == "?") then
|
||||||
|
echo "* These are the programs that require X windows."
|
||||||
|
echo " If you do not require all of the X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
goto ask_require_x_progs
|
||||||
|
else if ("$ANS_REQ_X" != "") then
|
||||||
|
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||||
|
set ANS_REQ_X=$YES
|
||||||
|
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||||
|
set ANS_REQ_X=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_X=$DEF_REQ_X
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_ted:
|
||||||
|
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||||
|
set ANS_REQ_TED = $<
|
||||||
|
if ("$ANS_REQ_TED" == "?") then
|
||||||
|
echo "* This is the trace editor program. It allows you to look at"
|
||||||
|
echo " traces obtained from automated fluorescent sequencing machines."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_ted
|
||||||
|
else if ("$ANS_REQ_TED" != "") then
|
||||||
|
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||||
|
set ANS_REQ_TED=$YES
|
||||||
|
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||||
|
set ANS_REQ_TED=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_ted
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_TED=$DEF_REQ_TED
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_misc:
|
||||||
|
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||||
|
set ANS_REQ_MISC = $<
|
||||||
|
if ("$ANS_REQ_MISC" == "?") then
|
||||||
|
echo "* Other programs include:"
|
||||||
|
echo " alfsplit"
|
||||||
|
echo " getABISampleName"
|
||||||
|
echo ""
|
||||||
|
goto ask_require_misc
|
||||||
|
else if ("$ANS_REQ_MISC" != "") then
|
||||||
|
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||||
|
set ANS_REQ_MISC=$YES
|
||||||
|
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||||
|
set ANS_REQ_MISC=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_misc
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
time_taken_warning:
|
||||||
|
echo ""
|
||||||
|
echo "The installation procedure is now ready to start."
|
||||||
|
echo ""
|
||||||
|
echo "**** Warning:"
|
||||||
|
echo " The installation will take considerable time to complete. If you"
|
||||||
|
echo " are installing the whole Staden Package from scratch it could"
|
||||||
|
echo " take as long as an hour for all exectuables to be compiled and"
|
||||||
|
echo " installed."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_goahead:
|
||||||
|
echo -n "Proceed with the installation [YES]? "
|
||||||
|
set ANSWER=$<
|
||||||
|
if ("$ANSWER" == "?") then
|
||||||
|
echo "* Final confirmation to proceed with the installation. Answer"
|
||||||
|
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||||
|
echo ""
|
||||||
|
goto ask_goahead
|
||||||
|
else if ("$ANSWER" != "") then
|
||||||
|
if ("$ANSWER" =~ [nN]*) then
|
||||||
|
goto chickens_exit
|
||||||
|
else if ("$ANSWER" !~ [yY]*) then
|
||||||
|
goto ask_goahead
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
installation_proper:
|
||||||
|
|
||||||
|
# make binaries directory if it doesn't exist
|
||||||
|
|
||||||
|
if (! -d $DIR_BIN) then
|
||||||
|
$MKDIR $DIR_BIN
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||||
|
echo ""
|
||||||
|
echo "+ Compiling miscellaneous library"
|
||||||
|
|
||||||
|
pushd $DIR_MISC > /dev/null
|
||||||
|
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_NONX" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing non X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE nprogs lprogs
|
||||||
|
$INSTALL mep $DIR_BIN
|
||||||
|
$INSTALL nip $DIR_BIN
|
||||||
|
$INSTALL pip $DIR_BIN
|
||||||
|
$INSTALL sap $DIR_BIN
|
||||||
|
$INSTALL sapf $DIR_BIN
|
||||||
|
$INSTALL sip $DIR_BIN
|
||||||
|
$INSTALL splitp1 $DIR_BIN
|
||||||
|
$INSTALL splitp2 $DIR_BIN
|
||||||
|
$INSTALL splitp3 $DIR_BIN
|
||||||
|
$INSTALL sethelp $DIR_BIN
|
||||||
|
$INSTALL gip $DIR_BIN
|
||||||
|
$INSTALL nipl $DIR_BIN
|
||||||
|
$INSTALL pipl $DIR_BIN
|
||||||
|
$INSTALL sipl $DIR_BIN
|
||||||
|
$INSTALL dap $DIR_BIN
|
||||||
|
$INSTALL nipf $DIR_BIN
|
||||||
|
$INSTALL vep $DIR_BIN
|
||||||
|
$INSTALL rep $DIR_BIN
|
||||||
|
$INSTALL lip $DIR_BIN
|
||||||
|
#$INSTALL convert_project $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE bap
|
||||||
|
$INSTALL bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_TED" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing Trace editor"
|
||||||
|
|
||||||
|
pushd $DIR_TED > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE ted
|
||||||
|
$INSTALL ted $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_X" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xprogs
|
||||||
|
$INSTALL xmep $DIR_BIN
|
||||||
|
$INSTALL xnip $DIR_BIN
|
||||||
|
$INSTALL xpip $DIR_BIN
|
||||||
|
$INSTALL xsap $DIR_BIN
|
||||||
|
$INSTALL xsip $DIR_BIN
|
||||||
|
$INSTALL xdap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xbap
|
||||||
|
$INSTALL xbap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing miscellaneous programs"
|
||||||
|
|
||||||
|
pushd $DIR_ABI > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL getABISampleName $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_ALF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE alfsplit
|
||||||
|
$INSTALL alfsplit $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_CONVERT > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE convert
|
||||||
|
$INSTALL convert $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_COP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL cop $DIR_BIN
|
||||||
|
$INSTALL cop-bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_FROG > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE frog
|
||||||
|
$INSTALL frog $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_GETMCH > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE trace2seq
|
||||||
|
$INSTALL trace2seq $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_SCF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE makeSCF
|
||||||
|
$INSTALL makeSCF $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
installation_done:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation completed"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo " Some further initialisation is required in order to use the"
|
||||||
|
echo " package. csh users should insert the following in their .login"
|
||||||
|
echo " files:"
|
||||||
|
echo " "
|
||||||
|
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||||
|
echo ' source $STADENROOT/staden.login'
|
||||||
|
echo " "
|
||||||
|
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||||
|
echo " their .profile:"
|
||||||
|
echo " "
|
||||||
|
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||||
|
echo " export STADENROOT"
|
||||||
|
echo ' . $STADENROOT/staden.profile'
|
||||||
|
echo " "
|
||||||
|
echo " These initialisations will alter the shell's search path so that"
|
||||||
|
echo " it can find the programs in the STADEN Package"
|
||||||
|
echo " "
|
||||||
|
|
||||||
|
normal_exit:
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
chickens_exit:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation cancelled"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
end_failure:
|
||||||
|
unset noglob
|
||||||
|
echo ""
|
||||||
|
echo "Aborted STADEN Package installation on `date`"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
|
453
Staden_install-sun
Normal file
453
Staden_install-sun
Normal file
|
@ -0,0 +1,453 @@
|
||||||
|
#! /bin/csh -f
|
||||||
|
#
|
||||||
|
# staden_install - version 2.4
|
||||||
|
#
|
||||||
|
# This is a prototype installation program.
|
||||||
|
#
|
||||||
|
# 9 March 1992
|
||||||
|
# Modified for installation on Sun, Alliant, etc
|
||||||
|
# No longer install 2rs
|
||||||
|
#
|
||||||
|
# 20 November 1992
|
||||||
|
# Now includes convert, cop, frog, getMCH and scf
|
||||||
|
#
|
||||||
|
# 25 November 1992
|
||||||
|
# SGI supported
|
||||||
|
#
|
||||||
|
# 19 May 1993
|
||||||
|
# DEC Alpha, Solaris supported
|
||||||
|
#
|
||||||
|
# Written by sd@uk.ac.cam.mrc-lmb
|
||||||
|
#
|
||||||
|
|
||||||
|
# prelim
|
||||||
|
set prog = $0 ; set prog = $prog:t
|
||||||
|
|
||||||
|
# Machines supported: al sun dec sgi alpha solaris
|
||||||
|
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||||
|
set MACHINE = sun
|
||||||
|
|
||||||
|
# For local (MRC-LMB) setup only
|
||||||
|
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||||
|
set LOCAL = NO
|
||||||
|
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -n "Staden Package installation procedure - "
|
||||||
|
switch (${MACHINE})
|
||||||
|
case "al":
|
||||||
|
echo "Alliant FX/2800 Concentrix version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "sun":
|
||||||
|
echo "SunOS version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
case "dec":
|
||||||
|
echo "DEC Ultrix (mips) version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "sgi":
|
||||||
|
echo "Silicon Graphics Iris version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "alpha":
|
||||||
|
echo "DEC Alpha OSF/1 version"
|
||||||
|
set MAKE = "gmake -sk"
|
||||||
|
breaksw
|
||||||
|
case "solaris":
|
||||||
|
echo "Solaris version"
|
||||||
|
set MAKE = "make -sk"
|
||||||
|
breaksw
|
||||||
|
default:
|
||||||
|
echo "Panic. Unknown version"
|
||||||
|
exit 1
|
||||||
|
endsw
|
||||||
|
echo ""
|
||||||
|
echo "* starting initialization...please wait."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Binary fork of source directory
|
||||||
|
if ($LOCAL == "YES") then
|
||||||
|
set DIR_BINARIES = ${MACHINE}-binaries
|
||||||
|
set DIR_PROGS = ${MACHINE}-bin
|
||||||
|
else
|
||||||
|
set DIR_BINARIES = .
|
||||||
|
set DIR_PROGS = bin
|
||||||
|
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||||
|
endif
|
||||||
|
|
||||||
|
init:
|
||||||
|
# Set useful shell variables
|
||||||
|
set YES="YES";
|
||||||
|
set NO="NO"
|
||||||
|
|
||||||
|
# set/unset some .cshrc envs.
|
||||||
|
unset noclobber
|
||||||
|
set noglob
|
||||||
|
|
||||||
|
# set interrupt trap
|
||||||
|
onintr end_failure
|
||||||
|
|
||||||
|
# Make dir command
|
||||||
|
set MKDIR = "mkdir"
|
||||||
|
|
||||||
|
# Copy command
|
||||||
|
set CP = "cp -p"
|
||||||
|
|
||||||
|
# Install command
|
||||||
|
#set INSTALL = "install"
|
||||||
|
#set INSTALL = "mv"
|
||||||
|
set INSTALL = "cp"
|
||||||
|
|
||||||
|
# Set up default responses
|
||||||
|
set DEF_STADEN_ROOT = `pwd`
|
||||||
|
|
||||||
|
set DEF_REQ_NONX = "$YES"
|
||||||
|
set DEF_REQ_X = "$YES"
|
||||||
|
set DEF_REQ_TED = "$YES"
|
||||||
|
set DEF_REQ_MISC = "$YES"
|
||||||
|
|
||||||
|
# directories
|
||||||
|
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||||
|
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||||
|
set DIR_MISC = $DIR_SRC/Misc
|
||||||
|
set DIR_STADEN = $DIR_SRC/staden
|
||||||
|
set DIR_TED = $DIR_SRC/ted
|
||||||
|
set DIR_ABI = $DIR_SRC/abi
|
||||||
|
set DIR_ALF = $DIR_SRC/alf
|
||||||
|
set DIR_BAP = $DIR_SRC/bap
|
||||||
|
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||||
|
set DIR_CONVERT = $DIR_SRC/convert
|
||||||
|
set DIR_COP = $DIR_SRC/cop
|
||||||
|
set DIR_FROG = $DIR_SRC/frog
|
||||||
|
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||||
|
set DIR_SCF = $DIR_SRC/scf
|
||||||
|
|
||||||
|
|
||||||
|
main:
|
||||||
|
|
||||||
|
|
||||||
|
preamble:
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo "* Please answer the following questions."
|
||||||
|
echo " Default answers to questions are given in square brackets."
|
||||||
|
echo " If you require help at any stage respond with a ? to the question."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_staden_root:
|
||||||
|
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||||
|
|
||||||
|
ask_require_nonx_progs:
|
||||||
|
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||||
|
set ANS_REQ_NONX = $<
|
||||||
|
if ("$ANS_REQ_NONX" == "?") then
|
||||||
|
echo "* If you do not have X windows on your system you will require"
|
||||||
|
echo " these. However, you will require Tektronics terminal emulation."
|
||||||
|
echo " If you do not require all of the non-X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
else if ("$ANS_REQ_NONX" != "") then
|
||||||
|
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||||
|
set ANS_REQ_NONX=$YES
|
||||||
|
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||||
|
set ANS_REQ_NONX=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||||
|
endif
|
||||||
|
|
||||||
|
ask_require_x_progs:
|
||||||
|
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||||
|
set ANS_REQ_X = $<
|
||||||
|
if ("$ANS_REQ_X" == "?") then
|
||||||
|
echo "* These are the programs that require X windows."
|
||||||
|
echo " If you do not require all of the X programs, you should abort"
|
||||||
|
echo " and manually make the ones you require."
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
goto ask_require_x_progs
|
||||||
|
else if ("$ANS_REQ_X" != "") then
|
||||||
|
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||||
|
set ANS_REQ_X=$YES
|
||||||
|
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||||
|
set ANS_REQ_X=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_nonx_progs
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_X=$DEF_REQ_X
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_ted:
|
||||||
|
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||||
|
set ANS_REQ_TED = $<
|
||||||
|
if ("$ANS_REQ_TED" == "?") then
|
||||||
|
echo "* This is the trace editor program. It allows you to look at"
|
||||||
|
echo " traces obtained from automated fluorescent sequencing machines."
|
||||||
|
echo ""
|
||||||
|
goto ask_require_ted
|
||||||
|
else if ("$ANS_REQ_TED" != "") then
|
||||||
|
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||||
|
set ANS_REQ_TED=$YES
|
||||||
|
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||||
|
set ANS_REQ_TED=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_ted
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_TED=$DEF_REQ_TED
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ask_require_misc:
|
||||||
|
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||||
|
set ANS_REQ_MISC = $<
|
||||||
|
if ("$ANS_REQ_MISC" == "?") then
|
||||||
|
echo "* Other programs include:"
|
||||||
|
echo " alfsplit"
|
||||||
|
echo " getABISampleName"
|
||||||
|
echo ""
|
||||||
|
goto ask_require_misc
|
||||||
|
else if ("$ANS_REQ_MISC" != "") then
|
||||||
|
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||||
|
set ANS_REQ_MISC=$YES
|
||||||
|
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||||
|
set ANS_REQ_MISC=$NO
|
||||||
|
else
|
||||||
|
goto ask_require_misc
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
time_taken_warning:
|
||||||
|
echo ""
|
||||||
|
echo "The installation procedure is now ready to start."
|
||||||
|
echo ""
|
||||||
|
echo "**** Warning:"
|
||||||
|
echo " The installation will take considerable time to complete. If you"
|
||||||
|
echo " are installing the whole Staden Package from scratch it could"
|
||||||
|
echo " take as long as an hour for all exectuables to be compiled and"
|
||||||
|
echo " installed."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
ask_goahead:
|
||||||
|
echo -n "Proceed with the installation [YES]? "
|
||||||
|
set ANSWER=$<
|
||||||
|
if ("$ANSWER" == "?") then
|
||||||
|
echo "* Final confirmation to proceed with the installation. Answer"
|
||||||
|
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||||
|
echo ""
|
||||||
|
goto ask_goahead
|
||||||
|
else if ("$ANSWER" != "") then
|
||||||
|
if ("$ANSWER" =~ [nN]*) then
|
||||||
|
goto chickens_exit
|
||||||
|
else if ("$ANSWER" !~ [yY]*) then
|
||||||
|
goto ask_goahead
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
installation_proper:
|
||||||
|
|
||||||
|
# make binaries directory if it doesn't exist
|
||||||
|
|
||||||
|
if (! -d $DIR_BIN) then
|
||||||
|
$MKDIR $DIR_BIN
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||||
|
echo ""
|
||||||
|
echo "+ Compiling miscellaneous library"
|
||||||
|
|
||||||
|
pushd $DIR_MISC > /dev/null
|
||||||
|
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_NONX" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing non X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE nprogs lprogs
|
||||||
|
$INSTALL mep $DIR_BIN
|
||||||
|
$INSTALL nip $DIR_BIN
|
||||||
|
$INSTALL pip $DIR_BIN
|
||||||
|
$INSTALL sap $DIR_BIN
|
||||||
|
$INSTALL sapf $DIR_BIN
|
||||||
|
$INSTALL sip $DIR_BIN
|
||||||
|
$INSTALL splitp1 $DIR_BIN
|
||||||
|
$INSTALL splitp2 $DIR_BIN
|
||||||
|
$INSTALL splitp3 $DIR_BIN
|
||||||
|
$INSTALL sethelp $DIR_BIN
|
||||||
|
$INSTALL gip $DIR_BIN
|
||||||
|
$INSTALL nipl $DIR_BIN
|
||||||
|
$INSTALL pipl $DIR_BIN
|
||||||
|
$INSTALL sipl $DIR_BIN
|
||||||
|
$INSTALL dap $DIR_BIN
|
||||||
|
$INSTALL nipf $DIR_BIN
|
||||||
|
$INSTALL vep $DIR_BIN
|
||||||
|
$INSTALL rep $DIR_BIN
|
||||||
|
$INSTALL lip $DIR_BIN
|
||||||
|
#$INSTALL convert_project $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE bap
|
||||||
|
$INSTALL bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_TED" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing Trace editor"
|
||||||
|
|
||||||
|
pushd $DIR_TED > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE ted
|
||||||
|
$INSTALL ted $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_X" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing X programs"
|
||||||
|
|
||||||
|
pushd $DIR_STADEN > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xprogs
|
||||||
|
$INSTALL xmep $DIR_BIN
|
||||||
|
$INSTALL xnip $DIR_BIN
|
||||||
|
$INSTALL xpip $DIR_BIN
|
||||||
|
$INSTALL xsap $DIR_BIN
|
||||||
|
$INSTALL xsip $DIR_BIN
|
||||||
|
$INSTALL xdap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_OSP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_BAP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE xbap
|
||||||
|
$INSTALL xbap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
if ("$ANS_REQ_MISC" == "$YES") then
|
||||||
|
echo ""
|
||||||
|
echo "+ Installing miscellaneous programs"
|
||||||
|
|
||||||
|
pushd $DIR_ABI > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL getABISampleName $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_ALF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE alfsplit
|
||||||
|
$INSTALL alfsplit $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_CONVERT > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE convert
|
||||||
|
$INSTALL convert $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_COP > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE all
|
||||||
|
$INSTALL cop $DIR_BIN
|
||||||
|
$INSTALL cop-bap $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_FROG > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE frog
|
||||||
|
$INSTALL frog $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_GETMCH > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE trace2seq
|
||||||
|
$INSTALL trace2seq $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
pushd $DIR_SCF > /dev/null
|
||||||
|
cd $DIR_BINARIES
|
||||||
|
$MAKE makeSCF
|
||||||
|
$INSTALL makeSCF $DIR_BIN
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
installation_done:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation completed"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo " Some further initialisation is required in order to use the"
|
||||||
|
echo " package. csh users should insert the following in their .login"
|
||||||
|
echo " files:"
|
||||||
|
echo " "
|
||||||
|
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||||
|
echo ' source $STADENROOT/staden.login'
|
||||||
|
echo " "
|
||||||
|
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||||
|
echo " their .profile:"
|
||||||
|
echo " "
|
||||||
|
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||||
|
echo " export STADENROOT"
|
||||||
|
echo ' . $STADENROOT/staden.profile'
|
||||||
|
echo " "
|
||||||
|
echo " These initialisations will alter the shell's search path so that"
|
||||||
|
echo " it can find the programs in the STADEN Package"
|
||||||
|
echo " "
|
||||||
|
|
||||||
|
normal_exit:
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
chickens_exit:
|
||||||
|
echo ""
|
||||||
|
echo "+ Installation cancelled"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
end_failure:
|
||||||
|
unset noglob
|
||||||
|
echo ""
|
||||||
|
echo "Aborted STADEN Package installation on `date`"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
|
91
Version-1993.0.7
Normal file
91
Version-1993.0.7
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
Wed Jul 7
|
||||||
|
*Version-1993.0.7*
|
||||||
|
New xbap and ted.
|
||||||
|
Can use Ctrl as well as Meta to shift cutoffs in contig editor.
|
||||||
|
Code to read in ABI traces now robust to ABI problem files, where
|
||||||
|
called base order is not base position order.
|
||||||
|
|
||||||
|
Thu Jul 1
|
||||||
|
*Version-1993.0.6*
|
||||||
|
New xbap and bap, to fix bugs.
|
||||||
|
Break Contig was sometimes not recalculating consensus length correctly.
|
||||||
|
Contig Edit was trucating reading name lengths at 10 characters.
|
||||||
|
|
||||||
|
Thu Jun 16
|
||||||
|
*Version-1993.0.5*
|
||||||
|
New xbap and bap executables. RS changed assembly in bap so that
|
||||||
|
when entry is not permitted the program asks for the percentage
|
||||||
|
mismatch - this allows display of alignments for all levels of
|
||||||
|
mismatch.
|
||||||
|
|
||||||
|
Mon Jun 14 14:54:43 BST 1993
|
||||||
|
*Version-1993.0.4*
|
||||||
|
Bug in xdap. It was compiled with xbap's edUtils.h by mistake.
|
||||||
|
|
||||||
|
Fri Jun 11 17:50:13 BST 1993
|
||||||
|
*Version-1993.0.3*
|
||||||
|
Bugs in bap/xbap fixed. New executables included.
|
||||||
|
|
||||||
|
Thu Jun 3 13:53:38 BST 1993
|
||||||
|
*Version-1993.0.2*
|
||||||
|
Bugs in bap/xbap fixed. New executables included.
|
||||||
|
|
||||||
|
Thu May 20 14:45:38 BST 1993
|
||||||
|
*Version-1993.0.1*
|
||||||
|
Changes to makefiles and Staden_install
|
||||||
|
|
||||||
|
Fri Mar 5 11:27:22 GMT 1993
|
||||||
|
*Version-1993.0*
|
||||||
|
Now for DEC Alpha and Solaris
|
||||||
|
bap/xbap now includes double stranding and auto-creation of oligos
|
||||||
|
|
||||||
|
Tue Jan 26 11:54:36 GMT 1993
|
||||||
|
*Version-1992.3.1*
|
||||||
|
Bug fixes
|
||||||
|
1. indexseqlibs/genbentryname1.c
|
||||||
|
2. convert bugs + new programs
|
||||||
|
|
||||||
|
Mon Nov 23 13:50:39 WET 1992
|
||||||
|
*Version-1992.3*
|
||||||
|
Includes bap/xbap and utility programs
|
||||||
|
|
||||||
|
|
||||||
|
Wed Sep 30 11:18:09 BST 1992
|
||||||
|
*Version-1992.2.1*
|
||||||
|
Source changes since last release
|
||||||
|
bug fixes to postscript output, sequence library programs
|
||||||
|
New sun and dec executables
|
||||||
|
|
||||||
|
|
||||||
|
Thu Aug 27 15:27:05 BST 1992
|
||||||
|
|
||||||
|
*Version-1992.2*
|
||||||
|
|
||||||
|
|
||||||
|
Mon Jul 27 13:01:37 WET 1992
|
||||||
|
|
||||||
|
*Version-1992.1.3*
|
||||||
|
Miscellaneous bug fixes and enhancements
|
||||||
|
New sun and dec executables
|
||||||
|
|
||||||
|
|
||||||
|
Tue Jun 16 16:07:41 BST 1992
|
||||||
|
|
||||||
|
*Version-1992.1.2*
|
||||||
|
Sun sparc executables now linked with cc and not gcc.
|
||||||
|
New makefile-sun files
|
||||||
|
New sources for hitNtrg.c and freetext4.c (indexseqlibs), and
|
||||||
|
tagU2.c (staden)
|
||||||
|
|
||||||
|
|
||||||
|
Wed May 27 17:12:36 BST 1992
|
||||||
|
|
||||||
|
*Version-1992.1.1*
|
||||||
|
Inclusion of vep (vector excision program), plus minor changes and bug fixes
|
||||||
|
|
||||||
|
|
||||||
|
Tue May 26 11:10:28 WET 1992
|
||||||
|
|
||||||
|
*Version-1992.1*
|
||||||
|
This version includes the port to DEC Ultrix (mips)
|
||||||
|
|
BIN
bin/alfsplit
Normal file
BIN
bin/alfsplit
Normal file
Binary file not shown.
BIN
bin/bap
Normal file
BIN
bin/bap
Normal file
Binary file not shown.
BIN
bin/convert
Normal file
BIN
bin/convert
Normal file
Binary file not shown.
BIN
bin/cop
Normal file
BIN
bin/cop
Normal file
Binary file not shown.
BIN
bin/cop-bap
Normal file
BIN
bin/cop-bap
Normal file
Binary file not shown.
BIN
bin/dap
Normal file
BIN
bin/dap
Normal file
Binary file not shown.
BIN
bin/frog
Normal file
BIN
bin/frog
Normal file
Binary file not shown.
BIN
bin/getABISampleName
Normal file
BIN
bin/getABISampleName
Normal file
Binary file not shown.
BIN
bin/gip
Normal file
BIN
bin/gip
Normal file
Binary file not shown.
BIN
bin/lip
Normal file
BIN
bin/lip
Normal file
Binary file not shown.
BIN
bin/makeSCF
Normal file
BIN
bin/makeSCF
Normal file
Binary file not shown.
BIN
bin/mep
Normal file
BIN
bin/mep
Normal file
Binary file not shown.
BIN
bin/nip
Normal file
BIN
bin/nip
Normal file
Binary file not shown.
BIN
bin/nipf
Normal file
BIN
bin/nipf
Normal file
Binary file not shown.
BIN
bin/nipl
Normal file
BIN
bin/nipl
Normal file
Binary file not shown.
BIN
bin/pip
Normal file
BIN
bin/pip
Normal file
Binary file not shown.
BIN
bin/pipl
Normal file
BIN
bin/pipl
Normal file
Binary file not shown.
BIN
bin/rep
Normal file
BIN
bin/rep
Normal file
Binary file not shown.
BIN
bin/sap
Normal file
BIN
bin/sap
Normal file
Binary file not shown.
BIN
bin/sapf
Normal file
BIN
bin/sapf
Normal file
Binary file not shown.
BIN
bin/sethelp
Normal file
BIN
bin/sethelp
Normal file
Binary file not shown.
BIN
bin/sip
Normal file
BIN
bin/sip
Normal file
Binary file not shown.
BIN
bin/sipl
Normal file
BIN
bin/sipl
Normal file
Binary file not shown.
BIN
bin/splitp1
Normal file
BIN
bin/splitp1
Normal file
Binary file not shown.
BIN
bin/splitp2
Normal file
BIN
bin/splitp2
Normal file
Binary file not shown.
BIN
bin/splitp3
Normal file
BIN
bin/splitp3
Normal file
Binary file not shown.
BIN
bin/ted
Normal file
BIN
bin/ted
Normal file
Binary file not shown.
BIN
bin/trace2seq
Normal file
BIN
bin/trace2seq
Normal file
Binary file not shown.
BIN
bin/vep
Normal file
BIN
bin/vep
Normal file
Binary file not shown.
BIN
bin/xbap
Normal file
BIN
bin/xbap
Normal file
Binary file not shown.
BIN
bin/xbap.1
Normal file
BIN
bin/xbap.1
Normal file
Binary file not shown.
BIN
bin/xdap
Normal file
BIN
bin/xdap
Normal file
Binary file not shown.
BIN
bin/xmep
Normal file
BIN
bin/xmep
Normal file
Binary file not shown.
BIN
bin/xnip
Normal file
BIN
bin/xnip
Normal file
Binary file not shown.
BIN
bin/xpip
Normal file
BIN
bin/xpip
Normal file
Binary file not shown.
BIN
bin/xsap
Normal file
BIN
bin/xsap
Normal file
Binary file not shown.
BIN
bin/xsip
Normal file
BIN
bin/xsip
Normal file
Binary file not shown.
32
doc/Converting_Sap_Databases
Normal file
32
doc/Converting_Sap_Databases
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
Converting Sap Databases For Be Used With XDAP SD 10 July 1991
|
||||||
|
=======================================================================
|
||||||
|
|
||||||
|
The sequence assembly programmes dap and xdap are based on the programs
|
||||||
|
sap and xsap, with major modifications. For a concise summary of the
|
||||||
|
new features I refer you to Rodger and my paper, "A sequence assembly
|
||||||
|
and editing program for efficient management of large projects"
|
||||||
|
(Nucleic Acids Research, in press)
|
||||||
|
|
||||||
|
The need for storing extra information in project databases has
|
||||||
|
resulted in the creation of two files. For users who wish you use old
|
||||||
|
(sap) databases with xdap, additional files must be created to use all
|
||||||
|
the new features. The program 'convert_project' does this. It is
|
||||||
|
interactive, and asks you for names of relevant files, version numbers
|
||||||
|
etc. Here is a sample program dialogue:
|
||||||
|
|
||||||
|
|
||||||
|
% convert_project
|
||||||
|
Database conversion program
|
||||||
|
Converts *.RD? file to *.TG? and *.CC? files
|
||||||
|
|
||||||
|
Project name ? test
|
||||||
|
Version ? 0
|
||||||
|
Conversion completed.
|
||||||
|
|
||||||
|
|
||||||
|
Further, please ensure that the file TAGDB is in your project
|
||||||
|
directory. Copies can be found in $STADTABL. Alternatively ensure that
|
||||||
|
the environment TAGDB variable is set to $STADTABL/TAGDB
|
||||||
|
|
||||||
|
setenv TAGDB $STADTABL/TAGDB
|
||||||
|
|
30
doc/README
Normal file
30
doc/README
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
Processing and printing LaTeX sources
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
Given a source file src.tex, run LaTeX to generate the bibliographic
|
||||||
|
references:
|
||||||
|
|
||||||
|
latex src
|
||||||
|
|
||||||
|
Now run BibTeX to search the bibliography for them:
|
||||||
|
|
||||||
|
bibtex src
|
||||||
|
|
||||||
|
Now run LaTeX twice, first to pick up the references, second to bind
|
||||||
|
forward references:
|
||||||
|
|
||||||
|
latex src
|
||||||
|
latex src
|
||||||
|
|
||||||
|
This will have generated a src.dvi output file. Now we convert this
|
||||||
|
to PostScript:
|
||||||
|
|
||||||
|
dvi2ps src.dvi >src.ps
|
||||||
|
|
||||||
|
Now we can print this out:
|
||||||
|
|
||||||
|
lpr src.ps
|
||||||
|
|
||||||
|
Most of the above is only necessay if you are building something from
|
||||||
|
scratch, but it's best to go through it anyway until you fully
|
||||||
|
understand how LaTeX works.
|
131
doc/gip-menu.PS
Normal file
131
doc/gip-menu.PS
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
%!
|
||||||
|
/cm {28.2 mul} def
|
||||||
|
/BOXSIZE 2 cm def
|
||||||
|
|
||||||
|
/boxcen
|
||||||
|
{
|
||||||
|
% move to centre of box
|
||||||
|
BOXSIZE mul 2 div BOXSIZE 2 div rmoveto
|
||||||
|
exch
|
||||||
|
% move back by correct amount to ensure letter is in centre of box
|
||||||
|
dup stringwidth
|
||||||
|
pop 2 div neg % halve & neg x offset
|
||||||
|
% y offset appears to be zero! - so use constant 'square' char (eg X)
|
||||||
|
(X) stringwidth pop 2 div neg
|
||||||
|
} def
|
||||||
|
|
||||||
|
/letter
|
||||||
|
{
|
||||||
|
dup BOXSIZE mul 0 rlineto
|
||||||
|
0 BOXSIZE rlineto
|
||||||
|
dup BOXSIZE mul neg 0 rlineto
|
||||||
|
0 BOXSIZE neg rlineto
|
||||||
|
closepath
|
||||||
|
gsave
|
||||||
|
dup boxcen rmoveto
|
||||||
|
show
|
||||||
|
stroke
|
||||||
|
grestore
|
||||||
|
BOXSIZE mul 0 rmoveto
|
||||||
|
} def
|
||||||
|
|
||||||
|
/nextline {0 BOXSIZE neg rmoveto} def
|
||||||
|
|
||||||
|
/line
|
||||||
|
{
|
||||||
|
gsave
|
||||||
|
1 letter
|
||||||
|
1 letter
|
||||||
|
1 letter
|
||||||
|
1 letter
|
||||||
|
grestore
|
||||||
|
nextline
|
||||||
|
} def
|
||||||
|
|
||||||
|
/Times-Roman findfont 50 scalefont setfont
|
||||||
|
newpath
|
||||||
|
5 setlinewidth
|
||||||
|
200 650 translate
|
||||||
|
0 0 moveto
|
||||||
|
%2 setlinecap
|
||||||
|
|
||||||
|
gsave
|
||||||
|
(A) (G) (C) (T) line
|
||||||
|
(3) (4) (1) (2) line
|
||||||
|
(B) (H) (D) (V) line
|
||||||
|
(M) (N) (K) (L) line
|
||||||
|
(-) (X) (Y) (R) line
|
||||||
|
(8) (7) (6) (5) line
|
||||||
|
/Times-Roman findfont 25 scalefont setfont
|
||||||
|
gsave
|
||||||
|
(DELETE) 2 letter
|
||||||
|
(RESET) 2 letter
|
||||||
|
grestore
|
||||||
|
nextline
|
||||||
|
/Times-Roman findfont 35 scalefont setfont
|
||||||
|
gsave
|
||||||
|
(STOP) 4 letter
|
||||||
|
grestore
|
||||||
|
nextline
|
||||||
|
gsave
|
||||||
|
(START) 4 letter
|
||||||
|
grestore
|
||||||
|
nextline
|
||||||
|
gsave
|
||||||
|
(CONFIRM) 4 letter
|
||||||
|
grestore
|
||||||
|
nextline
|
||||||
|
% yukky from here on
|
||||||
|
gsave
|
||||||
|
0 BOXSIZE rmoveto
|
||||||
|
1 cm 0 rlineto stroke
|
||||||
|
grestore
|
||||||
|
(ORIGIN) dup 4 boxcen rmoveto show pop
|
||||||
|
(ORIGIN) stringwidth neg exch neg exch rmoveto
|
||||||
|
(X) stringwidth exch 2 div rmoveto
|
||||||
|
-5 0 rmoveto
|
||||||
|
2 setlinewidth
|
||||||
|
-45 21 rlineto
|
||||||
|
6 0 rlineto
|
||||||
|
-6 0 rmoveto
|
||||||
|
0 -6 rlineto
|
||||||
|
stroke
|
||||||
|
grestore
|
||||||
|
2 setlinewidth
|
||||||
|
0 BOXSIZE 1.4 mul rmoveto
|
||||||
|
6 6 rlineto
|
||||||
|
-6 -6 rmoveto
|
||||||
|
6 -6 rlineto
|
||||||
|
-6 6 rmoveto
|
||||||
|
80 0 rlineto
|
||||||
|
5 -6 rmoveto
|
||||||
|
/Times-Roman findfont 30 scalefont setfont
|
||||||
|
(8 cm) show
|
||||||
|
5 6 rmoveto
|
||||||
|
76 0 rlineto
|
||||||
|
-6 6 rlineto
|
||||||
|
6 -6 rmoveto
|
||||||
|
-6 -6 rlineto
|
||||||
|
stroke
|
||||||
|
0 0 moveto
|
||||||
|
BOXSIZE .4 mul neg BOXSIZE rmoveto
|
||||||
|
currentpoint translate
|
||||||
|
newpath
|
||||||
|
0 0 moveto
|
||||||
|
90 rotate
|
||||||
|
-6 6 rlineto
|
||||||
|
6 -6 rmoveto
|
||||||
|
-6 -6 rlineto
|
||||||
|
6 6 rmoveto
|
||||||
|
-244 0 rlineto
|
||||||
|
-84 0 rmoveto
|
||||||
|
0 -6 rmoveto
|
||||||
|
(20 cm) show
|
||||||
|
0 6 rmoveto
|
||||||
|
-84 0 rmoveto
|
||||||
|
-227 0 rlineto
|
||||||
|
6 6 rlineto
|
||||||
|
-6 -6 rmoveto
|
||||||
|
6 -6 rlineto
|
||||||
|
stroke
|
||||||
|
showpage
|
2426
doc/install.PS
Normal file
2426
doc/install.PS
Normal file
File diff suppressed because it is too large
Load diff
172
doc/install.tex
Normal file
172
doc/install.tex
Normal file
|
@ -0,0 +1,172 @@
|
||||||
|
\documentstyle[a4,11pt]{article}
|
||||||
|
|
||||||
|
\title{Installing the Staden Package}
|
||||||
|
\author{Simon Dear}
|
||||||
|
\date{21 May 1993}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\section{Introduction}
|
||||||
|
|
||||||
|
On the accompanying tape you will find executables for
|
||||||
|
one of SunOS 4.x, Sun
|
||||||
|
Solaris 2.x, DEC Ultrix, DEC OSF/1 and Silicon Graphics SGI operating systems.
|
||||||
|
Also there are sources for all the programs in the Staden package.
|
||||||
|
Programs in the package are:
|
||||||
|
\begin{description}
|
||||||
|
|
||||||
|
\item[mep and xmep] Motif exploration program.
|
||||||
|
\item[nip and xnip] Nucleotide interpretation program.
|
||||||
|
\item[nipl] Nucleotide interpretation program (library).
|
||||||
|
Searches nucleotide libraries for patterns of motifs.
|
||||||
|
\item[pip and xpip] Protein interpretation program.
|
||||||
|
\item[pipl] Protein interpretation program (library).
|
||||||
|
Searches protein libraries for patterns of motifs.
|
||||||
|
\item[sip and xsip] Similarity investigation program.
|
||||||
|
\item[sipl] Similarity investigation program (library).
|
||||||
|
Compares a probe protein or nucleic acid sequence against
|
||||||
|
a library of sequences.
|
||||||
|
\item[sap and xsap] The original sequence assembly program.
|
||||||
|
\item[bap and xbap] Our latest, most advanced sequence assembly program.
|
||||||
|
\item[dap and xdap] An obsolete assembly program, superceded by {\em bap}.
|
||||||
|
\item[lip] Library interface program.
|
||||||
|
\item[rep] Repeat examination program.
|
||||||
|
\item[ted] X windows utility for displaying and editing
|
||||||
|
fluorescent sequencing machine traces.
|
||||||
|
\item[splitp1, splitp2 and splitp3] Refer to help/SPLITP.MEM.
|
||||||
|
\item[sethelp] Builds online help files.
|
||||||
|
\item[gip] Gel input program.
|
||||||
|
\item[convert] Converts between {\em xdap\/} and {\em xbap\/} databases.
|
||||||
|
\item[cop and cop-bap] Checks completed {\em xdap\/} and {\em xbap\/}
|
||||||
|
databases for editing errors.
|
||||||
|
\item[trace2seq] Extracts sequence from trace files.
|
||||||
|
\item[getABISampleName] Extracts sample names from ABI trace files.
|
||||||
|
\item[makeSCF] Converts existing trace files to the compact
|
||||||
|
SCF format.
|
||||||
|
\item[alfsplit] Splits the Pharmacia A.L.F. gel
|
||||||
|
file into multiple files, one for each sample.
|
||||||
|
\item[frog] Relabels lanes in ABI trace files.
|
||||||
|
\item[+ numerous scripts (including {\em squirrel (v1.4)\/})]
|
||||||
|
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Requirements}
|
||||||
|
|
||||||
|
You will need a tape drive to read the software off the distribution
|
||||||
|
tape (QIC-150, TK50, or Exabyte). You will also need a large amount of
|
||||||
|
disk storage to accommodate the whole package. For release
|
||||||
|
version-1993.0, requirements were
|
||||||
|
31Mb (SunOS 4.x),
|
||||||
|
36Mb (Sun Solaris 2.x)
|
||||||
|
30Mb (DEC Ultrix)
|
||||||
|
37Mb (DEC OSF/1)
|
||||||
|
and
|
||||||
|
27Mb (Silicon Graphics SGI.)
|
||||||
|
|
||||||
|
|
||||||
|
To compile the Staden package you will require:
|
||||||
|
\begin{itemize}
|
||||||
|
\item An ANSI C compiler.
|
||||||
|
\item A FORTRAN-77 compiler.
|
||||||
|
\item X11 (Release 4 or 5).
|
||||||
|
\item GNU make (except with SunOS and Solaris 2.x.)
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\section{Installation}
|
||||||
|
|
||||||
|
To install the package,
|
||||||
|
\begin{enumerate}
|
||||||
|
\item Create a directory for where you would like the software to be
|
||||||
|
placed. You may have to be superuser to do this.
|
||||||
|
\begin{verbatim} mkdir /home/Staden\end{verbatim}
|
||||||
|
\item Change to this directory.
|
||||||
|
\begin{verbatim} cd /home/Staden\end{verbatim}
|
||||||
|
\item Place the tape into the tape unit.
|
||||||
|
\item Extract the software off the distribution tape (NOTE: the device name may be
|
||||||
|
different on your machine):
|
||||||
|
\begin{verbatim} tar xvf /dev/rst0\end{verbatim}
|
||||||
|
\item C shell users should set the environment variable {\bf STADENROOT}
|
||||||
|
to be the directory where the package is installed and source the file
|
||||||
|
{\em staden.login} found there. This is best done by adding lines to their
|
||||||
|
{\em .login} file:
|
||||||
|
\begin{verbatim}
|
||||||
|
setenv STADENROOT /home/Staden
|
||||||
|
source $STADENROOT/staden.login
|
||||||
|
\end{verbatim}
|
||||||
|
Users of the Bourne shell, sh, should similarly add lines their {\em .profile} file:
|
||||||
|
\begin{verbatim}
|
||||||
|
STADENROOT=/home/Staden
|
||||||
|
export STADENROOT
|
||||||
|
. $STADENROOT/staden.profile
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
The startup routines set environment variables and modify the shell's
|
||||||
|
search path so that it can find the programs in the Staden Package.
|
||||||
|
When users next log on to the system, they will be able to use the
|
||||||
|
programs.
|
||||||
|
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
|
||||||
|
\section {Installation on Unsupported Platforms}
|
||||||
|
|
||||||
|
Install the software as you would for a supported machine. You will
|
||||||
|
need to remake all executables. The script {\em Staden\_install} can
|
||||||
|
be used to help recompile the package. A large number of
|
||||||
|
assumptions have been made, and you may need to change the makefiles
|
||||||
|
to suit your system.
|
||||||
|
|
||||||
|
The sources have been organised into subdirectories of the directory
|
||||||
|
{\bf src}. In {\bf Misc} are routines common to many programs. They
|
||||||
|
should be made first. In {\bf staden} are all the programs of the
|
||||||
|
Staden suite ({\em mep}, {\em nip}, {\em pip}, {\em sap}, {\em sip},
|
||||||
|
{\em dap}, {\em gip}, {\em vep}, {\em lip} and {\em rep}) with the
|
||||||
|
exception of {\em bap}. Code for our latest sequence assembly program
|
||||||
|
{\em bap} is in directories {\bf bap} and {\bf bap/osp-bits}. Make
|
||||||
|
the objects in {\bf staden} first, then the ones in {\bf
|
||||||
|
bap/osp-bits}, and finally the ones in {\bf bap}. In {\bf ted} is the
|
||||||
|
trace editing program.
|
||||||
|
|
||||||
|
|
||||||
|
\section {Other Software Provided}
|
||||||
|
|
||||||
|
Other software and scripts can be found in the {\bf alf\/}, {\bf
|
||||||
|
abi\/}, {\bf cop\/}, {\bf getMCH\/}, {\bf scf\/}, {\bf frog\/} and {\bf
|
||||||
|
scripts}
|
||||||
|
directories.
|
||||||
|
Each directory contains documentation describing the programs
|
||||||
|
contained.
|
||||||
|
|
||||||
|
Since release version-1993.0 we have distributed the {\em squirrel (v1.4)}
|
||||||
|
package. Please read the disclaimer that accompanies this software.
|
||||||
|
Additional sources and scripts can be found in {\bf expGetSeq}, {\bf vepe},
|
||||||
|
{\bf newted} and {\bf squirrel-1.4} directories.
|
||||||
|
|
||||||
|
Many scripts (including {\em squirrel}) and filters were developed at the MRC-LMB for
|
||||||
|
{\bf INTERNAL USE ONLY}.
|
||||||
|
We are aware that people elsewhere will want to develop
|
||||||
|
similar software.
|
||||||
|
We include them in the Staden Package merely as {\bf EXAMPLES} of
|
||||||
|
what has been achieved elsewhere.
|
||||||
|
{\bf THESE SCRIPTS WILL NOT WORK ON YOUR SYSTEM WITHOUT MODIFICATION.}
|
||||||
|
|
||||||
|
\section {When All Else Fails...}
|
||||||
|
If you have any problems please contact the authors,
|
||||||
|
\mbox{Rodger Staden}
|
||||||
|
\mbox{(\em rs@mrc-lmba.cam.ac.uk\/)},
|
||||||
|
\mbox{Simon Dear}
|
||||||
|
\mbox{(\em sd@mrc-lmba.cam.ac.uk\/)}
|
||||||
|
and
|
||||||
|
\mbox{James Bonfield}
|
||||||
|
\mbox{(\em jkb@mrc-lmba.cam.ac.uk\/)},
|
||||||
|
by email or by writing to us at:
|
||||||
|
MRC Laboratory of Molecular Biology, Hills Road, Cambridge, \mbox{CB2 2QH}, U.K.
|
||||||
|
We also welcome general comments on the package.
|
||||||
|
|
||||||
|
\end{document}
|
5154
doc/manual.rtf
Normal file
5154
doc/manual.rtf
Normal file
File diff suppressed because it is too large
Load diff
3033
doc/ted.PS
Normal file
3033
doc/ted.PS
Normal file
File diff suppressed because it is too large
Load diff
213
doc/ted.tex
Normal file
213
doc/ted.tex
Normal file
|
@ -0,0 +1,213 @@
|
||||||
|
\documentstyle[12pt]{article}
|
||||||
|
|
||||||
|
\title{A trace display and editing program for data from fluorescence based
|
||||||
|
sequencing machines}
|
||||||
|
\author{Timothy Gleeson \and LaDeana Hillier}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\maketitle
|
||||||
|
\section*{}
|
||||||
|
\subsection*{}
|
||||||
|
\subsubsection*{ABSTRACT}
|
||||||
|
|
||||||
|
``Ted'' ({\em T}race {\em ed}itor)
|
||||||
|
is a graphical editor for sequence and trace data from automated
|
||||||
|
fluorescence sequencing machines. It provides facilities
|
||||||
|
for viewing sequence and trace data (in top or bottom strand
|
||||||
|
orientation), for editing the base sequence, for
|
||||||
|
automated or manual trimming of the head (vector) and tail
|
||||||
|
(uncertain data) from the sequence, for vertical and horizontal trace
|
||||||
|
scaling, for keeping a history of sequence editing, and for output of
|
||||||
|
the edited sequence. Ted has been used extensively in the C.
|
||||||
|
elegans genome sequencing project,
|
||||||
|
both as a stand-alone program and integrated into
|
||||||
|
the Staden sequence assembly package, and has
|
||||||
|
greatly aided in the efficiency
|
||||||
|
and accuracy of sequence editing. It runs in the X
|
||||||
|
windows environment on Sun workstations and is available from the
|
||||||
|
authors. Ted currently supports sequence and trace data from the ABI
|
||||||
|
373A and Pharmacia A.L.F. sequencers.
|
||||||
|
|
||||||
|
\subsubsection*{INTRODUCTION}
|
||||||
|
Time involved in sequence editing is extensive, and anything easing
|
||||||
|
that burden will improve the efficiency of any major sequencing
|
||||||
|
project. Having sequence and trace data available online in easily-
|
||||||
|
manipulable form is invaluable. Ted (a Trace-EDitor) was developed to
|
||||||
|
fill this role in the C. elegans genome
|
||||||
|
sequencing project [1].
|
||||||
|
|
||||||
|
\subsubsection*{METHODS}
|
||||||
|
|
||||||
|
{\em Computing Design and Implementation.}
|
||||||
|
When designing ted, we had a number of specific computing goals
|
||||||
|
in mind including portability and adaptability. For portability, we
|
||||||
|
chose to write ted in ANSI C using the X windowing system and the
|
||||||
|
Xaw toolkit. X provides basic capabilities for the creation and use
|
||||||
|
of windows, and the toolkit contains a number of pre-packaged
|
||||||
|
components, such as the ``sliders'' used for scrolling. X also allows
|
||||||
|
site, user and per-run defaults to be set. Adaptability is also an
|
||||||
|
important goal since we are providing a new function to
|
||||||
|
research groups who are constantly adding new requirements.
|
||||||
|
|
||||||
|
Stylistically, we have followed an ``Abstract Data Type''
|
||||||
|
discipline. In this discipline, a program is split into a number of
|
||||||
|
modules which provide separate, well-defined functions. We
|
||||||
|
separate the interface of a module from its implementation. For
|
||||||
|
example, a unified internal sequence format is used. This can store
|
||||||
|
a varying amount of information. However, there is a clear and
|
||||||
|
simple interface by which the rest of the program accesses this
|
||||||
|
module. Such a style is not well supported by C, but its adoption has
|
||||||
|
been very successful. The addition of new sequencing machines, and
|
||||||
|
thus new external data formats, may cause some changes in the
|
||||||
|
internal representation of the sequence but should not affect
|
||||||
|
the rest of the program.
|
||||||
|
|
||||||
|
Ted accepts a large number of optional command line arguments,
|
||||||
|
many of which can also be specified as system defaults. This
|
||||||
|
supports a mode of working whereby ted is invoked not directly by the
|
||||||
|
user but instead by a script or another application which supplies
|
||||||
|
arguments appropriate to the editing task.
|
||||||
|
|
||||||
|
|
||||||
|
{\em Graphical Interface.}
|
||||||
|
Ted currently accepts data from two fluorescence based sequencing
|
||||||
|
machines, the Pharmacia A.L.F. and the ABI 373A.
|
||||||
|
The sequencing machine data consists of
|
||||||
|
four traces of fluorescence levels together with the machine's
|
||||||
|
interpretation, which is a sequence of bases.
|
||||||
|
Ted displays
|
||||||
|
the traces and the machine-generated base list.
|
||||||
|
A second, initially identical, list of bases is provided for correction
|
||||||
|
by the user.
|
||||||
|
|
||||||
|
Ted has an X windows based
|
||||||
|
graphical interface. The trace file
|
||||||
|
can either be input from the command line or by
|
||||||
|
clicking on the INPUT button after the program has been invoked.
|
||||||
|
Other parameters which the user may specify on the
|
||||||
|
command line include: the output
|
||||||
|
file name; a base position or sequence string on which the trace is
|
||||||
|
to be centered; a default trace magnification; a 5' vector sequence
|
||||||
|
for automated elimination of the sequence head (vector); top or
|
||||||
|
bottom strand orientation; or any of the usual X-window parameters (e.g.
|
||||||
|
display, geometry...).
|
||||||
|
|
||||||
|
The graphics display (Figure 1) consists of the control
|
||||||
|
panel, the base position information, the original and edited sequence
|
||||||
|
data, and the graphical representation of the trace. The user may
|
||||||
|
begin by using the control panel INPUT button to input a new trace
|
||||||
|
file at which time the user selects whether to view the sequence
|
||||||
|
and trace in top or bottom strand orientation.
|
||||||
|
The trace file is displayed and, if a 5' vector sequence has been
|
||||||
|
specified on the command line, the program attempts to select a
|
||||||
|
cutoff point corresponding to the vector sequence at the ``head'' of the
|
||||||
|
trace file. The bases beyond the ``cutoff'' point are
|
||||||
|
displayed on a shaded background. The user may modify the cutoff
|
||||||
|
position by clicking on the ``Adj left cut'' button and clicking on the
|
||||||
|
position of the desired cutoff. Similarly, the user may adjust the
|
||||||
|
right cutoff of the sequence (chosen by starting at the 5' end of the
|
||||||
|
sequence and looking for the first occurrence when 2 out of 5 bases
|
||||||
|
are 'N') by scrolling along the sequence to that point, clicking on the
|
||||||
|
``Adj right cut'' button, and clicking on the appropriate base.
|
||||||
|
Automation of the ``cutoff'' process is optional; the user may compile
|
||||||
|
the program with that feature turned ``off.''
|
||||||
|
|
||||||
|
Clicking on the ``Edit seq'' button allows the user to enter the edit
|
||||||
|
mode. The ``Search'' button can be used to skip from ``problem'' to
|
||||||
|
``problem'' (i.e., ambiguity to ambiguity) or to look for runs of
|
||||||
|
identical bases (e.g., TTTT) which are often mis-called by
|
||||||
|
the machine software.
|
||||||
|
|
||||||
|
Bases can be inserted, deleted, or replaced as with
|
||||||
|
any ordinary word-processor. In difficult-to-read areas,
|
||||||
|
the trace may be vertically or horizontally scaled by dragging or
|
||||||
|
clicking on the magnification scroll bar or by clicking on the
|
||||||
|
vertical scaling buttons (``Scale down'', ``Scale up''), respectively.
|
||||||
|
Finally, the edited sequence is saved to an ascii file using the
|
||||||
|
``Output'' button. A history of the editing session can also be saved
|
||||||
|
along with the sequence.
|
||||||
|
The ``Quit'' button is used
|
||||||
|
to exit the program. When reinvoking ted on an edited trace file the
|
||||||
|
edited base sequence, rather than the original sequence, is shown in
|
||||||
|
the edited base window. The user may invoke ted by calling in any one
|
||||||
|
of the previous editing sessions.
|
||||||
|
|
||||||
|
|
||||||
|
\subsubsection*{APPLICATIONS AND CONCLUSIONS}
|
||||||
|
|
||||||
|
In the C. elegans genome sequencing project, data from the ABI or
|
||||||
|
A.L.F. sequencing machines' computers are transferred to Sun
|
||||||
|
workstations.
|
||||||
|
The user invokes a Unix shell script that calls ted systematically
|
||||||
|
on each of the new set of trace files creating a set of sequence files.
|
||||||
|
The sequence files that are deemed to be of acceptable quality
|
||||||
|
are then entered into the sequence
|
||||||
|
assembly program xdap [2] where the sequences are assembled into
|
||||||
|
contigs. Portions of the ted trace-editor have been incorporated
|
||||||
|
into the xdap ``trace manager,'' which is used in
|
||||||
|
conjunction with the contig editor to view sets of aligned traces
|
||||||
|
at sites of discrepancies in the aligned sequences.
|
||||||
|
|
||||||
|
Ted is also used at the stage of choosing oligo primers for the
|
||||||
|
``walking'' stage of the sequencing project. It can be invoked directly
|
||||||
|
from the oligo selection program, osp [3], to allow examination
|
||||||
|
of the trace data in the region of the primers so that
|
||||||
|
integrity of the sequence data can be verified.
|
||||||
|
|
||||||
|
Currently, no other programs are known to be available
|
||||||
|
which support editing of the ABI trace data.
|
||||||
|
Further, the modular design of the program should allow
|
||||||
|
support for new types of sequencing machines, with new data
|
||||||
|
formats, to be implemented in a straightforward fashion.
|
||||||
|
|
||||||
|
|
||||||
|
\subsubsection*{AVAILABILITY}
|
||||||
|
Ted is freely available from the authors or from Rodger Staden and
|
||||||
|
Simon Dear (MRC Laboratory of Molecular Biology, Hills Road, Cambridge,
|
||||||
|
UK, CB2 2QH) for use on Sun workstations running X-windows (or OpenLook).
|
||||||
|
|
||||||
|
|
||||||
|
\subsubsection*{ACKNOWLEDGMENTS}
|
||||||
|
The authors would like to thank all members of the C. elegans
|
||||||
|
sequencing project with special thanks to the following people:
|
||||||
|
John Sulston, Bob Waterston,
|
||||||
|
Phil Green, Rick Wilson, Richard Durbin, Simon Dear, and Rodger Staden
|
||||||
|
for their helpful suggestions for improvements in the ted interface
|
||||||
|
and for their parts in the development of ted. This work was
|
||||||
|
supported by the Medical Research Council and NIH grant R01-HG00136.
|
||||||
|
|
||||||
|
\subsubsection*{REFERENCES}
|
||||||
|
|
||||||
|
1. Waterston, R., Sulston, J., et al. (1991), in preparation.
|
||||||
|
|
||||||
|
2. Dear, S. and Staden, R. (1991) Nuc. Acids Res., in press.
|
||||||
|
|
||||||
|
3. Hillier, L. and Green, P. (1991) submitted.
|
||||||
|
|
||||||
|
|
||||||
|
{\bf Figure 1 legend.}
|
||||||
|
|
||||||
|
Figure 1 shows a ``screen dump'' of the ted graphical interface.
|
||||||
|
The display consists of
|
||||||
|
the control panel and the synchronized view of the base position
|
||||||
|
information, original and edited sequence data,
|
||||||
|
and graphical representation of the trace (with each nucleotide's trace
|
||||||
|
being represented
|
||||||
|
by a different color). The control
|
||||||
|
panel allows the user to read in new trace files (in either
|
||||||
|
bottom or top strand orientation)
|
||||||
|
as well as to search for a string of nucleotides or a certain base position.
|
||||||
|
Scroll bars allow the user to adjust the magnification of or scroll through
|
||||||
|
the sequence and trace data. The user may also choose to change the vertical
|
||||||
|
magnification of the trace data. Further, sequence on the head (vector)
|
||||||
|
or tail (uncertain data) of the sequence may be ``cutoff''
|
||||||
|
using the adjust left and right cutoff buttons. Bases can be inserted,
|
||||||
|
deleted, or replaced as with
|
||||||
|
any ordinary word-processor in the sequence data window. Finally, the
|
||||||
|
sequence may be written to an ascii file using the output button on
|
||||||
|
the control panel.
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
|
||||||
|
|
2722
help/BAP.RNO
Normal file
2722
help/BAP.RNO
Normal file
File diff suppressed because it is too large
Load diff
2724
help/DAP.RNO
Normal file
2724
help/DAP.RNO
Normal file
File diff suppressed because it is too large
Load diff
205
help/GIP.RNO
Normal file
205
help/GIP.RNO
Normal file
|
@ -0,0 +1,205 @@
|
||||||
|
.NPA
|
||||||
|
.left margin1
|
||||||
|
.CENTER
|
||||||
|
GIP
|
||||||
|
.LEFT MARGIN1
|
||||||
|
.PARA
|
||||||
|
A digitizer is
|
||||||
|
a two dimensional surface
|
||||||
|
which is such that if a special pen is pressed onto it, the pens
|
||||||
|
coordinates can be recorded by a computer.
|
||||||
|
These coordinates
|
||||||
|
can be interpreted by a program.
|
||||||
|
.para
|
||||||
|
The digitizing device we use works by the pen emitting a high frequency
|
||||||
|
sound which is picked up by two microphones positioned at the rear of the
|
||||||
|
working area. The pen position is determined by triangulation and the
|
||||||
|
digitizing device sends the coordinates to the computer. As no special
|
||||||
|
surface is required the device can conveniently be positioned on a light
|
||||||
|
box giving the sequencer an unobscured view of the autoradiographs.
|
||||||
|
.LEFT MARGIN1
|
||||||
|
The digitizer
|
||||||
|
is called a GRAPHBAR MODEL GP7 made by
|
||||||
|
Science Accessories Corp,
|
||||||
|
970 Kings Highway West,
|
||||||
|
Southport,
|
||||||
|
Connecticut 06490,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
.para
|
||||||
|
The program uses a menu to allow the user to select commands or
|
||||||
|
to enter the uncertainty codes for areas of the gel that are
|
||||||
|
difficult to interpret. A menu is simply a series of boxes drawn on
|
||||||
|
the digitizing surface that each contain a command or
|
||||||
|
uncertainty code. When the user puts the pen down in these special
|
||||||
|
regions the program interprets the coordinates as commands and acts
|
||||||
|
appropriately. A copy of the menu should have been sent to you.
|
||||||
|
It should be stuck down on the surface of the
|
||||||
|
light box in the digitizing area. For convenience it is best to position it
|
||||||
|
to the right of the digitizing area, but in practice as long as
|
||||||
|
its top
|
||||||
|
edge is parallel to the digitizer box, it can be put anywhere in the active
|
||||||
|
region.
|
||||||
|
.sk1
|
||||||
|
.left margin1
|
||||||
|
Entering gel readings using a digitizer
|
||||||
|
.left margin1
|
||||||
|
.para
|
||||||
|
The autoradiograph should be stuck down on the light box with the lanes
|
||||||
|
running, as near is as
|
||||||
|
possible, at right angles to the digitizer. To read
|
||||||
|
an autoradiograph placed on the light box
|
||||||
|
the user need only define the positions of
|
||||||
|
the four sequencing lanes and the bases
|
||||||
|
to which they correspond and then use the pen to point to each
|
||||||
|
successive band progressing up the gel. The program examines the
|
||||||
|
coordinates of each pen position to see in which of the four
|
||||||
|
lanes
|
||||||
|
it lies and assigns the corresponding base to be stored in the
|
||||||
|
computer. Each time the pen tip is depressed to point to a position
|
||||||
|
on the surface of the digitizer the program sounds the bell on the
|
||||||
|
terminal (a different sound for each of the four bases on the
|
||||||
|
microcomputer version of the program)
|
||||||
|
to indicate to the user that a point has been recorded. As
|
||||||
|
the sequence is read the program displays it on the screen.
|
||||||
|
|
||||||
|
|
||||||
|
.para
|
||||||
|
The program uses a menu
|
||||||
|
to allow the user to select commands or
|
||||||
|
to enter the uncertainty codes for areas of the gel that are
|
||||||
|
difficult to interpret. A menu is simply a series of boxes drawn on
|
||||||
|
the digitizing surface that each contain a command or
|
||||||
|
uncertainty code. When the user puts the pen down in these special
|
||||||
|
regions the program interprets the coordinates as commands and acts
|
||||||
|
appropriately. As well as the uncertainty codes
|
||||||
|
A,C,G,T,1,2,3,4,B,D,H,V,R,Y,X,-,5,6,7,8 the following commands are
|
||||||
|
included in the menu: DELETE removes the last character from
|
||||||
|
the sequence;
|
||||||
|
RESET allows the lane centres to be redefined;
|
||||||
|
START means begin the next
|
||||||
|
stage of the procedure; STOP means stop the current stage in the
|
||||||
|
procedure; CONFIRM means confirm that the last command or set of
|
||||||
|
coordinates are correct.
|
||||||
|
.para
|
||||||
|
The digitizing device also has a menu of its own. This lies in a two inch wide
|
||||||
|
strip immediately in front of the digitizing box. Pen positions within this
|
||||||
|
two inch strip are interpretted as commands to the digitizer and are not
|
||||||
|
sent to the GIP program. In general the only time users will need to use
|
||||||
|
the device menu is when they tell GIP where the program menu lies in the
|
||||||
|
digitizing area. This is done by first hitting ORIGIN in the device menu
|
||||||
|
and then hitting the bottom left hand corner of the program menu. The
|
||||||
|
program menu can hence be positioned anywhere in the active region but
|
||||||
|
should be arranged parallel to the digitizer.
|
||||||
|
.para
|
||||||
|
The user should try to hit the bands as near as possible to the centre of
|
||||||
|
the lanes because the program tracks the lanes up the film using the pen
|
||||||
|
positions. By using this tracking strategy the user only has to define the
|
||||||
|
centres of the bottom of the lanes before starting to read the film. The
|
||||||
|
program can correctly follow quite curved lanes and constantly checks that
|
||||||
|
its lane centre coordinates look sensible. If the lane centres appear to be
|
||||||
|
getting too close the program stops responding to the pen positions of
|
||||||
|
bands and hence does not ring the bell. If this occurs users must hit the
|
||||||
|
reset box in the menu and the program will request them to redefine the
|
||||||
|
lane centres at the current reading position. Then they can continue
|
||||||
|
reading. As a further safeguard the program will only respond to pen
|
||||||
|
positions either in the menu or very close to the current reading position.
|
||||||
|
.sk1
|
||||||
|
.left margin1
|
||||||
|
Running the gel reading program
|
||||||
|
.left margin1
|
||||||
|
The autoradiograph should be firmly stuck down on the light box and the
|
||||||
|
program started by typing GIP. It will ask the first question.
|
||||||
|
.left margin2
|
||||||
|
" ? FILE OF FILE NAMES="
|
||||||
|
.left margin2
|
||||||
|
Type the name for the file of file names and then follow the instructions.
|
||||||
|
.left margin2
|
||||||
|
" HIT DIGITIZER MENU ORIGIN"
|
||||||
|
.left margin2
|
||||||
|
" THEN PROGRAM MENU ORIGIN"
|
||||||
|
.left margin2
|
||||||
|
" THEN HIT START IN PROGRAM MENU"
|
||||||
|
.left margin2
|
||||||
|
If the bell does not sound after you hit start try hitting metric in the
|
||||||
|
device menu (the program uses metric units, and some digitizers are set to
|
||||||
|
default to use inches; hitting metric switches between the two).
|
||||||
|
.left margin2
|
||||||
|
After the bell has sounded the program will give the default lane order.
|
||||||
|
.left margin2
|
||||||
|
" LANE ORDER IS T C A G"
|
||||||
|
.left margin2
|
||||||
|
" IF CORRECT HIT CONFIRM, ELSE HIT RESET"
|
||||||
|
.left margin2
|
||||||
|
If the lane order, reading from left to right is correct hit confirm in the
|
||||||
|
program menu. If you are using a different order hit reset and you will be
|
||||||
|
asked to define the lane order from left to right using the program menu
|
||||||
|
(as follows).
|
||||||
|
.left margin2
|
||||||
|
" DEFINE LANE ORDER (LEFT TO RIGHT) USING MENU"
|
||||||
|
.left margin2
|
||||||
|
Hit the boxes in the menu that contain the symbols A,C,G,T in the
|
||||||
|
left-right order of the lanes. The program will respond with the lane order
|
||||||
|
as above and ask for confirmation. When this is received, the next task is
|
||||||
|
to define the start positions of the next four lanes.
|
||||||
|
.left margin2
|
||||||
|
" HIT START, THEN HIT (LEFT TO RIGHT)"
|
||||||
|
.left margin2
|
||||||
|
" THE START POSITIONS FOR THE NEXT FOUR LANES"
|
||||||
|
.left margin2
|
||||||
|
Hit the centres of the four lanes at a height level with the first band
|
||||||
|
that is going to be read. The program will report the mean lane separations
|
||||||
|
and asks for confirmation that they are correct.
|
||||||
|
.left margin2
|
||||||
|
" MEAN LANE SEPARATION IS XX"
|
||||||
|
.left margin2
|
||||||
|
" HIT CONFIRM TO CONTINUE"
|
||||||
|
.left margin2
|
||||||
|
Users will become familiar with the values from their films and will spot
|
||||||
|
any unusual numbers.
|
||||||
|
Asking for confirmation allows users to try again if they had made a
|
||||||
|
mistake, but generally the lane separation values can be ignored.
|
||||||
|
Hit confirm, and the program will give the message
|
||||||
|
.left margin2
|
||||||
|
" HIT START WHEN READY TO BEGIN READING"
|
||||||
|
.left margin2
|
||||||
|
Hit start and the program will give the message
|
||||||
|
.left margin2
|
||||||
|
" HIT BANDS, UNCERTAINTY CODES, RESET OR STOP"
|
||||||
|
.left margin2
|
||||||
|
Hit the bands, interpretting the sequence progressing
|
||||||
|
up the film. If necessary use the uncertainty codes. If the pen stops
|
||||||
|
responding hit reset and follow the instructions as above. When the
|
||||||
|
sequence becomes unreadable hit stop and the program will ask for a file
|
||||||
|
name for the gel reading just read.
|
||||||
|
.left margin2
|
||||||
|
" ? FILE NAME FOR THIS GEL READING="
|
||||||
|
.left margin2
|
||||||
|
Type the file name observing the rules about legal gel readings names.
|
||||||
|
The program will ask if you wish
|
||||||
|
to read another sequence.
|
||||||
|
.left margin2
|
||||||
|
" TO ENTER ANOTHER GEL READING TYPE 1"
|
||||||
|
.left margin2
|
||||||
|
To enter another type 1 and you will be back to the step of defining the
|
||||||
|
lane order. Typing anything else will stop the program.
|
||||||
|
.left margin1
|
||||||
|
.sk1
|
||||||
|
Running the microcomputer version of the gel reading program
|
||||||
|
.left margin1
|
||||||
|
The microcomputer version of GIP is slightly different and is called
|
||||||
|
GIPB. The BBC micro
|
||||||
|
does not have the capacity to process the gel readings beyond the reading
|
||||||
|
stage.
|
||||||
|
This means that users of this program
|
||||||
|
would need to transfer their gel readings from the micro to another machine
|
||||||
|
using a terminal emmulator. Transferring many files is tedious and so the
|
||||||
|
microcomputer version of the gel reading program stores all the gel
|
||||||
|
readings for each run of the program in a single file. This special
|
||||||
|
file contains both sequences and file names and can be moved in a single
|
||||||
|
transfer to another machine. Once on the other machine the single file must
|
||||||
|
be split into separate gel reading files and a file of file names. This is
|
||||||
|
done using the program BSPLIT. As far as using the microcomputer version
|
||||||
|
of GIP, the only difference is that the first file name the program
|
||||||
|
requests is not a file of file names, but a name for the single file to
|
||||||
|
contain all the gel readings and their names.
|
859
help/MEP.RNO
Normal file
859
help/MEP.RNO
Normal file
|
@ -0,0 +1,859 @@
|
||||||
|
.NPA
|
||||||
|
.SP 1
|
||||||
|
.left margin1
|
||||||
|
@-1. TX 0 @General
|
||||||
|
.sp
|
||||||
|
@-2. T 0 @Screen control
|
||||||
|
.sp
|
||||||
|
@-2. X 0 @Screen
|
||||||
|
.sp
|
||||||
|
@-3. TX 0 @Dictionary analysis
|
||||||
|
.sp
|
||||||
|
@0. TX -1 @MEP
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
This is a program for analysing families of nucleotide sequences in order
|
||||||
|
to find common motifs and potential binding sites.
|
||||||
|
The ideas in this program were described in Staden, R. "Methods
|
||||||
|
for discovering novel motifs in nucleic acid sequences".
|
||||||
|
Computer Applications in the Biosciences, 5, 293-298, (1989).
|
||||||
|
.PARA
|
||||||
|
The program can read
|
||||||
|
sequences stored in either of two formats: 1) all sequences aligned in a
|
||||||
|
single file; 2) all sequences in separate files and accessed through a file
|
||||||
|
of file names.
|
||||||
|
.PARA
|
||||||
|
The program contains functions that can answer several questions
|
||||||
|
about a set of sequences:
|
||||||
|
.SK1
|
||||||
|
.left margin2
|
||||||
|
Which words are most common?
|
||||||
|
.left margin2
|
||||||
|
Which words occur in the most sequences?
|
||||||
|
.left margin2
|
||||||
|
Which words contain the most information?
|
||||||
|
.left margin2
|
||||||
|
Which words occur in equivalent positions in the sequences?
|
||||||
|
.left margin2
|
||||||
|
Which words are inverted repeats?
|
||||||
|
.left margin2
|
||||||
|
Which words occur on both strands of the sequences?
|
||||||
|
.left margin2
|
||||||
|
Where are the inverted repeats?
|
||||||
|
.left margin2
|
||||||
|
Where are the fuzzy words?
|
||||||
|
.para
|
||||||
|
Most of the program is
|
||||||
|
concerned with analysing
|
||||||
|
what it terms "fuzzy
|
||||||
|
words" within the set of sequences. The analysis is explained
|
||||||
|
below. Note that the standard version of the programs is limited
|
||||||
|
to words of maximum length 8 letters, and a maximum fuzziness
|
||||||
|
of 2.
|
||||||
|
.para
|
||||||
|
The following analyses (preceded by their option numbers) are included:
|
||||||
|
.lit
|
||||||
|
? = Help
|
||||||
|
! = Quit
|
||||||
|
3 = Read new sequences
|
||||||
|
4 = Redefine active region
|
||||||
|
5 = List the sequences
|
||||||
|
6 = List text file
|
||||||
|
7 = Direct output to disk
|
||||||
|
10 = Clear graphics
|
||||||
|
11 = Clear text
|
||||||
|
12 = Draw ruler
|
||||||
|
13 = Use cross hair
|
||||||
|
14 = Reset margins
|
||||||
|
15 = Label diagram
|
||||||
|
16 = Draw map
|
||||||
|
17 = Search for strings
|
||||||
|
18 = Set strand
|
||||||
|
19 = Set composition
|
||||||
|
20 = Set word length
|
||||||
|
21 = Set number of mismatches
|
||||||
|
22 = Show settings
|
||||||
|
23 = Make dictionary Dw
|
||||||
|
24 = Make dictionary Ds
|
||||||
|
25 = Make fuzzy dictionary Dm from Dw
|
||||||
|
26 = Make fuzzy dictionary Dm from Ds
|
||||||
|
27 = Make fuzzy dictionary Dh from Dm
|
||||||
|
28 = Examine fuzzy dictionary Dm
|
||||||
|
29 = Examine fuzzy dictionary Dh
|
||||||
|
30 = Examine words in Dm
|
||||||
|
31 = Examine words in Dh
|
||||||
|
32 = Save or restore a dictionary
|
||||||
|
33 = Find inverted repeats
|
||||||
|
.end lit
|
||||||
|
.para
|
||||||
|
Some of these methods produce graphical
|
||||||
|
results
|
||||||
|
and so the
|
||||||
|
program is generally used from a graphics terminal (a vdu on which lines
|
||||||
|
and points can be drawn as well as characters).
|
||||||
|
.para
|
||||||
|
.LEFT MARGIN2
|
||||||
|
The positions of each of the plots is defined relative to a users drawing
|
||||||
|
board which has size 1-10,000 in x and 1-10,000 in y.
|
||||||
|
Plots for
|
||||||
|
each option are drawn in a window defined by x0,y0 and xlength,ylength.
|
||||||
|
Where x0,y0 is the position of the bottom left hand corner of the window,
|
||||||
|
and xlength is the width of the window and ylength the
|
||||||
|
height of the window.
|
||||||
|
.lit
|
||||||
|
--------------------------------------------------------- 10,000
|
||||||
|
1 1
|
||||||
|
1 -------------------------------------- ^ 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 ylength 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 -------------------------------------- v 1
|
||||||
|
1 x0,y0^ 1
|
||||||
|
1 <---------------xlength--------------> 1
|
||||||
|
--------------------------------------------------------- 1
|
||||||
|
1 10,000
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||||
|
The default window positions are read from a file "MEPMARG" when the
|
||||||
|
program is started. Users can have their own file if required.
|
||||||
|
.para
|
||||||
|
The options for the program are accessed from 3 main menus: general, screen
|
||||||
|
control and dictionary analylsis.
|
||||||
|
Both menus and options are selected by number.
|
||||||
|
.para
|
||||||
|
The most important and novel part of the program is its use of "fuzzy
|
||||||
|
dictionaries" and an information theory measure, to help show the most
|
||||||
|
interesting motifs.
|
||||||
|
|
||||||
|
Central to the method is the idea of a fuzzy dictionary of word
|
||||||
|
frequencies. A dictionary of word frequencies is an ordered list of
|
||||||
|
all the words in the sequences and a count of the number of times
|
||||||
|
that they occur. A fuzzy dictionary is an equivalent list but which
|
||||||
|
contains instead, for each word, a count of the number of times
|
||||||
|
similar words occur in the sequences. We term words that are
|
||||||
|
similar "relations". The fuzziness is defined by the number of
|
||||||
|
letters in a word that are allowed to be different. So if we had a
|
||||||
|
fuzziness of 1 we allow 1 letter to be different. For example, with
|
||||||
|
a fuzziness of 1, the entry in the fuzzy dictionary for the word
|
||||||
|
TTTTTT would contain a count of the numbers of times TTTTTT
|
||||||
|
occured plus the number of times all words differing by exactly
|
||||||
|
one letter from TTTTTT occured.
|
||||||
|
.para
|
||||||
|
Once the fuzzy dictionary has been created we can examine it in
|
||||||
|
several ways to find candidate control sequences. The simplest
|
||||||
|
question we can ask is which word in the dictionary is the most
|
||||||
|
common. Sometimes this simple criterion of "most common" may
|
||||||
|
be adequate to discover a new motif but in general we would not
|
||||||
|
expect it to be sufficient. For example some words will be common
|
||||||
|
simply because of a base composition bias in the sequences being
|
||||||
|
analysed. In addition a word can be the most frequent and yet not
|
||||||
|
be "well defined". This last point is best explained by an example.
|
||||||
|
.para
|
||||||
|
Suppose we were looking at two letter words and allowing one
|
||||||
|
mismatch, and that there were 10 occurences of TT and 5 of AC.
|
||||||
|
We could align the 10 words that were one letter different from TT
|
||||||
|
and the 5 that were related to AC. Then we could count the
|
||||||
|
number of times each base occured in each position for each of
|
||||||
|
these two sets of words. Suppose we got the two base frequency
|
||||||
|
tables shown below.
|
||||||
|
.lit
|
||||||
|
TT AC
|
||||||
|
T 6 4 T 1 0
|
||||||
|
C 1 3 C 0 4
|
||||||
|
A 1 2 A 4 1
|
||||||
|
G 2 1 G 0 0
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
These tables show that although TT occurs (with one letter
|
||||||
|
mismatch) more often than AC, the ratio of base frequencies for
|
||||||
|
AC at 4/5, 4/5 is higher than those for TT at 6/10, 4/10. Hence we
|
||||||
|
would say that AC was better defined than TT.
|
||||||
|
Expressing this another way we would say that the definition of AC
|
||||||
|
contained more information than that for TT. The program
|
||||||
|
calculates the information content in a way that takes into account
|
||||||
|
both the sequence composition and the level of definition of the
|
||||||
|
motif.
|
||||||
|
.para
|
||||||
|
Definitions
|
||||||
|
|
||||||
|
.para
|
||||||
|
Here we deal only with the dictionary analysis.
|
||||||
|
Suppose we are dealing with a set of
|
||||||
|
sequences and are examining them for words that are six
|
||||||
|
characters in length.
|
||||||
|
|
||||||
|
.para
|
||||||
|
Dictionary Dw contains a count of the number of times each word
|
||||||
|
occurs in the set of sequences. For example the entry for TTTTTT
|
||||||
|
contains a value equal to the number of times the word TTTTTT
|
||||||
|
occurs in the set of sequences.
|
||||||
|
|
||||||
|
.para
|
||||||
|
Dictionary Ds contains a count of the number of different sequences in
|
||||||
|
which each word occurs. For example if the entry for word TTTTTT
|
||||||
|
contains the value 10, it denotes that the word TTTTTT occurs in ten
|
||||||
|
different sequences. Unlike Dw it only counts words once for each
|
||||||
|
sequence. For example if we had a set of 100 sequences, the maximum
|
||||||
|
possible value that Ds could take is 100, and this would only happen if
|
||||||
|
a word occurred in every sequence. However for the same set of
|
||||||
|
sequences, Dw could contain values greater than 100, and this would
|
||||||
|
show that a word had occurred more than once in at least one
|
||||||
|
sequence.
|
||||||
|
|
||||||
|
.para
|
||||||
|
From either of the two dictionaries Dw or Ds we can calculate a fuzzy
|
||||||
|
dictionary Dm. For each word, the entry in the fuzzy dictionary Dm
|
||||||
|
contains the sum of the dictionary values (taken from either Dw or Ds)
|
||||||
|
for all words that differ from it by up to m letters. For example if m=2
|
||||||
|
the entry for TTTTTT contains the number of times that TTTTTT
|
||||||
|
occurs in the dictionary, plus the counts for all words that differ from
|
||||||
|
TTTTTT by 1 or 2 letters.
|
||||||
|
Obviously the interpretation of the values in Dm depends on which of
|
||||||
|
the two dictionaries Dw or Ds they were derived from. When derived
|
||||||
|
from Dw the entry for any word in Dm gives the total number of
|
||||||
|
times it, and its relations, occur in the set of sequences. When derived
|
||||||
|
from Ds the entry for any word in Dm gives the total number of
|
||||||
|
different sequences that contain a word and each of its relations.
|
||||||
|
|
||||||
|
.para
|
||||||
|
Finally, from fuzzy dictionary Dm we can derive fuzzy dictionary Dh.
|
||||||
|
All entries in Dh are zero except for the word(s), within each set of
|
||||||
|
relations, that are most frequent. For example if TTTTTT occurred 20
|
||||||
|
times but had a relation that occurred more often, then the entry for
|
||||||
|
TTTTTT would be zero. However if TTTTTT did not have a more
|
||||||
|
frequently occurring relation, then the entry for TTTTTT would
|
||||||
|
contain the value 20.
|
||||||
|
|
||||||
|
.LEFT MARGIN1
|
||||||
|
@1. T 0 @Help
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
This option gives online help. The user should select option numbers and
|
||||||
|
the current documentation will be given. Note that option 0 gives an
|
||||||
|
introduction to the program, and that ? will get help from anywhere in
|
||||||
|
the
|
||||||
|
program.
|
||||||
|
The following analyses (preceded by their option numbers) are included:
|
||||||
|
.lit
|
||||||
|
? = Help
|
||||||
|
! = Quit
|
||||||
|
3 = Read new sequences
|
||||||
|
4 = Redefine active region
|
||||||
|
5 = List the sequences
|
||||||
|
6 = List text file
|
||||||
|
7 = Direct output to disk
|
||||||
|
10 = Clear graphics
|
||||||
|
11 = Clear text
|
||||||
|
12 = Draw ruler
|
||||||
|
13 = Use cross hair
|
||||||
|
14 = Reset margins
|
||||||
|
15 = Label diagram
|
||||||
|
16 = Draw map
|
||||||
|
17 = Search for strings
|
||||||
|
18 = Set strand
|
||||||
|
19 = Set composition
|
||||||
|
20 = Set word length
|
||||||
|
21 = Set number of mismatches
|
||||||
|
22 = Show settings
|
||||||
|
23 = Make dictionary Dw
|
||||||
|
24 = Make dictionary Ds
|
||||||
|
25 = Make fuzzy dictionary Dm from Dw
|
||||||
|
26 = Make fuzzy dictionary Dm from Ds
|
||||||
|
27 = Make fuzzy dictionary Dh from Dm
|
||||||
|
28 = Examine fuzzy dictionary Dm
|
||||||
|
29 = Examine fuzzy dictionary Dh
|
||||||
|
30 = Examine words in Dm
|
||||||
|
31 = Examine words in Dh
|
||||||
|
32 = Save or restore a dictionary
|
||||||
|
33 = Find inverted repeats
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@2. T 0 @Quit
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
This function stops the program.
|
||||||
|
.left margin1
|
||||||
|
@3. TX 1 @Read a new sequence
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
It can read
|
||||||
|
sequences stored in either of two formats: 1) all sequences aligned in a
|
||||||
|
single file; 2) all sequences in separate files and accessed through a file
|
||||||
|
of file names. Typical dialogue follows:
|
||||||
|
.lit
|
||||||
|
|
||||||
|
X 1 Read file of aligned sequences
|
||||||
|
2 Use file of file names
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? File of aligned sequences=F1
|
||||||
|
Number of files 88
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@4. TX 1 @Define active region
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
For its analytic functions
|
||||||
|
the program always works on a region of the sequence called the active
|
||||||
|
region. When new sequences are read into the program the active region is
|
||||||
|
automatically set to start at the beginning of the sequences and go
|
||||||
|
up to the end of the longest one.
|
||||||
|
.left margin1
|
||||||
|
@5. TX 1 @List a sequence
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
The sequence can be listed with line lengths of 50 bases with each sequence
|
||||||
|
numbered in the order in which they were read.
|
||||||
|
Output can be directed to a disk file by
|
||||||
|
first selecting disk output. Typical dialogue follows.
|
||||||
|
.lit
|
||||||
|
|
||||||
|
? Menu or option number=5
|
||||||
|
|
||||||
|
10 20 30 40 50
|
||||||
|
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||||
|
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||||
|
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||||
|
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||||
|
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||||
|
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||||
|
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||||
|
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||||
|
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||||
|
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||||
|
|
||||||
|
60
|
||||||
|
1 TACCCGTTTTT
|
||||||
|
2 GCGTTTTTGT
|
||||||
|
3 TCATACCATAAG
|
||||||
|
4 TTTCATACC
|
||||||
|
5 ATTGTGAGC
|
||||||
|
6 TTCCGGCTCG
|
||||||
|
7 GAAGAGAGT
|
||||||
|
8 TCAGGTGT
|
||||||
|
9 ATGAATG
|
||||||
|
10 TAATTACG
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@6. TX 1 @List a text file
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
Allows the user to have a text file displayed on the screen. It will appear
|
||||||
|
one page at a time.
|
||||||
|
.left margin1
|
||||||
|
@7. TX 1 @Direct output to disk
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
Used to direct output that would normally appear on the screen to a file.
|
||||||
|
.para
|
||||||
|
Select redirection of either text or graphics, and
|
||||||
|
supply the name of the file that the output should be written to.
|
||||||
|
.para
|
||||||
|
The results from the next options selected will not appear on the screen
|
||||||
|
but will be written to the file. When option 7 is selected again
|
||||||
|
the file will be
|
||||||
|
closed and output will again appear on the screen.
|
||||||
|
.left margin1
|
||||||
|
@10. TX 2 @Clear graphics
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
Clears the screen of both text and graphics.
|
||||||
|
.left margin1
|
||||||
|
@11. TX 2 @Clear text
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
Clears only text from the screen.
|
||||||
|
.left margin1
|
||||||
|
@12. TX 2 @Draw a ruler
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
This option
|
||||||
|
allows the user to draw a ruler or scale along the x axis of the screen to
|
||||||
|
help identify the coordinates of points of interest. The user can define
|
||||||
|
the position of the first amino acid to be marked (for example if the
|
||||||
|
active
|
||||||
|
region is 1501 to 8000, the user might wish to mark every 1000th amino
|
||||||
|
acid
|
||||||
|
starting at either 1501 or 2000 - it depends if the user wishes to treat
|
||||||
|
the active region as an independent unit with its own numbering starting
|
||||||
|
at
|
||||||
|
its left edge, or as part of the whole sequence). The user can also define
|
||||||
|
the separation of the ticks on the scale and their height. If required the
|
||||||
|
labelling routine can be used to add numbers to the ticks.
|
||||||
|
.left margin1
|
||||||
|
@13. TX 2 @Use crosshair
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
This function puts
|
||||||
|
a steerable cross on the screen that can be used to find the
|
||||||
|
coordinates of points in the sequence. The user can move the cross
|
||||||
|
around using the directional keys; when he hits the space bar the
|
||||||
|
program will print out the coordinates of the cross in sequence units and
|
||||||
|
the option will be exited.
|
||||||
|
.para
|
||||||
|
If instead,
|
||||||
|
you hit a , the position will be displayed but the cross will remain on
|
||||||
|
the screen.
|
||||||
|
.para
|
||||||
|
If a letter s is hit the sequence around the cross hair is displayed and
|
||||||
|
the cross remains on the screen.
|
||||||
|
.left margin1
|
||||||
|
@14. TX 2 @Reposition plots
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
The positions of each of the plots is defined relative to a users drawing
|
||||||
|
board which has size 1-10,000 in x and 1-10,000 in y.
|
||||||
|
Plots for
|
||||||
|
each option are drawn in a window defined by x0,y0 and xlength,ylength.
|
||||||
|
Where x0,y0 is the position of the bottom left hand corner of the window,
|
||||||
|
and xlength is the width of the window and ylength the
|
||||||
|
height of the window.
|
||||||
|
.lit
|
||||||
|
--------------------------------------------------------- 10,000
|
||||||
|
1 1
|
||||||
|
1 -------------------------------------- ^ 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 ylength 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 -------------------------------------- v 1
|
||||||
|
1 x0,y0^ 1
|
||||||
|
1 <---------------xlength--------------> 1
|
||||||
|
--------------------------------------------------------- 1
|
||||||
|
1 10,000
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||||
|
The default window positions are read from a file "MEPMARG" when the
|
||||||
|
program is started. Users can have their own file if required.
|
||||||
|
As all the plots start
|
||||||
|
at the same position in x and have the same width, x0 and xlength are the
|
||||||
|
same for all options. Generally users will only want to change the start
|
||||||
|
level of the window y0 and its height ylength.
|
||||||
|
This option
|
||||||
|
allows users to change window positions whilst running the program.
|
||||||
|
The routine prompts first for the number of the option that the users
|
||||||
|
wishes
|
||||||
|
to reposition; then for the y start and height; then for the x start and
|
||||||
|
length. Note that changes to the x values affect all options. If the user
|
||||||
|
types only carriage return for any value it will remain unchanged.
|
||||||
|
The cross-hair can be used to choose suitable heights.
|
||||||
|
.LEFT MARGIN1
|
||||||
|
@15. TX 2 @Label a diagram
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
This routine allows users to label any diagrams they have produced. They
|
||||||
|
are asked to type in a label. When the user types carriage return to finish
|
||||||
|
typing the label the cross-hair appears on the screen. The user can
|
||||||
|
position it anywhere on the screen. If the user types R (for right justify)
|
||||||
|
the label will be
|
||||||
|
written on the diagram with its right end at the cross-hair position.
|
||||||
|
If the user types L (for left justify) the label will be written on the
|
||||||
|
diagram with its left end at the cross hair position.
|
||||||
|
The
|
||||||
|
cross-hair will then immediately reappear. The user may put the same
|
||||||
|
label
|
||||||
|
on another part of the diagram as before or if he hits the space bar he
|
||||||
|
will be asked if he wishes to type in another label.
|
||||||
|
.left margin1
|
||||||
|
@16. TX 2 @Display a map
|
||||||
|
.LEFT MARGIN2
|
||||||
|
.para
|
||||||
|
It is often convenient to plot a map alongside graphed analysis in order
|
||||||
|
to
|
||||||
|
indicate features within the sequence. This function allows users to
|
||||||
|
draw
|
||||||
|
maps using files arranged in the form of EMBL feature tables. Of course
|
||||||
|
the
|
||||||
|
EMBL table are usually only used for nucleic acid sequence annotation
|
||||||
|
but,
|
||||||
|
as long as the features are written in the correct format, they can be
|
||||||
|
employed by this routine. The map is composed of a line representing the
|
||||||
|
sequence and then further lines denoting the endpoints of each feature
|
||||||
|
the
|
||||||
|
user identifies. The user is asked to define height at which the line
|
||||||
|
representing the sequence should be drawn; then for the feature height;
|
||||||
|
then for the features to plot.
|
||||||
|
.left margin1
|
||||||
|
@17. TX 1 @Search for strings
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Search for strings
|
||||||
|
perfoms searches of all the sequences for selected words and
|
||||||
|
shows which sequences they are found in. The user types in a word and
|
||||||
|
defines the allowed number of mismatches. The results are listed or
|
||||||
|
plotted. If listed the display includes the sequence number, the position
|
||||||
|
in the sequence and the matching string.
|
||||||
|
The results are plotted in the
|
||||||
|
following way. The x axis of the plot represents the length of the aligned
|
||||||
|
sequences and the y direction is divided into sufficient strips to accommodate
|
||||||
|
each sequence. So if a match is found in the 3rd sequence at a position
|
||||||
|
equivalent to halfway along the longest of the sequences then a short
|
||||||
|
vertical line will be drawn at the midpoint of the 3rd strip. If the sequences
|
||||||
|
are aligned it can be useful if the motifs happen to appear in
|
||||||
|
related positions. For example see the original publication. Typical
|
||||||
|
dialogue follows.
|
||||||
|
.lit
|
||||||
|
|
||||||
|
? Menu or option number=17
|
||||||
|
X 1 Plot match positions
|
||||||
|
2 Plot histogram of matches
|
||||||
|
? 0,1,2 =
|
||||||
|
? Word to search for=TTGACA
|
||||||
|
? Minimum match (0-6) (6) =5
|
||||||
|
? (y/n) (y) Plot results N
|
||||||
|
2 35 TAGACA
|
||||||
|
5 14 TTTACA
|
||||||
|
6 37 TTTACA
|
||||||
|
11 14 TAGACA
|
||||||
|
14 14 TTGACA
|
||||||
|
17 14 GTGACA
|
||||||
|
17 22 TTAACA
|
||||||
|
20 1 TTGACA
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@18. TX 3 @Set strand
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Set strand allows the user to define which strand(s) of the sequences to
|
||||||
|
analyse: input stand, complement of input, or both.
|
||||||
|
.left margin1
|
||||||
|
@19. TX 3 @Set composition
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Set composition gives the user three choices for setting the composition
|
||||||
|
of the sequences for use in the calculation of the information content of
|
||||||
|
words. The user can select the overall composition of the sequences as read,
|
||||||
|
an even composition, or can type in any other 4 values.
|
||||||
|
.left margin1
|
||||||
|
@20. TX 3 @Set word length
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Set word length sets the length of word for which dictionaries will be made.
|
||||||
|
.left margin1
|
||||||
|
@21. TX 3 @Set number of mismatches
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Set number of mismatches sets the level of fuzziness for the creation of
|
||||||
|
dictionary Dm.
|
||||||
|
.left margin1
|
||||||
|
@22. TX 3 @Show settings
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Show settings show the current settings for all parameters associated with
|
||||||
|
dictionary analysis. A typical diaplsy follows:
|
||||||
|
.lit
|
||||||
|
? Menu or option number=22
|
||||||
|
Current word length = 6
|
||||||
|
Number of mismatches = 1
|
||||||
|
Start position = 1
|
||||||
|
End position = 63
|
||||||
|
Input strand only
|
||||||
|
Observed composition
|
||||||
|
Dictionary Dw unmade
|
||||||
|
Dictionary Ds unmade
|
||||||
|
Dictionary Dm unmade
|
||||||
|
Dictionary Dh unmade
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@23. TX 3 @Make dictionary Dw
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Make dictionary Dw creates a dictionary that contains a count of the
|
||||||
|
frequency of occurrence of each word in the collected sequences.
|
||||||
|
.left margin1
|
||||||
|
@24. TX 3 @Make dictionary Ds
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Make dictionary Ds creates a dictionary that contains a count of the
|
||||||
|
number of different sequences that contain each word.
|
||||||
|
.left margin1
|
||||||
|
@25. TX 3 @Make dictionary Dm from Dw
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Make dictionary Dm from Dw creates a dictionary from dictionary Dw that
|
||||||
|
contains the frequency of occurrence of each word (say X) in Dw plus the
|
||||||
|
frequency of occurrence of each word in Dw that differs from X by up to m
|
||||||
|
letters. Dm is called a fuzzy dictionary as it contains the frequencies of
|
||||||
|
occurrence of all words plus the frequencies of all the words that are
|
||||||
|
similar to them.
|
||||||
|
.left margin1
|
||||||
|
@26. TX 3 @Make dictionary Dm from Ds
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Make dictionary Dm from Ds creates a dictionary from dictionary Ds that
|
||||||
|
contains the frequency of occurrence of each word (say X) in Ds plus the
|
||||||
|
frequency of occurrence of each word in Ds that differs from X by up to m
|
||||||
|
letters. Dm is called a fuzzy dictionary as it contains the frequencies of
|
||||||
|
occurrence of all words plus the frequencies of all the words that are
|
||||||
|
similar to them.
|
||||||
|
.left margin1
|
||||||
|
@27. TX 3 @Make dictionary Dh from Dm
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Make dictionary Dh creates a dictionary from dictionary Dm and whose
|
||||||
|
entries are zero except for those words in any set of related words that
|
||||||
|
are most frequent. It finds the dominant words in each set of relations
|
||||||
|
and stores their counts.
|
||||||
|
.left margin1
|
||||||
|
@28. TX 3 @Examine fuzzy dictionary Dm
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Examine dictionary Dm allows users to analyse the contents of dictionary
|
||||||
|
Dm to find the most common words or those words that contain the most
|
||||||
|
information. The user supplies a frequency or information cutoff and chooses
|
||||||
|
to have the results sorted on either value. The program will find the top 100
|
||||||
|
words that achieve the cutoff values and present them to the user sorted
|
||||||
|
as selected. The information content will be calcutated from either Dw or Ds
|
||||||
|
depending which was used to create Dm, and using the current composition
|
||||||
|
setting. Typical dialogue follows:
|
||||||
|
.lit
|
||||||
|
|
||||||
|
? Menu or option number=28
|
||||||
|
Looking for highest scoring words
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.62
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 9 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
AAAAAC 64 0.66460
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
GTTTTT 66 0.64300
|
||||||
|
TTTTTG 73 0.64070
|
||||||
|
TTTTGT 63 0.63820
|
||||||
|
TTTTTC 65 0.63810
|
||||||
|
AAAATA 63 0.62670
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.62
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =2
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 9 Maximum information= 0.7385326
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TTTTTG 73 0.64070
|
||||||
|
GTTTTT 66 0.64300
|
||||||
|
TTTTTC 65 0.63810
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
AAAAAC 64 0.66460
|
||||||
|
TTTTGT 63 0.63820
|
||||||
|
AAAATA 63 0.62670
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =!
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@29. TX 3 @Examine fuzzy dictionary Dh
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Examine dictionary Dh allows users to analyse the contents of dictionary Dh
|
||||||
|
to find the most common words or those words that contain the most
|
||||||
|
information. The user supplies a frequency or information cutoff and chooses
|
||||||
|
to have the results sorted on either value. The program will find the top 100
|
||||||
|
words that achieve the cutoff values and present them to the user sorted as
|
||||||
|
selected. The information content will be calcutated from either Dw or Ds
|
||||||
|
depending which was used to create Dh and using the current composition
|
||||||
|
setting. Typical dialogue follows:
|
||||||
|
.lit
|
||||||
|
|
||||||
|
? Menu or option number=29
|
||||||
|
Looking for highest scoring words
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.6
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 4 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =50
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.5
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 8 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
TCTTGA 54 0.66080
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
ACTTTA 57 0.61960
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
AGTATA 51 0.60540
|
||||||
|
TTATAA 55 0.59300
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =50
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =
|
||||||
|
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 8 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
TCTTGA 54 0.66080
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
ACTTTA 57 0.61960
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
AGTATA 51 0.60540
|
||||||
|
TTATAA 55 0.59300
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =!
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@30. TX 3 @Examine words in Dm
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Examine words in Dm allows users to analyse the contents of dictonary Dm at the
|
||||||
|
level of individual words to find their frequency, information content, and to
|
||||||
|
see their base frequency table. The user types in a word to examine and the
|
||||||
|
program displays the values and table. The information content will be
|
||||||
|
calcutated from either Dw or Ds depending which was used to create Dm,
|
||||||
|
and using the current composition setting. Typical dialogue follows:
|
||||||
|
.lit
|
||||||
|
? Menu or option number=30
|
||||||
|
? Word to examine=TTGACA
|
||||||
|
TtgacA 60 0.7385326
|
||||||
|
56 56 6 7 5 11
|
||||||
|
4 3 2 1 52 1
|
||||||
|
1 4 2 53 3 48
|
||||||
|
3 1 54 3 4 4
|
||||||
|
TTGACA
|
||||||
|
? Word to examine=TATAAT
|
||||||
|
taTAat 65 0.6251902
|
||||||
|
56 3 53 4 4 60
|
||||||
|
6 1 5 5 5 3
|
||||||
|
3 60 5 57 57 4
|
||||||
|
4 5 6 3 3 2
|
||||||
|
TATAAT
|
||||||
|
? Word to examine=
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@31. TX 3 @Examine words in Dh
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Examine words in Dh allows users to analyse the contents of dictonary Dh at the
|
||||||
|
level of individual words to find their frequency, information content, and to
|
||||||
|
see their base frequency table. The user types in a word to examine and the
|
||||||
|
program displays the values and table. The information content will be
|
||||||
|
calcutated from either Dw or Ds depending which was used to create Dm,
|
||||||
|
and using the current composition setting. Typical dialogue follows:
|
||||||
|
.lit
|
||||||
|
|
||||||
|
? Menu or option number=31
|
||||||
|
? Word to examine=TTGACA
|
||||||
|
TtgacA 60 0.7385326
|
||||||
|
56 56 6 7 5 11
|
||||||
|
4 3 2 1 52 1
|
||||||
|
1 4 2 53 3 48
|
||||||
|
3 1 54 3 4 4
|
||||||
|
TTGACA
|
||||||
|
? Word to examine=TATAAT
|
||||||
|
taTAat 65 0.6251902
|
||||||
|
56 3 53 4 4 60
|
||||||
|
6 1 5 5 5 3
|
||||||
|
3 60 5 57 57 4
|
||||||
|
4 5 6 3 3 2
|
||||||
|
TATAAT
|
||||||
|
? Word to examine=GGGGGG
|
||||||
|
gggggg 0 0.6199890
|
||||||
|
3 1 1 2 3 4
|
||||||
|
1 3 1 2 2 1
|
||||||
|
2 1 1 1 1 1
|
||||||
|
11 12 14 12 11 11
|
||||||
|
GGGGGG
|
||||||
|
? Word to examine=
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@32. TX 3 @Save or restore a dictionary
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Save or restore dictionary allows users to write or read any dictionary to
|
||||||
|
and from disk files. The user is asked te define the dictionary and file. The
|
||||||
|
function is useful if the machine being used is very slow at calculating
|
||||||
|
because the files can be handled quickly. However note that the files
|
||||||
|
cannot be processed by any other program.
|
||||||
|
.left margin1
|
||||||
|
@33. TX 1 @Find inverted repeats
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Find inverted repeats performs searches for simple inverted repeat sequences
|
||||||
|
in each sequence. They are defined by a range of loop sizes and a minimum
|
||||||
|
number of potential basepairs. The results can be plotted or listed. The x
|
||||||
|
axis of the plot represents the length of the aligned sequences and the y
|
||||||
|
direction is divided into sufficient strips to accommodate each sequence.
|
||||||
|
So if an inverted repeat is found in the 3rd sequence at a position equivalent
|
||||||
|
to halfway along the longest of the sequences then a short vertical line will
|
||||||
|
be drawn at the midpoint of the 3rd strip. Alternatively, if the results are
|
||||||
|
listed, the potential hairpin loops are drawn out, with the sequence number
|
||||||
|
and the position of the loop. Typical dialogue follows.
|
||||||
|
.lit
|
||||||
|
|
||||||
|
? Menu or option number=33
|
||||||
|
Define the range of loop sizes
|
||||||
|
? Minimum loop size (0-10) (3) =0
|
||||||
|
? Maximum loop size (1-20) (3) =
|
||||||
|
? Minimum number of basepairs (1-20) (6) =
|
||||||
|
? (y/n) (y) Plot results N
|
||||||
|
Searching
|
||||||
|
|
||||||
|
Sequence 3 34
|
||||||
|
C
|
||||||
|
G.T
|
||||||
|
T-A
|
||||||
|
A-T
|
||||||
|
T.G
|
||||||
|
T.G
|
||||||
|
G.T
|
||||||
|
ATCTTT TATTTCA
|
||||||
|
33
|
||||||
|
|
||||||
|
Sequence 5 35
|
||||||
|
T
|
||||||
|
G.T
|
||||||
|
T.G
|
||||||
|
A-T
|
||||||
|
T.G
|
||||||
|
G.T
|
||||||
|
C-G
|
||||||
|
T.G
|
||||||
|
TCCGGC AATTGTG
|
||||||
|
34
|
||||||
|
.end lit
|
||||||
|
.left margin1
|
||||||
|
@ End of help
|
5116
help/NIP.RNO
Normal file
5116
help/NIP.RNO
Normal file
File diff suppressed because it is too large
Load diff
88
help/NIPF.RNO
Normal file
88
help/NIPF.RNO
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
.NPA
|
||||||
|
.SP 1
|
||||||
|
.left margin1
|
||||||
|
@-1. TX 0 @General
|
||||||
|
.sp
|
||||||
|
@-2. TX 0 @Screen control
|
||||||
|
.sp
|
||||||
|
@-3. TX 0 @Statistical analysis
|
||||||
|
.sp
|
||||||
|
@-1. TX 0 @General
|
||||||
|
.sp
|
||||||
|
@-2. TX 0 @Screen control
|
||||||
|
.sp
|
||||||
|
@-3. TX 0 @Statistical analysis
|
||||||
|
.sp
|
||||||
|
@0. TX -1 @NIPF
|
||||||
|
.sp
|
||||||
|
@1. TX 1 @ Help
|
||||||
|
.sp
|
||||||
|
@2. TX 1 @ Quit
|
||||||
|
.sp
|
||||||
|
@3. TX 1 @ Read new sequence
|
||||||
|
.sp
|
||||||
|
@4. TX 1 @ Redefine active region
|
||||||
|
.sp
|
||||||
|
@5. TX 1 @ List the sequence
|
||||||
|
.sp
|
||||||
|
@6. TX 1 @ List a text file
|
||||||
|
.sp
|
||||||
|
@7. TX 1 @ Direct output to disk
|
||||||
|
.sp
|
||||||
|
@8. TX 1 @ Write active sequence to disk
|
||||||
|
.sp
|
||||||
|
@9. TX 1 @ List a translation
|
||||||
|
.sp
|
||||||
|
@32. TX 1 @ List showing base differences
|
||||||
|
.sp
|
||||||
|
@37. TX 1 @ List showing translation
|
||||||
|
.sp
|
||||||
|
@33. TX 1 @ List showing amino acid differences
|
||||||
|
.sp
|
||||||
|
@10. TX 2 @ Clear graphics
|
||||||
|
.sp
|
||||||
|
@11. TX 2 @ Clear text
|
||||||
|
.sp
|
||||||
|
@12. TX 2 @ Draw a ruler
|
||||||
|
.sp
|
||||||
|
@13. TX 2 @ Use cross hair
|
||||||
|
.sp
|
||||||
|
@14. TX 2 @ Reset margins
|
||||||
|
.sp
|
||||||
|
@15. TX 2 @ Label diagram
|
||||||
|
.sp
|
||||||
|
@16. TX 2 @ Display a map
|
||||||
|
.sp
|
||||||
|
@17. TX 3 @ Set comparison mode
|
||||||
|
.sp
|
||||||
|
@18. TX 3 @ Set sort mode
|
||||||
|
.sp
|
||||||
|
@21. TX 3 @ Count base changes
|
||||||
|
.sp
|
||||||
|
@22. TX 3 @ Count codon changes
|
||||||
|
.sp
|
||||||
|
@23. TX 3 @ Count genetic events
|
||||||
|
.sp
|
||||||
|
@24. TX 3 @ Show table of base changes
|
||||||
|
.sp
|
||||||
|
@36. TX 3 @ Show table of expressed base changes
|
||||||
|
.sp
|
||||||
|
@39. TX 3 @ Show table of silent base changes
|
||||||
|
.sp
|
||||||
|
@38. TX 3 @ Estimate mutation rate
|
||||||
|
.sp
|
||||||
|
@25. TX 3 @ Plot base changes
|
||||||
|
.sp
|
||||||
|
@26. TX 3 @ Plot expressed changes per base
|
||||||
|
.sp
|
||||||
|
@27. TX 3 @ Plot silent changes per base
|
||||||
|
.sp
|
||||||
|
@28. TX 3 @ Count expressed changes per base
|
||||||
|
.sp
|
||||||
|
@29. TX 3 @ Count silent changes per base
|
||||||
|
.sp
|
||||||
|
@30. TX 3 @ Count changed amino acids
|
||||||
|
.sp
|
||||||
|
@31. TX 3 @ Plot amino acid variability
|
||||||
|
.sp
|
||||||
|
@ end of help
|
2469
help/PIP.RNO
Normal file
2469
help/PIP.RNO
Normal file
File diff suppressed because it is too large
Load diff
38
help/README
Normal file
38
help/README
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
README file for help directory of staden package
|
||||||
|
-----------------------------------------------
|
||||||
|
|
||||||
|
Should contain (at least) ProgramName_help where ProgramName is each of
|
||||||
|
bap, dap, gip, mem, mep, nip, nipf, pip, sap, sip and also staden_help
|
||||||
|
and stadenp_help.
|
||||||
|
|
||||||
|
There are 3 main formats of file in this directory:
|
||||||
|
|
||||||
|
PROGRAM.RNO:
|
||||||
|
This is the unformatted (runoff/nroff style) help for PROGRAM.
|
||||||
|
Any changes to the help should be performed on this file.
|
||||||
|
|
||||||
|
program_help:
|
||||||
|
This is the online formatted help used by PROGRAM. It can also
|
||||||
|
be printed to produce hardcopy documentation.
|
||||||
|
|
||||||
|
program_menu:
|
||||||
|
This is a file that describes the menus used in PROGRAM,
|
||||||
|
together with an index into the program_help file for the
|
||||||
|
online help. The format for each line is:
|
||||||
|
|
||||||
|
<option number> <menu number> <program_help offset> <no. of
|
||||||
|
lines of help> <program type T(ext) or (X)windows> <option name>
|
||||||
|
|
||||||
|
|
||||||
|
Exceptions to these are for the staden_help, stadenp_help, and
|
||||||
|
splitp_help which do not have the relevant .RNO or _menu files. The
|
||||||
|
file staden_help gives an introduction to the xterm user interface
|
||||||
|
(written for vax and vms and so is out of date with the Unix
|
||||||
|
versions).
|
||||||
|
|
||||||
|
See the file splitp_help for information about the reformatting of the
|
||||||
|
PROSITE motif library.
|
||||||
|
|
||||||
|
Rebuild help files with the Unix command "make all". Ensure that the utility
|
||||||
|
program sethelp is compiled and in the executables search path. The sources
|
||||||
|
for the program sethelp are found in $STADENROOT/staden.
|
2523
help/SAP.RNO
Normal file
2523
help/SAP.RNO
Normal file
File diff suppressed because it is too large
Load diff
1431
help/SIP.RNO
Normal file
1431
help/SIP.RNO
Normal file
File diff suppressed because it is too large
Load diff
125
help/SPLITP.RNO
Normal file
125
help/SPLITP.RNO
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
.para
|
||||||
|
Preparing the PROSITE protein motif library for use by the Staden programs
|
||||||
|
.para
|
||||||
|
Introduction
|
||||||
|
.para
|
||||||
|
A library of protein motifs (in our terminology, because they include
|
||||||
|
variable gaps, some would be called patterns) has recently become available
|
||||||
|
from Amos Bairoch,Departement de Biochimie Medicale,University of Geneva
|
||||||
|
Currently it contains 317 patterns/motifs and arrives on tape or cdrom
|
||||||
|
in two files:
|
||||||
|
a .dat file and a .doc file. There is also a user documentation file
|
||||||
|
prosite.usr. Here I outline what is required to prepare the PROSITE library for
|
||||||
|
use by our programs.
|
||||||
|
.para
|
||||||
|
Three programs need to be run SPLITP1, SPLITP2, and SPLITP3.
|
||||||
|
.PARA
|
||||||
|
Outline of the PROSITE files
|
||||||
|
.para
|
||||||
|
A typical entry in the .dat file is shown below.
|
||||||
|
.lit
|
||||||
|
|
||||||
|
ID 2FE2S_FERREDOXIN; PATTERN.
|
||||||
|
AC PS00197;
|
||||||
|
DT APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
|
||||||
|
DE 2Fe-2S ferredoxins, iron-sulfur binding region signature.
|
||||||
|
PA C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C.
|
||||||
|
NR /RELEASE=14,15409;
|
||||||
|
NR /TOTAL=69(69); /POSITIVE=63(63); /UNKNOWN=0(0); /FALSE_POS=6(6);
|
||||||
|
NR /FALSE_NEG=5(5);
|
||||||
|
CC /TAXO-RANGE=A?EP?; /MAX-REPEAT=1;
|
||||||
|
CC /SITE=1,iron_sulfur; /SITE=5,iron_sulfur; /SITE=8,iron_sulfur;
|
||||||
|
DR P15788, FER$APHHA , T; P00250, FER$APHSA , T; P00223, FER$ARCLA , T;
|
||||||
|
DR P00227, FER$BRANA , T; P07838, FER$BRYMA , T; P13106, FER$BUMFI , T;
|
||||||
|
DR P00247, FER$CHLFR , T; P07839, FER$CHLRE , T; P00222, FER$COLES , T;
|
||||||
|
DO PDOC00175;
|
||||||
|
//
|
||||||
|
.end lit
|
||||||
|
.para
|
||||||
|
Each entry has an accession number (here PS00197), a pattern definition
|
||||||
|
(here C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C) and a documentation file
|
||||||
|
cross reference (here PDOC00175).
|
||||||
|
This pattern means: C, gap of 1 or 2, any of STA, gap of 2, C, any of STA,
|
||||||
|
not P, C.
|
||||||
|
.para
|
||||||
|
We need to convert all of these patterns into our pattern definitions
|
||||||
|
(as membership of a set, with the appopriate gap ranges) and write each
|
||||||
|
into a separate pattern file with corresponding "membership of a set"
|
||||||
|
weight matrices. Each
|
||||||
|
pattern file is named accession_number.pat (here PS00197.PAT). The
|
||||||
|
corresponding matrix files are accession_number.wtsa,
|
||||||
|
accession_number.wtsb, etc for however many are needed (here PS00197.WTSA
|
||||||
|
and PS00197.WTSB): two are needed because of the variable gap.
|
||||||
|
.para
|
||||||
|
In addition we can optionally
|
||||||
|
split the .dat and .doc files into separate files, one for each
|
||||||
|
entry, with names accession_number.dat and accession_number.doc. Also we
|
||||||
|
create an index for the library prosite.lis, which
|
||||||
|
gives a one line description of each pattern, and ends with the pattern
|
||||||
|
file and documentation file numbers. The start of the file is shown below.
|
||||||
|
.lit
|
||||||
|
|
||||||
|
N-glycosylation site. 00001,00001
|
||||||
|
Glycosaminoglycan attachment site. 00002,00002
|
||||||
|
Tyrosine sulfatation site. 00003,00003
|
||||||
|
cAMP- and cGMP-dependent protein kinase phosphorylation site. 00004,00004
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
So the name of the pattern file for Glycosaminoglycan attachment site is
|
||||||
|
PS00002.PAT, and for the documentation file PDOC00002.DOC
|
||||||
|
.para
|
||||||
|
Finally we
|
||||||
|
create a file of file names for all the patterns in the library.
|
||||||
|
.para
|
||||||
|
To use the complete PROSITE library from program pip, select "pattern searcher"
|
||||||
|
and choose the
|
||||||
|
option "use file of pattern file names", and give the file name
|
||||||
|
prosite.nam). For any matches found, the accession number and pattern title
|
||||||
|
will be
|
||||||
|
displayed.
|
||||||
|
|
||||||
|
.para
|
||||||
|
Running the conversion programs
|
||||||
|
.para
|
||||||
|
|
||||||
|
Only SPLITP3 is necessary for using the library. The others programs
|
||||||
|
only make the
|
||||||
|
original files marginally easier to browse through and produce an index.
|
||||||
|
.para
|
||||||
|
SPLITP1 splits the prosite.dat file to create a separate file for each
|
||||||
|
entry. Each file is automatically named PSentry_number.dat. In addition it
|
||||||
|
creates an index for the library (see above).
|
||||||
|
.para
|
||||||
|
SPLITP2 performs the same operation for the Prosite.doc file, except that
|
||||||
|
no index is created. Files are named PSentry_number.doc.
|
||||||
|
.para
|
||||||
|
SPLITP3 creates a separate pattern file and weight matrix files for each
|
||||||
|
prosite entry from the file prosite.dat. Pattern files are named
|
||||||
|
PSentry_number.pat, weight matrix files PSentry_number.wtsa,
|
||||||
|
Psentry_number.wtsb, etc. The pattern title is the one line description
|
||||||
|
of the motif. SPLITP3 also creates a file of file names. Notice that it
|
||||||
|
will ask for a path name so that the path can be included in the file of
|
||||||
|
file names. This is the path to the directory in which the pattern files
|
||||||
|
are stored.
|
||||||
|
.para
|
||||||
|
Notes
|
||||||
|
.para
|
||||||
|
Obviously the use of files of file names is a general solution, and anybody
|
||||||
|
could now create their own set of interesting patterns for screening, or a
|
||||||
|
subset of prosite.nam, etc.
|
||||||
|
.para
|
||||||
|
Note that 5 of the bairoch motifs contained the symbols > or < which
|
||||||
|
means that the motifs must appear exactly at the N or C termini of the
|
||||||
|
sequences. Currently our methods have no mechanism for such definitions and,
|
||||||
|
for example KDEL motifs, will be permitted to occur anywhere throughout
|
||||||
|
a sequence.
|
||||||
|
|
||||||
|
.para
|
||||||
|
Also, of course, the library does not have to be used solely for performing
|
||||||
|
mass screenings: each individual entry can be used as a single pattern by
|
||||||
|
giving the name of its .pat file - eg pathname/ps00002.pat
|
||||||
|
In addition more sophisticated users will wish to copy pattern files and
|
||||||
|
weight matrices into their own directories and modify them. For example the
|
||||||
|
cutoff scores are probably chosen to be quite high in order to reduce the
|
||||||
|
number of false positives, and some users might wish to lower them.
|
||||||
|
|
354
help/STADEN.RNO
Normal file
354
help/STADEN.RNO
Normal file
|
@ -0,0 +1,354 @@
|
||||||
|
.npa
|
||||||
|
.left margin2
|
||||||
|
.para
|
||||||
|
Introduction to the Staden sequence analysis package and its user interface
|
||||||
|
.PARA
|
||||||
|
The package contains the following programs:
|
||||||
|
.lit
|
||||||
|
|
||||||
|
GIP Gel input program
|
||||||
|
SAP Sequence assemble program
|
||||||
|
NIP Nucleotide interpretation program
|
||||||
|
PIP Protein interpretation program
|
||||||
|
SIP Similarity investigation program
|
||||||
|
MEP Motif exploration program
|
||||||
|
NIPL Nucleotide interpretation program (library)
|
||||||
|
PIPL Protein interpretation program (library)
|
||||||
|
SIPL Similarity investigation program (library)
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
.left margin2
|
||||||
|
GIP uses a digitiser for entry of DNA sequences from
|
||||||
|
autoradiographs.
|
||||||
|
.left margin2
|
||||||
|
SAP handles everything relating to assembling gel
|
||||||
|
readings in order to produce a consensus sequence. It can also deal with
|
||||||
|
families of protein sequences.
|
||||||
|
.left margin2
|
||||||
|
NIP provides functions for analysing and interpretting
|
||||||
|
individual nucleotide sequences.
|
||||||
|
.left margin2
|
||||||
|
PIP provides functions for analysing and interpretting
|
||||||
|
individual protein sequences.
|
||||||
|
.left margin2
|
||||||
|
MEP analyses families of nucleotide sequences to help discover new motifs.
|
||||||
|
.left margin2
|
||||||
|
NIPL performs pattern searches on nucleotide sequence libraries.
|
||||||
|
.left margin2
|
||||||
|
PIPL performs pattern searches on protein sequence libraries.
|
||||||
|
.left margin2
|
||||||
|
SIP provides functions for comparing and aligning
|
||||||
|
pairs of protein or nucleotide sequences.
|
||||||
|
.left margin2
|
||||||
|
SIPL searches nucleotide and protein sequence
|
||||||
|
libraries for entries similar to probe sequences.
|
||||||
|
.left margin2
|
||||||
|
.sk1
|
||||||
|
.para
|
||||||
|
Documentation
|
||||||
|
.para
|
||||||
|
As is explained below, the
|
||||||
|
programs SAP, NIP, PIP, SIP and MEP have online help,
|
||||||
|
and the help files have the names: HELPSAP, HELPNIP, HELPPIP, HELPSIP,
|
||||||
|
HELPMEP. These
|
||||||
|
files can be displayed on the screen or printed using the appropriate
|
||||||
|
commands. Currently the help for the other programs is also contained in
|
||||||
|
these files. For example help for NIPL is in HELPNIP. This file is called
|
||||||
|
HELPSTADEN.
|
||||||
|
.para
|
||||||
|
Sequence formats
|
||||||
|
.para
|
||||||
|
The shotgun sequencing program SAP deals only with simple
|
||||||
|
text files for gel readings, and is a self-contained system.
|
||||||
|
However as there is still no single agreed format
|
||||||
|
for finished sequences or for libraries of sequences,
|
||||||
|
the other programs in the package can read data that is stored in several ways.
|
||||||
|
.para
|
||||||
|
The analytical programs can read individual sequences stored in the following
|
||||||
|
formats:
|
||||||
|
Staden, EMBL, Genbank, PIR (also known as NBRF), and GCG, but for storing whole
|
||||||
|
libraries we use only PIR format. In addition
|
||||||
|
these programs can perform a number of
|
||||||
|
simple operations using libraries stored in this format. They can extract
|
||||||
|
entries by entry name, can search titles for keywords, can search the whole
|
||||||
|
of the annotation files for keywords, and can extract annotations for any
|
||||||
|
named entry.
|
||||||
|
We reformat all sequence libraries into PIR format. Currently we
|
||||||
|
have NBRF, EMBL, SWISSPROT and VECBASE libraries in PIR format.
|
||||||
|
.para
|
||||||
|
The library searching programs operate only
|
||||||
|
on sequences stored in PIR format.
|
||||||
|
.para
|
||||||
|
The analytical programs
|
||||||
|
will operate with uppercase or lowercase sequence
|
||||||
|
characters. In addition T and U are equivalent. SAP uses uppercase letters
|
||||||
|
for original gel readings and lowercase letters for characters that are
|
||||||
|
corrected by the automatic editor.
|
||||||
|
Programs NIP and PIP use IUB symbols for redundancy in back translations
|
||||||
|
and for sequence searches.
|
||||||
|
The symbols are shown below.
|
||||||
|
.LIT
|
||||||
|
|
||||||
|
|
||||||
|
NC-IUB SYMBOLS
|
||||||
|
|
||||||
|
A,C,G,T
|
||||||
|
R (A,G) 'puRine'
|
||||||
|
Y (T,C) 'pYrimidine'
|
||||||
|
W (A,T) 'Weak'
|
||||||
|
S (C,G) 'Strong'
|
||||||
|
M (A,C) 'aMino'
|
||||||
|
K (G,T) 'Keto'
|
||||||
|
H (A,T,C) 'not G'
|
||||||
|
B (G,C,T) 'not A'
|
||||||
|
V (G,A,C) 'not T'
|
||||||
|
D (G,A,T) 'not C'
|
||||||
|
N (G,A,C,T) 'aNy'
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
.PARA
|
||||||
|
The user interface
|
||||||
|
.PARA
|
||||||
|
The user interface is common to all programs.
|
||||||
|
It consists of a set of menus and a uniform way
|
||||||
|
of presenting choices and obtaining input
|
||||||
|
from the user. This section describes: the
|
||||||
|
menu system; how options are selected and other choices made; how values
|
||||||
|
are supplied to the program; how help is obtained, and
|
||||||
|
how to escape from any part of a program. In addition it gives information
|
||||||
|
about saving results in files and the use of graphics for presenting
|
||||||
|
results.
|
||||||
|
.para
|
||||||
|
Menus
|
||||||
|
.para
|
||||||
|
Each program has several menus and numerous options.
|
||||||
|
Each menu or option has a unique number that is used to
|
||||||
|
identify it. Menu numbers are distinguished from
|
||||||
|
option numbers by being preceded by the letter
|
||||||
|
m (or M, all programs make no distinction between
|
||||||
|
upper and lower case letters). With the exception of
|
||||||
|
some parts of program SAP, the menus are not hierachical,
|
||||||
|
rather the options they each contain are simply lists of
|
||||||
|
related functions and their identifying numbers.
|
||||||
|
Therefore options can be selected independently
|
||||||
|
of the menu that is currently being shown on the
|
||||||
|
screen, and the menus are simply memory aides.
|
||||||
|
All options and menus are selected by typing their
|
||||||
|
option number when the programs present the prompt
|
||||||
|
.para
|
||||||
|
"? Menu or option number =".
|
||||||
|
.para
|
||||||
|
To select a menu type its number preceded by
|
||||||
|
the letter M. To select an option type its number.
|
||||||
|
If you type only "return" you will get menu m0
|
||||||
|
which is simply a list of menus. If you select an
|
||||||
|
option you will return to the current menu after the function is completed.
|
||||||
|
.para
|
||||||
|
When you select an option, in many cases the
|
||||||
|
program will immediately perform the operation
|
||||||
|
selected without further dialogue. If you precede an option
|
||||||
|
number by the letter d (e.g. D17), you
|
||||||
|
will force the program to offer dialogue about the selected option
|
||||||
|
before the function operates,
|
||||||
|
hence allowing you to change the value of any of its parameters. If
|
||||||
|
you precede an option number by the symbol ? (e.g. ?17),
|
||||||
|
you will be given help on the option (here 17).
|
||||||
|
.para
|
||||||
|
Where possible, equivalent or identical options have been given the same
|
||||||
|
numbers in all programs, and so users quickly learn the numbers for
|
||||||
|
the functions they employ most often.
|
||||||
|
.para
|
||||||
|
Help
|
||||||
|
.para
|
||||||
|
As mentioned above, help about each option can be obtained by
|
||||||
|
preceding the option number by the symbol ? when you are presented
|
||||||
|
with the prompt "? Menu or option number", but there are two further
|
||||||
|
ways of obtaining help. Whenever the program asks a question
|
||||||
|
you can respond by typing the symbol ? and you will receive information
|
||||||
|
about the current option. In addition, option number 1
|
||||||
|
in all the programs will give help on all of a programs functions.
|
||||||
|
.para
|
||||||
|
Quitting
|
||||||
|
.para
|
||||||
|
To exit from any point in a program you type ! for quit.
|
||||||
|
If a menu is on the screen this will stop the program, otherwise
|
||||||
|
you will be returned to the last menu.
|
||||||
|
.Para
|
||||||
|
Other interactions
|
||||||
|
.para
|
||||||
|
Questions are presented in a few restricted ways.
|
||||||
|
In all cases typing only "return" in response to a question means
|
||||||
|
yes, and typing N or n means no.
|
||||||
|
.para
|
||||||
|
Obvious opposites such as "clear screen" and "keep picture"
|
||||||
|
are presented with only the default shown. For example
|
||||||
|
in this case the default is generally "keep picture" so the
|
||||||
|
program will display:
|
||||||
|
.para
|
||||||
|
"(y/n) (y) Keep picture"
|
||||||
|
.para
|
||||||
|
and the picture will be retained if the user types anything other than N or
|
||||||
|
n, (in which case the screen will be cleared).
|
||||||
|
.para
|
||||||
|
Where there are choices that are not obvious opposites, or
|
||||||
|
there are more than two choices, two further conventions are used:
|
||||||
|
"radio buttons" and "check boxes".
|
||||||
|
.para
|
||||||
|
|
||||||
|
Radio buttons are used when only one of a number of choices can be
|
||||||
|
made at any one time. The choices are presented arranged one above the
|
||||||
|
other, each choice with a number for its selection, and the default
|
||||||
|
choice marked with an X. For example in the restriction
|
||||||
|
enzyme search routine the following choices are offered:
|
||||||
|
.para
|
||||||
|
.lit
|
||||||
|
|
||||||
|
Select output mode
|
||||||
|
1 order results enzyme by enzyme
|
||||||
|
2 order results by positon
|
||||||
|
X 3 show only infrequent cutters
|
||||||
|
4 show names above the sequence
|
||||||
|
? Selection (1-4) (3) =
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
Any single option can be selected by typing the option number,
|
||||||
|
and the default option, (here shown as 3), is also obtained by
|
||||||
|
typing only "return". Again help can be obtained by typing ? and
|
||||||
|
you can quit by typing !.
|
||||||
|
.para
|
||||||
|
Check boxes are used when any number of a set of choices can be
|
||||||
|
made (i.e. the choices are not exclusive). Choices are
|
||||||
|
made by typing choice numbers. Each choice can be considered
|
||||||
|
as a switch whose setting is reversed when it is selected. Choices that are
|
||||||
|
currently switched on are marked with an X.
|
||||||
|
The user quits from making selections by typing only
|
||||||
|
"return". For example in the routine that plots base composition
|
||||||
|
you can plot the frequencies of any combination of bases, e.g. only
|
||||||
|
A, or A+T, or A+T+G etc.
|
||||||
|
The following check box is offered to the user:
|
||||||
|
.lit
|
||||||
|
|
||||||
|
X 1 T
|
||||||
|
2 C
|
||||||
|
X 3 A
|
||||||
|
4 G
|
||||||
|
? Selection (1-4) () =
|
||||||
|
|
||||||
|
.END LIT
|
||||||
|
As shown this will plot the A+T composition. To switch off T
|
||||||
|
you select 1, to switch on C you select 2, etc, to quit,
|
||||||
|
having set the bases required you type only "return".
|
||||||
|
.para
|
||||||
|
Input of numerical values
|
||||||
|
.para
|
||||||
|
All input of integer or decimal numbers is presented in a
|
||||||
|
standard way with the allowed range shown in brackets and the default
|
||||||
|
value also in brackets. For example:
|
||||||
|
.para
|
||||||
|
? span (5-31) (11) =
|
||||||
|
.para
|
||||||
|
In this example you could type any number between 5 and 31,
|
||||||
|
or "return" only, or ! or ? (see above). Any other input will cause the
|
||||||
|
program to ask the question again. Typing only "return" gives the default
|
||||||
|
value (here 11).
|
||||||
|
.para
|
||||||
|
Use of the bell
|
||||||
|
.para
|
||||||
|
The programs use the bell to indicate that a task is completed.
|
||||||
|
This allows users to read textual results before they are scrolled up off
|
||||||
|
the screen, or to look at a plot before it is scrolled over by the menus.
|
||||||
|
When the bell sounds, the programs will wait
|
||||||
|
until return is typed. You can quit from these points by typing ! but
|
||||||
|
no help is available.
|
||||||
|
.para
|
||||||
|
Printing and saving results in files
|
||||||
|
.para
|
||||||
|
A few of the functions in the programs automatically write their textual
|
||||||
|
results
|
||||||
|
to disk files, but for most functions you can choose whether results
|
||||||
|
appear on the terminal screen or go to a file. This applies to both text
|
||||||
|
and graphical results.
|
||||||
|
For these functions
|
||||||
|
the normal, or default, place for results to
|
||||||
|
appear is on the screen, and users need to decide before the
|
||||||
|
function is selected if they want to redirect the results to a file.
|
||||||
|
In all programs, option number 7, "Direct output to disk" gives control
|
||||||
|
over whether results appear on the screen or go to a file. When a program
|
||||||
|
is started results will be sent to the screen. If option 7 is selected
|
||||||
|
users will be given the choice of redirecting either text or graphics to a
|
||||||
|
file. The program will then ask users to supply a file name. From that
|
||||||
|
point on all results will be sent to the file until option 7 is selected again,
|
||||||
|
in which case the "redirection file" will be closed, and results will start
|
||||||
|
to appear on the screen.
|
||||||
|
.para
|
||||||
|
If these files contain textual results they can be looked at
|
||||||
|
from within the programs
|
||||||
|
by using option 6, "List a text file". Once you leave the program
|
||||||
|
you can use an appropriate system command to print the files.
|
||||||
|
There is no function within the programs to direct files to a printer.
|
||||||
|
.para
|
||||||
|
The converse of the above is also possible. That
|
||||||
|
is, it is possible to redirect results that would normally go to file,
|
||||||
|
so that they appear instead on the screen. This is often useful as a way
|
||||||
|
of checking results before saving them in a file. On a VAX using
|
||||||
|
VMS you do this by typing TT: for the name of the file that the
|
||||||
|
program would create. TT: is what VMS calls the screen.
|
||||||
|
.para
|
||||||
|
Use of graphics
|
||||||
|
.para
|
||||||
|
The analytical programs including NIP, PIP and SIP present the results of
|
||||||
|
many of their analyses graphically. The position at which the results for
|
||||||
|
any function appear on the screen is defined relative to a notional users
|
||||||
|
"drawing board" of dimension 10,000 by 10,000. This drawing board fills the
|
||||||
|
screen and results are drawn in windows defined using symbols x0,yo and
|
||||||
|
xlength,ylength,
|
||||||
|
where x0,y0 is the position of the bottom left hand corner of the window,
|
||||||
|
and xlength is the width of the window and ylength the
|
||||||
|
height of the window.
|
||||||
|
.lit
|
||||||
|
|
||||||
|
--------------------------------------------------------- 10,000
|
||||||
|
1 1
|
||||||
|
1 -------------------------------------- ^ 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 ylength 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 -------------------------------------- v 1
|
||||||
|
1 x0,y0^ 1
|
||||||
|
1 <---------------xlength--------------> 1
|
||||||
|
--------------------------------------------------------- 1
|
||||||
|
1 10,000
|
||||||
|
|
||||||
|
.end lit
|
||||||
|
.para
|
||||||
|
The window positions for each option are read from a file
|
||||||
|
when a program is started. If required individual users could have their
|
||||||
|
own set of plot positions, and also the positions
|
||||||
|
can be redefined from within the
|
||||||
|
programs using option number 14.
|
||||||
|
.para
|
||||||
|
For those analyses that draw continuous lines to represent results
|
||||||
|
(for example a plot of base composition) the user is asked to supply the
|
||||||
|
"Plot interval". All the analyses produce a value for every point along the
|
||||||
|
sequence but often it is unnecessary to actually plot the
|
||||||
|
values for all the points.
|
||||||
|
The plot interval is simply the distance between the points
|
||||||
|
shown on the screen. If the user selects a plot interval of 1, every point
|
||||||
|
will be plotted; a plot interval of 3 will show every third point. It is a
|
||||||
|
way of speeding up the analyses.
|
||||||
|
.para
|
||||||
|
Saving graphics
|
||||||
|
.para
|
||||||
|
Many terminals are not capable of dumping their screen contents to a
|
||||||
|
file for subsequent printing. One convenient way of obtaining hard copy
|
||||||
|
of graphical results is to use a micro computer as a terminal. On
|
||||||
|
the Macintosh we use the terminal emulator versa
|
||||||
|
termPro. This allows graphics to be saved as
|
||||||
|
Macintosh files that can be annotated and printed using
|
||||||
|
Macdraw and other painting programs.
|
||||||
|
.para
|
||||||
|
Alternatively graphics can be redirected to a file and printed using a
|
||||||
|
laser printer with tektronix capability (see
|
||||||
|
"Printing and saving results in files").
|
2112
help/bap_help
Normal file
2112
help/bap_help
Normal file
File diff suppressed because it is too large
Load diff
84
help/bap_menu
Normal file
84
help/bap_menu
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
-1 0 21 2 T General
|
||||||
|
-1 0 21 2 X General
|
||||||
|
-2 0 50 2 T Screen control
|
||||||
|
-2 0 71 2 X Screen
|
||||||
|
-3 0 98 2 T Modification
|
||||||
|
-3 0 98 2 X Modification
|
||||||
|
0 -1 116 332 T BAP
|
||||||
|
0 -1 116 332 X BAP
|
||||||
|
17 1 17434 18 T Screen against enzymes
|
||||||
|
17 1 17434 18 X Screen against enzymes
|
||||||
|
18 1 18477 23 T Screen against vector
|
||||||
|
18 1 18477 23 X Screen against vector
|
||||||
|
20 3 19859 121 T Auto assemble
|
||||||
|
20 3 19859 121 X Auto assemble
|
||||||
|
28 1 26426 43 T Highlight disagreements
|
||||||
|
28 1 26426 43 X Highlight disagreements
|
||||||
|
32 3 28846 17 T Extract gel readings
|
||||||
|
32 3 28846 17 X Extract gel readings
|
||||||
|
1 0 29607 3 T Help
|
||||||
|
1 0 29607 3 X Help
|
||||||
|
2 0 29676 5 T Quit
|
||||||
|
2 0 29676 5 X Quit
|
||||||
|
3 1 29869 230 T Open a database
|
||||||
|
3 1 29869 230 X Open a database
|
||||||
|
4 3 41499 320 T Edit contig
|
||||||
|
4 3 41499 320 X Edit contig
|
||||||
|
5 1 56688 43 T Display a contig
|
||||||
|
5 1 56688 43 X Display a contig
|
||||||
|
6 1 58990 6 T List a text file
|
||||||
|
6 1 58990 6 X List a text file
|
||||||
|
8 1 59248 93 T Calculate a consensus
|
||||||
|
8 1 59248 93 X Calculate a consensus
|
||||||
|
25 1 63707 41 T Show relationships
|
||||||
|
25 1 63707 41 X Show relationships
|
||||||
|
23 3 65650 11 T Complement a contig
|
||||||
|
23 3 65650 11 X Complement a contig
|
||||||
|
22 3 66173 59 T Join contigs
|
||||||
|
22 3 66173 59 X Join contigs
|
||||||
|
24 1 69194 11 T Copy the database
|
||||||
|
24 1 69194 11 X Copy the database
|
||||||
|
19 1 69740 43 T Check database
|
||||||
|
19 1 69740 43 X Check database
|
||||||
|
29 1 71898 82 T Examine quality
|
||||||
|
29 1 71898 82 X Examine quality
|
||||||
|
26 3 75715 84 T Alter relationships
|
||||||
|
26 3 75715 84 X Alter relationships
|
||||||
|
27 1 79641 17 T Set display parameters
|
||||||
|
27 1 79641 17 X Set display parameters
|
||||||
|
30 3 80503 7 T Shuffle pads
|
||||||
|
30 3 80503 7 X Shuffle pads
|
||||||
|
10 2 80866 3 T Clear graphics
|
||||||
|
10 2 80866 3 X Clear graphics
|
||||||
|
11 2 80931 3 T Clear text
|
||||||
|
11 2 80931 3 X Clear text
|
||||||
|
12 2 80996 12 T Draw a ruler.
|
||||||
|
12 2 80996 12 X Draw a ruler.
|
||||||
|
14 2 81730 38 T Reposition plots
|
||||||
|
14 2 81730 38 X Reposition plots
|
||||||
|
15 2 84069 28 T Label a diagram
|
||||||
|
15 2 84069 28 X Label a diagram
|
||||||
|
16 2 85174 3 T Display a map
|
||||||
|
16 2 85174 3 X Display a map
|
||||||
|
7 1 85228 12 T Redirect output
|
||||||
|
7 1 85228 12 X Redirect output
|
||||||
|
13 2 85731 43 T Use crosshair
|
||||||
|
13 2 85731 43 X Use crosshair
|
||||||
|
33 2 87876 12 T Plot single contig
|
||||||
|
33 2 87876 12 X Plot single contig
|
||||||
|
34 2 88578 10 T Plot all contigs
|
||||||
|
34 2 88578 10 X Plot all contigs
|
||||||
|
31 3 89160 21 T Disassemble readings
|
||||||
|
31 3 89160 21 X Disassemble readings
|
||||||
|
35 3 90372 94 T Find internal joins
|
||||||
|
35 1 90372 94 T Find internal joins
|
||||||
|
35 3 90372 94 X Find internal joins
|
||||||
|
35 1 90372 94 X Find internal joins
|
||||||
|
36 3 96201 30 T Double strand
|
||||||
|
36 3 96201 30 X Double strand
|
||||||
|
37 3 97555 64 T Auto-select oligos
|
||||||
|
37 3 97555 64 X Auto-select oligos
|
||||||
|
38 1 100421 30 T Check assembly
|
||||||
|
38 1 100421 30 X Check assembly
|
||||||
|
39 1 102178 90 T Find read pairs
|
||||||
|
39 1 102178 90 X Find read pairs
|
2112
help/dap_help
Normal file
2112
help/dap_help
Normal file
File diff suppressed because it is too large
Load diff
79
help/dap_menu
Normal file
79
help/dap_menu
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
-1 0 21 2 T General
|
||||||
|
-1 0 21 2 X General
|
||||||
|
-2 0 50 2 T Screen control
|
||||||
|
-2 0 71 2 X Screen
|
||||||
|
-3 0 98 2 T Modification
|
||||||
|
-3 0 98 2 X Modification
|
||||||
|
0 -1 116 351 T SAP
|
||||||
|
0 -1 116 351 X SAP
|
||||||
|
17 1 18801 18 T Screen against enzymes
|
||||||
|
17 1 18801 18 X Screen against enzymes
|
||||||
|
18 1 19844 22 T Screen against vector
|
||||||
|
18 1 19844 22 X Screen against vector
|
||||||
|
20 3 21171 113 T Auto assemble
|
||||||
|
20 3 21171 113 X Auto assemble
|
||||||
|
28 1 27332 42 T Highlight disagreements
|
||||||
|
28 1 27332 42 X Highlight disagreements
|
||||||
|
32 3 29694 22 T Extract gel readings
|
||||||
|
32 3 29694 22 X Extract gel readings
|
||||||
|
1 0 30797 3 T Help
|
||||||
|
1 0 30797 3 X Help
|
||||||
|
2 0 30866 5 T Quit
|
||||||
|
2 0 30866 5 X Quit
|
||||||
|
3 1 31059 237 T Open a database
|
||||||
|
3 1 31059 237 X Open a database
|
||||||
|
4 3 43258 239 T Edit contig
|
||||||
|
4 3 43258 239 X Edit contig
|
||||||
|
9 3 54180 42 T Screen edit
|
||||||
|
5 1 56376 45 T Display a contig
|
||||||
|
5 1 56376 45 X Display a contig
|
||||||
|
6 1 58862 6 T List a text file
|
||||||
|
6 1 58862 6 X List a text file
|
||||||
|
8 1 59120 93 T Calculate a consensus
|
||||||
|
8 1 59120 93 X Calculate a consensus
|
||||||
|
25 1 63651 41 T Show relationships
|
||||||
|
25 1 63651 41 X Show relationships
|
||||||
|
21 3 65587 101 T Enter new gel reading
|
||||||
|
21 3 65587 101 X Enter new gel reading
|
||||||
|
23 3 70677 11 T Complement a contig
|
||||||
|
23 3 70677 11 X Complement a contig
|
||||||
|
22 3 71200 63 T Join contigs
|
||||||
|
22 3 71200 63 X Join contigs
|
||||||
|
24 1 74467 11 T Copy the database
|
||||||
|
24 1 74467 11 X Copy the database
|
||||||
|
19 1 75013 41 T Check database
|
||||||
|
19 1 75013 41 X Check database
|
||||||
|
29 1 77032 82 T Examine quality
|
||||||
|
29 1 77032 82 X Examine quality
|
||||||
|
26 3 80849 101 T Alter relationships
|
||||||
|
26 3 80849 101 X Alter relationships
|
||||||
|
27 1 86065 17 T Set display parameters
|
||||||
|
27 1 86065 17 X Set display parameters
|
||||||
|
30 3 86933 48 T Auto edit a contig
|
||||||
|
30 3 86933 48 X Auto edit a contig
|
||||||
|
10 2 89409 3 T Clear graphics
|
||||||
|
10 2 89409 3 X Clear graphics
|
||||||
|
11 2 89474 3 T Clear text
|
||||||
|
11 2 89474 3 X Clear text
|
||||||
|
12 2 89539 12 T Draw a ruler.
|
||||||
|
12 2 89539 12 X Draw a ruler.
|
||||||
|
14 2 90273 38 T Reposition plots
|
||||||
|
14 2 90273 38 X Reposition plots
|
||||||
|
15 2 92612 28 T Label a diagram
|
||||||
|
15 2 92612 28 X Label a diagram
|
||||||
|
16 2 93717 27 T Display a map
|
||||||
|
16 2 93717 27 X Display a map
|
||||||
|
7 1 94692 12 T Redirect output
|
||||||
|
7 1 94692 12 X Redirect output
|
||||||
|
13 2 95163 43 T Use crosshair
|
||||||
|
13 2 95163 43 X Use crosshair
|
||||||
|
33 2 97308 12 T Plot single contig
|
||||||
|
33 2 97308 12 X Plot single contig
|
||||||
|
34 2 98010 10 T Plot all contigs
|
||||||
|
34 2 98010 10 X Plot all contigs
|
||||||
|
31 3 98592 12 T Type in gel readings
|
||||||
|
31 3 98592 12 X Type in gel readings
|
||||||
|
35 3 99223 92 T Find internal joins
|
||||||
|
35 1 99223 92 T Find internal joins
|
||||||
|
35 3 99223 92 X Find internal joins
|
||||||
|
35 1 99223 92 X Find internal joins
|
198
help/gip_help
Normal file
198
help/gip_help
Normal file
|
@ -0,0 +1,198 @@
|
||||||
|
GIP
|
||||||
|
|
||||||
|
A digitizer is a two dimensional surface which is such that
|
||||||
|
if a special pen is pressed onto it, the pens coordinates can be
|
||||||
|
recorded by a computer. These coordinates can be interpreted by a
|
||||||
|
program.
|
||||||
|
|
||||||
|
The digitizing device we use works by the pen emitting a high
|
||||||
|
frequency sound which is picked up by two microphones positioned at
|
||||||
|
the rear of the working area. The pen position is determined by
|
||||||
|
triangulation and the digitizing device sends the coordinates to the
|
||||||
|
computer. As no special surface is required the device can
|
||||||
|
conveniently be positioned on a light box giving the sequencer an
|
||||||
|
unobscured view of the autoradiographs.
|
||||||
|
The digitizer is called a GRAPHBAR MODEL GP7 made by Science
|
||||||
|
Accessories Corp, 970 Kings Highway West, Southport, Connecticut
|
||||||
|
06490, USA.
|
||||||
|
|
||||||
|
The program uses a menu to allow the user to select commands
|
||||||
|
or to enter the uncertainty codes for areas of the gel that
|
||||||
|
are difficult to interpret. A menu is simply a series of boxes drawn
|
||||||
|
on the digitizing surface that each contain a command or
|
||||||
|
uncertainty code. When the user puts the pen down in these special
|
||||||
|
regions the program interprets the coordinates as commands and acts
|
||||||
|
appropriately. A copy of the menu should have been sent to you. It
|
||||||
|
should be stuck down on the surface of the light box in the
|
||||||
|
digitizing area. For convenience it is best to position it to the
|
||||||
|
right of the digitizing area, but in practice as long as its top edge
|
||||||
|
is parallel to the digitizer box, it can be put anywhere in the
|
||||||
|
active region.
|
||||||
|
|
||||||
|
Entering gel readings using a digitizer
|
||||||
|
|
||||||
|
The autoradiograph should be stuck down on the light box with
|
||||||
|
the lanes running, as near is as possible, at right angles to the
|
||||||
|
digitizer. To read an autoradiograph placed on the light box the user
|
||||||
|
need only define the positions of the four sequencing lanes and the
|
||||||
|
bases to which they correspond and then use the pen to point to
|
||||||
|
each successive band progressing up the gel. The program examines
|
||||||
|
the coordinates of each pen position to see in which of the four
|
||||||
|
lanes it lies and assigns the corresponding base to be stored
|
||||||
|
in the computer. Each time the pen tip is depressed to point to a
|
||||||
|
position on the surface of the digitizer the program sounds the
|
||||||
|
bell on the terminal (a different sound for each of the four bases on
|
||||||
|
the microcomputer version of the program) to indicate to the user
|
||||||
|
that a point has been recorded. As the sequence is read the
|
||||||
|
program displays it on the screen.
|
||||||
|
|
||||||
|
The program uses a menu to allow the user to select commands
|
||||||
|
or to enter the uncertainty codes for areas of the gel that
|
||||||
|
are difficult to interpret. A menu is simply a series of boxes drawn
|
||||||
|
on the digitizing surface that each contain a command or
|
||||||
|
uncertainty code. When the user puts the pen down in these special
|
||||||
|
regions the program interprets the coordinates as commands and acts
|
||||||
|
appropriately. As well as the uncertainty codes
|
||||||
|
A,C,G,T,1,2,3,4,B,D,H,V,R,Y,X,-,5,6,7,8 the following commands are
|
||||||
|
included in the menu: DELETE removes the last character from the
|
||||||
|
sequence; RESET allows the lane centres to be redefined; START means
|
||||||
|
begin the next stage of the procedure; STOP means stop the
|
||||||
|
current stage in the procedure; CONFIRM means confirm that the last
|
||||||
|
command or set of coordinates are correct.
|
||||||
|
|
||||||
|
The digitizing device also has a menu of its own. This lies in
|
||||||
|
a two inch wide strip immediately in front of the digitizing box. Pen
|
||||||
|
positions within this two inch strip are interpretted as commands to
|
||||||
|
the digitizer and are not sent to the GIP program. In general the
|
||||||
|
only time users will need to use the device menu is when they tell
|
||||||
|
GIP where the program menu lies in the digitizing area. This is done
|
||||||
|
by first hitting ORIGIN in the device menu and then hitting the
|
||||||
|
bottom left hand corner of the program menu. The program menu can
|
||||||
|
hence be positioned anywhere in the active region but should be
|
||||||
|
arranged parallel to the digitizer.
|
||||||
|
|
||||||
|
The user should try to hit the bands as near as possible to
|
||||||
|
the centre of the lanes because the program tracks the lanes up the
|
||||||
|
film using the pen positions. By using this tracking strategy the
|
||||||
|
user only has to define the centres of the bottom of the lanes before
|
||||||
|
starting to read the film. The program can correctly follow quite
|
||||||
|
curved lanes and constantly checks that its lane centre coordinates
|
||||||
|
look sensible. If the lane centres appear to be getting too close the
|
||||||
|
program stops responding to the pen positions of bands and hence does
|
||||||
|
not ring the bell. If this occurs users must hit the reset box in the
|
||||||
|
menu and the program will request them to redefine the lane centres
|
||||||
|
at the current reading position. Then they can continue reading. As a
|
||||||
|
further safeguard the program will only respond to pen positions
|
||||||
|
either in the menu or very close to the current reading position.
|
||||||
|
|
||||||
|
Running the gel reading program
|
||||||
|
The autoradiograph should be firmly stuck down on the light box and
|
||||||
|
the program started by typing GIP. It will ask the first question.
|
||||||
|
" ? FILE OF FILE NAMES="
|
||||||
|
Type the name for the file of file names and then follow the
|
||||||
|
instructions.
|
||||||
|
" HIT DIGITIZER MENU ORIGIN"
|
||||||
|
" THEN PROGRAM MENU ORIGIN"
|
||||||
|
" THEN HIT START IN PROGRAM MENU"
|
||||||
|
If the bell does not sound after you hit start try hitting metric in
|
||||||
|
the device menu (the program uses metric units, and some digitizers
|
||||||
|
are set to default to use inches; hitting metric switches between
|
||||||
|
the two).
|
||||||
|
After the bell has sounded the program will give the default lane
|
||||||
|
order.
|
||||||
|
" LANE ORDER IS T C A G"
|
||||||
|
" IF CORRECT HIT CONFIRM, ELSE HIT RESET"
|
||||||
|
If the lane order, reading from left to right is correct hit confirm
|
||||||
|
in the program menu. If you are using a different order hit reset
|
||||||
|
and you will be asked to define the lane order from left to right
|
||||||
|
using the program menu (as follows).
|
||||||
|
" DEFINE LANE ORDER (LEFT TO RIGHT) USING MENU"
|
||||||
|
Hit the boxes in the menu that contain the symbols A,C,G,T in the
|
||||||
|
left-right order of the lanes. The program will respond with the
|
||||||
|
lane order as above and ask for confirmation. When this is received,
|
||||||
|
the next task is to define the start positions of the next four
|
||||||
|
lanes.
|
||||||
|
" HIT START, THEN HIT (LEFT TO RIGHT)"
|
||||||
|
" THE START POSITIONS FOR THE NEXT FOUR LANES"
|
||||||
|
Hit the centres of the four lanes at a height level with the first
|
||||||
|
band that is going to be read. The program will report the mean lane
|
||||||
|
separations and asks for confirmation that they are correct.
|
||||||
|
" MEAN LANE SEPARATION IS XX"
|
||||||
|
" HIT CONFIRM TO CONTINUE"
|
||||||
|
Users will become familiar with the values from their films and will
|
||||||
|
spot any unusual numbers. Asking for confirmation allows users to
|
||||||
|
try again if they had made a mistake, but generally the lane
|
||||||
|
separation values can be ignored. Hit confirm, and the program will
|
||||||
|
give the message
|
||||||
|
" HIT START WHEN READY TO BEGIN READING"
|
||||||
|
Hit start and the program will give the message
|
||||||
|
" HIT BANDS, UNCERTAINTY CODES, RESET OR STOP"
|
||||||
|
Hit the bands, interpretting the sequence progressing up the film.
|
||||||
|
If necessary use the uncertainty codes. If the pen stops responding
|
||||||
|
hit reset and follow the instructions as above. When the sequence
|
||||||
|
becomes unreadable hit stop and the program will ask for a file name
|
||||||
|
for the gel reading just read.
|
||||||
|
" ? FILE NAME FOR THIS GEL READING="
|
||||||
|
Type the file name observing the rules about legal gel readings
|
||||||
|
names. The program will ask if you wish to read another sequence.
|
||||||
|
" TO ENTER ANOTHER GEL READING TYPE 1"
|
||||||
|
To enter another type 1 and you will be back to the step of defining
|
||||||
|
the lane order. Typing anything else will stop the program.
|
||||||
|
|
||||||
|
Running the microcomputer version of the gel reading program
|
||||||
|
The microcomputer version of GIP is slightly different and is called
|
||||||
|
GIPB. The BBC micro does not have the capacity to process the gel
|
||||||
|
readings beyond the reading stage. This means that users of this
|
||||||
|
program would need to transfer their gel readings from the micro to
|
||||||
|
another machine using a terminal emmulator. Transferring many files
|
||||||
|
is tedious and so the microcomputer version of the gel reading
|
||||||
|
program stores all the gel readings for each run of the program in a
|
||||||
|
single file. This special file contains both sequences and file names
|
||||||
|
and can be moved in a single transfer to another machine. Once on the
|
||||||
|
other machine the single file must be split into separate gel reading
|
||||||
|
files and a file of file names. This is done using the program
|
||||||
|
BSPLIT. As far as using the microcomputer version of GIP, the only
|
||||||
|
difference is that the first file name the program requests is not a
|
||||||
|
file of file names, but a name for the single file to contain all the
|
||||||
|
gel readings and their names.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
0
help/gip_menu
Normal file
0
help/gip_menu
Normal file
48
help/makefile
Normal file
48
help/makefile
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
#
|
||||||
|
# Make file for help files - this requires gmake on some systems.
|
||||||
|
#
|
||||||
|
PROGS = bap dap gip mep nip \
|
||||||
|
nipf pip sap sip #mem
|
||||||
|
|
||||||
|
HELPS = bap_help dap_help gip_help mep_help nip_help \
|
||||||
|
nipf_help pip_help sap_help sip_help #mem_help
|
||||||
|
|
||||||
|
MENUS = bap_menu dap_menu gip_menu mep_menu nip_menu \
|
||||||
|
nipf_menu pip_menu sap_menu sip_menu #mem_menu
|
||||||
|
|
||||||
|
all: $(PROGS)
|
||||||
|
|
||||||
|
DOIT = rm -f $@_help $@_menu; ./runoff $?
|
||||||
|
|
||||||
|
bap: BAP.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
dap: DAP.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
gip: GIP.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
#mem: MEM.RNO
|
||||||
|
# $(DOIT)
|
||||||
|
|
||||||
|
mep: MEP.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
nip: NIP.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
nipf: NIPF.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
pip: PIP.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
sap: SAP.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
sip: SIP.RNO
|
||||||
|
$(DOIT)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(HELPS) $(MENUS)
|
698
help/mem_help
Normal file
698
help/mem_help
Normal file
|
@ -0,0 +1,698 @@
|
||||||
|
|
||||||
|
@0. B 1 @MEP
|
||||||
|
This is a program for analysing families of nucleotide sequences in
|
||||||
|
order to find common motifs and potential binding sites. The ideas
|
||||||
|
in this program were described in Staden, R. "Methods for
|
||||||
|
discovering novel motifs in nucleic acid sequences". Computer
|
||||||
|
Applications in the Biosciences, 5, 293-298, (1989).
|
||||||
|
|
||||||
|
The program can read sequences stored in either of two
|
||||||
|
formats: 1) all sequences aligned in a single file; 2) all sequences
|
||||||
|
in separate files and accessed through a file of file names.
|
||||||
|
|
||||||
|
The program contains functions that can answer several
|
||||||
|
questions about a set of sequences:
|
||||||
|
|
||||||
|
Which words are most common?
|
||||||
|
Which words occur in the most sequences?
|
||||||
|
Which words contain the most information?
|
||||||
|
Which words occur in equivalent positions in the sequences?
|
||||||
|
Which words are inverted repeats?
|
||||||
|
Which words occur on both strands of the sequences?
|
||||||
|
Where are the inverted repeats?
|
||||||
|
Where are the fuzzy words?
|
||||||
|
|
||||||
|
Most of the program is concerned with analysing what it terms
|
||||||
|
"fuzzy words" within the set of sequences. The analysis is explained
|
||||||
|
below. Note that the standard version of the programs is limited to
|
||||||
|
words of maximum length 8 letters, and a maximum fuzziness of 2.
|
||||||
|
|
||||||
|
The following analyses (preceded by their option numbers) are
|
||||||
|
included:
|
||||||
|
? = Help
|
||||||
|
! = Quit
|
||||||
|
3 = Read new sequences
|
||||||
|
4 = Redefine active region
|
||||||
|
5 = List the sequences
|
||||||
|
6 = List text file
|
||||||
|
7 = Direct output to disk
|
||||||
|
10 = Clear graphics
|
||||||
|
11 = Clear text
|
||||||
|
12 = Draw ruler
|
||||||
|
13 = Use cross hair
|
||||||
|
14 = Reset margins
|
||||||
|
15 = Label diagram
|
||||||
|
16 = Draw map
|
||||||
|
17 = Search for strings
|
||||||
|
18 = Set strand
|
||||||
|
19 = Set composition
|
||||||
|
20 = Set word length
|
||||||
|
21 = Set number of mismatches
|
||||||
|
22 = Show settings
|
||||||
|
23 = Make dictionary Dw
|
||||||
|
24 = Make dictionary Ds
|
||||||
|
25 = Make fuzzy dictionary Dm from Dw
|
||||||
|
26 = Make fuzzy dictionary Dm from Ds
|
||||||
|
27 = Make fuzzy dictionary Dh from Dm
|
||||||
|
28 = Examine fuzzy dictionary Dm
|
||||||
|
29 = Examine fuzzy dictionary Dh
|
||||||
|
30 = Examine words in Dm
|
||||||
|
31 = Examine words in Dh
|
||||||
|
32 = Save or restore a dictionary
|
||||||
|
33 = Find inverted repeats
|
||||||
|
|
||||||
|
Some of these methods produce graphical results and so the
|
||||||
|
program is generally used from a graphics terminal (a vdu on which
|
||||||
|
lines and points can be drawn as well as characters).
|
||||||
|
|
||||||
|
The positions of each of the plots is defined relative to a users
|
||||||
|
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||||
|
for each option are drawn in a window defined by x0,y0 and
|
||||||
|
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||||
|
corner of the window, and xlength is the width of the window and
|
||||||
|
ylength the height of the window.
|
||||||
|
--------------------------------------------------------- 10,000
|
||||||
|
1 1
|
||||||
|
1 -------------------------------------- ^ 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 ylength 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 -------------------------------------- v 1
|
||||||
|
1 x0,y0^ 1
|
||||||
|
1 <---------------xlength--------------> 1
|
||||||
|
--------------------------------------------------------- 1
|
||||||
|
1 10,000
|
||||||
|
|
||||||
|
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||||
|
The default window positions are read from a file "MEPMARG" when the
|
||||||
|
program is started. Users can have their own file if required.
|
||||||
|
|
||||||
|
The options for the program are accessed from 3 main menus:
|
||||||
|
general, screen control and dictionary analylsis. Both menus and
|
||||||
|
options are selected by number.
|
||||||
|
|
||||||
|
The most important and novel part of the program is its use of
|
||||||
|
"fuzzy dictionaries" and an information theory measure, to help show
|
||||||
|
the most interesting motifs. Central to the method is the idea of a
|
||||||
|
fuzzy dictionary of word frequencies. A dictionary of word
|
||||||
|
frequencies is an ordered list of all the words in the sequences and
|
||||||
|
a count of the number of times that they occur. A fuzzy dictionary
|
||||||
|
is an equivalent list but which contains instead, for each word, a
|
||||||
|
count of the number of times similar words occur in the sequences.
|
||||||
|
We term words that are similar "relations". The fuzziness is defined
|
||||||
|
by the number of letters in a word that are allowed to be different.
|
||||||
|
So if we had a fuzziness of 1 we allow 1 letter to be different. For
|
||||||
|
example, with a fuzziness of 1, the entry in the fuzzy dictionary
|
||||||
|
for the word TTTTTT would contain a count of the numbers of times
|
||||||
|
TTTTTT occured plus the number of times all words differing by
|
||||||
|
exactly one letter from TTTTTT occured.
|
||||||
|
|
||||||
|
Once the fuzzy dictionary has been created we can examine it
|
||||||
|
in several ways to find candidate control sequences. The simplest
|
||||||
|
question we can ask is which word in the dictionary is the most
|
||||||
|
common. Sometimes this simple criterion of "most common" may be
|
||||||
|
adequate to discover a new motif but in general we would not expect
|
||||||
|
it to be sufficient. For example some words will be common simply
|
||||||
|
because of a base composition bias in the sequences being analysed.
|
||||||
|
In addition a word can be the most frequent and yet not be "well
|
||||||
|
defined". This last point is best explained by an example.
|
||||||
|
|
||||||
|
Suppose we were looking at two letter words and allowing one
|
||||||
|
mismatch, and that there were 10 occurences of TT and 5 of AC. We
|
||||||
|
could align the 10 words that were one letter different from TT and
|
||||||
|
the 5 that were related to AC. Then we could count the number of
|
||||||
|
times each base occured in each position for each of these two sets
|
||||||
|
of words. Suppose we got the two base frequency tables shown below.
|
||||||
|
TT AC
|
||||||
|
T 6 4 T 1 0
|
||||||
|
C 1 3 C 0 4
|
||||||
|
A 1 2 A 4 1
|
||||||
|
G 2 1 G 0 0
|
||||||
|
|
||||||
|
These tables show that although TT occurs (with one letter mismatch)
|
||||||
|
more often than AC, the ratio of base frequencies for AC at 4/5, 4/5
|
||||||
|
is higher than those for TT at 6/10, 4/10. Hence we would say that
|
||||||
|
AC was better defined than TT. Expressing this another way we would
|
||||||
|
say that the definition of AC contained more information than that
|
||||||
|
for TT. The program calculates the information content in a way that
|
||||||
|
takes into account both the sequence composition and the level of
|
||||||
|
definition of the motif.
|
||||||
|
|
||||||
|
Definitions
|
||||||
|
|
||||||
|
Here we deal only with the dictionary analysis. Suppose we
|
||||||
|
are dealing with a set of sequences and are examining them for words
|
||||||
|
that are six characters in length.
|
||||||
|
|
||||||
|
Dictionary Dw contains a count of the number of times each
|
||||||
|
word occurs in the set of sequences. For example the entry for
|
||||||
|
TTTTTT contains a value equal to the number of times the word TTTTTT
|
||||||
|
occurs in the set of sequences.
|
||||||
|
|
||||||
|
Dictionary Ds contains a count of the number of different
|
||||||
|
sequences in which each word occurs. For example if the entry for
|
||||||
|
word TTTTTT contains the value 10, it denotes that the word TTTTTT
|
||||||
|
occurs in ten different sequences. Unlike Dw it only counts words
|
||||||
|
once for each sequence. For example if we had a set of 100
|
||||||
|
sequences, the maximum possible value that Ds could take is 100, and
|
||||||
|
this would only happen if a word occurred in every sequence. However
|
||||||
|
for the same set of sequences, Dw could contain values greater than
|
||||||
|
100, and this would show that a word had occurred more than once in
|
||||||
|
at least one sequence.
|
||||||
|
|
||||||
|
From either of the two dictionaries Dw or Ds we can calculate
|
||||||
|
a fuzzy dictionary Dm. For each word, the entry in the fuzzy
|
||||||
|
dictionary Dm contains the sum of the dictionary values (taken from
|
||||||
|
either Dw or Ds) for all words that differ from it by up to m
|
||||||
|
letters. For example if m=2 the entry for TTTTTT contains the number
|
||||||
|
of times that TTTTTT occurs in the dictionary, plus the counts for
|
||||||
|
all words that differ from TTTTTT by 1 or 2 letters. Obviously the
|
||||||
|
interpretation of the values in Dm depends on which of the two
|
||||||
|
dictionaries Dw or Ds they were derived from. When derived from Dw
|
||||||
|
the entry for any word in Dm gives the total number of times it, and
|
||||||
|
its relations, occur in the set of sequences. When derived from Ds
|
||||||
|
the entry for any word in Dm gives the total number of different
|
||||||
|
sequences that contain a word and each of its relations.
|
||||||
|
|
||||||
|
Finally, from fuzzy dictionary Dm we can derive fuzzy
|
||||||
|
dictionary Dh. All entries in Dh are zero except for the word(s),
|
||||||
|
within each set of relations, that are most frequent. For example if
|
||||||
|
TTTTTT occurred 20 times but had a relation that occurred more
|
||||||
|
often, then the entry for TTTTTT would be zero. However if TTTTTT
|
||||||
|
did not have a more frequently occurring relation, then the entry
|
||||||
|
for TTTTTT would contain the value 20.
|
||||||
|
@1. B 1 @Help
|
||||||
|
This option gives online help. The user should select option numbers
|
||||||
|
and the current documentation will be given. Note that option 0
|
||||||
|
gives an introduction to the program, and that ? will get help from
|
||||||
|
anywhere in the program. The following analyses (preceded by their
|
||||||
|
option numbers) are included:
|
||||||
|
? = Help
|
||||||
|
! = Quit
|
||||||
|
3 = Read new sequences
|
||||||
|
4 = Redefine active region
|
||||||
|
5 = List the sequences
|
||||||
|
6 = List text file
|
||||||
|
7 = Direct output to disk
|
||||||
|
10 = Clear graphics
|
||||||
|
11 = Clear text
|
||||||
|
12 = Draw ruler
|
||||||
|
13 = Use cross hair
|
||||||
|
14 = Reset margins
|
||||||
|
15 = Label diagram
|
||||||
|
16 = Draw map
|
||||||
|
17 = Search for strings
|
||||||
|
18 = Set strand
|
||||||
|
19 = Set composition
|
||||||
|
20 = Set word length
|
||||||
|
21 = Set number of mismatches
|
||||||
|
22 = Show settings
|
||||||
|
23 = Make dictionary Dw
|
||||||
|
24 = Make dictionary Ds
|
||||||
|
25 = Make fuzzy dictionary Dm from Dw
|
||||||
|
26 = Make fuzzy dictionary Dm from Ds
|
||||||
|
27 = Make fuzzy dictionary Dh from Dm
|
||||||
|
28 = Examine fuzzy dictionary Dm
|
||||||
|
29 = Examine fuzzy dictionary Dh
|
||||||
|
30 = Examine words in Dm
|
||||||
|
31 = Examine words in Dh
|
||||||
|
32 = Save or restore a dictionary
|
||||||
|
33 = Find inverted repeats
|
||||||
|
@2. B 1 @Quit
|
||||||
|
This function stops the program.
|
||||||
|
@3. B 1 @Read a new sequence.
|
||||||
|
|
||||||
|
It can read sequences stored in either of two formats: 1) all
|
||||||
|
sequences aligned in a single file; 2) all sequences in separate
|
||||||
|
files and accessed through a file of file names. Typical dialogue
|
||||||
|
follows:
|
||||||
|
|
||||||
|
X 1 Read file of aligned sequences
|
||||||
|
2 Use file of file names
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? File of aligned sequences=F1
|
||||||
|
Number of files 88
|
||||||
|
|
||||||
|
@4. B 1 @Define active region
|
||||||
|
For its analytic functions the program always works on a region of
|
||||||
|
the sequence called the active region. When new sequences are read
|
||||||
|
into the program the active region is automatically set to start at
|
||||||
|
the beginning of the sequences and go up to the end of the longest
|
||||||
|
one.
|
||||||
|
@5. B 1 @List a sequence.
|
||||||
|
The sequence can be listed with line lengths of 50 bases with each
|
||||||
|
sequence numbered in the order in which they were read. Output can
|
||||||
|
be directed to a disk file by first selecting disk output. Typical
|
||||||
|
dialogue follows.
|
||||||
|
|
||||||
|
? Menu or option number=5
|
||||||
|
|
||||||
|
10 20 30 40 50
|
||||||
|
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||||
|
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||||
|
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||||
|
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||||
|
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||||
|
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||||
|
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||||
|
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||||
|
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||||
|
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||||
|
|
||||||
|
60
|
||||||
|
1 TACCCGTTTTT
|
||||||
|
2 GCGTTTTTGT
|
||||||
|
3 TCATACCATAAG
|
||||||
|
4 TTTCATACC
|
||||||
|
5 ATTGTGAGC
|
||||||
|
6 TTCCGGCTCG
|
||||||
|
7 GAAGAGAGT
|
||||||
|
8 TCAGGTGT
|
||||||
|
9 ATGAATG
|
||||||
|
10 TAATTACG
|
||||||
|
@6. B 1 @List a text file.
|
||||||
|
Allows the user to have a text file displayed on the screen. It will
|
||||||
|
appear one page at a time.
|
||||||
|
@7. B 1 @Direct output to disk
|
||||||
|
|
||||||
|
Used to direct output that would normally appear on the screen
|
||||||
|
to a file.
|
||||||
|
|
||||||
|
Select redirection of either text or graphics, and supply the
|
||||||
|
name of the file that the output should be written to.
|
||||||
|
|
||||||
|
The results from the next options selected will not appear on
|
||||||
|
the screen but will be written to the file. When option 7 is
|
||||||
|
selected again the file will be closed and output will again appear
|
||||||
|
on the screen.
|
||||||
|
@10. B 1 @Clear graphics
|
||||||
|
Clears the screen of both text and graphics.
|
||||||
|
@11. B 1 @Clear text
|
||||||
|
Clears only text from the screen.
|
||||||
|
@12. B 1 @Draw a ruler.
|
||||||
|
This option allows the user to draw a ruler or scale along the x
|
||||||
|
axis of the screen to help identify the coordinates of points of
|
||||||
|
interest. The user can define the position of the first amino acid
|
||||||
|
to be marked (for example if the active region is 1501 to 8000, the
|
||||||
|
user might wish to mark every 1000th amino acid starting at either
|
||||||
|
1501 or 2000 - it depends if the user wishes to treat the active
|
||||||
|
region as an independent unit with its own numbering starting at its
|
||||||
|
left edge, or as part of the whole sequence). The user can also
|
||||||
|
define the separation of the ticks on the scale and their height. If
|
||||||
|
required the labelling routine can be used to add numbers to the
|
||||||
|
ticks.
|
||||||
|
@13. B 1 @Use crosshair.
|
||||||
|
This function puts a steerable cross on the screen that can be used
|
||||||
|
to find the coordinates of points in the sequence. The user can move
|
||||||
|
the cross around using the directional keys; when he hits the space
|
||||||
|
bar the program will print out the coordinates of the cross in
|
||||||
|
sequence units and the option will be exited.
|
||||||
|
|
||||||
|
If instead, you hit a , the position will be displayed but the
|
||||||
|
cross will remain on the screen.
|
||||||
|
|
||||||
|
If a letter s is hit the sequence around the cross hair is
|
||||||
|
displayed and the cross remains on the screen.
|
||||||
|
@14. B 1 @Reposition plots
|
||||||
|
The positions of each of the plots is defined relative to a users
|
||||||
|
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||||
|
for each option are drawn in a window defined by x0,y0 and
|
||||||
|
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||||
|
corner of the window, and xlength is the width of the window and
|
||||||
|
ylength the height of the window.
|
||||||
|
--------------------------------------------------------- 10,000
|
||||||
|
1 1
|
||||||
|
1 -------------------------------------- ^ 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 ylength 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 -------------------------------------- v 1
|
||||||
|
1 x0,y0^ 1
|
||||||
|
1 <---------------xlength--------------> 1
|
||||||
|
--------------------------------------------------------- 1
|
||||||
|
1 10,000
|
||||||
|
|
||||||
|
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||||
|
The default window positions are read from a file "MEPMARG" when the
|
||||||
|
program is started. Users can have their own file if required. As
|
||||||
|
all the plots start at the same position in x and have the same
|
||||||
|
width, x0 and xlength are the same for all options. Generally users
|
||||||
|
will only want to change the start level of the window y0 and its
|
||||||
|
height ylength. This option allows users to change window positions
|
||||||
|
whilst running the program. The routine prompts first for the
|
||||||
|
number of the option that the users wishes to reposition; then for
|
||||||
|
the y start and height; then for the x start and length. Note that
|
||||||
|
changes to the x values affect all options. If the user types only
|
||||||
|
carriage return for any value it will remain unchanged. The cross-
|
||||||
|
hair can be used to choose suitable heights.
|
||||||
|
@15. B 1 @Label a diagram
|
||||||
|
This routine allows users to label any diagrams they have produced.
|
||||||
|
They are asked to type in a label. When the user types carriage
|
||||||
|
return to finish typing the label the cross-hair appears on the
|
||||||
|
screen. The user can position it anywhere on the screen. If the user
|
||||||
|
types R (for right justify) the label will be written on the diagram
|
||||||
|
with its right end at the cross-hair position. If the user types L
|
||||||
|
(for left justify) the label will be written on the diagram with its
|
||||||
|
left end at the cross hair position. The cross-hair will then
|
||||||
|
immediately reappear. The user may put the same label on another
|
||||||
|
part of the diagram as before or if he hits the space bar he will be
|
||||||
|
asked if he wishes to type in another label.
|
||||||
|
@16. B 1 @Display a map.
|
||||||
|
It is often convenient to plot a map alongside graphed analysis in
|
||||||
|
order to indicate features within the sequence. This function allows
|
||||||
|
users to draw maps using files arranged in the form of EMBL feature
|
||||||
|
tables. Of course the EMBL table are usually only used for nucleic
|
||||||
|
acid sequence annotation but, as long as the features are written in
|
||||||
|
the correct format, they can be employed by this routine. The map is
|
||||||
|
composed of a line representing the sequence and then further lines
|
||||||
|
denoting the endpoints of each feature the user identifies. The user
|
||||||
|
is asked to define height at which the line representing the
|
||||||
|
sequence should be drawn; then for the feature height; then for the
|
||||||
|
features to plot.
|
||||||
|
@17. B 1 @Search for strings
|
||||||
|
Search for strings perfoms searches of all the sequences for
|
||||||
|
selected words and shows which sequences they are found in. The user
|
||||||
|
types in a word and defines the allowed number of mismatches. The
|
||||||
|
results are listed or plotted. If listed the display includes the
|
||||||
|
sequence number, the position in the sequence and the matching
|
||||||
|
string. The results are plotted in the following way. The x axis of
|
||||||
|
the plot represents the length of the aligned sequences and the y
|
||||||
|
direction is divided into sufficient strips to accommodate each
|
||||||
|
sequence. So if a match is found in the 3rd sequence at a position
|
||||||
|
equivalent to halfway along the longest of the sequences then a
|
||||||
|
short vertical line will be drawn at the midpoint of the 3rd strip.
|
||||||
|
If the sequences are aligned it can be useful if the motifs happen
|
||||||
|
to appear in related positions. For example see the original
|
||||||
|
publication. Typical dialogue follows.
|
||||||
|
|
||||||
|
? Menu or option number=17
|
||||||
|
X 1 Plot match positions
|
||||||
|
2 Plot histogram of matches
|
||||||
|
? 0,1,2 =
|
||||||
|
? Word to search for=TTGACA
|
||||||
|
? Minimum match (0-6) (6) =5
|
||||||
|
? (y/n) (y) Plot results N
|
||||||
|
2 35 TAGACA
|
||||||
|
5 14 TTTACA
|
||||||
|
6 37 TTTACA
|
||||||
|
11 14 TAGACA
|
||||||
|
14 14 TTGACA
|
||||||
|
17 14 GTGACA
|
||||||
|
17 22 TTAACA
|
||||||
|
20 1 TTGACA
|
||||||
|
@18. B 1 @Set strand
|
||||||
|
Set strand allows the user to define which strand(s) of the
|
||||||
|
sequences to analyse: input stand, complement of input, or both.
|
||||||
|
@19. B 1 @Set composition
|
||||||
|
Set composition gives the user three choices for setting the
|
||||||
|
composition of the sequences for use in the calculation of the
|
||||||
|
information content of words. The user can select the overall
|
||||||
|
composition of the sequences as read, an even composition, or can
|
||||||
|
type in any other 4 values.
|
||||||
|
@20. B 1 @Set word length
|
||||||
|
Set word length sets the length of word for which dictionaries will
|
||||||
|
be made.
|
||||||
|
@21. B 1 @Set number of mismatches
|
||||||
|
Set number of mismatches sets the level of fuzziness for the
|
||||||
|
creation of dictionary Dm.
|
||||||
|
@22. B 1 @Show settings
|
||||||
|
Show settings show the current settings for all parameters
|
||||||
|
associated with dictionary analysis. A typical diaplsy follows:
|
||||||
|
? Menu or option number=22
|
||||||
|
Current word length = 6
|
||||||
|
Number of mismatches = 1
|
||||||
|
Start position = 1
|
||||||
|
End position = 63
|
||||||
|
Input strand only
|
||||||
|
Observed composition
|
||||||
|
Dictionary Dw unmade
|
||||||
|
Dictionary Ds unmade
|
||||||
|
Dictionary Dm unmade
|
||||||
|
Dictionary Dh unmade
|
||||||
|
@23. B 1 @Make dictionary Dw
|
||||||
|
Make dictionary Dw creates a dictionary that contains a count of
|
||||||
|
the frequency of occurrence of each word in the collected sequences.
|
||||||
|
@24. B 1 @Make dictionary Ds
|
||||||
|
Make dictionary Ds creates a dictionary that contains a count of the
|
||||||
|
number of different sequences that contain each word.
|
||||||
|
@25. B 1 @Make dictionary Dm from Dw
|
||||||
|
Make dictionary Dm from Dw creates a dictionary from dictionary Dw
|
||||||
|
that contains the frequency of occurrence of each word (say X) in Dw
|
||||||
|
plus the frequency of occurrence of each word in Dw that differs
|
||||||
|
from X by up to m letters. Dm is called a fuzzy dictionary as it
|
||||||
|
contains the frequencies of occurrence of all words plus the
|
||||||
|
frequencies of all the words that are similar to them.
|
||||||
|
@26. B 1 @Make dictionary Dm from Ds
|
||||||
|
Make dictionary Dm from Ds creates a dictionary from dictionary Ds
|
||||||
|
that contains the frequency of occurrence of each word (say X) in Ds
|
||||||
|
plus the frequency of occurrence of each word in Ds that differs
|
||||||
|
from X by up to m letters. Dm is called a fuzzy dictionary as it
|
||||||
|
contains the frequencies of occurrence of all words plus the
|
||||||
|
frequencies of all the words that are similar to them.
|
||||||
|
@27. B 1 @Make dictionary Dh from Dm
|
||||||
|
Make dictionary Dh creates a dictionary from dictionary Dm and
|
||||||
|
whose entries are zero except for those words in any set of related
|
||||||
|
words that are most frequent. It finds the dominant words in each
|
||||||
|
set of relations and stores their counts.
|
||||||
|
@28. B 1 @Examine dictionary Dm
|
||||||
|
Examine dictionary Dm allows users to analyse the contents of
|
||||||
|
dictionary Dm to find the most common words or those words that
|
||||||
|
contain the most information. The user supplies a frequency or
|
||||||
|
information cutoff and chooses to have the results sorted on either
|
||||||
|
value. The program will find the top 100 words that achieve the
|
||||||
|
cutoff values and present them to the user sorted as selected. The
|
||||||
|
information content will be calcutated from either Dw or Ds
|
||||||
|
depending which was used to create Dm, and using the current
|
||||||
|
composition setting. Typical dialogue follows:
|
||||||
|
|
||||||
|
? Menu or option number=28
|
||||||
|
Looking for highest scoring words
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.62
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 9 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
AAAAAC 64 0.66460
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
GTTTTT 66 0.64300
|
||||||
|
TTTTTG 73 0.64070
|
||||||
|
TTTTGT 63 0.63820
|
||||||
|
TTTTTC 65 0.63810
|
||||||
|
AAAATA 63 0.62670
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.62
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =2
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 9 Maximum information= 0.7385326
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TTTTTG 73 0.64070
|
||||||
|
GTTTTT 66 0.64300
|
||||||
|
TTTTTC 65 0.63810
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
AAAAAC 64 0.66460
|
||||||
|
TTTTGT 63 0.63820
|
||||||
|
AAAATA 63 0.62670
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =!
|
||||||
|
|
||||||
|
@29. B 1 @Examine dictionary Dh
|
||||||
|
Examine dictionary Dh allows users to analyse the contents of
|
||||||
|
dictionary Dh to find the most common words or those words that
|
||||||
|
contain the most information. The user supplies a frequency or
|
||||||
|
information cutoff and chooses to have the results sorted on either
|
||||||
|
value. The program will find the top 100 words that achieve the
|
||||||
|
cutoff values and present them to the user sorted as selected. The
|
||||||
|
information content will be calcutated from either Dw or Ds
|
||||||
|
depending which was used to create Dh and using the current
|
||||||
|
composition setting. Typical dialogue follows:
|
||||||
|
|
||||||
|
? Menu or option number=29
|
||||||
|
Looking for highest scoring words
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.6
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 4 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =50
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.5
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 8 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
TCTTGA 54 0.66080
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
ACTTTA 57 0.61960
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
AGTATA 51 0.60540
|
||||||
|
TTATAA 55 0.59300
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =50
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =
|
||||||
|
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 8 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
TCTTGA 54 0.66080
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
ACTTTA 57 0.61960
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
AGTATA 51 0.60540
|
||||||
|
TTATAA 55 0.59300
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =!
|
||||||
|
|
||||||
|
@30. B 1 @Examine words in Dm
|
||||||
|
Examine words in Dm allows users to analyse the contents of
|
||||||
|
dictonary Dm at the level of individual words to find their
|
||||||
|
frequency, information content, and to see their base frequency
|
||||||
|
table. The user types in a word to examine and the program displays
|
||||||
|
the values and table. The information content will be calcutated
|
||||||
|
from either Dw or Ds depending which was used to create Dm, and
|
||||||
|
using the current composition setting. Typical dialogue follows:
|
||||||
|
? Menu or option number=30
|
||||||
|
? Word to examine=TTGACA
|
||||||
|
TtgacA 60 0.7385326
|
||||||
|
56 56 6 7 5 11
|
||||||
|
4 3 2 1 52 1
|
||||||
|
1 4 2 53 3 48
|
||||||
|
3 1 54 3 4 4
|
||||||
|
TTGACA
|
||||||
|
? Word to examine=TATAAT
|
||||||
|
taTAat 65 0.6251902
|
||||||
|
56 3 53 4 4 60
|
||||||
|
6 1 5 5 5 3
|
||||||
|
3 60 5 57 57 4
|
||||||
|
4 5 6 3 3 2
|
||||||
|
TATAAT
|
||||||
|
? Word to examine=
|
||||||
|
|
||||||
|
@31. B 1 @Examine words in Dh
|
||||||
|
Examine words in Dh allows users to analyse the contents of
|
||||||
|
dictonary Dh at the level of individual words to find their
|
||||||
|
frequency, information content, and to see their base frequency
|
||||||
|
table. The user types in a word to examine and the program displays
|
||||||
|
the values and table. The information content will be calcutated
|
||||||
|
from either Dw or Ds depending which was used to create Dm, and
|
||||||
|
using the current composition setting. Typical dialogue follows:
|
||||||
|
|
||||||
|
? Menu or option number=31
|
||||||
|
? Word to examine=TTGACA
|
||||||
|
TtgacA 60 0.7385326
|
||||||
|
56 56 6 7 5 11
|
||||||
|
4 3 2 1 52 1
|
||||||
|
1 4 2 53 3 48
|
||||||
|
3 1 54 3 4 4
|
||||||
|
TTGACA
|
||||||
|
? Word to examine=TATAAT
|
||||||
|
taTAat 65 0.6251902
|
||||||
|
56 3 53 4 4 60
|
||||||
|
6 1 5 5 5 3
|
||||||
|
3 60 5 57 57 4
|
||||||
|
4 5 6 3 3 2
|
||||||
|
TATAAT
|
||||||
|
? Word to examine=GGGGGG
|
||||||
|
gggggg 0 0.6199890
|
||||||
|
3 1 1 2 3 4
|
||||||
|
1 3 1 2 2 1
|
||||||
|
2 1 1 1 1 1
|
||||||
|
11 12 14 12 11 11
|
||||||
|
GGGGGG
|
||||||
|
? Word to examine=
|
||||||
|
|
||||||
|
@32. B 1 @Save or restore a dictionary
|
||||||
|
Save or restore dictionary allows users to write or read any
|
||||||
|
dictionary to and from disk files. The user is asked te define the
|
||||||
|
dictionary and file. The function is useful if the machine being
|
||||||
|
used is very slow at calculating because the files can be handled
|
||||||
|
quickly. However note that the files cannot be processed by any
|
||||||
|
other program.
|
||||||
|
@33. B 1 @Find inverted repeats
|
||||||
|
Find inverted repeats performs searches for simple inverted repeat
|
||||||
|
sequences in each sequence. They are defined by a range of loop
|
||||||
|
sizes and a minimum number of potential basepairs. The results can
|
||||||
|
be plotted or listed. The x axis of the plot represents the length
|
||||||
|
of the aligned sequences and the y direction is divided into
|
||||||
|
sufficient strips to accommodate each sequence. So if an inverted
|
||||||
|
repeat is found in the 3rd sequence at a position equivalent to
|
||||||
|
halfway along the longest of the sequences then a short vertical
|
||||||
|
line will be drawn at the midpoint of the 3rd strip. Alternatively,
|
||||||
|
if the results are listed, the potential hairpin loops are drawn
|
||||||
|
out, with the sequence number and the position of the loop. Typical
|
||||||
|
dialogue follows.
|
||||||
|
|
||||||
|
? Menu or option number=33
|
||||||
|
Define the range of loop sizes
|
||||||
|
? Minimum loop size (0-10) (3) =0
|
||||||
|
? Maximum loop size (1-20) (3) =
|
||||||
|
? Minimum number of basepairs (1-20) (6) =
|
||||||
|
? (y/n) (y) Plot results N
|
||||||
|
Searching
|
||||||
|
|
||||||
|
Sequence 3 34
|
||||||
|
C
|
||||||
|
G.T
|
||||||
|
T-A
|
||||||
|
A-T
|
||||||
|
T.G
|
||||||
|
T.G
|
||||||
|
G.T
|
||||||
|
ATCTTT TATTTCA
|
||||||
|
33
|
||||||
|
|
||||||
|
Sequence 5 35
|
||||||
|
T
|
||||||
|
G.T
|
||||||
|
T.G
|
||||||
|
A-T
|
||||||
|
T.G
|
||||||
|
G.T
|
||||||
|
C-G
|
||||||
|
T.G
|
||||||
|
TCCGGC AATTGTG
|
||||||
|
34
|
||||||
|
|
||||||
|
|
||||||
|
@ End of help
|
32
help/mem_menu
Normal file
32
help/mem_menu
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
0 1 15 184 B MEP
|
||||||
|
1 1 9304 37 B Help
|
||||||
|
2 1 10465 2 B Quit
|
||||||
|
3 1 10531 14 B Read a new sequence.
|
||||||
|
4 1 10932 6 B Define active region
|
||||||
|
5 1 11250 31 B List a sequence.
|
||||||
|
6 1 12393 3 B List a text file.
|
||||||
|
7 1 12525 12 B Direct output to disk
|
||||||
|
10 1 12996 2 B Clear graphics
|
||||||
|
11 1 13065 2 B Clear text
|
||||||
|
12 1 13126 12 B Draw a ruler.
|
||||||
|
13 1 13871 12 B Use crosshair.
|
||||||
|
14 1 14459 34 B Reposition plots
|
||||||
|
15 1 16611 12 B Label a diagram
|
||||||
|
16 1 17394 12 B Display a map.
|
||||||
|
17 1 18154 31 B Search for strings
|
||||||
|
18 1 19507 3 B Set strand
|
||||||
|
19 1 19672 6 B Set composition
|
||||||
|
20 1 20013 3 B Set word length
|
||||||
|
21 1 20131 3 B Set number of mismatches
|
||||||
|
22 1 20256 14 B Show settings
|
||||||
|
23 1 20718 3 B Make dictionary Dw
|
||||||
|
24 1 20890 3 B Make dictionary Ds
|
||||||
|
25 1 21055 7 B Make dictionary Dm from Dw
|
||||||
|
26 1 21505 7 B Make dictionary Dm from Ds
|
||||||
|
27 1 21955 5 B Make dictionary Dh from Dm
|
||||||
|
28 1 22245 55 B Examine dictionary Dm
|
||||||
|
29 1 24148 70 B Examine dictionary Dh
|
||||||
|
30 1 26410 25 B Examine words in Dm
|
||||||
|
31 1 27437 33 B Examine words in Dh
|
||||||
|
32 1 28701 7 B Save or restore a dictionary
|
||||||
|
33 1 29106 46 B Find inverted repeats
|
792
help/mep_help
Normal file
792
help/mep_help
Normal file
|
@ -0,0 +1,792 @@
|
||||||
|
|
||||||
|
@-1. TX 0 @General
|
||||||
|
|
||||||
|
@-2. T 0 @Screen control
|
||||||
|
|
||||||
|
@-2. X 0 @Screen
|
||||||
|
|
||||||
|
@-3. TX 0 @Dictionary analysis
|
||||||
|
|
||||||
|
@0. TX -1 @MEP
|
||||||
|
|
||||||
|
This is a program for analysing families of nucleotide
|
||||||
|
sequences in order to find common motifs and potential binding
|
||||||
|
sites. The ideas in this program were described in Staden, R.
|
||||||
|
"Methods for discovering novel motifs in nucleic acid sequences".
|
||||||
|
Computer Applications in the Biosciences, 5, 293-298, (1989).
|
||||||
|
|
||||||
|
The program can read sequences stored in either of two
|
||||||
|
formats: 1) all sequences aligned in a single file; 2) all sequences
|
||||||
|
in separate files and accessed through a file of file names.
|
||||||
|
|
||||||
|
The program contains functions that can answer several
|
||||||
|
questions about a set of sequences:
|
||||||
|
|
||||||
|
Which words are most common?
|
||||||
|
Which words occur in the most sequences?
|
||||||
|
Which words contain the most information?
|
||||||
|
Which words occur in equivalent positions in the sequences?
|
||||||
|
Which words are inverted repeats?
|
||||||
|
Which words occur on both strands of the sequences?
|
||||||
|
Where are the inverted repeats?
|
||||||
|
Where are the fuzzy words?
|
||||||
|
|
||||||
|
Most of the program is concerned with analysing what it terms
|
||||||
|
"fuzzy words" within the set of sequences. The analysis is explained
|
||||||
|
below. Note that the standard version of the programs is limited to
|
||||||
|
words of maximum length 8 letters, and a maximum fuzziness of 2.
|
||||||
|
|
||||||
|
The following analyses (preceded by their option numbers) are
|
||||||
|
included:
|
||||||
|
? = Help
|
||||||
|
! = Quit
|
||||||
|
3 = Read new sequences
|
||||||
|
4 = Redefine active region
|
||||||
|
5 = List the sequences
|
||||||
|
6 = List text file
|
||||||
|
7 = Direct output to disk
|
||||||
|
10 = Clear graphics
|
||||||
|
11 = Clear text
|
||||||
|
12 = Draw ruler
|
||||||
|
13 = Use cross hair
|
||||||
|
14 = Reset margins
|
||||||
|
15 = Label diagram
|
||||||
|
16 = Draw map
|
||||||
|
17 = Search for strings
|
||||||
|
18 = Set strand
|
||||||
|
19 = Set composition
|
||||||
|
20 = Set word length
|
||||||
|
21 = Set number of mismatches
|
||||||
|
22 = Show settings
|
||||||
|
23 = Make dictionary Dw
|
||||||
|
24 = Make dictionary Ds
|
||||||
|
25 = Make fuzzy dictionary Dm from Dw
|
||||||
|
26 = Make fuzzy dictionary Dm from Ds
|
||||||
|
27 = Make fuzzy dictionary Dh from Dm
|
||||||
|
28 = Examine fuzzy dictionary Dm
|
||||||
|
29 = Examine fuzzy dictionary Dh
|
||||||
|
30 = Examine words in Dm
|
||||||
|
31 = Examine words in Dh
|
||||||
|
32 = Save or restore a dictionary
|
||||||
|
33 = Find inverted repeats
|
||||||
|
|
||||||
|
Some of these methods produce graphical results and so the
|
||||||
|
program is generally used from a graphics terminal (a vdu on which
|
||||||
|
lines and points can be drawn as well as characters).
|
||||||
|
|
||||||
|
The positions of each of the plots is defined relative to a users
|
||||||
|
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||||
|
for each option are drawn in a window defined by x0,y0 and
|
||||||
|
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||||
|
corner of the window, and xlength is the width of the window and
|
||||||
|
ylength the height of the window.
|
||||||
|
--------------------------------------------------------- 10,000
|
||||||
|
1 1
|
||||||
|
1 -------------------------------------- ^ 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 ylength 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 -------------------------------------- v 1
|
||||||
|
1 x0,y0^ 1
|
||||||
|
1 <---------------xlength--------------> 1
|
||||||
|
--------------------------------------------------------- 1
|
||||||
|
1 10,000
|
||||||
|
|
||||||
|
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||||
|
The default window positions are read from a file "MEPMARG" when the
|
||||||
|
program is started. Users can have their own file if required.
|
||||||
|
|
||||||
|
The options for the program are accessed from 3 main menus:
|
||||||
|
general, screen control and dictionary analylsis. Both menus and
|
||||||
|
options are selected by number.
|
||||||
|
|
||||||
|
The most important and novel part of the program is its use of
|
||||||
|
"fuzzy dictionaries" and an information theory measure, to help show
|
||||||
|
the most interesting motifs. Central to the method is the idea of a
|
||||||
|
fuzzy dictionary of word frequencies. A dictionary of word
|
||||||
|
frequencies is an ordered list of all the words in the sequences and
|
||||||
|
a count of the number of times that they occur. A fuzzy dictionary
|
||||||
|
is an equivalent list but which contains instead, for each word, a
|
||||||
|
count of the number of times similar words occur in the sequences.
|
||||||
|
We term words that are similar "relations". The fuzziness is defined
|
||||||
|
by the number of letters in a word that are allowed to be different.
|
||||||
|
So if we had a fuzziness of 1 we allow 1 letter to be different. For
|
||||||
|
example, with a fuzziness of 1, the entry in the fuzzy dictionary
|
||||||
|
for the word TTTTTT would contain a count of the numbers of times
|
||||||
|
TTTTTT occured plus the number of times all words differing by
|
||||||
|
exactly one letter from TTTTTT occured.
|
||||||
|
|
||||||
|
Once the fuzzy dictionary has been created we can examine it
|
||||||
|
in several ways to find candidate control sequences. The simplest
|
||||||
|
question we can ask is which word in the dictionary is the most
|
||||||
|
common. Sometimes this simple criterion of "most common" may be
|
||||||
|
adequate to discover a new motif but in general we would not expect
|
||||||
|
it to be sufficient. For example some words will be common simply
|
||||||
|
because of a base composition bias in the sequences being analysed.
|
||||||
|
In addition a word can be the most frequent and yet not be "well
|
||||||
|
defined". This last point is best explained by an example.
|
||||||
|
|
||||||
|
Suppose we were looking at two letter words and allowing one
|
||||||
|
mismatch, and that there were 10 occurences of TT and 5 of AC. We
|
||||||
|
could align the 10 words that were one letter different from TT and
|
||||||
|
the 5 that were related to AC. Then we could count the number of
|
||||||
|
times each base occured in each position for each of these two sets
|
||||||
|
of words. Suppose we got the two base frequency tables shown below.
|
||||||
|
TT AC
|
||||||
|
T 6 4 T 1 0
|
||||||
|
C 1 3 C 0 4
|
||||||
|
A 1 2 A 4 1
|
||||||
|
G 2 1 G 0 0
|
||||||
|
|
||||||
|
These tables show that although TT occurs (with one letter mismatch)
|
||||||
|
more often than AC, the ratio of base frequencies for AC at 4/5, 4/5
|
||||||
|
is higher than those for TT at 6/10, 4/10. Hence we would say that
|
||||||
|
AC was better defined than TT. Expressing this another way we would
|
||||||
|
say that the definition of AC contained more information than that
|
||||||
|
for TT. The program calculates the information content in a way that
|
||||||
|
takes into account both the sequence composition and the level of
|
||||||
|
definition of the motif.
|
||||||
|
|
||||||
|
Definitions
|
||||||
|
|
||||||
|
Here we deal only with the dictionary analysis. Suppose we
|
||||||
|
are dealing with a set of sequences and are examining them for words
|
||||||
|
that are six characters in length.
|
||||||
|
|
||||||
|
Dictionary Dw contains a count of the number of times each
|
||||||
|
word occurs in the set of sequences. For example the entry for
|
||||||
|
TTTTTT contains a value equal to the number of times the word TTTTTT
|
||||||
|
occurs in the set of sequences.
|
||||||
|
|
||||||
|
Dictionary Ds contains a count of the number of different
|
||||||
|
sequences in which each word occurs. For example if the entry for
|
||||||
|
word TTTTTT contains the value 10, it denotes that the word TTTTTT
|
||||||
|
occurs in ten different sequences. Unlike Dw it only counts words
|
||||||
|
once for each sequence. For example if we had a set of 100
|
||||||
|
sequences, the maximum possible value that Ds could take is 100, and
|
||||||
|
this would only happen if a word occurred in every sequence. However
|
||||||
|
for the same set of sequences, Dw could contain values greater than
|
||||||
|
100, and this would show that a word had occurred more than once in
|
||||||
|
at least one sequence.
|
||||||
|
|
||||||
|
From either of the two dictionaries Dw or Ds we can calculate
|
||||||
|
a fuzzy dictionary Dm. For each word, the entry in the fuzzy
|
||||||
|
dictionary Dm contains the sum of the dictionary values (taken from
|
||||||
|
either Dw or Ds) for all words that differ from it by up to m
|
||||||
|
letters. For example if m=2 the entry for TTTTTT contains the number
|
||||||
|
of times that TTTTTT occurs in the dictionary, plus the counts for
|
||||||
|
all words that differ from TTTTTT by 1 or 2 letters. Obviously the
|
||||||
|
interpretation of the values in Dm depends on which of the two
|
||||||
|
dictionaries Dw or Ds they were derived from. When derived from Dw
|
||||||
|
the entry for any word in Dm gives the total number of times it, and
|
||||||
|
its relations, occur in the set of sequences. When derived from Ds
|
||||||
|
the entry for any word in Dm gives the total number of different
|
||||||
|
sequences that contain a word and each of its relations.
|
||||||
|
|
||||||
|
Finally, from fuzzy dictionary Dm we can derive fuzzy
|
||||||
|
dictionary Dh. All entries in Dh are zero except for the word(s),
|
||||||
|
within each set of relations, that are most frequent. For example if
|
||||||
|
TTTTTT occurred 20 times but had a relation that occurred more
|
||||||
|
often, then the entry for TTTTTT would be zero. However if TTTTTT
|
||||||
|
did not have a more frequently occurring relation, then the entry
|
||||||
|
for TTTTTT would contain the value 20.
|
||||||
|
@1. T 0 @Help
|
||||||
|
|
||||||
|
This option gives online help. The user should select option
|
||||||
|
numbers and the current documentation will be given. Note that
|
||||||
|
option 0 gives an introduction to the program, and that ? will get
|
||||||
|
help from anywhere in the program. The following analyses (preceded
|
||||||
|
by their option numbers) are included:
|
||||||
|
? = Help
|
||||||
|
! = Quit
|
||||||
|
3 = Read new sequences
|
||||||
|
4 = Redefine active region
|
||||||
|
5 = List the sequences
|
||||||
|
6 = List text file
|
||||||
|
7 = Direct output to disk
|
||||||
|
10 = Clear graphics
|
||||||
|
11 = Clear text
|
||||||
|
12 = Draw ruler
|
||||||
|
13 = Use cross hair
|
||||||
|
14 = Reset margins
|
||||||
|
15 = Label diagram
|
||||||
|
16 = Draw map
|
||||||
|
17 = Search for strings
|
||||||
|
18 = Set strand
|
||||||
|
19 = Set composition
|
||||||
|
20 = Set word length
|
||||||
|
21 = Set number of mismatches
|
||||||
|
22 = Show settings
|
||||||
|
23 = Make dictionary Dw
|
||||||
|
24 = Make dictionary Ds
|
||||||
|
25 = Make fuzzy dictionary Dm from Dw
|
||||||
|
26 = Make fuzzy dictionary Dm from Ds
|
||||||
|
27 = Make fuzzy dictionary Dh from Dm
|
||||||
|
28 = Examine fuzzy dictionary Dm
|
||||||
|
29 = Examine fuzzy dictionary Dh
|
||||||
|
30 = Examine words in Dm
|
||||||
|
31 = Examine words in Dh
|
||||||
|
32 = Save or restore a dictionary
|
||||||
|
33 = Find inverted repeats
|
||||||
|
@2. T 0 @Quit
|
||||||
|
|
||||||
|
This function stops the program.
|
||||||
|
@3. TX 1 @Read a new sequence
|
||||||
|
|
||||||
|
It can read sequences stored in either of two formats: 1) all
|
||||||
|
sequences aligned in a single file; 2) all sequences in separate
|
||||||
|
files and accessed through a file of file names. Typical dialogue
|
||||||
|
follows:
|
||||||
|
|
||||||
|
X 1 Read file of aligned sequences
|
||||||
|
2 Use file of file names
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? File of aligned sequences=F1
|
||||||
|
Number of files 88
|
||||||
|
|
||||||
|
@4. TX 1 @Define active region
|
||||||
|
|
||||||
|
For its analytic functions the program always works on a
|
||||||
|
region of the sequence called the active region. When new sequences
|
||||||
|
are read into the program the active region is automatically set to
|
||||||
|
start at the beginning of the sequences and go up to the end of the
|
||||||
|
longest one.
|
||||||
|
@5. TX 1 @List a sequence
|
||||||
|
|
||||||
|
The sequence can be listed with line lengths of 50 bases with
|
||||||
|
each sequence numbered in the order in which they were read. Output
|
||||||
|
can be directed to a disk file by first selecting disk output.
|
||||||
|
Typical dialogue follows.
|
||||||
|
|
||||||
|
? Menu or option number=5
|
||||||
|
|
||||||
|
10 20 30 40 50
|
||||||
|
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||||
|
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||||
|
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||||
|
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||||
|
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||||
|
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||||
|
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||||
|
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||||
|
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||||
|
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||||
|
|
||||||
|
60
|
||||||
|
1 TACCCGTTTTT
|
||||||
|
2 GCGTTTTTGT
|
||||||
|
3 TCATACCATAAG
|
||||||
|
4 TTTCATACC
|
||||||
|
5 ATTGTGAGC
|
||||||
|
6 TTCCGGCTCG
|
||||||
|
7 GAAGAGAGT
|
||||||
|
8 TCAGGTGT
|
||||||
|
9 ATGAATG
|
||||||
|
10 TAATTACG
|
||||||
|
@6. TX 1 @List a text file
|
||||||
|
|
||||||
|
Allows the user to have a text file displayed on the screen.
|
||||||
|
It will appear one page at a time.
|
||||||
|
@7. TX 1 @Direct output to disk
|
||||||
|
|
||||||
|
Used to direct output that would normally appear on the screen
|
||||||
|
to a file.
|
||||||
|
|
||||||
|
Select redirection of either text or graphics, and supply the
|
||||||
|
name of the file that the output should be written to.
|
||||||
|
|
||||||
|
The results from the next options selected will not appear on
|
||||||
|
the screen but will be written to the file. When option 7 is
|
||||||
|
selected again the file will be closed and output will again appear
|
||||||
|
on the screen.
|
||||||
|
@10. TX 2 @Clear graphics
|
||||||
|
|
||||||
|
Clears the screen of both text and graphics.
|
||||||
|
@11. TX 2 @Clear text
|
||||||
|
|
||||||
|
Clears only text from the screen.
|
||||||
|
@12. TX 2 @Draw a ruler
|
||||||
|
|
||||||
|
This option allows the user to draw a ruler or scale along the
|
||||||
|
x axis of the screen to help identify the coordinates of points of
|
||||||
|
interest. The user can define the position of the first amino acid
|
||||||
|
to be marked (for example if the active region is 1501 to 8000, the
|
||||||
|
user might wish to mark every 1000th amino acid starting at either
|
||||||
|
1501 or 2000 - it depends if the user wishes to treat the active
|
||||||
|
region as an independent unit with its own numbering starting at its
|
||||||
|
left edge, or as part of the whole sequence). The user can also
|
||||||
|
define the separation of the ticks on the scale and their height. If
|
||||||
|
required the labelling routine can be used to add numbers to the
|
||||||
|
ticks.
|
||||||
|
@13. TX 2 @Use crosshair
|
||||||
|
|
||||||
|
This function puts a steerable cross on the screen that can be
|
||||||
|
used to find the coordinates of points in the sequence. The user can
|
||||||
|
move the cross around using the directional keys; when he hits the
|
||||||
|
space bar the program will print out the coordinates of the cross in
|
||||||
|
sequence units and the option will be exited.
|
||||||
|
|
||||||
|
If instead, you hit a , the position will be displayed but the
|
||||||
|
cross will remain on the screen.
|
||||||
|
|
||||||
|
If a letter s is hit the sequence around the cross hair is
|
||||||
|
displayed and the cross remains on the screen.
|
||||||
|
@14. TX 2 @Reposition plots
|
||||||
|
|
||||||
|
The positions of each of the plots is defined relative to a
|
||||||
|
users drawing board which has size 1-10,000 in x and 1-10,000 in y.
|
||||||
|
Plots for each option are drawn in a window defined by x0,y0 and
|
||||||
|
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||||
|
corner of the window, and xlength is the width of the window and
|
||||||
|
ylength the height of the window.
|
||||||
|
--------------------------------------------------------- 10,000
|
||||||
|
1 1
|
||||||
|
1 -------------------------------------- ^ 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 ylength 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 1 1 1 1
|
||||||
|
1 -------------------------------------- v 1
|
||||||
|
1 x0,y0^ 1
|
||||||
|
1 <---------------xlength--------------> 1
|
||||||
|
--------------------------------------------------------- 1
|
||||||
|
1 10,000
|
||||||
|
|
||||||
|
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||||
|
The default window positions are read from a file "MEPMARG" when the
|
||||||
|
program is started. Users can have their own file if required. As
|
||||||
|
all the plots start at the same position in x and have the same
|
||||||
|
width, x0 and xlength are the same for all options. Generally users
|
||||||
|
will only want to change the start level of the window y0 and its
|
||||||
|
height ylength. This option allows users to change window positions
|
||||||
|
whilst running the program. The routine prompts first for the
|
||||||
|
number of the option that the users wishes to reposition; then for
|
||||||
|
the y start and height; then for the x start and length. Note that
|
||||||
|
changes to the x values affect all options. If the user types only
|
||||||
|
carriage return for any value it will remain unchanged. The cross-
|
||||||
|
hair can be used to choose suitable heights.
|
||||||
|
@15. TX 2 @Label a diagram
|
||||||
|
|
||||||
|
This routine allows users to label any diagrams they have
|
||||||
|
produced. They are asked to type in a label. When the user types
|
||||||
|
carriage return to finish typing the label the cross-hair appears on
|
||||||
|
the screen. The user can position it anywhere on the screen. If the
|
||||||
|
user types R (for right justify) the label will be written on the
|
||||||
|
diagram with its right end at the cross-hair position. If the user
|
||||||
|
types L (for left justify) the label will be written on the diagram
|
||||||
|
with its left end at the cross hair position. The cross-hair will
|
||||||
|
then immediately reappear. The user may put the same label on
|
||||||
|
another part of the diagram as before or if he hits the space bar he
|
||||||
|
will be asked if he wishes to type in another label.
|
||||||
|
@16. TX 2 @Display a map
|
||||||
|
|
||||||
|
It is often convenient to plot a map alongside graphed
|
||||||
|
analysis in order to indicate features within the sequence. This
|
||||||
|
function allows users to draw maps using files arranged in the form
|
||||||
|
of EMBL feature tables. Of course the EMBL table are usually only
|
||||||
|
used for nucleic acid sequence annotation but, as long as the
|
||||||
|
features are written in the correct format, they can be employed by
|
||||||
|
this routine. The map is composed of a line representing the
|
||||||
|
sequence and then further lines denoting the endpoints of each
|
||||||
|
feature the user identifies. The user is asked to define height at
|
||||||
|
which the line representing the sequence should be drawn; then for
|
||||||
|
the feature height; then for the features to plot.
|
||||||
|
@17. TX 1 @Search for strings
|
||||||
|
|
||||||
|
Search for strings perfoms searches of all the sequences for
|
||||||
|
selected words and shows which sequences they are found in. The user
|
||||||
|
types in a word and defines the allowed number of mismatches. The
|
||||||
|
results are listed or plotted. If listed the display includes the
|
||||||
|
sequence number, the position in the sequence and the matching
|
||||||
|
string. The results are plotted in the following way. The x axis of
|
||||||
|
the plot represents the length of the aligned sequences and the y
|
||||||
|
direction is divided into sufficient strips to accommodate each
|
||||||
|
sequence. So if a match is found in the 3rd sequence at a position
|
||||||
|
equivalent to halfway along the longest of the sequences then a
|
||||||
|
short vertical line will be drawn at the midpoint of the 3rd strip.
|
||||||
|
If the sequences are aligned it can be useful if the motifs happen
|
||||||
|
to appear in related positions. For example see the original
|
||||||
|
publication. Typical dialogue follows.
|
||||||
|
|
||||||
|
? Menu or option number=17
|
||||||
|
X 1 Plot match positions
|
||||||
|
2 Plot histogram of matches
|
||||||
|
? 0,1,2 =
|
||||||
|
? Word to search for=TTGACA
|
||||||
|
? Minimum match (0-6) (6) =5
|
||||||
|
? (y/n) (y) Plot results N
|
||||||
|
2 35 TAGACA
|
||||||
|
5 14 TTTACA
|
||||||
|
6 37 TTTACA
|
||||||
|
11 14 TAGACA
|
||||||
|
14 14 TTGACA
|
||||||
|
17 14 GTGACA
|
||||||
|
17 22 TTAACA
|
||||||
|
20 1 TTGACA
|
||||||
|
@18. TX 3 @Set strand
|
||||||
|
|
||||||
|
Set strand allows the user to define which strand(s) of the
|
||||||
|
sequences to analyse: input stand, complement of input, or both.
|
||||||
|
@19. TX 3 @Set composition
|
||||||
|
|
||||||
|
Set composition gives the user three choices for setting the
|
||||||
|
composition of the sequences for use in the calculation of the
|
||||||
|
information content of words. The user can select the overall
|
||||||
|
composition of the sequences as read, an even composition, or can
|
||||||
|
type in any other 4 values.
|
||||||
|
@20. TX 3 @Set word length
|
||||||
|
|
||||||
|
Set word length sets the length of word for which dictionaries
|
||||||
|
will be made.
|
||||||
|
@21. TX 3 @Set number of mismatches
|
||||||
|
|
||||||
|
Set number of mismatches sets the level of fuzziness for the
|
||||||
|
creation of dictionary Dm.
|
||||||
|
@22. TX 3 @Show settings
|
||||||
|
|
||||||
|
Show settings show the current settings for all parameters
|
||||||
|
associated with dictionary analysis. A typical diaplsy follows:
|
||||||
|
? Menu or option number=22
|
||||||
|
Current word length = 6
|
||||||
|
Number of mismatches = 1
|
||||||
|
Start position = 1
|
||||||
|
End position = 63
|
||||||
|
Input strand only
|
||||||
|
Observed composition
|
||||||
|
Dictionary Dw unmade
|
||||||
|
Dictionary Ds unmade
|
||||||
|
Dictionary Dm unmade
|
||||||
|
Dictionary Dh unmade
|
||||||
|
@23. TX 3 @Make dictionary Dw
|
||||||
|
|
||||||
|
Make dictionary Dw creates a dictionary that contains a count
|
||||||
|
of the frequency of occurrence of each word in the collected
|
||||||
|
sequences.
|
||||||
|
@24. TX 3 @Make dictionary Ds
|
||||||
|
|
||||||
|
Make dictionary Ds creates a dictionary that contains a count
|
||||||
|
of the number of different sequences that contain each word.
|
||||||
|
@25. TX 3 @Make dictionary Dm from Dw
|
||||||
|
|
||||||
|
Make dictionary Dm from Dw creates a dictionary from
|
||||||
|
dictionary Dw that contains the frequency of occurrence of each word
|
||||||
|
(say X) in Dw plus the frequency of occurrence of each word in Dw
|
||||||
|
that differs from X by up to m letters. Dm is called a fuzzy
|
||||||
|
dictionary as it contains the frequencies of occurrence of all words
|
||||||
|
plus the frequencies of all the words that are similar to them.
|
||||||
|
@26. TX 3 @Make dictionary Dm from Ds
|
||||||
|
|
||||||
|
Make dictionary Dm from Ds creates a dictionary from
|
||||||
|
dictionary Ds that contains the frequency of occurrence of each word
|
||||||
|
(say X) in Ds plus the frequency of occurrence of each word in Ds
|
||||||
|
that differs from X by up to m letters. Dm is called a fuzzy
|
||||||
|
dictionary as it contains the frequencies of occurrence of all words
|
||||||
|
plus the frequencies of all the words that are similar to them.
|
||||||
|
@27. TX 3 @Make dictionary Dh from Dm
|
||||||
|
|
||||||
|
Make dictionary Dh creates a dictionary from dictionary Dm
|
||||||
|
and whose entries are zero except for those words in any set of
|
||||||
|
related words that are most frequent. It finds the dominant words in
|
||||||
|
each set of relations and stores their counts.
|
||||||
|
@28. TX 3 @Examine fuzzy dictionary Dm
|
||||||
|
|
||||||
|
Examine dictionary Dm allows users to analyse the contents of
|
||||||
|
dictionary Dm to find the most common words or those words that
|
||||||
|
contain the most information. The user supplies a frequency or
|
||||||
|
information cutoff and chooses to have the results sorted on either
|
||||||
|
value. The program will find the top 100 words that achieve the
|
||||||
|
cutoff values and present them to the user sorted as selected. The
|
||||||
|
information content will be calcutated from either Dw or Ds
|
||||||
|
depending which was used to create Dm, and using the current
|
||||||
|
composition setting. Typical dialogue follows:
|
||||||
|
|
||||||
|
? Menu or option number=28
|
||||||
|
Looking for highest scoring words
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.62
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 9 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
AAAAAC 64 0.66460
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
GTTTTT 66 0.64300
|
||||||
|
TTTTTG 73 0.64070
|
||||||
|
TTTTGT 63 0.63820
|
||||||
|
TTTTTC 65 0.63810
|
||||||
|
AAAATA 63 0.62670
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.62
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =2
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 9 Maximum information= 0.7385326
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TTTTTG 73 0.64070
|
||||||
|
GTTTTT 66 0.64300
|
||||||
|
TTTTTC 65 0.63810
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
AAAAAC 64 0.66460
|
||||||
|
TTTTGT 63 0.63820
|
||||||
|
AAAATA 63 0.62670
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =!
|
||||||
|
|
||||||
|
@29. TX 3 @Examine fuzzy dictionary Dh
|
||||||
|
|
||||||
|
Examine dictionary Dh allows users to analyse the contents of
|
||||||
|
dictionary Dh to find the most common words or those words that
|
||||||
|
contain the most information. The user supplies a frequency or
|
||||||
|
information cutoff and chooses to have the results sorted on either
|
||||||
|
value. The program will find the top 100 words that achieve the
|
||||||
|
cutoff values and present them to the user sorted as selected. The
|
||||||
|
information content will be calcutated from either Dw or Ds
|
||||||
|
depending which was used to create Dh and using the current
|
||||||
|
composition setting. Typical dialogue follows:
|
||||||
|
|
||||||
|
? Menu or option number=29
|
||||||
|
Looking for highest scoring words
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =60
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.6
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 4 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =50
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =.5
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 8 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
TCTTGA 54 0.66080
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
ACTTTA 57 0.61960
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
AGTATA 51 0.60540
|
||||||
|
TTATAA 55 0.59300
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =50
|
||||||
|
? Minimum information (0.00-1.00) (0.00) =
|
||||||
|
|
||||||
|
X 1 Sort on information
|
||||||
|
2 Sort on word score
|
||||||
|
? 0,1,2 =
|
||||||
|
|
||||||
|
? Maximum number to list (0-100) (100) =
|
||||||
|
|
||||||
|
The words are
|
||||||
|
Total words= 8 Maximum information= 0.7385326
|
||||||
|
TTGACA 60 0.73850
|
||||||
|
TCTTGA 54 0.66080
|
||||||
|
AAAAAA 90 0.64880
|
||||||
|
TATAAT 65 0.62510
|
||||||
|
ACTTTA 57 0.61960
|
||||||
|
TTTTTT 115 0.60630
|
||||||
|
AGTATA 51 0.60540
|
||||||
|
TTATAA 55 0.59300
|
||||||
|
The highest word score = 115
|
||||||
|
? Minimum word score (0-115) (0) =!
|
||||||
|
|
||||||
|
@30. TX 3 @Examine words in Dm
|
||||||
|
|
||||||
|
Examine words in Dm allows users to analyse the contents of
|
||||||
|
dictonary Dm at the level of individual words to find their
|
||||||
|
frequency, information content, and to see their base frequency
|
||||||
|
table. The user types in a word to examine and the program displays
|
||||||
|
the values and table. The information content will be calcutated
|
||||||
|
from either Dw or Ds depending which was used to create Dm, and
|
||||||
|
using the current composition setting. Typical dialogue follows:
|
||||||
|
? Menu or option number=30
|
||||||
|
? Word to examine=TTGACA
|
||||||
|
TtgacA 60 0.7385326
|
||||||
|
56 56 6 7 5 11
|
||||||
|
4 3 2 1 52 1
|
||||||
|
1 4 2 53 3 48
|
||||||
|
3 1 54 3 4 4
|
||||||
|
TTGACA
|
||||||
|
? Word to examine=TATAAT
|
||||||
|
taTAat 65 0.6251902
|
||||||
|
56 3 53 4 4 60
|
||||||
|
6 1 5 5 5 3
|
||||||
|
3 60 5 57 57 4
|
||||||
|
4 5 6 3 3 2
|
||||||
|
TATAAT
|
||||||
|
? Word to examine=
|
||||||
|
|
||||||
|
@31. TX 3 @Examine words in Dh
|
||||||
|
|
||||||
|
Examine words in Dh allows users to analyse the contents of
|
||||||
|
dictonary Dh at the level of individual words to find their
|
||||||
|
frequency, information content, and to see their base frequency
|
||||||
|
table. The user types in a word to examine and the program displays
|
||||||
|
the values and table. The information content will be calcutated
|
||||||
|
from either Dw or Ds depending which was used to create Dm, and
|
||||||
|
using the current composition setting. Typical dialogue follows:
|
||||||
|
|
||||||
|
? Menu or option number=31
|
||||||
|
? Word to examine=TTGACA
|
||||||
|
TtgacA 60 0.7385326
|
||||||
|
56 56 6 7 5 11
|
||||||
|
4 3 2 1 52 1
|
||||||
|
1 4 2 53 3 48
|
||||||
|
3 1 54 3 4 4
|
||||||
|
TTGACA
|
||||||
|
? Word to examine=TATAAT
|
||||||
|
taTAat 65 0.6251902
|
||||||
|
56 3 53 4 4 60
|
||||||
|
6 1 5 5 5 3
|
||||||
|
3 60 5 57 57 4
|
||||||
|
4 5 6 3 3 2
|
||||||
|
TATAAT
|
||||||
|
? Word to examine=GGGGGG
|
||||||
|
gggggg 0 0.6199890
|
||||||
|
3 1 1 2 3 4
|
||||||
|
1 3 1 2 2 1
|
||||||
|
2 1 1 1 1 1
|
||||||
|
11 12 14 12 11 11
|
||||||
|
GGGGGG
|
||||||
|
? Word to examine=
|
||||||
|
|
||||||
|
@32. TX 3 @Save or restore a dictionary
|
||||||
|
|
||||||
|
Save or restore dictionary allows users to write or read any
|
||||||
|
dictionary to and from disk files. The user is asked te define the
|
||||||
|
dictionary and file. The function is useful if the machine being
|
||||||
|
used is very slow at calculating because the files can be handled
|
||||||
|
quickly. However note that the files cannot be processed by any
|
||||||
|
other program.
|
||||||
|
@33. TX 1 @Find inverted repeats
|
||||||
|
|
||||||
|
Find inverted repeats performs searches for simple inverted
|
||||||
|
repeat sequences in each sequence. They are defined by a range of
|
||||||
|
loop sizes and a minimum number of potential basepairs. The results
|
||||||
|
can be plotted or listed. The x axis of the plot represents the
|
||||||
|
length of the aligned sequences and the y direction is divided into
|
||||||
|
sufficient strips to accommodate each sequence. So if an inverted
|
||||||
|
repeat is found in the 3rd sequence at a position equivalent to
|
||||||
|
halfway along the longest of the sequences then a short vertical
|
||||||
|
line will be drawn at the midpoint of the 3rd strip. Alternatively,
|
||||||
|
if the results are listed, the potential hairpin loops are drawn
|
||||||
|
out, with the sequence number and the position of the loop. Typical
|
||||||
|
dialogue follows.
|
||||||
|
|
||||||
|
? Menu or option number=33
|
||||||
|
Define the range of loop sizes
|
||||||
|
? Minimum loop size (0-10) (3) =0
|
||||||
|
? Maximum loop size (1-20) (3) =
|
||||||
|
? Minimum number of basepairs (1-20) (6) =
|
||||||
|
? (y/n) (y) Plot results N
|
||||||
|
Searching
|
||||||
|
|
||||||
|
Sequence 3 34
|
||||||
|
C
|
||||||
|
G.T
|
||||||
|
T-A
|
||||||
|
A-T
|
||||||
|
T.G
|
||||||
|
T.G
|
||||||
|
G.T
|
||||||
|
ATCTTT TATTTCA
|
||||||
|
33
|
||||||
|
|
||||||
|
Sequence 5 35
|
||||||
|
T
|
||||||
|
G.T
|
||||||
|
T.G
|
||||||
|
A-T
|
||||||
|
T.G
|
||||||
|
G.T
|
||||||
|
C-G
|
||||||
|
T.G
|
||||||
|
TCCGGC AATTGTG
|
||||||
|
34
|
||||||
|
@ End of help
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
68
help/mep_menu
Normal file
68
help/mep_menu
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
-1 0 22 2 T General
|
||||||
|
-1 0 22 2 X General
|
||||||
|
-2 0 51 2 T Screen control
|
||||||
|
-2 0 72 2 X Screen
|
||||||
|
-3 0 106 2 T Dictionary analysis
|
||||||
|
-3 0 106 2 X Dictionary analysis
|
||||||
|
0 -1 124 185 T MEP
|
||||||
|
0 -1 124 185 X MEP
|
||||||
|
1 0 9423 38 T Help
|
||||||
|
2 0 10594 3 T Quit
|
||||||
|
3 1 10667 14 T Read a new sequence
|
||||||
|
3 1 10667 14 X Read a new sequence
|
||||||
|
4 1 11069 7 T Define active region
|
||||||
|
4 1 11069 7 X Define active region
|
||||||
|
5 1 11396 32 T List a sequence
|
||||||
|
5 1 11396 32 X List a sequence
|
||||||
|
6 1 12548 4 T List a text file
|
||||||
|
6 1 12548 4 X List a text file
|
||||||
|
7 1 12690 12 T Direct output to disk
|
||||||
|
7 1 12690 12 X Direct output to disk
|
||||||
|
10 2 13162 3 T Clear graphics
|
||||||
|
10 2 13162 3 X Clear graphics
|
||||||
|
11 2 13239 3 T Clear text
|
||||||
|
11 2 13239 3 X Clear text
|
||||||
|
12 2 13307 13 T Draw a ruler
|
||||||
|
12 2 13307 13 X Draw a ruler
|
||||||
|
13 2 14053 13 T Use crosshair
|
||||||
|
13 2 14053 13 X Use crosshair
|
||||||
|
14 2 14643 35 T Reposition plots
|
||||||
|
14 2 14643 35 X Reposition plots
|
||||||
|
15 2 16797 13 T Label a diagram
|
||||||
|
15 2 16797 13 X Label a diagram
|
||||||
|
16 2 17589 13 T Display a map
|
||||||
|
16 2 17589 13 X Display a map
|
||||||
|
17 1 18384 32 T Search for strings
|
||||||
|
17 1 18384 32 X Search for strings
|
||||||
|
18 3 19739 4 T Set strand
|
||||||
|
18 3 19739 4 X Set strand
|
||||||
|
19 3 19906 7 T Set composition
|
||||||
|
19 3 19906 7 X Set composition
|
||||||
|
20 3 20249 4 T Set word length
|
||||||
|
20 3 20249 4 X Set word length
|
||||||
|
21 3 20374 4 T Set number of mismatches
|
||||||
|
21 3 20374 4 X Set number of mismatches
|
||||||
|
22 3 20501 15 T Show settings
|
||||||
|
22 3 20501 15 X Show settings
|
||||||
|
23 3 20965 5 T Make dictionary Dw
|
||||||
|
23 3 20965 5 X Make dictionary Dw
|
||||||
|
24 3 21152 4 T Make dictionary Ds
|
||||||
|
24 3 21152 4 X Make dictionary Ds
|
||||||
|
25 3 21326 8 T Make dictionary Dm from Dw
|
||||||
|
25 3 21326 8 X Make dictionary Dm from Dw
|
||||||
|
26 3 21787 8 T Make dictionary Dm from Ds
|
||||||
|
26 3 21787 8 X Make dictionary Dm from Ds
|
||||||
|
27 3 22248 6 T Make dictionary Dh from Dm
|
||||||
|
27 3 22248 6 X Make dictionary Dh from Dm
|
||||||
|
28 3 22551 56 T Examine fuzzy dictionary Dm
|
||||||
|
28 3 22551 56 X Examine fuzzy dictionary Dm
|
||||||
|
29 3 24462 71 T Examine fuzzy dictionary Dh
|
||||||
|
29 3 24462 71 X Examine fuzzy dictionary Dh
|
||||||
|
30 3 26726 26 T Examine words in Dm
|
||||||
|
30 3 26726 26 X Examine words in Dm
|
||||||
|
31 3 27755 34 T Examine words in Dh
|
||||||
|
31 3 27755 34 X Examine words in Dh
|
||||||
|
32 3 29021 8 T Save or restore a dictionary
|
||||||
|
32 3 29021 8 X Save or restore a dictionary
|
||||||
|
33 1 29428 45 T Find inverted repeats
|
||||||
|
33 1 29428 45 X Find inverted repeats
|
4620
help/nip_help
Normal file
4620
help/nip_help
Normal file
File diff suppressed because it is too large
Load diff
156
help/nip_menu
Normal file
156
help/nip_menu
Normal file
|
@ -0,0 +1,156 @@
|
||||||
|
-1 0 22 2 T General
|
||||||
|
-1 0 22 2 X General
|
||||||
|
-2 0 51 2 T Screen control
|
||||||
|
-2 0 72 2 X Screen
|
||||||
|
-3 0 118 2 T Statistical analysis of content
|
||||||
|
-3 0 143 2 X Statistics
|
||||||
|
-4 0 180 2 T Structures and repeats
|
||||||
|
-4 0 205 2 X Structures
|
||||||
|
-5 0 242 2 T Translation and codons
|
||||||
|
-5 0 242 2 X Translation and codons
|
||||||
|
-6 0 279 2 T Gene search by content
|
||||||
|
-6 0 279 2 X Gene search by content
|
||||||
|
-7 0 309 2 T General signals
|
||||||
|
-7 0 309 2 X General signals
|
||||||
|
-8 0 340 2 T Specific signals
|
||||||
|
-8 0 340 2 X Specific signals
|
||||||
|
0 -1 359 16 T NIP
|
||||||
|
0 -1 359 16 X NIP
|
||||||
|
1 0 1155 7 T Help
|
||||||
|
1 0 1155 7 X Help
|
||||||
|
2 0 1469 3 T Quit
|
||||||
|
2 0 1469 3 X Quit
|
||||||
|
3 1 1543 220 T Read a new sequence
|
||||||
|
3 1 1543 220 X Read a new sequence
|
||||||
|
4 1 11372 15 T Define active region
|
||||||
|
4 1 11372 15 X Define active region
|
||||||
|
5 1 12100 24 T List a sequence
|
||||||
|
5 1 12100 24 X List a sequence
|
||||||
|
6 1 13103 6 T List a text file.
|
||||||
|
6 1 13103 6 X List a text file.
|
||||||
|
7 1 13300 12 T Direct output to disk
|
||||||
|
7 1 13300 12 X Direct output to disk
|
||||||
|
8 1 13785 10 T Write active region to disk
|
||||||
|
8 1 13785 10 X Write active region to disk
|
||||||
|
9 1 14128 31 T Edit the sequence
|
||||||
|
9 1 14128 31 X Edit the sequence
|
||||||
|
10 2 15970 3 T Clear graphics
|
||||||
|
10 2 15970 3 X Clear graphics
|
||||||
|
11 2 16036 3 T Clear text
|
||||||
|
11 2 16036 3 X Clear text
|
||||||
|
12 2 16101 12 T Draw a ruler
|
||||||
|
12 2 16101 12 X Draw a ruler
|
||||||
|
13 2 16833 13 T Use crosshair
|
||||||
|
13 2 16833 13 X Use crosshair
|
||||||
|
14 2 17443 35 T Reposition plots
|
||||||
|
14 2 17443 35 X Reposition plots
|
||||||
|
15 2 19598 28 T Label a diagram
|
||||||
|
15 2 19598 28 X Label a diagram
|
||||||
|
16 2 20703 34 T Display a map
|
||||||
|
16 2 20703 34 X Display a map
|
||||||
|
17 1 22073 599 T Search for restriction enzymes
|
||||||
|
17 1 22073 599 X Search for restriction enzymes
|
||||||
|
18 7 46675 105 T Compare a short sequence
|
||||||
|
18 1 46675 105 T Compare a short sequence
|
||||||
|
18 7 46675 105 X Compare a short sequence
|
||||||
|
18 1 46675 105 X Compare a short sequence
|
||||||
|
19 7 49650 106 T Compare a short sequence using a score matrix
|
||||||
|
19 7 49650 106 X Compare a short sequence using a score matrix
|
||||||
|
20 7 53349 230 T Search for a motif using a weight matrix
|
||||||
|
20 7 53349 230 X Search for a motif using a weight matrix
|
||||||
|
21 3 63267 4 T Count base composition
|
||||||
|
21 3 63267 4 X Count base composition
|
||||||
|
22 3 63440 14 T Count dinucleotide frequencies
|
||||||
|
22 3 63440 14 X Count dinucleotide frequencies
|
||||||
|
23 5 64100 179 T Count codons and amino acids
|
||||||
|
23 3 64100 179 T Count codons and amino acids
|
||||||
|
23 5 64100 179 X Count codons and amino acids
|
||||||
|
23 3 64100 179 X Count codons and amino acids
|
||||||
|
24 3 72137 57 T Plot base composition
|
||||||
|
24 3 72137 57 X Plot base composition
|
||||||
|
25 3 73213 23 T Plot local deviations in base composition
|
||||||
|
25 3 73213 23 X Plot local deviations in base composition
|
||||||
|
26 3 74495 23 T Plot local deviations from dinucleotide composition
|
||||||
|
26 3 74495 23 X Plot local deviations from dinucleotide composition
|
||||||
|
27 3 75793 23 T Plot local deviations from trinucleotide composition
|
||||||
|
27 3 75793 23 X Plot local deviations from trinucleotide composition
|
||||||
|
28 5 77065 18 T Calculate codon constraint
|
||||||
|
28 5 77065 18 X Calculate codon constraint
|
||||||
|
59 3 77869 12 T Plot negentropy
|
||||||
|
59 3 77869 12 X Plot negentropy
|
||||||
|
30 4 78454 74 T Search for hairpin loops
|
||||||
|
30 4 78454 74 X Search for hairpin loops
|
||||||
|
31 4 80321 23 T Search for long range inverted repeats
|
||||||
|
31 4 80321 23 X Search for long range inverted repeats
|
||||||
|
32 4 81157 37 T Search for repeats
|
||||||
|
32 4 81157 37 X Search for repeats
|
||||||
|
33 4 82467 12 T Search for z dna (total ry, yr)
|
||||||
|
33 4 82467 12 X Search for z dna (total ry, yr)
|
||||||
|
34 4 82984 12 T Search for z dna (runs of ry, yr)
|
||||||
|
34 4 82984 12 X Search for z dna (runs of ry, yr)
|
||||||
|
35 4 83623 15 T Search for z dna (best phased value)
|
||||||
|
35 4 83623 15 X Search for z dna (best phased value)
|
||||||
|
36 4 84350 92 T Local similarity or complementarity search
|
||||||
|
36 4 84350 92 X Local similarity or complementarity search
|
||||||
|
37 5 87778 39 T Set genetic code
|
||||||
|
37 5 87778 39 X Set genetic code
|
||||||
|
38 4 89050 74 T Examine repeats
|
||||||
|
38 3 89050 74 T Examine repeats
|
||||||
|
39 5 91670 286 T Translate and list in upto six phases
|
||||||
|
39 5 91670 286 X Translate and list in upto six phases
|
||||||
|
40 5 103780 134 T Translate and write the protein sequence to disk
|
||||||
|
40 5 103780 134 X Translate and write the protein sequence to disk
|
||||||
|
41 5 108198 71 T Calculate and write codon table to disk
|
||||||
|
41 5 108198 71 X Calculate and write codon table to disk
|
||||||
|
42 6 111525 132 T Codon usage method
|
||||||
|
42 6 111525 132 X Codon usage method
|
||||||
|
43 6 118508 182 T Positional base preference method.
|
||||||
|
43 6 118508 182 X Positional base preference method.
|
||||||
|
44 6 127924 39 T Uneven positional base frequencies.
|
||||||
|
44 6 127924 39 X Uneven positional base frequencies.
|
||||||
|
45 6 130287 33 T Codon improbability on base composition
|
||||||
|
45 6 130287 33 X Codon improbability on base composition
|
||||||
|
46 6 132146 28 T Codon improbability on amino acid composition
|
||||||
|
46 6 132146 28 X Codon improbability on amino acid composition
|
||||||
|
47 6 133744 14 T Shepherd RNY preference method
|
||||||
|
47 6 133744 14 X Shepherd RNY preference method
|
||||||
|
48 6 134410 30 T Ficketts method
|
||||||
|
48 6 134410 30 X Ficketts method
|
||||||
|
49 6 136094 139 T tRNA gene search.
|
||||||
|
49 6 136094 139 X tRNA gene search.
|
||||||
|
50 7 141894 4 T Plot start codons
|
||||||
|
50 7 141894 4 X Plot start codons
|
||||||
|
51 7 142027 4 T Plot stop codons
|
||||||
|
51 7 142027 4 X Plot stop codons
|
||||||
|
52 7 142188 4 T Plot stop codons on the complementary strand
|
||||||
|
52 7 142188 4 X Plot stop codons on the complementary strand
|
||||||
|
53 7 142365 4 T Plot stop codons on both strands
|
||||||
|
53 7 142365 4 X Plot stop codons on both strands
|
||||||
|
54 5 142536 45 T Search for longest open reading frames
|
||||||
|
54 5 142536 45 X Search for longest open reading frames
|
||||||
|
55 8 144437 67 T Search for E. coli promoter (general)
|
||||||
|
55 8 144437 67 X Search for E. coli promoter (general)
|
||||||
|
56 8 148004 4 T Search for E. coli promoter (general) strand
|
||||||
|
56 8 148004 4 X Search for E. coli promoter (general) strand
|
||||||
|
57 8 148210 4 T Search for E. coli promoter sequences. (-35 and -10)
|
||||||
|
57 8 148210 4 X Search for E. coli promoter sequences. (-35 and -10)
|
||||||
|
58 8 148405 44 T Search for procaryotic ribosome binding sites
|
||||||
|
58 8 148405 44 X Search for procaryotic ribosome binding sites
|
||||||
|
29 1 150862 4 T Reverse and complement the sequence
|
||||||
|
29 1 150862 4 X Reverse and complement the sequence
|
||||||
|
60 7 151001 142 T Search using a dinucleotide weight matrix
|
||||||
|
60 7 151001 142 X Search using a dinucleotide weight matrix
|
||||||
|
61 8 157292 31 T Search for eukaryotic ribosome binding sites
|
||||||
|
61 8 157292 31 X Search for eukaryotic ribosome binding sites
|
||||||
|
62 8 158730 56 T Search for splice junctions
|
||||||
|
62 8 158730 56 X Search for splice junctions
|
||||||
|
63 7 162089 7 T Search using a weight matrix (complementary)
|
||||||
|
63 7 162089 7 X Search using a weight matrix (complementary)
|
||||||
|
64 3 162471 36 T Plot observed-expected word frequencies
|
||||||
|
64 3 162471 36 X Plot observed-expected word frequencies
|
||||||
|
65 9 164175 5 T Search for polya sites
|
||||||
|
65 9 164175 5 X Search for polya sites
|
||||||
|
66 1 164369 4 T Interconvert t and u
|
||||||
|
66 1 164369 4 X Interconvert t and u
|
||||||
|
67 7 164520 797 T Search for patterns of motifs
|
||||||
|
67 7 164520 797 X Search for patterns of motifs
|
132
help/nipf_help
Normal file
132
help/nipf_help
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
|
||||||
|
@-1. TX 0 @General
|
||||||
|
|
||||||
|
@-2. TX 0 @Screen control
|
||||||
|
|
||||||
|
@-3. TX 0 @Statistical analysis
|
||||||
|
|
||||||
|
@-1. TX 0 @General
|
||||||
|
|
||||||
|
@-2. TX 0 @Screen control
|
||||||
|
|
||||||
|
@-3. TX 0 @Statistical analysis
|
||||||
|
|
||||||
|
@0. TX -1 @NIPF
|
||||||
|
|
||||||
|
@1. TX 1 @ Help
|
||||||
|
|
||||||
|
@2. TX 1 @ Quit
|
||||||
|
|
||||||
|
@3. TX 1 @ Read new sequence
|
||||||
|
|
||||||
|
@4. TX 1 @ Redefine active region
|
||||||
|
|
||||||
|
@5. TX 1 @ List the sequence
|
||||||
|
|
||||||
|
@6. TX 1 @ List a text file
|
||||||
|
|
||||||
|
@7. TX 1 @ Direct output to disk
|
||||||
|
|
||||||
|
@8. TX 1 @ Write active sequence to disk
|
||||||
|
|
||||||
|
@9. TX 1 @ List a translation
|
||||||
|
|
||||||
|
@32. TX 1 @ List showing base differences
|
||||||
|
|
||||||
|
@37. TX 1 @ List showing translation
|
||||||
|
|
||||||
|
@33. TX 1 @ List showing amino acid differences
|
||||||
|
|
||||||
|
@10. TX 2 @ Clear graphics
|
||||||
|
|
||||||
|
@11. TX 2 @ Clear text
|
||||||
|
|
||||||
|
@12. TX 2 @ Draw a ruler
|
||||||
|
|
||||||
|
@13. TX 2 @ Use cross hair
|
||||||
|
|
||||||
|
@14. TX 2 @ Reset margins
|
||||||
|
|
||||||
|
@15. TX 2 @ Label diagram
|
||||||
|
|
||||||
|
@16. TX 2 @ Display a map
|
||||||
|
|
||||||
|
@17. TX 3 @ Set comparison mode
|
||||||
|
|
||||||
|
@18. TX 3 @ Set sort mode
|
||||||
|
|
||||||
|
@21. TX 3 @ Count base changes
|
||||||
|
|
||||||
|
@22. TX 3 @ Count codon changes
|
||||||
|
|
||||||
|
@23. TX 3 @ Count genetic events
|
||||||
|
|
||||||
|
@24. TX 3 @ Show table of base changes
|
||||||
|
|
||||||
|
@36. TX 3 @ Show table of expressed base changes
|
||||||
|
|
||||||
|
@39. TX 3 @ Show table of silent base changes
|
||||||
|
|
||||||
|
@38. TX 3 @ Estimate mutation rate
|
||||||
|
|
||||||
|
@25. TX 3 @ Plot base changes
|
||||||
|
|
||||||
|
@26. TX 3 @ Plot expressed changes per base
|
||||||
|
|
||||||
|
@27. TX 3 @ Plot silent changes per base
|
||||||
|
|
||||||
|
@28. TX 3 @ Count expressed changes per base
|
||||||
|
|
||||||
|
@29. TX 3 @ Count silent changes per base
|
||||||
|
|
||||||
|
@30. TX 3 @ Count changed amino acids
|
||||||
|
|
||||||
|
@31. TX 3 @ Plot amino acid variability
|
||||||
|
|
||||||
|
@ end of help
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
84
help/nipf_menu
Normal file
84
help/nipf_menu
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
-1 0 23 2 T General
|
||||||
|
-1 0 23 2 X General
|
||||||
|
-2 0 53 2 T Screen control
|
||||||
|
-2 0 53 2 X Screen control
|
||||||
|
-3 0 89 2 T Statistical analysis
|
||||||
|
-3 0 89 2 X Statistical analysis
|
||||||
|
-1 0 112 2 T General
|
||||||
|
-1 0 112 2 X General
|
||||||
|
-2 0 142 2 T Screen control
|
||||||
|
-2 0 142 2 X Screen control
|
||||||
|
-3 0 178 2 T Statistical analysis
|
||||||
|
-3 0 178 2 X Statistical analysis
|
||||||
|
0 -1 198 2 T NIPF
|
||||||
|
0 -1 198 2 X NIPF
|
||||||
|
1 1 217 2 T Help
|
||||||
|
1 1 217 2 X Help
|
||||||
|
2 1 236 2 T Quit
|
||||||
|
2 1 236 2 X Quit
|
||||||
|
3 1 268 2 T Read new sequence
|
||||||
|
3 1 268 2 X Read new sequence
|
||||||
|
4 1 305 2 T Redefine active region
|
||||||
|
4 1 305 2 X Redefine active region
|
||||||
|
5 1 337 2 T List the sequence
|
||||||
|
5 1 337 2 X List the sequence
|
||||||
|
6 1 368 2 T List a text file
|
||||||
|
6 1 368 2 X List a text file
|
||||||
|
7 1 404 2 T Direct output to disk
|
||||||
|
7 1 404 2 X Direct output to disk
|
||||||
|
8 1 448 2 T Write active sequence to disk
|
||||||
|
8 1 448 2 X Write active sequence to disk
|
||||||
|
9 1 481 2 T List a translation
|
||||||
|
9 1 481 2 X List a translation
|
||||||
|
32 1 525 2 T List showing base differences
|
||||||
|
32 1 525 2 X List showing base differences
|
||||||
|
37 1 564 2 T List showing translation
|
||||||
|
37 1 564 2 X List showing translation
|
||||||
|
33 1 614 2 T List showing amino acid differences
|
||||||
|
33 1 614 2 X List showing amino acid differences
|
||||||
|
10 2 643 2 T Clear graphics
|
||||||
|
10 2 643 2 X Clear graphics
|
||||||
|
11 2 668 2 T Clear text
|
||||||
|
11 2 668 2 X Clear text
|
||||||
|
12 2 695 2 T Draw a ruler
|
||||||
|
12 2 695 2 X Draw a ruler
|
||||||
|
13 2 724 2 T Use cross hair
|
||||||
|
13 2 724 2 X Use cross hair
|
||||||
|
14 2 752 2 T Reset margins
|
||||||
|
14 2 752 2 X Reset margins
|
||||||
|
15 2 780 2 T Label diagram
|
||||||
|
15 2 780 2 X Label diagram
|
||||||
|
16 2 808 2 T Display a map
|
||||||
|
16 2 808 2 X Display a map
|
||||||
|
17 3 842 2 T Set comparison mode
|
||||||
|
17 3 842 2 X Set comparison mode
|
||||||
|
18 3 870 2 T Set sort mode
|
||||||
|
18 3 870 2 X Set sort mode
|
||||||
|
21 3 903 2 T Count base changes
|
||||||
|
21 3 903 2 X Count base changes
|
||||||
|
22 3 937 2 T Count codon changes
|
||||||
|
22 3 937 2 X Count codon changes
|
||||||
|
23 3 972 2 T Count genetic events
|
||||||
|
23 3 972 2 X Count genetic events
|
||||||
|
24 3 1013 2 T Show table of base changes
|
||||||
|
24 3 1013 2 X Show table of base changes
|
||||||
|
36 3 1064 2 T Show table of expressed base changes
|
||||||
|
36 3 1064 2 X Show table of expressed base changes
|
||||||
|
39 3 1112 2 T Show table of silent base changes
|
||||||
|
39 3 1112 2 X Show table of silent base changes
|
||||||
|
38 3 1149 2 T Estimate mutation rate
|
||||||
|
38 3 1149 2 X Estimate mutation rate
|
||||||
|
25 3 1181 2 T Plot base changes
|
||||||
|
25 3 1181 2 X Plot base changes
|
||||||
|
26 3 1227 2 T Plot expressed changes per base
|
||||||
|
26 3 1227 2 X Plot expressed changes per base
|
||||||
|
27 3 1270 2 T Plot silent changes per base
|
||||||
|
27 3 1270 2 X Plot silent changes per base
|
||||||
|
28 3 1317 2 T Count expressed changes per base
|
||||||
|
28 3 1317 2 X Count expressed changes per base
|
||||||
|
29 3 1361 2 T Count silent changes per base
|
||||||
|
29 3 1361 2 X Count silent changes per base
|
||||||
|
30 3 1401 2 T Count changed amino acids
|
||||||
|
30 3 1401 2 X Count changed amino acids
|
||||||
|
31 3 1443 2 T Plot amino acid variability
|
||||||
|
31 3 1443 2 X Plot amino acid variability
|
2244
help/pip_help
Normal file
2244
help/pip_help
Normal file
File diff suppressed because it is too large
Load diff
80
help/pip_menu
Normal file
80
help/pip_menu
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
-1 0 21 2 T General
|
||||||
|
-1 0 21 2 X General
|
||||||
|
-2 0 50 2 T Screen control
|
||||||
|
-2 0 71 2 X Screen
|
||||||
|
-3 0 117 2 T Statistical analysis of content
|
||||||
|
-3 0 142 2 X Statistics
|
||||||
|
-4 0 179 2 T Structures and repeats
|
||||||
|
-4 0 204 2 X Structures
|
||||||
|
-5 0 225 2 T Search
|
||||||
|
-5 0 225 2 X Search
|
||||||
|
0 -1 243 76 T PIP
|
||||||
|
0 -1 243 76 X PIP
|
||||||
|
1 0 3546 8 T Help
|
||||||
|
1 0 3546 8 X Help
|
||||||
|
2 0 3889 3 T Quit
|
||||||
|
2 0 3889 3 X Quit
|
||||||
|
3 1 3962 220 T Read a new sequence
|
||||||
|
3 1 3962 220 X Read a new sequence
|
||||||
|
4 1 13792 12 T Redefine active region
|
||||||
|
4 1 13792 12 X Redefine active region
|
||||||
|
5 1 14480 33 T List a sequence
|
||||||
|
5 1 14480 33 X List a sequence
|
||||||
|
6 1 15941 4 T List a text file
|
||||||
|
6 1 15941 4 X List a text file
|
||||||
|
7 1 16083 12 T Direct output to disk
|
||||||
|
7 1 16083 12 X Direct output to disk
|
||||||
|
8 1 16567 7 T Write active region to disk
|
||||||
|
8 1 16567 7 X Write active region to disk
|
||||||
|
9 1 16922 26 T Edit the sequence
|
||||||
|
9 1 16922 26 X Edit the sequence
|
||||||
|
10 2 18386 3 T Clear graphics
|
||||||
|
10 2 18386 3 X Clear graphics
|
||||||
|
11 2 18463 3 T Clear text
|
||||||
|
11 2 18463 3 X Clear text
|
||||||
|
12 2 18531 13 T Draw a ruler
|
||||||
|
12 2 18531 13 X Draw a ruler
|
||||||
|
13 2 19278 13 T Use cross hair
|
||||||
|
13 2 19278 13 X Use cross hair
|
||||||
|
14 2 19865 35 T Reset margins
|
||||||
|
14 2 19865 35 X Reset margins
|
||||||
|
15 2 22019 13 T Label a diagram
|
||||||
|
15 2 22019 13 X Label a diagram
|
||||||
|
16 2 22811 13 T Display a map
|
||||||
|
16 2 22811 13 X Display a map
|
||||||
|
17 5 23611 254 T Short sequence search
|
||||||
|
17 1 23611 254 T Short sequence search
|
||||||
|
17 5 23611 254 X Short sequence search
|
||||||
|
17 1 23611 254 X Short sequence search
|
||||||
|
18 5 34012 57 T Compare a sequence
|
||||||
|
18 1 34012 57 T Compare a sequence
|
||||||
|
18 5 34012 57 X Compare a sequence
|
||||||
|
18 1 34012 57 X Compare a sequence
|
||||||
|
19 5 35654 69 T Compare a sequence using a score matrix
|
||||||
|
19 1 35654 69 T Compare a sequence using a score matrix
|
||||||
|
19 5 35654 69 X Compare a sequence using a score matrix
|
||||||
|
19 1 35654 69 X Compare a sequence using a score matrix
|
||||||
|
20 5 37587 214 T Search for a motif using a weight matrix
|
||||||
|
20 5 37587 214 X Search for a motif using a weight matrix
|
||||||
|
21 3 46771 20 T Calculate amino acid composition
|
||||||
|
21 3 46771 20 X Calculate amino acid composition
|
||||||
|
22 4 47655 20 T Plot hydrophobicity
|
||||||
|
22 3 47655 20 T Plot hydrophobicity
|
||||||
|
22 4 47655 20 X Plot hydrophobicity
|
||||||
|
22 3 47655 20 X Plot hydrophobicity
|
||||||
|
23 4 48439 19 T Plot charge
|
||||||
|
23 3 48439 19 T Plot charge
|
||||||
|
23 4 48439 19 X Plot charge
|
||||||
|
23 3 48439 19 X Plot charge
|
||||||
|
24 4 48953 72 T Plot robson prediction
|
||||||
|
24 4 48953 72 X Plot robson prediction
|
||||||
|
26 4 51912 32 T Draw a helix wheel
|
||||||
|
26 4 51912 32 X Draw a helix wheel
|
||||||
|
25 4 53561 36 T Plot hydrophobic moment
|
||||||
|
25 3 53561 36 T Plot hydrophobic moment
|
||||||
|
25 4 53561 36 X Plot hydrophobic moment
|
||||||
|
25 3 53561 36 X Plot hydrophobic moment
|
||||||
|
27 1 55101 87 T Back translate to dna
|
||||||
|
27 1 55101 87 X Back translate to dna
|
||||||
|
28 5 59337 809 T Search for patterns of motifs
|
||||||
|
28 5 59337 809 X Search for patterns of motifs
|
1848
help/sap_help
Normal file
1848
help/sap_help
Normal file
File diff suppressed because it is too large
Load diff
76
help/sap_menu
Normal file
76
help/sap_menu
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
-1 0 21 2 T General
|
||||||
|
-1 0 21 2 X General
|
||||||
|
-2 0 50 2 T Screen control
|
||||||
|
-2 0 71 2 X Screen
|
||||||
|
-3 0 98 2 T Modification
|
||||||
|
-3 0 98 2 X Modification
|
||||||
|
0 -1 116 379 T SAP
|
||||||
|
0 -1 116 379 X SAP
|
||||||
|
17 1 19213 18 T Screen against restriction enzymes
|
||||||
|
17 1 19213 18 X Screen against restriction enzymes
|
||||||
|
18 1 20256 22 T Screen against vector
|
||||||
|
18 1 20256 22 X Screen against vector
|
||||||
|
20 2 21583 113 T Auto assemble
|
||||||
|
20 2 21583 113 X Auto assemble
|
||||||
|
28 1 27744 42 T Highlight disagreements
|
||||||
|
28 1 27744 42 X Highlight disagreements
|
||||||
|
32 3 30106 22 T Extract gel readings
|
||||||
|
32 3 30106 22 X Extract gel readings
|
||||||
|
1 0 31209 3 T Help
|
||||||
|
1 0 31209 3 X Help
|
||||||
|
2 0 31277 5 T Help
|
||||||
|
2 0 31277 5 X Help
|
||||||
|
3 1 31470 175 T Open a database
|
||||||
|
3 1 31470 175 X Open a database
|
||||||
|
4 3 40550 64 T Edit
|
||||||
|
4 3 40550 64 X Edit
|
||||||
|
9 3 43796 40 T Screen edit
|
||||||
|
9 3 43796 40 X Screen edit
|
||||||
|
5 1 45923 45 T Display a contig
|
||||||
|
5 1 45923 45 X Display a contig
|
||||||
|
6 1 48409 6 T List a text file
|
||||||
|
6 1 48409 6 X List a text file
|
||||||
|
8 1 48667 94 T Calculate a consensus
|
||||||
|
8 1 48667 94 X Calculate a consensus
|
||||||
|
25 1 53186 41 T Show relationships
|
||||||
|
25 1 53186 41 X Show relationships
|
||||||
|
21 3 55121 99 T Enter new gel reading
|
||||||
|
21 3 55121 99 X Enter new gel reading
|
||||||
|
23 3 60131 11 T Complement a contig
|
||||||
|
23 3 60131 11 X Complement a contig
|
||||||
|
22 3 60644 70 T Join contigs
|
||||||
|
22 3 60644 70 X Join contigs
|
||||||
|
24 1 64235 11 T Copy the database
|
||||||
|
24 1 64235 11 X Copy the database
|
||||||
|
19 1 64781 41 T Check database
|
||||||
|
19 1 64781 41 X Check database
|
||||||
|
29 1 66799 82 T Examine quality
|
||||||
|
29 1 66799 82 X Examine quality
|
||||||
|
26 3 70617 92 T Alter relationships
|
||||||
|
26 3 70617 92 X Alter relationships
|
||||||
|
27 1 75377 17 T Set display parameters
|
||||||
|
27 1 75377 17 X Set display parameters
|
||||||
|
30 3 76245 48 T Auto edit a contig
|
||||||
|
30 3 76245 48 X Auto edit a contig
|
||||||
|
10 2 78721 3 T Clear graphics
|
||||||
|
10 2 78721 3 X Clear graphics
|
||||||
|
11 2 78786 3 T Clear text
|
||||||
|
11 2 78786 3 X Clear text
|
||||||
|
12 2 78851 12 T Draw a ruler.
|
||||||
|
12 2 78851 12 X Draw a ruler.
|
||||||
|
14 2 79585 38 T Reposition plots
|
||||||
|
14 2 79585 38 X Reposition plots
|
||||||
|
15 2 81933 28 T Label a diagram
|
||||||
|
15 2 81933 28 X Label a diagram
|
||||||
|
16 2 83039 27 T Display a map.
|
||||||
|
16 2 83039 27 X Display a map.
|
||||||
|
7 1 84014 12 T Redirect output
|
||||||
|
7 1 84014 12 X Redirect output
|
||||||
|
13 2 84485 41 T Use crosshair
|
||||||
|
13 2 84485 41 X Use crosshair
|
||||||
|
33 2 86611 11 T Plot single contig
|
||||||
|
33 2 86611 11 X Plot single contig
|
||||||
|
34 2 87312 9 T Plot all contigs
|
||||||
|
34 2 87312 9 X Plot all contigs
|
||||||
|
31 3 87884 9 T Type in gel readings
|
||||||
|
31 3 87884 9 X Type in gel readings
|
1254
help/sip_help
Normal file
1254
help/sip_help
Normal file
File diff suppressed because it is too large
Load diff
78
help/sip_menu
Normal file
78
help/sip_menu
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
-1 0 22 2 T General
|
||||||
|
-1 0 22 2 X General
|
||||||
|
-2 0 51 2 T Screen control
|
||||||
|
-2 0 72 2 X Screen
|
||||||
|
-3 0 101 2 T Set parameters
|
||||||
|
-3 0 101 2 X Set parameters
|
||||||
|
-4 0 126 2 T Comparison
|
||||||
|
-4 0 126 2 X Comparison
|
||||||
|
0 -1 144 208 T SIP
|
||||||
|
0 -1 144 208 X SIP
|
||||||
|
1 0 12690 39 T Help
|
||||||
|
1 0 12690 39 X Help
|
||||||
|
2 0 13755 3 T Quit
|
||||||
|
2 0 13755 3 X Quit
|
||||||
|
3 1 13828 220 T Read a new sequence
|
||||||
|
3 1 13828 220 X Read a new sequence
|
||||||
|
4 1 23656 10 T Define active region
|
||||||
|
4 1 23656 10 X Define active region
|
||||||
|
5 1 24191 16 T List a sequence
|
||||||
|
5 1 24191 16 X List a sequence
|
||||||
|
6 1 25001 4 T List a text file
|
||||||
|
6 1 25001 4 X List a text file
|
||||||
|
7 1 25143 12 T Direct output to disk
|
||||||
|
7 1 25143 12 X Direct output to disk
|
||||||
|
8 1 25627 4 T Write active region to disk
|
||||||
|
8 1 25627 4 X Write active region to disk
|
||||||
|
9 1 25764 5 T Edit the sequences
|
||||||
|
9 1 25764 5 X Edit the sequences
|
||||||
|
10 2 25944 3 T Clear graphics
|
||||||
|
10 2 25944 3 X Clear graphics
|
||||||
|
11 2 26021 3 T Clear text
|
||||||
|
11 2 26021 3 X Clear text
|
||||||
|
12 2 26089 15 T Draw a ruler
|
||||||
|
12 2 26089 15 X Draw a ruler
|
||||||
|
13 2 26869 54 T Use cross hair
|
||||||
|
13 2 26869 54 X Use cross hair
|
||||||
|
14 2 28754 29 T Reposition plots
|
||||||
|
14 2 28754 29 X Reposition plots
|
||||||
|
15 2 30429 13 T Label a diagram
|
||||||
|
15 2 30429 13 X Label a diagram
|
||||||
|
16 2 31213 7 T Display a map
|
||||||
|
16 2 31213 7 X Display a map
|
||||||
|
17 4 31596 19 T Apply identities algorithm
|
||||||
|
17 4 31596 19 X Apply identities algorithm
|
||||||
|
18 4 32260 81 T Apply proportional algorithm
|
||||||
|
18 4 32260 81 X Apply proportional algorithm
|
||||||
|
19 4 36686 42 T List matching spans
|
||||||
|
19 4 36686 42 X List matching spans
|
||||||
|
20 3 37569 16 T Set span length
|
||||||
|
20 3 37569 16 X Set span length
|
||||||
|
21 3 38560 13 T Set proportional score
|
||||||
|
21 3 38560 13 X Set proportional score
|
||||||
|
22 3 39251 6 T Set identities score
|
||||||
|
22 3 39251 6 X Set identities score
|
||||||
|
23 3 39544 79 T Calculate expected scores
|
||||||
|
23 3 39544 79 X Calculate expected scores
|
||||||
|
24 3 43148 90 T Calculate observed scores
|
||||||
|
24 3 43148 90 X Calculate observed scores
|
||||||
|
25 3 46152 26 T Show current parameter settings
|
||||||
|
25 3 46152 26 X Show current parameter settings
|
||||||
|
27 2 46802 5 T Draw a /
|
||||||
|
27 2 46802 5 X Draw a /
|
||||||
|
26 4 46991 57 T Quick scan
|
||||||
|
26 4 46991 57 X Quick scan
|
||||||
|
28 4 49883 90 T Align sequences
|
||||||
|
28 4 49883 90 X Align sequences
|
||||||
|
29 1 55133 4 T Complement the sequences
|
||||||
|
29 1 55133 4 X Complement the sequences
|
||||||
|
30 3 55256 9 T Switch main diagonal
|
||||||
|
30 3 55256 9 X Switch main diagonal
|
||||||
|
31 3 55755 8 T Switch identities
|
||||||
|
31 3 55755 8 X Switch identities
|
||||||
|
32 3 56202 17 T change score matrix
|
||||||
|
32 3 56202 17 X change score matrix
|
||||||
|
33 3 56884 16 T Set number of sd's for Quickscan
|
||||||
|
33 3 56884 16 X Set number of sd's for Quickscan
|
||||||
|
34 3 57767 13 T Set gap penalities
|
||||||
|
34 3 57767 13 X Set gap penalities
|
132
help/splitp_help
Normal file
132
help/splitp_help
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
|
||||||
|
Preparing the PROSITE protein motif library for use by
|
||||||
|
the Staden programs
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
|
||||||
|
A library of protein motifs (in our terminology, because
|
||||||
|
they include variable gaps, some would be called patterns) has
|
||||||
|
recently become available from Amos Bairoch,Departement de
|
||||||
|
Biochimie Medicale,University of Geneva Currently it contains 317
|
||||||
|
patterns/motifs and arrives on tape or cdrom in two files: a .dat
|
||||||
|
file and a .doc file. There is also a user documentation file
|
||||||
|
prosite.usr. Here I outline what is required to prepare the
|
||||||
|
PROSITE library for use by our programs.
|
||||||
|
|
||||||
|
Three programs need to be run SPLITP1, SPLITP2, and
|
||||||
|
SPLITP3.
|
||||||
|
|
||||||
|
Outline of the PROSITE files
|
||||||
|
|
||||||
|
A typical entry in the .dat file is shown below.
|
||||||
|
|
||||||
|
ID 2FE2S_FERREDOXIN; PATTERN.
|
||||||
|
AC PS00197;
|
||||||
|
DT APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
|
||||||
|
DE 2Fe-2S ferredoxins, iron-sulfur binding region signature.
|
||||||
|
PA C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C.
|
||||||
|
NR /RELEASE=14,15409;
|
||||||
|
NR /TOTAL=69(69); /POSITIVE=63(63); /UNKNOWN=0(0); /FALSE_POS=6(6);
|
||||||
|
NR /FALSE_NEG=5(5);
|
||||||
|
CC /TAXO-RANGE=A?EP?; /MAX-REPEAT=1;
|
||||||
|
CC /SITE=1,iron_sulfur; /SITE=5,iron_sulfur; /SITE=8,iron_sulfur;
|
||||||
|
DR P15788, FER$APHHA , T; P00250, FER$APHSA , T; P00223, FER$ARCLA , T;
|
||||||
|
DR P00227, FER$BRANA , T; P07838, FER$BRYMA , T; P13106, FER$BUMFI , T;
|
||||||
|
DR P00247, FER$CHLFR , T; P07839, FER$CHLRE , T; P00222, FER$COLES , T;
|
||||||
|
DO PDOC00175;
|
||||||
|
//
|
||||||
|
|
||||||
|
Each entry has an accession number (here PS00197), a
|
||||||
|
pattern definition (here C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C) and a
|
||||||
|
documentation file cross reference (here PDOC00175). This
|
||||||
|
pattern means: C, gap of 1 or 2, any of STA, gap of 2, C, any of
|
||||||
|
STA, not P, C.
|
||||||
|
|
||||||
|
We need to convert all of these patterns into our pattern
|
||||||
|
definitions (as membership of a set, with the appopriate gap
|
||||||
|
ranges) and write each into a separate pattern file with
|
||||||
|
corresponding "membership of a set" weight matrices. Each pattern
|
||||||
|
file is named accession_number.pat (here PS00197.PAT). The
|
||||||
|
corresponding matrix files are accession_number.wtsa,
|
||||||
|
accession_number.wtsb, etc for however many are needed (here
|
||||||
|
PS00197.WTSA and PS00197.WTSB): two are needed because of the
|
||||||
|
variable gap.
|
||||||
|
|
||||||
|
In addition we can optionally split the .dat and .doc
|
||||||
|
files into separate files, one for each entry, with names
|
||||||
|
accession_number.dat and accession_number.doc. Also we create an
|
||||||
|
index for the library prosite.lis, which gives a one line
|
||||||
|
description of each pattern, and ends with the pattern file and
|
||||||
|
documentation file numbers. The start of the file is shown below.
|
||||||
|
|
||||||
|
N-glycosylation site. 00001,00001
|
||||||
|
Glycosaminoglycan attachment site. 00002,00002
|
||||||
|
Tyrosine sulfatation site. 00003,00003
|
||||||
|
cAMP- and cGMP-dependent protein kinase phosphorylation site. 00004,00004
|
||||||
|
|
||||||
|
So the name of the pattern file for Glycosaminoglycan attachment
|
||||||
|
site is PS00002.PAT, and for the documentation file PDOC00002.DOC
|
||||||
|
|
||||||
|
Finally we create a file of file names for all the
|
||||||
|
patterns in the library.
|
||||||
|
|
||||||
|
To use the complete PROSITE library from program pip,
|
||||||
|
select "pattern searcher" and choose the option "use file of
|
||||||
|
pattern file names", and give the file name prosite.nam). For any
|
||||||
|
matches found, the accession number and pattern title will be
|
||||||
|
displayed.
|
||||||
|
|
||||||
|
Running the conversion programs
|
||||||
|
|
||||||
|
Only SPLITP3 is necessary for using the library. The
|
||||||
|
others programs only make the original files marginally easier to
|
||||||
|
browse through and produce an index.
|
||||||
|
|
||||||
|
SPLITP1 splits the prosite.dat file to create a separate
|
||||||
|
file for each entry. Each file is automatically named
|
||||||
|
PSentry_number.dat. In addition it creates an index for the
|
||||||
|
library (see above).
|
||||||
|
|
||||||
|
SPLITP2 performs the same operation for the Prosite.doc
|
||||||
|
file, except that no index is created. Files are named
|
||||||
|
PSentry_number.doc.
|
||||||
|
|
||||||
|
SPLITP3 creates a separate pattern file and weight matrix
|
||||||
|
files for each prosite entry from the file prosite.dat. Pattern
|
||||||
|
files are named PSentry_number.pat, weight matrix files
|
||||||
|
PSentry_number.wtsa, Psentry_number.wtsb, etc. The pattern title
|
||||||
|
is the one line description of the motif. SPLITP3 also creates a
|
||||||
|
file of file names. Notice that it will ask for a path name so
|
||||||
|
that the path can be included in the file of file names. This is
|
||||||
|
the path to the directory in which the pattern files are stored.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
|
||||||
|
Obviously the use of files of file names is a general
|
||||||
|
solution, and anybody could now create their own set of
|
||||||
|
interesting patterns for screening, or a subset of prosite.nam,
|
||||||
|
etc.
|
||||||
|
|
||||||
|
Note that 5 of the bairoch motifs contained the symbols >
|
||||||
|
or < which means that the motifs must appear exactly at the N or
|
||||||
|
C termini of the sequences. Currently our methods have no
|
||||||
|
mechanism for such definitions and, for example KDEL motifs, will
|
||||||
|
be permitted to occur anywhere throughout a sequence.
|
||||||
|
|
||||||
|
Also, of course, the library does not have to be used
|
||||||
|
solely for performing mass screenings: each individual entry can
|
||||||
|
be used as a single pattern by giving the name of its .pat file -
|
||||||
|
eg pathname/ps00002.pat In addition more sophisticated users will
|
||||||
|
wish to copy pattern files and weight matrices into their own
|
||||||
|
directories and modify them. For example the cutoff scores are
|
||||||
|
probably chosen to be quite high in order to reduce the number of
|
||||||
|
false positives, and some users might wish to lower them.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
81
help/staden.references
Normal file
81
help/staden.references
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
|
||||||
|
References with further information about the methods
|
||||||
|
|
||||||
|
Staden, R. Nucl. Acid Res. 8, 817-825 (1980)
|
||||||
|
A computer program to search for tRNA genes. (NIP)
|
||||||
|
Staden, R. Nucl. Acid Res. 8, 3673-3694 (1980)
|
||||||
|
A new computer method for the storage and manipulation
|
||||||
|
of DNA gel reading data. (SAP).
|
||||||
|
Staden, R. Nucl. Acid Res. 10, 2951-2961 (1982)
|
||||||
|
An interactive graphics program for comparing and
|
||||||
|
aligning nucleic acid and amino acid sequences.
|
||||||
|
(SIP).
|
||||||
|
Staden, R. Nucl. Acid Res. 10, 4731-4751 (1982)
|
||||||
|
Automation of the computer handling of gel reading data
|
||||||
|
produced by the shotgun method of DNA sequencing.(SAP)
|
||||||
|
Staden, R. and McLachlan, A.,D. Nucl. Acid Res. 10
|
||||||
|
141-156 (1982)
|
||||||
|
Codon preference and its use in identifying protein
|
||||||
|
coding regions in long DNA sequences. (NIP)
|
||||||
|
Staden, R. Nucl. Acid Res. 12, 499-503 (1984)
|
||||||
|
A computer program to enter DNA gel reading data into a
|
||||||
|
computer. (GIP)
|
||||||
|
Staden, R. Nucl. Acid Res. 12, 551-567 (1984)
|
||||||
|
Measurements of the effects that coding for a protein
|
||||||
|
has on on a DNA sequence and their use for finding
|
||||||
|
genes. (NIP: positional base preferences, uneven
|
||||||
|
positional base frequencies)
|
||||||
|
Staden, R. Nucl. Acid Res. 12, 505-519 (1984)
|
||||||
|
Computer methods to locate signals in nucleic acid
|
||||||
|
sequences. NIP: promoters, ribosome binding
|
||||||
|
sites, intron/exon junctions.
|
||||||
|
McLachlan A D, Staden R and Boswell D R, Nucl. Acid Res.
|
||||||
|
12, 9567-9575 (1984)
|
||||||
|
Measure of strength of codon preference. (NIP)
|
||||||
|
Staden R, Computer methods to locate genes and signals in
|
||||||
|
nucleic acid sequences, Genetic Engineering: Principles
|
||||||
|
and Methods Vol. 7, Edited by J. K. Setlow and A.
|
||||||
|
Hollaender, Plenum Publishing Corp. 1985. (NIP)
|
||||||
|
Staden R Nucl. Acid. Res. 14, 217-231 (1986)
|
||||||
|
The current status and portability of our sequence
|
||||||
|
handling software. Summary for May 1985.
|
||||||
|
Staden R "Computer Handling of DNA sequencing projects" in
|
||||||
|
Nucleic acid and protein sequence analysis, A practical
|
||||||
|
approach, 173-217. Edited by M.J.Bishop and C.J.Rawlings,
|
||||||
|
IRL press (1987). (SAP)
|
||||||
|
Staden R, Methods to define and locate patterns of motifs in
|
||||||
|
sequences. CABIOS 4 53-60 (1988). (NIP, PIP,
|
||||||
|
NIPL, PIPL)
|
||||||
|
Staden R, Methods for calculating the probabilities of finding
|
||||||
|
patterns in sequences. CABIOS 5 89-96 (1989). (NIP, PIP,
|
||||||
|
NIPL, PIPL)
|
||||||
|
Staden R, "Methods for discovering novel motifs in nucleic acid
|
||||||
|
sequences". CABIOS 5, 293-298, (1989). (MEP)
|
||||||
|
Staden R, Methods to search for patterns in protein and nucleic
|
||||||
|
acid sequences. In Doolittle, R,R (ed), Methods in
|
||||||
|
Enzymology, 183, Academic Press, San Diego, CA, 193-211.
|
||||||
|
(1990) (NIP, NIPL, PIP, PIPL)
|
||||||
|
Staden R, Finding protein coding regions in genomic sequences.
|
||||||
|
In Doolittle, R,R (ed), Methods in Enzymology, 183,
|
||||||
|
Academic Press, San Diego, CA, 163-180. (1990) (NIP)
|
||||||
|
Gleeson T J and Staden R, An X windows and UNIX implementation
|
||||||
|
of our sequence analysis package. CABIOS 7 398 (1991)
|
||||||
|
Staden R, Screening protein and nucleic acid sequences against
|
||||||
|
libraries of patterns. DNA Sequence, in press (NIP, PIP,
|
||||||
|
SPLITP1, SPLITP2, SPLITP3, PROSITE)
|
||||||
|
Dear S and Staden R, A sequence assembly and editing program for
|
||||||
|
efficient management of large projects. Nucleic Acids
|
||||||
|
Research 19 3907-3911 (1991) (XDAP)
|
||||||
|
Staden R and Dear S, Indexing the sequence libraries: Software
|
||||||
|
providing a common indexing system for all the standard
|
||||||
|
sequence libraries. DNA Sequence 3, 99-105 (1992).
|
||||||
|
Dear S and Staden R, A standard file format for data from DNA
|
||||||
|
sequencing instruments. DNA Sequence 3, 107-110 (1992)
|
||||||
|
Gleeson T and Hillier L, A trace display and editing program
|
||||||
|
for data from fluorescence based sequencing machines.
|
||||||
|
Nucleic Acids Research 19 6481-6483 (1991) (TED)
|
||||||
|
Staden R, Staden package update. Genome News 13 12-13 (1993)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
184
help/staden_help
Normal file
184
help/staden_help
Normal file
|
@ -0,0 +1,184 @@
|
||||||
|
|
||||||
|
Introduction to the Staden sequence analysis package and its
|
||||||
|
user interface
|
||||||
|
|
||||||
|
The package contains the following programs:
|
||||||
|
|
||||||
|
GIP Gel input program
|
||||||
|
SAP Sequence assemble program
|
||||||
|
NIP Nucleotide interpretation program
|
||||||
|
PIP Protein interpretation program
|
||||||
|
SIP Similarity investigation program
|
||||||
|
MEP Motif exploration program
|
||||||
|
NIPL Nucleotide interpretation program (library)
|
||||||
|
PIPL Protein interpretation program (library)
|
||||||
|
SIPL Similarity investigation program (library)
|
||||||
|
|
||||||
|
GIP uses a digitiser for entry of DNA sequences from
|
||||||
|
autoradiographs.
|
||||||
|
SAP handles everything relating to assembling gel readings in order
|
||||||
|
to produce a consensus sequence. It can also deal with families of
|
||||||
|
protein sequences.
|
||||||
|
NIP provides functions for analysing and interpretting individual
|
||||||
|
nucleotide sequences.
|
||||||
|
PIP provides functions for analysing and interpretting individual
|
||||||
|
protein sequences.
|
||||||
|
MEP analyses families of nucleotide sequences to help discover new
|
||||||
|
motifs.
|
||||||
|
NIPL performs pattern searches on nucleotide sequence libraries.
|
||||||
|
PIPL performs pattern searches on protein sequence libraries.
|
||||||
|
SIP provides functions for comparing and aligning pairs of protein
|
||||||
|
or nucleotide sequences.
|
||||||
|
SIPL searches nucleotide and protein sequence libraries for entries
|
||||||
|
similar to probe sequences.
|
||||||
|
|
||||||
|
|
||||||
|
Documentation
|
||||||
|
|
||||||
|
As is explained below, the programs SAP, NIP, PIP, SIP and MEP
|
||||||
|
have online help, and the help files have the names: HELPSAP,
|
||||||
|
HELPNIP, HELPPIP, HELPSIP, HELPMEP. These files can be displayed on
|
||||||
|
the screen or printed using the appropriate commands. Currently the
|
||||||
|
help for the other programs is also contained in these files. For
|
||||||
|
example help for NIPL is in HELPNIP. This file is called HELPSTADEN.
|
||||||
|
|
||||||
|
Sequence formats
|
||||||
|
|
||||||
|
The shotgun sequencing program SAP deals only with simple text
|
||||||
|
files for gel readings, and is a self-contained system. However as
|
||||||
|
there is still no single agreed format for finished sequences or for
|
||||||
|
libraries of sequences, the other programs in the package can read
|
||||||
|
data that is stored in several ways.
|
||||||
|
|
||||||
|
The analytical programs can read individual sequences stored
|
||||||
|
in the following formats: Staden, EMBL, Genbank, PIR (also known as
|
||||||
|
NBRF), and GCG, but for storing whole libraries we use only PIR
|
||||||
|
format. In addition these programs can perform a number of simple
|
||||||
|
operations using libraries stored in this format. They can extract
|
||||||
|
entries by entry name, can search titles for keywords, can search
|
||||||
|
the whole of the annotation files for keywords, and can extract
|
||||||
|
annotations for any named entry. We reformat all sequence libraries
|
||||||
|
into PIR format. Currently we have NBRF, EMBL, SWISSPROT and VECBASE
|
||||||
|
libraries in PIR format.
|
||||||
|
|
||||||
|
The library searching programs operate only on sequences
|
||||||
|
stored in PIR format.
|
||||||
|
|
||||||
|
The analytical programs will operate with uppercase or
|
||||||
|
lowercase sequence characters. In addition T and U are equivalent.
|
||||||
|
SAP uses uppercase letters for original gel readings and lowercase
|
||||||
|
letters for characters that are corrected by the automatic editor.
|
||||||
|
Programs NIP and PIP use IUB symbols for redundancy in back
|
||||||
|
translations and for sequence searches. The symbols are shown
|
||||||
|
below.
|
||||||
|
|
||||||
|
|
||||||
|
NC-IUB SYMBOLS
|
||||||
|
|
||||||
|
A,C,G,T
|
||||||
|
R (A,G) 'puRine'
|
||||||
|
Y (T,C) 'pYrimidine'
|
||||||
|
W (A,T) 'Weak'
|
||||||
|
S (C,G) 'Strong'
|
||||||
|
M (A,C) 'aMino'
|
||||||
|
K (G,T) 'Keto'
|
||||||
|
H (A,T,C) 'not G'
|
||||||
|
B (G,C,T) 'not A'
|
||||||
|
V (G,A,C) 'not T'
|
||||||
|
D (G,A,T) 'not C'
|
||||||
|
N (G,A,C,T) 'aNy'
|
||||||
|
|
||||||
|
|
||||||
|
The user interface
|
||||||
|
|
||||||
|
The user interface is common to all programs. It consists of a
|
||||||
|
set of menus and a uniform way of presenting choices and obtaining
|
||||||
|
input from the user. This section describes: the menu system; how
|
||||||
|
options are selected and other choices made; how values are
|
||||||
|
supplied to the program; how help is obtained, and how to escape
|
||||||
|
from any part of a program. In addition it gives information about
|
||||||
|
saving results in files and the use of graphics for presenting
|
||||||
|
results.
|
||||||
|
|
||||||
|
Menus
|
||||||
|
|
||||||
|
Each program has several menus and numerous options. Each menu
|
||||||
|
or option has a unique number that is used to identify it. Menu
|
||||||
|
numbers are distinguished from option numbers by being preceded by
|
||||||
|
the letter m (or M, all programs make no distinction between upper
|
||||||
|
and lower case letters). With the exception of some parts of program
|
||||||
|
SAP, the menus are not hierachical, rather the options they each
|
||||||
|
contain are simply lists of related functions and their identifying
|
||||||
|
numbers. Therefore options can be selected independently of the menu
|
||||||
|
that is currently being shown on the screen, and the menus are
|
||||||
|
simply memory aides. All options and menus are selected by typing
|
||||||
|
their option number when the programs present the prompt
|
||||||
|
|
||||||
|
"? Menu or option number =".
|
||||||
|
|
||||||
|
To select a menu type its number preceded by the letter M. To
|
||||||
|
select an option type its number. If you type only "return" you
|
||||||
|
will get menu m0 which is simply a list of menus. If you select an
|
||||||
|
option you will return to the current menu after the function is
|
||||||
|
completed.
|
||||||
|
|
||||||
|
When you select an option, in many cases the program will
|
||||||
|
immediately perform the operation selected without further dialogue.
|
||||||
|
If you precede an option number by the letter d (e.g. D17), you will
|
||||||
|
force the program to offer dialogue about the selected option before
|
||||||
|
the function operates, hence allowing you to change the value of any
|
||||||
|
of its parameters. If you precede an option number by the symbol ?
|
||||||
|
(e.g. ?17), you will be given help on the option (here 17).
|
||||||
|
|
||||||
|
Where possible, equivalent or identical options have been
|
||||||
|
given the same numbers in all programs, and so users quickly learn
|
||||||
|
the numbers for the functions they employ most often.
|
||||||
|
|
||||||
|
Help
|
||||||
|
|
||||||
|
As mentioned above, help about each option can be obtained by
|
||||||
|
preceding the option number by the symbol ? when you are presented
|
||||||
|
with the prompt "? Menu or option number", but there are two further
|
||||||
|
ways of obtaining help. Whenever the program asks a question you can
|
||||||
|
respond by typing the symbol ? and you will receive information
|
||||||
|
about the current option. In addition, option number 1 in all the
|
||||||
|
programs will give help on all of a programs functions.
|
||||||
|
|
||||||
|
Quitting
|
||||||
|
|
||||||
|
To exit from any point in a program you type ! for quit. If a
|
||||||
|
menu is on the screen this will stop the program, otherwise you will
|
||||||
|
be returned to the last menu.
|
||||||
|
|
||||||
|
Other interactions
|
||||||
|
|
||||||
|
Questions are presented in a few restricted ways. In all
|
||||||
|
cases typing only "return" in response to a question means yes, and
|
||||||
|
typing N or n means no.
|
||||||
|
|
||||||
|
Obvious opposites such as "clear screen" and "keep picture"
|
||||||
|
are presented with only the default shown. For example in this case
|
||||||
|
the default is generally "keep picture" so the program will display:
|
||||||
|
|
||||||
|
"(y/n) (y) Keep picture"
|
||||||
|
|
||||||
|
and the picture will be retained if the user types anything
|
||||||
|
other than N or n, (in which case the screen will be cleared).
|
||||||
|
|
||||||
|
Where there are choices that are not obvious opposites, or
|
||||||
|
there are more than two choices, two further conventions are used:
|
||||||
|
"radio buttons" and "check boxes".
|
||||||
|
|
||||||
|
Radio buttons are used when only one of a number of choices
|
||||||
|
can be made at any one time. The choices are presented arranged one
|
||||||
|
above the other, each choice with a number for its selection, and
|
||||||
|
the default choice marked with an X. For example in the restriction
|
||||||
|
enzyme search routine the following choices are offered:
|
||||||
|
|
||||||
|
|
||||||
|
Select output mode
|
||||||
|
1 order results enzyme by enzyme
|
||||||
|
2 order results by positon
|
||||||
|
X 3 show only infrequent cutters
|
||||||
|
4 show names above the sequence
|
||||||
|
|
26
help/stadenp_help
Normal file
26
help/stadenp_help
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
Standard Staden Programs
|
||||||
|
|
||||||
|
gip Gel input program
|
||||||
|
sap Sequence assembly program
|
||||||
|
(x)dap Sequence assembly program
|
||||||
|
(x)nip Nucleotide interpretation program
|
||||||
|
(x)pip Protein interpretation program
|
||||||
|
(x)sip Similarity investigation program
|
||||||
|
(x)mep Motif exploration program
|
||||||
|
nipl Nucleotide interpretation program (library)
|
||||||
|
pipl Protein interpretation program (library)
|
||||||
|
sipl Similarity investigation program (library)
|
||||||
|
Those with (x) have both tektronix (say nip) and x (say xnip) versions.
|
||||||
|
Environment variables for help files
|
||||||
|
HELPSAP sap
|
||||||
|
HELPDAP dap
|
||||||
|
HELPGIP gip
|
||||||
|
HELPNIP nip
|
||||||
|
HELPPIP pip
|
||||||
|
HELPSIP sip
|
||||||
|
HELPMEP mep
|
||||||
|
HELPSTADEN Introduction and user interface
|
||||||
|
e.g. to read HELPSTADEN type 'more $HELPSTADEN'
|
||||||
|
|
||||||
|
|
||||||
|
|
168
help/ted.help
Normal file
168
help/ted.help
Normal file
|
@ -0,0 +1,168 @@
|
||||||
|
Trace Editor Help
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
The ted trace editor is a prototype to allow the display and editing
|
||||||
|
of traces from sequencing machines, and the simple editing of plain
|
||||||
|
sequences. It runs under the X window system. It provides simultaneous
|
||||||
|
display of traces and bases. The editing allows individual bases to be
|
||||||
|
removed and new ones added, and also a range of bases at either end to
|
||||||
|
be cutoff. Currently, only ABI result files and plain sequences are
|
||||||
|
accepted.
|
||||||
|
|
||||||
|
Only one trace can be edited at a time.
|
||||||
|
|
||||||
|
|
||||||
|
Invocation
|
||||||
|
----------
|
||||||
|
|
||||||
|
ted can be run from the command line by simply typing:
|
||||||
|
|
||||||
|
ted
|
||||||
|
|
||||||
|
It will come up with no sequence initially displayed. If provided with
|
||||||
|
any arguments it does not understand, or invalid combinations of
|
||||||
|
arguments, ted will exit with a message indicating its intended usage.
|
||||||
|
ted accepts the standard X arguments allowing, for example, background
|
||||||
|
colour or geometry to be specified. ted can accept an argument
|
||||||
|
specifying an initial file to display. The key for this is the format
|
||||||
|
of the file, for example:
|
||||||
|
|
||||||
|
ted -ABI {ABI format filename}
|
||||||
|
ted -plain {plain format filename}
|
||||||
|
|
||||||
|
The file is then displayed at 50% magnification, with the caret
|
||||||
|
initially positioned at the first base.
|
||||||
|
|
||||||
|
When an initial file is given, a base number of interest and/or a
|
||||||
|
magnification can also be given, for example:
|
||||||
|
|
||||||
|
ted -ABI {ABI format file} -baseNum 280 -mag 30
|
||||||
|
|
||||||
|
or the bottom strand may be specified:
|
||||||
|
|
||||||
|
ted -ABI {ABI format file} -baseNum 280 -mag 30 -bottom 1
|
||||||
|
or
|
||||||
|
ted -ABI {ABI format file} -bottom 1
|
||||||
|
|
||||||
|
or a string of nucleotides on which the center the window:
|
||||||
|
|
||||||
|
ted -ABI {ABI format file} -astring 1
|
||||||
|
or
|
||||||
|
ted -ABI {ABI format file} -astring 1 -mag 30 -bottom 1
|
||||||
|
|
||||||
|
Options can be specified in any order.
|
||||||
|
|
||||||
|
An output filename can be specified in a similar manner:
|
||||||
|
|
||||||
|
ted -ABI inputfilename -output outputfilename
|
||||||
|
|
||||||
|
The default output filename is inputfilename.seq
|
||||||
|
|
||||||
|
If you are running the program on a remote machine, you must
|
||||||
|
specify a display parameter:
|
||||||
|
|
||||||
|
ted -display machine_name:0.2
|
||||||
|
|
||||||
|
You can also specify the size of the opening window or
|
||||||
|
other screen parameters by the following:
|
||||||
|
|
||||||
|
ted -geometry [{width}][x{height}][{+-}{xoff}[{+-}{yoff}]]
|
||||||
|
[-fg {color}] [-bg {color}] [-bd {color}] [-bw {pixels}]
|
||||||
|
|
||||||
|
Displays
|
||||||
|
--------
|
||||||
|
|
||||||
|
When running, ted displays the name of the file it is currently
|
||||||
|
operating on (if any) and the original number of bases.
|
||||||
|
|
||||||
|
A so-called viewport presents four different synchronised views of
|
||||||
|
part of the trace. The top one indicates the sequence indices - the
|
||||||
|
first digit of the number if positioned over the base to which that
|
||||||
|
number corresponds. Below this is a list of the bases as originally
|
||||||
|
found in the file (this is the interpretation of the trace as made by
|
||||||
|
the sequencing machine). Below this is the list of bases as edited by
|
||||||
|
the user --- initially, if this file has not been edited in the past,
|
||||||
|
this is identical to the list of original bases. However, if in a
|
||||||
|
previous session the user has edited this sequence, the edited
|
||||||
|
version of the sequence will appear in the edit window.
|
||||||
|
The final display is of the traces produced by the sequencing
|
||||||
|
machine for the four respective bases.
|
||||||
|
|
||||||
|
Two controls allow the view presented to be adjusted: both are
|
||||||
|
horizontal sliders or scrollbars. The first affects the magnification
|
||||||
|
at which the trace is viewed. The minimum magnification is such that
|
||||||
|
the whole of the trace is visible within the viewport; when a trace is
|
||||||
|
first input, this is the magnification used. The maximum magnification
|
||||||
|
is such that bases are spaced out with several characters of space
|
||||||
|
between them --- this should allow more than enough room for base
|
||||||
|
insertions to be clearly visible. The second scrollbar is immediately
|
||||||
|
above the viewport and allows the user to select which part of the
|
||||||
|
trace is viewed. Both the sliders work in a similar way: the middle
|
||||||
|
mouse button can be used to drag the thumb to any desired position,
|
||||||
|
the left and right mouse buttons can be clicked within the scrollbar
|
||||||
|
to indicate that paging up or down is desired. In the case of the
|
||||||
|
viewport scrollbar, the amount of paging is determined by how far up
|
||||||
|
the scrollbar the pointer is.
|
||||||
|
|
||||||
|
The whole ted window can be expanded and contracted (to an extent) by
|
||||||
|
dragging the "grow-region" provided by whatever window manager is
|
||||||
|
running. The viewport takes up all of this change in size.
|
||||||
|
|
||||||
|
Controls
|
||||||
|
--------
|
||||||
|
|
||||||
|
ted has four buttons. "Quit" exits the program after first checking
|
||||||
|
whether there is a sequence which has been edited and not saved.
|
||||||
|
|
||||||
|
"Help" pops up this window which has a scrollbar on the left allowing
|
||||||
|
all the text to be viewed.
|
||||||
|
|
||||||
|
"Input" presents a dialogue which asks for the format and name of a
|
||||||
|
file to be processed. The bases and (if this is not a plain format
|
||||||
|
file) traces are read in and displayed for editing. The only
|
||||||
|
conversion performed on bases is from 'N' to '-'.
|
||||||
|
|
||||||
|
"Output" presents a dialogue which asks for a filename into which the
|
||||||
|
edited and clipped bases can be saved. The default value can be set
|
||||||
|
on the command line using the "-output" keyword. No conversion of bases
|
||||||
|
is performed on output.
|
||||||
|
|
||||||
|
ted operates in one of three editing modes, one of which is selected
|
||||||
|
from three "radio buttons". The currently selected mode is
|
||||||
|
highlighted.
|
||||||
|
|
||||||
|
Editing
|
||||||
|
-------
|
||||||
|
|
||||||
|
In "Edit sequence" mode, the (lower) list of editable bases can be
|
||||||
|
edited in much the same way as a text editor operates. A "caret" which
|
||||||
|
is visible in the display of edited bases can be moved left and right
|
||||||
|
with the cursor keys (these are sometimes called arrow keys and often
|
||||||
|
appear on numeric keypads). It can also be positioned by clicking any
|
||||||
|
button while the pointer is pointing into either of the list of bases
|
||||||
|
or the traces. The DELETE key deletes the base immediately to the left
|
||||||
|
of the caret. Any printing character can be inserted to the right of
|
||||||
|
the caret by simply typing it. Inserted characters are placed halfway
|
||||||
|
between their neighbours, or if a space is left by the deletion of a
|
||||||
|
base originally there, its position is used. A base can thus be
|
||||||
|
changed by deleting it and entering the new base.
|
||||||
|
|
||||||
|
Note that in the current version of ted the caret is not constrained
|
||||||
|
to remain within the viewed part of the display and that editing can
|
||||||
|
still continue while it is thus invisible. Such editing would probably
|
||||||
|
only occur by accident.
|
||||||
|
|
||||||
|
ted provides a facility to define a cutoff at either end of the trace.
|
||||||
|
A number of the leftmost bases (corresponding to the vector) and the
|
||||||
|
rightmost bases (corresponding to the point where the data become
|
||||||
|
unreliable) can be defined by setting the editor into "Adjust left
|
||||||
|
cutoff" or "Adjust right cutoff" mode. In either of these modes, the
|
||||||
|
pointer and mouse buttons can be used to indicate the cutoff point,
|
||||||
|
and the cursor keys can be used to adjust this leftwards or
|
||||||
|
rightwards. Initially, the cutoff regions are both empty. The cutoff
|
||||||
|
regions are clearly indicated on the list of edited bases display and
|
||||||
|
on the traces display by being drawn with a dimmed background.
|
||||||
|
|
||||||
|
When the sequence is written out, the list of edited bases, with both
|
||||||
|
cutoff regions removed, is written. The output contains newlines
|
||||||
|
for convenient formatting and always ends with one.
|
102
manl/staden.l
Normal file
102
manl/staden.l
Normal file
|
@ -0,0 +1,102 @@
|
||||||
|
.TH staden 1L "November 1991" "MRC LMB" "LOCAL"
|
||||||
|
.SH NAME
|
||||||
|
staden, xstaden \- sequence analysis suite
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.I staden
|
||||||
|
is a suite of programs for sequence analysis. Currently available are
|
||||||
|
.I mep,
|
||||||
|
.I nip,
|
||||||
|
.I pip,
|
||||||
|
.I sap,
|
||||||
|
.I sip,
|
||||||
|
.I nipl,
|
||||||
|
.I pipl,
|
||||||
|
.I and sipl.
|
||||||
|
These all run under the SUN X11
|
||||||
|
.I xterm
|
||||||
|
Tektronics terminal emulator, but also work with the VT640 terminal
|
||||||
|
and the VersaTermPro and MS-Kermit emulators if they login to a SUN.
|
||||||
|
.PP
|
||||||
|
.I xstaden
|
||||||
|
is the same set of programs, named
|
||||||
|
.I xmep,
|
||||||
|
.I xnip,
|
||||||
|
.I xpip,
|
||||||
|
.I xsap,
|
||||||
|
.I xdap,
|
||||||
|
and
|
||||||
|
.I xsip,
|
||||||
|
which run directly under X providing a convenient user interface,
|
||||||
|
including resizable output and pull-down menus. All these programs
|
||||||
|
accept the standard X arguments. The library searching programs
|
||||||
|
nipl, pipl and sipl are only available in xterm form.
|
||||||
|
.PP
|
||||||
|
Sequence library access is provided for the format as distributed
|
||||||
|
on CDROM by EMBL. The CDROM contains the EMBL nucleotide library and
|
||||||
|
the SWISSPROT protein library. The libraries can be left on the
|
||||||
|
CDROM or transferred to hard disk.
|
||||||
|
.PP
|
||||||
|
The programs also provide an interface to the PROSITE protein motif
|
||||||
|
library.
|
||||||
|
.PP
|
||||||
|
Some initialisation is required in order to use the package. csh users
|
||||||
|
should insert the following in their .login files:
|
||||||
|
.IP
|
||||||
|
setenv STADENROOT /home/BioSW/staden
|
||||||
|
.IP
|
||||||
|
source $STADENROOT/staden.login
|
||||||
|
.LP
|
||||||
|
Users of the Bourne shell, sh, should insert the following in
|
||||||
|
their .profile:
|
||||||
|
.IP
|
||||||
|
STADENROOT=/home/BioSW/staden
|
||||||
|
.IP
|
||||||
|
export STADENROOT
|
||||||
|
.IP
|
||||||
|
. $STADENROOT/staden.profile
|
||||||
|
.LP
|
||||||
|
These initialisations will alter your shell's search path so
|
||||||
|
that it can find the program binaries, and other files that are
|
||||||
|
required.
|
||||||
|
.SH ENVIRONMENT
|
||||||
|
The following environment variables may be set in the
|
||||||
|
user's \fI .login\fP or \fI .profile\fP file:
|
||||||
|
.TP 20
|
||||||
|
.BI STADENROOT= /home/BioSW/staden
|
||||||
|
This must be set in the user's initialisation.
|
||||||
|
.TP 20
|
||||||
|
.BI SEQEDT= editor
|
||||||
|
Set the editor to be used by the package. The default is
|
||||||
|
\fIemacs\fP.
|
||||||
|
.SH FILES
|
||||||
|
.PD 0
|
||||||
|
.TP 30
|
||||||
|
$STADENROOT/staden.login
|
||||||
|
csh initialisation
|
||||||
|
.TP 30
|
||||||
|
$STADENROOT/staden.profile
|
||||||
|
sh initialisation
|
||||||
|
.TP 30
|
||||||
|
$STADENROOT/tables
|
||||||
|
Tables used by the programs
|
||||||
|
.TP 30
|
||||||
|
$STADENROOT/help
|
||||||
|
Helpfiles used by the programs, documentation of the user interface
|
||||||
|
and of each of the programs.
|
||||||
|
.TP 30
|
||||||
|
$STADENROOT/tables/SEQUENCELIBRARIES
|
||||||
|
Defines the sequence libraries available, their file descriptors
|
||||||
|
and the prompts to appear on the users screen.
|
||||||
|
.SH AUTHOR
|
||||||
|
Rodger Staden, MRC Laboratory of Molecular Biology, Hills Rd., Cambridge,
|
||||||
|
CB2 2QH, UK.
|
||||||
|
.SH BUGS
|
||||||
|
.PP
|
||||||
|
When using the xterm programs and in graphics input mode,
|
||||||
|
a carriage return should not be
|
||||||
|
entered on its own but should be preceded by some other character,
|
||||||
|
such as SPACE, COMMA or K. If a carriage return is entered on its
|
||||||
|
own, some garbage will (relatively) harmelssly appear on the plot.
|
||||||
|
.PP
|
||||||
|
General comments on the package can be sent to
|
||||||
|
\fI<rs@uk.ac.cam.mrc-lmb>\fP
|
107
manl/ted.l
Normal file
107
manl/ted.l
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
.TH ted 1L "July 1991" "MRC LMB" "LOCAL"
|
||||||
|
.SH NAME
|
||||||
|
ted \- trace editor
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.B ted
|
||||||
|
[(
|
||||||
|
.B -ABI\||\|-ALF\||\|-plain
|
||||||
|
)
|
||||||
|
.I tracefilename
|
||||||
|
[
|
||||||
|
.B -baseNum
|
||||||
|
.I number
|
||||||
|
]
|
||||||
|
.B [
|
||||||
|
.B -mag
|
||||||
|
.I number
|
||||||
|
( 1 to 100 )
|
||||||
|
]
|
||||||
|
.B [
|
||||||
|
.B -bottom
|
||||||
|
.I number
|
||||||
|
(1(true) or 0(false))
|
||||||
|
.B ]
|
||||||
|
.B [
|
||||||
|
.B -astring
|
||||||
|
.I nucleotide-string
|
||||||
|
]]
|
||||||
|
.B [
|
||||||
|
.B -enzyme
|
||||||
|
.I 5' cutting sequence
|
||||||
|
]
|
||||||
|
.B [
|
||||||
|
.B -raw
|
||||||
|
.I filename
|
||||||
|
(to be placed at head of xdap compatible .seq file)
|
||||||
|
.B ]
|
||||||
|
[
|
||||||
|
.B -output
|
||||||
|
.I outputfilename
|
||||||
|
]
|
||||||
|
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.B ted
|
||||||
|
is a simple prototype editor for traces produced from automatic
|
||||||
|
sequencing machines. It allows the traces (from the ABI
|
||||||
|
or ALF sequencing machines) produced to be
|
||||||
|
displayed along with the machines interpretation of these into
|
||||||
|
bases and an initially identical sequence which can be edited
|
||||||
|
by the user. A cutoff region can be defined at both ends. The
|
||||||
|
edited and clipped list of bases can then be written out.
|
||||||
|
.LP
|
||||||
|
When initially run,
|
||||||
|
.B ted
|
||||||
|
displays the trace file
|
||||||
|
.I tracefilename
|
||||||
|
(if given) of the specified format centered on the base number
|
||||||
|
.I baseNum
|
||||||
|
(if given). If no file is provided,
|
||||||
|
.B ted
|
||||||
|
initially displays nothing.
|
||||||
|
.LP
|
||||||
|
The display consists of
|
||||||
|
the control panel and the synchronized view of the base position
|
||||||
|
information, original and edited sequence data,
|
||||||
|
and graphical representation of the trace (with each nucleotide's trace
|
||||||
|
being represented by a different color). The control
|
||||||
|
panel allows the user to read in new trace files (in either
|
||||||
|
bottom or top strand orientation)
|
||||||
|
as well as to search for a string of nucleotides or a certain base position.
|
||||||
|
The information button brings up signal strength and average spacing for
|
||||||
|
ABI files.
|
||||||
|
Scroll bars allow the user to adjust the magnification of or scroll through
|
||||||
|
the sequence and trace data. The user may also choose to change the vertical
|
||||||
|
magnification of the trace data. Further, sequence on the head (vector)
|
||||||
|
or tail (uncertain data) of the sequence may be ``cutoff''
|
||||||
|
using the adjust left and right cutoff buttons. Bases can be inserted,
|
||||||
|
deleted, or replaced as with
|
||||||
|
any ordinary word-processor in the sequence data window. Finally, the
|
||||||
|
sequence may be written to an ascii file using the output button on
|
||||||
|
the control panel. The output filename is specified in a dialogue,
|
||||||
|
but a default value of inputfilename.seq is provided or the default value
|
||||||
|
can be given with the
|
||||||
|
.I outputfilename
|
||||||
|
argument.
|
||||||
|
.LP
|
||||||
|
A simple help system is provided.
|
||||||
|
.SH FILES
|
||||||
|
.PD 0
|
||||||
|
.TP 20
|
||||||
|
.B ted.help
|
||||||
|
Text provided in the help window.
|
||||||
|
.TP
|
||||||
|
.B /usr/lib/X11/app-defaults/Xted
|
||||||
|
Default application resources.
|
||||||
|
.SH ENVIRONMENT
|
||||||
|
.TP 20
|
||||||
|
.SB XFILESEARCHPATH
|
||||||
|
Specifies the locations where
|
||||||
|
.B ted.help
|
||||||
|
is sought.
|
||||||
|
If this is not defined,
|
||||||
|
.B ted.help
|
||||||
|
must be in the
|
||||||
|
.B /usr/lib/X11/app-defaults
|
||||||
|
directory.
|
||||||
|
.SH AUTHORS
|
||||||
|
Tim Gleeson, LaDeana Hillier, Simon Dear.
|
7
src/Misc/README
Normal file
7
src/Misc/README
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
Miscellaneous Routines Simon Dear, 14 April 1992
|
||||||
|
---------------------------------------------------------------
|
||||||
|
|
||||||
|
The source modules in this directory are for commonly used
|
||||||
|
routines. The archive misc.a should be made before any
|
||||||
|
other programs supplied on this tape.
|
||||||
|
|
15
src/Misc/crash.c
Normal file
15
src/Misc/crash.c
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
#include "misc.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdarg.h> /* varargs needed for v*printf() prototypes */
|
||||||
|
|
||||||
|
void crash (char* format,...)
|
||||||
|
{
|
||||||
|
va_list args ;
|
||||||
|
|
||||||
|
va_start (args,format) ;
|
||||||
|
vfprintf (stderr,format,args) ;
|
||||||
|
va_end (args) ;
|
||||||
|
|
||||||
|
exit (1) ;
|
||||||
|
}
|
14
src/Misc/date.c
Normal file
14
src/Misc/date.c
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
#include "misc.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/*
|
||||||
|
** Time and date calculations
|
||||||
|
*/
|
||||||
|
#include <time.h>
|
||||||
|
char *date_str()
|
||||||
|
{
|
||||||
|
time_t clock;
|
||||||
|
clock = time(NULL);
|
||||||
|
return ctime(&clock);
|
||||||
|
}
|
39
src/Misc/filenames.c
Normal file
39
src/Misc/filenames.c
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
#include "misc.h"
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
char *fn_tail(char *fn)
|
||||||
|
/*
|
||||||
|
** Return file part (:t) of
|
||||||
|
** directory path
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
char *s;
|
||||||
|
|
||||||
|
len = strlen(fn);
|
||||||
|
for(s=fn+len-1;len && *s != '/'; len--, s--) ;
|
||||||
|
s++;
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void fn_toupper (char *s)
|
||||||
|
/*
|
||||||
|
** Convert file to upper case
|
||||||
|
** ignoring directory path head
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
str_toupper(fn_tail(s));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void fn_tolower (char *s)
|
||||||
|
/*
|
||||||
|
** Convert file to lower case
|
||||||
|
** ignoring directory path head
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
str_tolower(fn_tail(s));
|
||||||
|
}
|
41
src/Misc/files.c
Normal file
41
src/Misc/files.c
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
#include "misc.h"
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
/* Alliant's Concentrix <sys/stat.h> is hugely deficient */
|
||||||
|
/* Define things we require in this program */
|
||||||
|
/* Methinks S_IFMT and S_IFDIR aren't defined in POSIX */
|
||||||
|
#ifndef S_ISDIR
|
||||||
|
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
|
||||||
|
#endif /*!S_ISDIR*/
|
||||||
|
#ifndef S_ISREG
|
||||||
|
#define S_ISREG(m) (((m)&S_IFMT) == S_IFREG)
|
||||||
|
#endif /*!S_ISREG*/
|
||||||
|
|
||||||
|
int is_directory(char * fn)
|
||||||
|
{
|
||||||
|
struct stat buf;
|
||||||
|
if ( stat(fn,&buf) ) return 0;
|
||||||
|
return S_ISDIR(buf.st_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
int is_file(char * fn)
|
||||||
|
{
|
||||||
|
struct stat buf;
|
||||||
|
if ( stat(fn,&buf) ) return 0;
|
||||||
|
return S_ISREG(buf.st_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
int file_exists(char * fn)
|
||||||
|
{
|
||||||
|
struct stat buf;
|
||||||
|
return ( stat(fn,&buf) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int file_size(char * fn)
|
||||||
|
{
|
||||||
|
struct stat buf;
|
||||||
|
if ( stat(fn,&buf) != 0) return 0;
|
||||||
|
return buf.st_size;
|
||||||
|
}
|
||||||
|
|
39
src/Misc/find.c
Normal file
39
src/Misc/find.c
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
#include "misc.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
char *myfind(char *file, char* searchpath, int (*found) (char *) )
|
||||||
|
{
|
||||||
|
static char wholePath[1024];
|
||||||
|
char *path;
|
||||||
|
char *delimiters=":";
|
||||||
|
char *f;
|
||||||
|
|
||||||
|
f = NULL;
|
||||||
|
if (found(file)) {
|
||||||
|
strcpy(wholePath,file);
|
||||||
|
f = wholePath;
|
||||||
|
} else if (searchpath != NULL) {
|
||||||
|
char *paths;
|
||||||
|
|
||||||
|
paths = (char *) malloc(strlen(searchpath)+1);
|
||||||
|
strcpy(paths,searchpath);
|
||||||
|
|
||||||
|
path = (char *) strtok(paths,delimiters);
|
||||||
|
while (path!= NULL) {
|
||||||
|
|
||||||
|
(void) strcpy(wholePath,path);
|
||||||
|
(void) strcat(wholePath,"/");
|
||||||
|
(void) strcat(wholePath,file);
|
||||||
|
if (found(wholePath)) {
|
||||||
|
f = wholePath;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
path = (char *) strtok((char *)NULL,delimiters);
|
||||||
|
}
|
||||||
|
free(paths);
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;;
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue