init
This commit is contained in:
commit
f8b7bfff1b
|
@ -0,0 +1,296 @@
|
|||
General Information
|
||||
(Not for the faint hearted)
|
||||
|
||||
30 September 1992
|
||||
|
||||
|
||||
0. Introduction
|
||||
---------------
|
||||
|
||||
This document contains information on the following subjects:
|
||||
|
||||
1. Installing the Staden Package on SPARCstations and DECstations
|
||||
2. Installing the Staden Package on Other Machines
|
||||
3. A Quick Guide to What's on the Release Tape
|
||||
4. Overview of Data Flow During Sequence Assembly
|
||||
5. Acknowledgements
|
||||
|
||||
|
||||
|
||||
1. Installing the Staden Package on SPARCstations and DECstations
|
||||
-----------------------------------------------------------------
|
||||
|
||||
We are endeavouring to make the installation of the Staden Package as
|
||||
quick and as easy as possible. In this current release we provide
|
||||
statically linked sparc and mips executables as well as all sources.
|
||||
|
||||
To install the package:
|
||||
|
||||
1) Create a new directory for the software. You may have to log on as
|
||||
superuser to do this.
|
||||
|
||||
% mkdir -p /home/BioSW/staden
|
||||
|
||||
2) Place the distribution tape in the drive and down load the package:
|
||||
|
||||
-sun-
|
||||
% tar xvf /dev/rst0
|
||||
...system messages...
|
||||
|
||||
-dec-
|
||||
% tar xvf /dev/rmt0h
|
||||
...system messages...
|
||||
|
||||
3) Users of the C Shell should add the following to his/her .login
|
||||
file:
|
||||
|
||||
setenv STADENROOT /home/BioSW/staden
|
||||
source $STADENROOT/staden.login
|
||||
|
||||
Users of the Bourne shell should add the following to their .profile
|
||||
file:
|
||||
|
||||
STADENROOT=/home/BioSW/staden
|
||||
export STADENROOT
|
||||
. $STADENROOT/staden.profile
|
||||
|
||||
|
||||
4) When the user next logs onto the work station the required
|
||||
initialisation will automatically be performed, and the programs in
|
||||
the Staden package can be run. Refer to the help/*.MEM files for
|
||||
information on the various program. (eg help on xdap is in
|
||||
help/DAP.MEM)
|
||||
|
||||
|
||||
2. Installing the Staden Package on Other Machines
|
||||
--------------------------------------------------
|
||||
|
||||
This is a little more difficult as you will need to remake all the
|
||||
executables. Your system configuration may also mean that some changes
|
||||
will need to be made, though hopefully only to makefiles. We provide
|
||||
a script to aid installation (we hope!), but you may prefer to make
|
||||
all the components manually.
|
||||
|
||||
To remake the Staden package you will require the following:
|
||||
1) A Fortran77 compiler
|
||||
2) An ANSI C compiler
|
||||
3) X11 Release 4, including the Athena Widget libraries.
|
||||
|
||||
Start by following step 1 through 3 above, to unload the sources and
|
||||
perform initialisations. Read the rest of this document and the other
|
||||
help files. Look at the make files. Follow your nose!
|
||||
|
||||
If you have any problems or successes porting our software to other
|
||||
platforms we would love to hear from you. We would also appreciate
|
||||
receiving your general comments on the package.
|
||||
|
||||
Rodger Staden (principle author)
|
||||
phone: +44 223 402389 email: rs@mrc-lmba.cam.ac.uk
|
||||
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||
Simon Dear:
|
||||
phone: +44 223 402266 email: sd@mrc-lmba.cam.ac.uk
|
||||
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||
James Bonfield:
|
||||
phome: +44 223 402499 email: jkb@mrc-lmba.cam.ac.uk
|
||||
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||
|
||||
|
||||
|
||||
3. A Quick Guide to What's on the Release Tape
|
||||
----------------------------------------------
|
||||
|
||||
The directory structure on this tape is very important. Once set up, the Staden
|
||||
package expects things to be in a predefined place. The root directory
|
||||
of the structure is referred to by the environment variable
|
||||
STADENROOT. Below this there should be at least the following:
|
||||
|
||||
1) bin/
|
||||
All executable files and scripts should be in this directory.
|
||||
$STADENROOT/bin is added to the search path by the script staden.login
|
||||
(or staden.profile if you are using the Bourne Shell). Though you are
|
||||
not forced to keep programs here, we find it is the simplest place to
|
||||
keep them.
|
||||
|
||||
2) help/
|
||||
All on-line help files are in this directory. Files of the form *.MEM
|
||||
or *.mem are formatted ascii files and can be printed for personal
|
||||
reference. The script staden.login sets up many environment variables
|
||||
that refer to files in this directory, as well as modifying
|
||||
XFILESEARCHPATH, which is used by X programs.
|
||||
|
||||
3) manl/
|
||||
Local manual pages for ted and the staden package are in this directory. The
|
||||
environment variable MANPATH is modified in staden.login to search
|
||||
here too.
|
||||
|
||||
4) staden.login and staden.profile
|
||||
These two files are scripts to set up environment variables required
|
||||
by the Staden package. C Shell users should source staden.login from
|
||||
their .login file, and Bourne Shell users should "source" staden.profile
|
||||
from their .profile directory. See "Installing the Staden Package on
|
||||
SPARCstations and DECstations", Part 3.
|
||||
|
||||
5) tables/
|
||||
Configuration files for the Staden package are in this directory.
|
||||
Various environment variables are set in staden.login to refer to
|
||||
files in this directory.
|
||||
|
||||
Also of use are the following:
|
||||
|
||||
doc/ - Miscellaneous documentation.
|
||||
userdata/ - Sample databases
|
||||
src/ - program sources
|
||||
ReleaseNotes - Notes on this and future releases
|
||||
Staden_install - Installation script
|
||||
SequenceLibraries - Notes on the use and installation of sequence libraries
|
||||
|
||||
|
||||
Program Sources
|
||||
---------------
|
||||
|
||||
All the program sources are found in the directories in $STADENROOT/src:
|
||||
|
||||
0) Misc/
|
||||
Sources for a library of useful routines used by the staden package.
|
||||
** Should be made before the programs in staden/ **
|
||||
|
||||
1) staden/
|
||||
Sources for the Staden suite: mep, xmep, nip, xnip, nipl, pip, xpip,
|
||||
pipl, sap (now superseded by dap), xsap (now superceded by xdap), sip,
|
||||
xsip, sipl, dap, xdap, splitp1, splitp2, splitp3, gip and convert_project.
|
||||
|
||||
2) ted/
|
||||
Sources for the trace display and sequence editing program ted.
|
||||
|
||||
3) abi/
|
||||
Sample scripts and programs for handling ABI 373A data files.
|
||||
|
||||
4) alf/
|
||||
Sample scripts and programs for handling Pharmacia A.L.F. data files.
|
||||
|
||||
Each directory has appropriate makefiles and README files.
|
||||
|
||||
|
||||
|
||||
4. Overview of Data Flow During Sequence Assembly
|
||||
-------------------------------------------------
|
||||
|
||||
During a sequence assembly project the data can enter the sequence
|
||||
assembly program from various routes (See Figure below).
|
||||
|
||||
|
||||
|
||||
Fluorescent Based
|
||||
Sequencing Machine
|
||||
Chromatogram Autoradiogram
|
||||
|
||||
ABI 373A Pharmacia A.L.F. |
|
||||
| | |
|
||||
| | |
|
||||
| alfsplit |
|
||||
| | |
|
||||
+--------+--------+ |
|
||||
| |
|
||||
| |
|
||||
ted (gip)
|
||||
| |
|
||||
+----------------+----------------+
|
||||
|
|
||||
|
|
||||
xdap
|
||||
|
||||
|
||||
Figure 1: Data Flow Through The Staden Suite
|
||||
|
||||
|
||||
The Pharmacia A.L.F. data files in their original format consist of
|
||||
one file for the (up to 10) samples that were on the gel. The program
|
||||
alfsplit divides the file up so that each sample is in a file of
|
||||
its own. From then on each gel reading can be handled individually.
|
||||
Whether these files can be transferred back to the Compaq for
|
||||
reprocessing is unknown.
|
||||
|
||||
All data from fluorescent based sequencing machines must pass through
|
||||
the trace editing program ted. Ted allows data vector sequence at the
|
||||
5' end and unreliable data at the 3' end to be clipped. The sequence
|
||||
can be edited if desired, though we should stress that this is NOT
|
||||
RECOMMENDED when used in conjunction with xdap. Ted translates all
|
||||
Pharmacia A.L.F. uncertainty codes to a hyphen ("-") and outputs the
|
||||
clipped sequence, along with additional information on the position
|
||||
and content of cutoffs, to a file.
|
||||
|
||||
People wanting to use xdap with ABI and Pharmacia files, but who have
|
||||
written their own trace clipping software should be aware that xdap
|
||||
requires information to be passed in the sequence file so that
|
||||
traces can be displayed. You may want to modify your software to be
|
||||
compatible with our file format. The file consists of four parts:
|
||||
|
||||
1) Cut off information (Optional).
|
||||
Format is ";%6d%6d%6d%-4s%-16s", where
|
||||
field 1 = total number of bases called
|
||||
2 = number of bases in the clipped sequence at the 5' end
|
||||
3 = number of bases in the sequence in this file
|
||||
4 = type of trace file.
|
||||
"ALF " - Pharmacia A.L.F.
|
||||
"ABI " - ABI 373A
|
||||
"SCF " - SCF
|
||||
"PLN " - Text only
|
||||
5 = name of trace file.
|
||||
|
||||
2) Content of the clipped sequence at the 5' end (Optional).
|
||||
The sequence can extend over several lines. Each line must
|
||||
begin with ";<" and should be less than 80 characters in
|
||||
length.
|
||||
|
||||
3) Content of the clipped sequence at the 3' end (Optional).
|
||||
The sequence can extend over several lines. Each line must
|
||||
begin with ";>" and should be less than 80 characters in
|
||||
length.
|
||||
|
||||
4) Initial tags for the sequence (Optional)
|
||||
Format is: ";;%4s %6d %6d %s\n", where
|
||||
field 1 = type of tag to be created (see $STADTABL/TAGDB)
|
||||
2 = position of tag
|
||||
3 = length of tag
|
||||
4 = annotation for tag (optional)
|
||||
This feature is only available in the program xbap, which
|
||||
at the time of writing is not yet being distributed with
|
||||
the package.
|
||||
|
||||
5) The sequence, which can extend over several lines. Each
|
||||
line should be less than 80 characters in length.
|
||||
|
||||
Here is a sample file:
|
||||
|
||||
; 660 55 450ABI a21d12.s1RES
|
||||
;<AGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCGGTTCCTTCTGG
|
||||
;<ATATC
|
||||
;>-GATAAGCTGATTTG-TTT-CCATTATGGC-GGTTTGAGCCTC-G-GGTC
|
||||
;>GACCACTCGGTGTGCCAGGAAGGGGTCTGAAATTGAATGGGTTATCACTA
|
||||
;>GGCGACGTTT--TTTTCAAATTCCGGGCTAAATTTTACGGC-GGA-CGGT
|
||||
;>TCCG-
|
||||
;;COMM 1 10 M13mp18 subclone
|
||||
CAAGACATTTTGAAATACTTGGAATACTGAATCCAAGATGTGGAACATTA
|
||||
GACATATCCGTGTGCTCAACAATCGACATTTGATCCACTGATGAAAATGT
|
||||
TCTTCGTTTAGAATTTCTCATAGCATCAGCCACTTTTGCATAATACTCGA
|
||||
TTGAAGGTTCATGGAAAAAGCTGCGTAGAAGGCATGTCATTGTGCTTACG
|
||||
AGCCATTTCGGATATCTTGTGAATTTAGCAGGAAGTTCTGTAACTGGTTG
|
||||
GAATTCAAATATATCAGTTCTTCTTCCTGGATCTCGTCCTTTTTGCACTA
|
||||
AAACCATTGCGATTGCATCCGGATTCTGAGTAAGAGCCACTACAGCTTTA
|
||||
TGATACAGGCTCTTGTTATTCCTTTCGTGCTCGAATGGGAACTTTCCAGT
|
||||
GGCACAAAAATATAGTGTACATCCCAGAGCCCATAGATCACATGTTCCGA
|
||||
|
||||
|
||||
|
||||
5. Acknowledgements
|
||||
|
||||
We would like to thank Applied Biosystems, Inc. and Pharmacia LKB
|
||||
Biotechnology for their cooperation in agreeing to our routines
|
||||
accessing the data files of their fluorescent sequencing machines.
|
||||
|
||||
373A sequence data file formats are the exclusive property of Applied
|
||||
Biosystems, Inc.
|
||||
|
||||
ALF sequence data file formats are the exclusive property of Pharmacia
|
||||
LKB Biotechnology, Inc.
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
Release Notes for Staden Package 1992.3
|
||||
---------------------------------------
|
||||
|
||||
|
||||
Installation guide
|
||||
------------------
|
||||
|
||||
The file doc/install.PS contain installation instructions.
|
||||
|
||||
|
||||
Manual for the Staden Package
|
||||
-----------------------------
|
||||
|
||||
There is now a 135 page manual on the Staden Package. It is currently
|
||||
being distributed on a Word4 document on a Macintosh floppy disk.
|
||||
|
||||
|
||||
Feedback and bug reports
|
||||
------------------------
|
||||
|
||||
We welcome comments and suggestions on all aspects of the package and are
|
||||
best contacted by email: rs@uk.ac.cam.mrc-lmb and sd@uk.ac.cam.mrc-lmb.
|
||||
All abnormal terminations are bugs and we would like to be told of them
|
||||
so they can be fixed. We recommend that you request an update at least once
|
||||
a year as the package is evolving very rapidly.
|
||||
|
||||
Note due to popular demand we have decided to release new routines earlier
|
||||
than in the past so please report bugs. The documentation for additions may
|
||||
be sparser than before, or non-existent, but if there is something with which
|
||||
you need help, email us.
|
||||
|
||||
|
||||
Changes this release
|
||||
--------------------
|
||||
|
||||
|
||||
The assembly programs bap and xbap heve several new functions:
|
||||
1. Find single stranded regions and try to fill them with "hidden"
|
||||
data from the adjacent readings.
|
||||
2. Find single stranded regions (includes ends of contigs) and
|
||||
select primers and templates for double stranding them (joining
|
||||
them).
|
||||
3. Pre assembly screening for readings to find those that align
|
||||
best. Optionally the hidden data can also be included in the
|
||||
comparison (part of assembly function).
|
||||
4. Find pairs of readings taken from opposite ends of the same
|
||||
template (ie forward and reverse read pairs). List or plot their
|
||||
positions.
|
||||
5. A new function to check that readings have been assembled into
|
||||
the correct positions. It aligns the hidden (previously termed "unused")
|
||||
parts of readings with the consensus they overlap to see how well
|
||||
they align. Poor alignments are reported.
|
||||
6. During assembly each reading is now allowed to match up to 100
|
||||
different places.
|
||||
|
||||
It might be guessed from the above that we are trying to improve our
|
||||
ability to deal with the assembly of human data. Hence, also the next
|
||||
addition.
|
||||
|
||||
A new experimental program (rep) for screening readings for Alu
|
||||
sequences prior to assembly. The Alu containing segments are tagged
|
||||
so they can be seen in the contig editor. A library of Alu sequences
|
||||
is included in /tables/alus. The program is quite slow as it compares
|
||||
each reading in both orientations with all of the Alu sequences (126
|
||||
of them) in order to find the best match. Only time and more data will
|
||||
tell how sensitive it is, and whether the current default score 0f 0.6
|
||||
is "correct". BEWARE rep modifies the original reading files to include
|
||||
the tag information. The only information is in /help/alu.help
|
||||
|
||||
A new program for extracting sets of sequences and their annotations
|
||||
from the sequence libraries (lip). The only information is in
|
||||
/help/lip.help
|
||||
|
||||
Changes to the xterm userinterface. These routines have been completely
|
||||
rewritten. One addition is that now ?? in response to a question will
|
||||
allow the user to get help on any function in a program. help is also
|
||||
improved in the x version.
|
||||
|
||||
|
||||
Changes last release
|
||||
--------------------
|
||||
|
||||
|
||||
DAP, XDAP have been replaced by BAP and XBAP (see below)
|
||||
|
||||
A new function for examining repeats has been added to NIP
|
||||
|
||||
A new repeat search has been added to SIP
|
||||
|
||||
Some outputs have been changed to produce FASTA format files
|
||||
instead of PIR.
|
||||
|
||||
MEP now allows searches for motifs in which any 8 out of a string
|
||||
of 20 can be switched on.
|
||||
|
||||
The manual has been updated.
|
||||
|
||||
Keyword and author searches on sequence libraries
|
||||
|
||||
All programs that use the libraries can now perform author
|
||||
and keyword searches on all libraries (only nip did so before).
|
||||
|
||||
Postscript output
|
||||
|
||||
All graphics can now be saved to disk in postscript form by
|
||||
use of a sub-option in "Redirect output".
|
||||
|
||||
|
||||
|
||||
Sequence assembly
|
||||
|
||||
BAP, XBAP replace DAP and XDAP. A program to convert DAP databases to BAP
|
||||
databases (convert) is included. BAP databases can contain up to 8000 readings
|
||||
and a consensus of 500,000 bases. A minor edit and recompilation will allow
|
||||
up to 99,999 readings. The space is used more efficiently now as the databases
|
||||
grow as the number of readings increases. Reading names can be 16 characters
|
||||
in length. In addition:
|
||||
|
||||
1) Assembly is 4 times as fast as in the DAP.
|
||||
|
||||
2) Find internal joins is 5 times as fast and now brings up the join editor
|
||||
with the two contigs in the correct orientation and aligned.
|
||||
|
||||
3) The assembly routines align pads better, plus a new automatic function can
|
||||
also be used to align them prior to editing.
|
||||
|
||||
4) The contig editor has been greatly speeded up and its functionality
|
||||
has been enhanced.
|
||||
|
||||
5) A routine for selecting oligos for primer walking is included.
|
||||
|
||||
6) A new routine allows batches of readings to be removed from a database.
|
||||
|
||||
7) We have also included routines for making SCF files, for getting the
|
||||
sequence from SCF files, and one for marking the poor quality data in
|
||||
readings. See the manual.
|
||||
|
||||
Sequence library formats
|
||||
|
||||
The standard sequence library indexing method is now that used on the
|
||||
EMBL CD-ROM. The libraries (EMBL nucleotide and SWISSPROT protein) can be
|
||||
left on the CD-ROM or copied to disk. We include in the package programs
|
||||
for creating this type of index for EMBL updates, PIR in codata format,
|
||||
NRL3D and GenBank. If the indexes are created all programs can read all
|
||||
these libraries. Programs and scripts for this task are contained in the
|
||||
directory indexseqlibs.
|
||||
The keyword and author searches are particularly fast and the
|
||||
keyword index is based on ALL text in the files - not just the keywords.
|
||||
|
||||
Feature table formats
|
||||
|
||||
The programs now use the new feature table format common to EMBL
|
||||
and GenBank, but retain the old format for SWISSPROT which has not yet
|
||||
changed.
|
||||
|
||||
For details of the above see file SequenceLibraries.
|
||||
|
||||
Pattern searches
|
||||
|
||||
Pipl and Nipl now have the facility to find only the best scoring
|
||||
match for each sequence. The prompt is "? report all matches", so typing
|
||||
only return means all matches will be shown and typing n means only the
|
||||
highest scoring will be reported. It is particularly useful when employed
|
||||
to create alignments. The corresponding help file has not been updated.
|
||||
Also to incorporate long unix file names the pattern files no longer include
|
||||
the annotation "filename".
|
||||
|
||||
|
||||
Nip
|
||||
|
||||
Option 38 in nip "translate and list" has been removed as the the
|
||||
more flexible routines of option 39 incorporate all its functionality. Many
|
||||
options that relate to feature tables have been modified but their help files
|
||||
are not yet up to date.
|
||||
|
||||
|
||||
Vep
|
||||
|
||||
A program (vep) for automatic excising of vector (either
|
||||
sequencing vector or cosmid vector) sequences from readings is now
|
||||
included in the package.
|
||||
|
||||
|
||||
|
||||
|
||||
Rodger Staden, Simon Dear, James Bonfield
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,420 @@
|
|||
Notes on library handling
|
||||
-------------------------
|
||||
|
||||
Contents of this document:
|
||||
|
||||
I) Introduction
|
||||
II) Details of file organisation and use
|
||||
III) Options currently available
|
||||
IV) Installation guide
|
||||
V) New feature table handling routines
|
||||
VI) Indexing the sequence libraries
|
||||
|
||||
|
||||
Section I Introduction
|
||||
----------------------
|
||||
|
||||
Available sequence libraries
|
||||
|
||||
There are a number of different sequence libraries for nucleotide and protein:
|
||||
PIR, GenBank, EMBL, Swissprot, and the Japanese Databank. Even after all the
|
||||
years of their existence they still use different formats for their data. This
|
||||
provides tedious and unrewarding work for software developers. Recently EMBL
|
||||
and GenBank agreed a new and common way of writing their feature tables, which
|
||||
is great help, although the rest of their format is different. Swissprot still
|
||||
uses the old embl style feature table format and PIR yet another.
|
||||
|
||||
All the libraries distribute their data on magnetic tapes and EMBL and GenBank
|
||||
have started to distribute on cdrom. The EMBL cdrom also contains Swissprot.
|
||||
The GenBank and EMBL cdroms use different formats and have different contents.
|
||||
The EMBL cdrom has useful indexes sorted alphabetically: those for entry name
|
||||
and accession number, brief descriptions, keywords and freetext indexes are
|
||||
already available and others are expected. These indexes point to the data for
|
||||
each entry, and can be used to extract the data for any entry quickly.
|
||||
|
||||
Moving to unix
|
||||
|
||||
The VAX version of our package used PIR format which meant reformatting all
|
||||
libraries other than PIR into that format. This required, at least
|
||||
temporarily, having space for two copies of the libraries, and quite a lot of
|
||||
cpu time. The software for doing this was provided by PIR, and is very VAX
|
||||
specific and hence will not run under unix. For the unix version of our package
|
||||
I have decided to use the EMBL cdrom format and its indexes as the primary
|
||||
format. The current programs also support the use of PIR format libraries
|
||||
without indexes - ie just the sequence and annotation files.
|
||||
|
||||
Indexing GenBank, EMBL updates, PIR and NRL3D
|
||||
|
||||
We include programs to create indexes for the above libraries. See below and
|
||||
the README file in indexseqlibs. The programs can read all the above libraries
|
||||
once the indexes are created. The indexing programs index the data in its
|
||||
distributed form: WE DO NOT REFORMAT OR COPY THE LIBRARIES but simply create
|
||||
indexes to the original files. Obviously this saves a lot of disk space, and
|
||||
for those content to use only embl and swissprot from the cdrom, almost no disk
|
||||
space is required. We havent tried it yet, but for genbank on cdrom, the only
|
||||
extra disk space required would be for the indexes.
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
Section II Details of file organisation and use
|
||||
-----------------------------------------------
|
||||
|
||||
The following strategy has been used to try to deal with alternate
|
||||
and changing sequence library formats.
|
||||
|
||||
1) libraries are described at several levels:
|
||||
|
||||
a) the top level file is a list of available libraries which contains:
|
||||
the library type, the name of the file containing the name of
|
||||
each libraries individual files, and the prompt to appear on
|
||||
the users screen: LTYPE LOGNAM PROMPT
|
||||
|
||||
b) the file containing the names of the libraries individual files
|
||||
contains flags to define the file types: FTYPE LOGNAM
|
||||
|
||||
c) the individual library files
|
||||
|
||||
|
||||
|
||||
2) libary types handled:
|
||||
|
||||
a) EMBL/SWISSPROT in distributed format with cdrom index format
|
||||
LTYPE = 'A'
|
||||
b) GenBank in distributed format with cdrom index format LTYPE = 'C'
|
||||
c) PIR/NRL3D in CODATA format with cdrom index format LTYPE = 'B'
|
||||
d) PIR/NBRF .seq files can be read sequentially as "personal files
|
||||
in PIR format" and do not appear in the list of available libraries.
|
||||
e) FASTA format files can be read sequentially as "personal files
|
||||
in FASTA format" and do not appear in the list of available
|
||||
libraries.
|
||||
|
||||
3) EMBL, SWISSPROT and other libraries for which EMBL-style indexes have been
|
||||
created
|
||||
|
||||
current file types:
|
||||
|
||||
A division.lookup
|
||||
B entryname.index
|
||||
C accession.target
|
||||
D accession.hits
|
||||
E brief description
|
||||
F freetext.target
|
||||
G freetext.hits
|
||||
H author.target
|
||||
I author.hits
|
||||
|
||||
|
||||
Library list
|
||||
level 1
|
||||
|
|
||||
|
|
||||
-----------------------------------------------------------
|
||||
| | |
|
||||
lib 1 file list lib 2 file list lib 3 file list
|
||||
level 2
|
||||
| |
|
||||
-------- ---------
|
||||
level 3
|
||||
file 1 file 1
|
||||
file 2 file 2
|
||||
. .
|
||||
file n file n
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
Level 1
|
||||
|
||||
File name: sequence.libs
|
||||
Environment variable: SEQUENCELIBRARIES
|
||||
Contents:
|
||||
|
||||
A EMBLFILES EMBL nucleotide library ! in cdrom format
|
||||
C GENBFILES GenBank nucleotide library!
|
||||
A SWISSFILES SWISSPROT protein library! in cdrom format
|
||||
B PIRFILES PIR protein library!
|
||||
B NRL3DFILES NRL3D protein library!
|
||||
|
||||
Notes:
|
||||
|
||||
The libraries have types A,B,C. The logical names are EMBLLIBDESCRP and
|
||||
SWISSLIBDESCRP, etc and the prompts are 'EMBL nucleotide library' and
|
||||
'SWISSPROT protein library', etc. Anything to the right of a ! is a comment.
|
||||
|
||||
Level 2: the list of library files (using embl as an example)
|
||||
|
||||
File name: embl.files
|
||||
Environment variable: EMBLFILES
|
||||
Contents:
|
||||
|
||||
A EMBLDIVPATH/embl_div.lkp
|
||||
B EMBLINDPATH/entrynam.idx
|
||||
C EMBLINDPATH/acnum.trg
|
||||
D EMBLINDPATH/acnum.hit
|
||||
E EMBLINDPATH/brief.idx
|
||||
F EMBLINDPATH/freetext.trg
|
||||
G EMBLINDPATH/freetext.hit
|
||||
H EMBLINDPATH/author.trg
|
||||
I EMBLINDPATH/author.hit
|
||||
|
||||
|
||||
Level 3: the sequence and annotation files (eg 15 for embl, 1 for swissprot).
|
||||
|
||||
Paths and file names:
|
||||
|
||||
EMBLPATH/bb.dat
|
||||
EMBLPATH/fun.dat
|
||||
EMBLPATH/inv.dat
|
||||
EMBLPATH/mam.dat
|
||||
EMBLPATH/org.dat
|
||||
EMBLPATH/patent.dat
|
||||
EMBLPATH/phg.dat
|
||||
EMBLPATH/pln.dat
|
||||
EMBLPATH/pri.dat
|
||||
EMBLPATH/pro.dat
|
||||
EMBLPATH/rod.dat
|
||||
EMBLPATH/syn.dat
|
||||
EMBLPATH/una.dat
|
||||
EMBLPATH/vrl.dat
|
||||
EMBLPATH/vrt.dat
|
||||
|
||||
All files from the division lookup file down are exactly as they appear on the
|
||||
cdrom. The division lookup file relates numbers stored in the indexes to
|
||||
actual division (or data) files stored on the disk. We rewrite it so the
|
||||
directory structure and file names can be chosen locally. Its format is
|
||||
I6,1x,A. An example is given below.
|
||||
|
||||
Division lookup file
|
||||
|
||||
File name: STADTABL/embl_div.lkp
|
||||
Environment variable path EMBLDIVPATH
|
||||
Contents:
|
||||
|
||||
1 EMBLPATH/bb.dat
|
||||
2 EMBLPATH/fun.dat
|
||||
3 EMBLPATH/inv.dat
|
||||
4 EMBLPATH/mam.dat
|
||||
5 EMBLPATH/org.dat
|
||||
6 EMBLPATH/patent.dat
|
||||
7 EMBLPATH/phg.dat
|
||||
8 EMBLPATH/pln.dat
|
||||
9 EMBLPATH/pri.dat
|
||||
10 EMBLPATH/pro.dat
|
||||
11 EMBLPATH/rod.dat
|
||||
12 EMBLPATH/syn.dat
|
||||
13 EMBLPATH/una.dat
|
||||
14 EMBLPATH/vrl.dat
|
||||
15 EMBLPATH/vrt.dat
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
|
||||
Section III Options currently available
|
||||
---------------------------------------
|
||||
|
||||
Facilities currently offered in nip,pip,sip,nipl,pipl,sipl:
|
||||
|
||||
Get a sequence by knowing its entry name
|
||||
Get a sequences' annotation by knowing its entry name
|
||||
Get an entry name by knowing its accession number
|
||||
Search the freetext index
|
||||
Search the author index
|
||||
|
||||
Facilities currently offered in nipl,pipl,sipl:
|
||||
|
||||
Search whole library
|
||||
Search only a list of entry names
|
||||
Search all but a list of entry names
|
||||
|
||||
Outline of each type of operation
|
||||
|
||||
Looking for an entry by name: the programs will open the library description
|
||||
file and read the names of its files and their file types. Then they will open
|
||||
the entrynam.idx file, and find the sequence offset, annotation offset and
|
||||
division number. Then open the division lookup file, find the file name for the
|
||||
division required, open that file, seek to the required byte and get the data.
|
||||
|
||||
Looking for an entry by accession number: the programs will open the library
|
||||
description file and read the names of its files and their file types. Then
|
||||
they open the acnum.trg and acnum.hit files. The acnum.trg file is read to find
|
||||
the accession number and a pointer to the acnum.hit file and the number of
|
||||
hits. That file is read and the corresponding entry names displayed. At
|
||||
present no further action is performed, although I expect to list out the
|
||||
titles for the entries found.
|
||||
|
||||
Searching the whole of a library: the programs will open the library
|
||||
description file and read the names of its files and their file types. Then
|
||||
they open the division lookup file, read the names and numbers of the sequence
|
||||
files, open all of them, then open the entryname file. Then the library is
|
||||
processed sequentially by reading the entry names, their sequence offsets and
|
||||
division numbers from the entry names file, and then the sequence from the
|
||||
appropriate data file.
|
||||
|
||||
Searching the whole of a library using a list of entry names to include: the
|
||||
programs will open the library description file and read the names of its files
|
||||
and their file types. Then they open the division lookup file, read the names
|
||||
and numbers of the sequence files, open all of them, then open the entryname
|
||||
file. Then the library is processed by reading the list of entry names and
|
||||
finding the names in the entry names file to get their sequence offsets and
|
||||
division numbers, and then the sequence from the appropriate data file. It will
|
||||
stop when it reaches the end of the list of entry names. The list of entry
|
||||
names can be in any order.
|
||||
|
||||
Searching the whole of a library using a list of entry names to exclude: the
|
||||
programs will open the library description file and read the names of its files
|
||||
and their file types. Then they open the division lookup file, read the names
|
||||
and numbers of the sequence files, open all of them, then open the entryname
|
||||
file. Then the library is processed sequentially by reading the list of entry
|
||||
names, reading the next entry in the entry names file to make sure it does not
|
||||
match, then getting the sequence offsets and division numbers, and then the
|
||||
sequence from the appropriate data file. If a the next name matches the name on
|
||||
the list of entry names, it will be skipped, and the next name to exclude read.
|
||||
If the list of excluded names is finished the rest of the library is searched
|
||||
sequentially. The list of entry names must be in the same order as those in the
|
||||
library (ie sorted alphabetically).
|
||||
|
||||
Searching a whole library using a PIR format file is performed by reading it
|
||||
sequentially. If as list of entry names is used it must be in the same order as
|
||||
the entries in the library file.
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
Section IV Installation guide
|
||||
-----------------------------
|
||||
|
||||
EMBL CDROM
|
||||
|
||||
The data can be left on the cdrom or copied to hard disk. The files
|
||||
staden.login and staden.profile source the file $STADTABL/libraries.config.csh
|
||||
and $STADTABL/libraries.config.sh respectively. Refer to this file to see what
|
||||
is required to install, add or move a sequence library that you want to be used
|
||||
by the programs.
|
||||
|
||||
Other libraries (PIR, Genbank, EMBL updates)
|
||||
|
||||
Create the indexes then edit the files that tell the programs where the data is
|
||||
stored. The files staden.login and staden.profile source the file
|
||||
$STADTABL/libraries.config Refer to this file to see what is required to
|
||||
install, add or move a sequence library that you want to be used by the
|
||||
programs.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
Section V New feature table handling facilities
|
||||
-----------------------------------------------
|
||||
|
||||
As mentioned above EMBL and GenBank have recently introduced new feature tables
|
||||
for annotating the sequences. They are a great improvement on the previous ones
|
||||
and, among other things, now permit correct translation of spliced genes.
|
||||
Various options within nip have been added or modified to take advantage of
|
||||
these changes. The routine to translate DNA to protein and write the protein
|
||||
to disk now gives correct results for spliced genes. The routine to translate
|
||||
DNA to protein and display the two together now gives correct translations
|
||||
except for the amino acids spanning intron/exon junctions. The routine to plot
|
||||
maps from feature tables can use the new style. The open reading frame finding
|
||||
routine writes out its results in the new style. The routine that finds open
|
||||
reading frames and writes their translations to disk also writes a title in the
|
||||
form of a new style feature table entry. The feature table format output from
|
||||
the pattern searches in nip also uses the new style.
|
||||
|
||||
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
Section VI Indexing the sequence libraries
|
||||
--------------------------------------------
|
||||
|
||||
We handle EMBL, SwissProt, and GenBank in their distributed format, plus
|
||||
PIR and NRL3D in codata format. All programs and scripts are in directory
|
||||
indexseqlibs.
|
||||
|
||||
Currently we produce entryname index, accession number index freetext index,
|
||||
and brief index (brief index contains the entry name the primary accession
|
||||
number the sequence length and an 80 character description).
|
||||
|
||||
To produce any of the indexes requires the creation of several intermediate
|
||||
files and the indexing programs are written so that the intermediate files
|
||||
are the same for all libraries. This means that only the programs that read
|
||||
the distributed form of each library need to be unique to that library, and
|
||||
all the other processing programs can be used for all libraries.
|
||||
|
||||
|
||||
However even the though the indexes have the same format, programs (like nip)
|
||||
that read the libraries need to treat each library separately because their
|
||||
actual contents are written differently.
|
||||
|
||||
Making the entry name index
|
||||
---------------------------
|
||||
|
||||
Common program entryname2
|
||||
|
||||
EMBL emblentryname1
|
||||
SwissProt emblentryname1
|
||||
|
||||
GenBank genbentryname1
|
||||
|
||||
PIR pirentryname1
|
||||
NRL3D pirentryname1
|
||||
|
||||
|
||||
Making the accession number index
|
||||
---------------------------------
|
||||
|
||||
Common programs access2 access3 access4
|
||||
|
||||
EMBL emblaccess1
|
||||
SwissProt emblaccess1
|
||||
|
||||
GenBank genbaccess1
|
||||
|
||||
PIR piraccess1 piraccess2
|
||||
NRL3D No accession numbers
|
||||
|
||||
Making the brief index
|
||||
----------------------
|
||||
|
||||
Common program title2
|
||||
|
||||
EMBL embltitle1
|
||||
SwissProt embltitle1
|
||||
|
||||
GenBank genbtitle1
|
||||
|
||||
PIR pirtitle1 pirtitle2 (pir3 has no accession numbers)
|
||||
NRL3D pirtitle2
|
||||
|
||||
Scripts
|
||||
-------
|
||||
|
||||
emblentryname.script
|
||||
emblaccession.script
|
||||
embltitle.script
|
||||
|
||||
swissentryname.script
|
||||
swissaccession.script
|
||||
swisstitle.script
|
||||
|
||||
genbentrynamescript
|
||||
genbaccession.script
|
||||
genbtitle.script
|
||||
|
||||
pirentryname.script
|
||||
piraccession.script
|
||||
pirtitle.script
|
||||
|
||||
nrl3dentryname.script
|
||||
nrl3dtitle.script
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = alpha
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = dec
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = sgi
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = solaris
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = sun
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
Wed Jul 7
|
||||
*Version-1993.0.7*
|
||||
New xbap and ted.
|
||||
Can use Ctrl as well as Meta to shift cutoffs in contig editor.
|
||||
Code to read in ABI traces now robust to ABI problem files, where
|
||||
called base order is not base position order.
|
||||
|
||||
Thu Jul 1
|
||||
*Version-1993.0.6*
|
||||
New xbap and bap, to fix bugs.
|
||||
Break Contig was sometimes not recalculating consensus length correctly.
|
||||
Contig Edit was trucating reading name lengths at 10 characters.
|
||||
|
||||
Thu Jun 16
|
||||
*Version-1993.0.5*
|
||||
New xbap and bap executables. RS changed assembly in bap so that
|
||||
when entry is not permitted the program asks for the percentage
|
||||
mismatch - this allows display of alignments for all levels of
|
||||
mismatch.
|
||||
|
||||
Mon Jun 14 14:54:43 BST 1993
|
||||
*Version-1993.0.4*
|
||||
Bug in xdap. It was compiled with xbap's edUtils.h by mistake.
|
||||
|
||||
Fri Jun 11 17:50:13 BST 1993
|
||||
*Version-1993.0.3*
|
||||
Bugs in bap/xbap fixed. New executables included.
|
||||
|
||||
Thu Jun 3 13:53:38 BST 1993
|
||||
*Version-1993.0.2*
|
||||
Bugs in bap/xbap fixed. New executables included.
|
||||
|
||||
Thu May 20 14:45:38 BST 1993
|
||||
*Version-1993.0.1*
|
||||
Changes to makefiles and Staden_install
|
||||
|
||||
Fri Mar 5 11:27:22 GMT 1993
|
||||
*Version-1993.0*
|
||||
Now for DEC Alpha and Solaris
|
||||
bap/xbap now includes double stranding and auto-creation of oligos
|
||||
|
||||
Tue Jan 26 11:54:36 GMT 1993
|
||||
*Version-1992.3.1*
|
||||
Bug fixes
|
||||
1. indexseqlibs/genbentryname1.c
|
||||
2. convert bugs + new programs
|
||||
|
||||
Mon Nov 23 13:50:39 WET 1992
|
||||
*Version-1992.3*
|
||||
Includes bap/xbap and utility programs
|
||||
|
||||
|
||||
Wed Sep 30 11:18:09 BST 1992
|
||||
*Version-1992.2.1*
|
||||
Source changes since last release
|
||||
bug fixes to postscript output, sequence library programs
|
||||
New sun and dec executables
|
||||
|
||||
|
||||
Thu Aug 27 15:27:05 BST 1992
|
||||
|
||||
*Version-1992.2*
|
||||
|
||||
|
||||
Mon Jul 27 13:01:37 WET 1992
|
||||
|
||||
*Version-1992.1.3*
|
||||
Miscellaneous bug fixes and enhancements
|
||||
New sun and dec executables
|
||||
|
||||
|
||||
Tue Jun 16 16:07:41 BST 1992
|
||||
|
||||
*Version-1992.1.2*
|
||||
Sun sparc executables now linked with cc and not gcc.
|
||||
New makefile-sun files
|
||||
New sources for hitNtrg.c and freetext4.c (indexseqlibs), and
|
||||
tagU2.c (staden)
|
||||
|
||||
|
||||
Wed May 27 17:12:36 BST 1992
|
||||
|
||||
*Version-1992.1.1*
|
||||
Inclusion of vep (vector excision program), plus minor changes and bug fixes
|
||||
|
||||
|
||||
Tue May 26 11:10:28 WET 1992
|
||||
|
||||
*Version-1992.1*
|
||||
This version includes the port to DEC Ultrix (mips)
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,32 @@
|
|||
Converting Sap Databases For Be Used With XDAP SD 10 July 1991
|
||||
=======================================================================
|
||||
|
||||
The sequence assembly programmes dap and xdap are based on the programs
|
||||
sap and xsap, with major modifications. For a concise summary of the
|
||||
new features I refer you to Rodger and my paper, "A sequence assembly
|
||||
and editing program for efficient management of large projects"
|
||||
(Nucleic Acids Research, in press)
|
||||
|
||||
The need for storing extra information in project databases has
|
||||
resulted in the creation of two files. For users who wish you use old
|
||||
(sap) databases with xdap, additional files must be created to use all
|
||||
the new features. The program 'convert_project' does this. It is
|
||||
interactive, and asks you for names of relevant files, version numbers
|
||||
etc. Here is a sample program dialogue:
|
||||
|
||||
|
||||
% convert_project
|
||||
Database conversion program
|
||||
Converts *.RD? file to *.TG? and *.CC? files
|
||||
|
||||
Project name ? test
|
||||
Version ? 0
|
||||
Conversion completed.
|
||||
|
||||
|
||||
Further, please ensure that the file TAGDB is in your project
|
||||
directory. Copies can be found in $STADTABL. Alternatively ensure that
|
||||
the environment TAGDB variable is set to $STADTABL/TAGDB
|
||||
|
||||
setenv TAGDB $STADTABL/TAGDB
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
Processing and printing LaTeX sources
|
||||
-------------------------------------
|
||||
|
||||
Given a source file src.tex, run LaTeX to generate the bibliographic
|
||||
references:
|
||||
|
||||
latex src
|
||||
|
||||
Now run BibTeX to search the bibliography for them:
|
||||
|
||||
bibtex src
|
||||
|
||||
Now run LaTeX twice, first to pick up the references, second to bind
|
||||
forward references:
|
||||
|
||||
latex src
|
||||
latex src
|
||||
|
||||
This will have generated a src.dvi output file. Now we convert this
|
||||
to PostScript:
|
||||
|
||||
dvi2ps src.dvi >src.ps
|
||||
|
||||
Now we can print this out:
|
||||
|
||||
lpr src.ps
|
||||
|
||||
Most of the above is only necessay if you are building something from
|
||||
scratch, but it's best to go through it anyway until you fully
|
||||
understand how LaTeX works.
|
|
@ -0,0 +1,131 @@
|
|||
%!
|
||||
/cm {28.2 mul} def
|
||||
/BOXSIZE 2 cm def
|
||||
|
||||
/boxcen
|
||||
{
|
||||
% move to centre of box
|
||||
BOXSIZE mul 2 div BOXSIZE 2 div rmoveto
|
||||
exch
|
||||
% move back by correct amount to ensure letter is in centre of box
|
||||
dup stringwidth
|
||||
pop 2 div neg % halve & neg x offset
|
||||
% y offset appears to be zero! - so use constant 'square' char (eg X)
|
||||
(X) stringwidth pop 2 div neg
|
||||
} def
|
||||
|
||||
/letter
|
||||
{
|
||||
dup BOXSIZE mul 0 rlineto
|
||||
0 BOXSIZE rlineto
|
||||
dup BOXSIZE mul neg 0 rlineto
|
||||
0 BOXSIZE neg rlineto
|
||||
closepath
|
||||
gsave
|
||||
dup boxcen rmoveto
|
||||
show
|
||||
stroke
|
||||
grestore
|
||||
BOXSIZE mul 0 rmoveto
|
||||
} def
|
||||
|
||||
/nextline {0 BOXSIZE neg rmoveto} def
|
||||
|
||||
/line
|
||||
{
|
||||
gsave
|
||||
1 letter
|
||||
1 letter
|
||||
1 letter
|
||||
1 letter
|
||||
grestore
|
||||
nextline
|
||||
} def
|
||||
|
||||
/Times-Roman findfont 50 scalefont setfont
|
||||
newpath
|
||||
5 setlinewidth
|
||||
200 650 translate
|
||||
0 0 moveto
|
||||
%2 setlinecap
|
||||
|
||||
gsave
|
||||
(A) (G) (C) (T) line
|
||||
(3) (4) (1) (2) line
|
||||
(B) (H) (D) (V) line
|
||||
(M) (N) (K) (L) line
|
||||
(-) (X) (Y) (R) line
|
||||
(8) (7) (6) (5) line
|
||||
/Times-Roman findfont 25 scalefont setfont
|
||||
gsave
|
||||
(DELETE) 2 letter
|
||||
(RESET) 2 letter
|
||||
grestore
|
||||
nextline
|
||||
/Times-Roman findfont 35 scalefont setfont
|
||||
gsave
|
||||
(STOP) 4 letter
|
||||
grestore
|
||||
nextline
|
||||
gsave
|
||||
(START) 4 letter
|
||||
grestore
|
||||
nextline
|
||||
gsave
|
||||
(CONFIRM) 4 letter
|
||||
grestore
|
||||
nextline
|
||||
% yukky from here on
|
||||
gsave
|
||||
0 BOXSIZE rmoveto
|
||||
1 cm 0 rlineto stroke
|
||||
grestore
|
||||
(ORIGIN) dup 4 boxcen rmoveto show pop
|
||||
(ORIGIN) stringwidth neg exch neg exch rmoveto
|
||||
(X) stringwidth exch 2 div rmoveto
|
||||
-5 0 rmoveto
|
||||
2 setlinewidth
|
||||
-45 21 rlineto
|
||||
6 0 rlineto
|
||||
-6 0 rmoveto
|
||||
0 -6 rlineto
|
||||
stroke
|
||||
grestore
|
||||
2 setlinewidth
|
||||
0 BOXSIZE 1.4 mul rmoveto
|
||||
6 6 rlineto
|
||||
-6 -6 rmoveto
|
||||
6 -6 rlineto
|
||||
-6 6 rmoveto
|
||||
80 0 rlineto
|
||||
5 -6 rmoveto
|
||||
/Times-Roman findfont 30 scalefont setfont
|
||||
(8 cm) show
|
||||
5 6 rmoveto
|
||||
76 0 rlineto
|
||||
-6 6 rlineto
|
||||
6 -6 rmoveto
|
||||
-6 -6 rlineto
|
||||
stroke
|
||||
0 0 moveto
|
||||
BOXSIZE .4 mul neg BOXSIZE rmoveto
|
||||
currentpoint translate
|
||||
newpath
|
||||
0 0 moveto
|
||||
90 rotate
|
||||
-6 6 rlineto
|
||||
6 -6 rmoveto
|
||||
-6 -6 rlineto
|
||||
6 6 rmoveto
|
||||
-244 0 rlineto
|
||||
-84 0 rmoveto
|
||||
0 -6 rmoveto
|
||||
(20 cm) show
|
||||
0 6 rmoveto
|
||||
-84 0 rmoveto
|
||||
-227 0 rlineto
|
||||
6 6 rlineto
|
||||
-6 -6 rmoveto
|
||||
6 -6 rlineto
|
||||
stroke
|
||||
showpage
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,172 @@
|
|||
\documentstyle[a4,11pt]{article}
|
||||
|
||||
\title{Installing the Staden Package}
|
||||
\author{Simon Dear}
|
||||
\date{21 May 1993}
|
||||
|
||||
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
On the accompanying tape you will find executables for
|
||||
one of SunOS 4.x, Sun
|
||||
Solaris 2.x, DEC Ultrix, DEC OSF/1 and Silicon Graphics SGI operating systems.
|
||||
Also there are sources for all the programs in the Staden package.
|
||||
Programs in the package are:
|
||||
\begin{description}
|
||||
|
||||
\item[mep and xmep] Motif exploration program.
|
||||
\item[nip and xnip] Nucleotide interpretation program.
|
||||
\item[nipl] Nucleotide interpretation program (library).
|
||||
Searches nucleotide libraries for patterns of motifs.
|
||||
\item[pip and xpip] Protein interpretation program.
|
||||
\item[pipl] Protein interpretation program (library).
|
||||
Searches protein libraries for patterns of motifs.
|
||||
\item[sip and xsip] Similarity investigation program.
|
||||
\item[sipl] Similarity investigation program (library).
|
||||
Compares a probe protein or nucleic acid sequence against
|
||||
a library of sequences.
|
||||
\item[sap and xsap] The original sequence assembly program.
|
||||
\item[bap and xbap] Our latest, most advanced sequence assembly program.
|
||||
\item[dap and xdap] An obsolete assembly program, superceded by {\em bap}.
|
||||
\item[lip] Library interface program.
|
||||
\item[rep] Repeat examination program.
|
||||
\item[ted] X windows utility for displaying and editing
|
||||
fluorescent sequencing machine traces.
|
||||
\item[splitp1, splitp2 and splitp3] Refer to help/SPLITP.MEM.
|
||||
\item[sethelp] Builds online help files.
|
||||
\item[gip] Gel input program.
|
||||
\item[convert] Converts between {\em xdap\/} and {\em xbap\/} databases.
|
||||
\item[cop and cop-bap] Checks completed {\em xdap\/} and {\em xbap\/}
|
||||
databases for editing errors.
|
||||
\item[trace2seq] Extracts sequence from trace files.
|
||||
\item[getABISampleName] Extracts sample names from ABI trace files.
|
||||
\item[makeSCF] Converts existing trace files to the compact
|
||||
SCF format.
|
||||
\item[alfsplit] Splits the Pharmacia A.L.F. gel
|
||||
file into multiple files, one for each sample.
|
||||
\item[frog] Relabels lanes in ABI trace files.
|
||||
\item[+ numerous scripts (including {\em squirrel (v1.4)\/})]
|
||||
|
||||
\end{description}
|
||||
|
||||
|
||||
\section{Requirements}
|
||||
|
||||
You will need a tape drive to read the software off the distribution
|
||||
tape (QIC-150, TK50, or Exabyte). You will also need a large amount of
|
||||
disk storage to accommodate the whole package. For release
|
||||
version-1993.0, requirements were
|
||||
31Mb (SunOS 4.x),
|
||||
36Mb (Sun Solaris 2.x)
|
||||
30Mb (DEC Ultrix)
|
||||
37Mb (DEC OSF/1)
|
||||
and
|
||||
27Mb (Silicon Graphics SGI.)
|
||||
|
||||
|
||||
To compile the Staden package you will require:
|
||||
\begin{itemize}
|
||||
\item An ANSI C compiler.
|
||||
\item A FORTRAN-77 compiler.
|
||||
\item X11 (Release 4 or 5).
|
||||
\item GNU make (except with SunOS and Solaris 2.x.)
|
||||
\end{itemize}
|
||||
|
||||
\section{Installation}
|
||||
|
||||
To install the package,
|
||||
\begin{enumerate}
|
||||
\item Create a directory for where you would like the software to be
|
||||
placed. You may have to be superuser to do this.
|
||||
\begin{verbatim} mkdir /home/Staden\end{verbatim}
|
||||
\item Change to this directory.
|
||||
\begin{verbatim} cd /home/Staden\end{verbatim}
|
||||
\item Place the tape into the tape unit.
|
||||
\item Extract the software off the distribution tape (NOTE: the device name may be
|
||||
different on your machine):
|
||||
\begin{verbatim} tar xvf /dev/rst0\end{verbatim}
|
||||
\item C shell users should set the environment variable {\bf STADENROOT}
|
||||
to be the directory where the package is installed and source the file
|
||||
{\em staden.login} found there. This is best done by adding lines to their
|
||||
{\em .login} file:
|
||||
\begin{verbatim}
|
||||
setenv STADENROOT /home/Staden
|
||||
source $STADENROOT/staden.login
|
||||
\end{verbatim}
|
||||
Users of the Bourne shell, sh, should similarly add lines their {\em .profile} file:
|
||||
\begin{verbatim}
|
||||
STADENROOT=/home/Staden
|
||||
export STADENROOT
|
||||
. $STADENROOT/staden.profile
|
||||
\end{verbatim}
|
||||
|
||||
The startup routines set environment variables and modify the shell's
|
||||
search path so that it can find the programs in the Staden Package.
|
||||
When users next log on to the system, they will be able to use the
|
||||
programs.
|
||||
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
\section {Installation on Unsupported Platforms}
|
||||
|
||||
Install the software as you would for a supported machine. You will
|
||||
need to remake all executables. The script {\em Staden\_install} can
|
||||
be used to help recompile the package. A large number of
|
||||
assumptions have been made, and you may need to change the makefiles
|
||||
to suit your system.
|
||||
|
||||
The sources have been organised into subdirectories of the directory
|
||||
{\bf src}. In {\bf Misc} are routines common to many programs. They
|
||||
should be made first. In {\bf staden} are all the programs of the
|
||||
Staden suite ({\em mep}, {\em nip}, {\em pip}, {\em sap}, {\em sip},
|
||||
{\em dap}, {\em gip}, {\em vep}, {\em lip} and {\em rep}) with the
|
||||
exception of {\em bap}. Code for our latest sequence assembly program
|
||||
{\em bap} is in directories {\bf bap} and {\bf bap/osp-bits}. Make
|
||||
the objects in {\bf staden} first, then the ones in {\bf
|
||||
bap/osp-bits}, and finally the ones in {\bf bap}. In {\bf ted} is the
|
||||
trace editing program.
|
||||
|
||||
|
||||
\section {Other Software Provided}
|
||||
|
||||
Other software and scripts can be found in the {\bf alf\/}, {\bf
|
||||
abi\/}, {\bf cop\/}, {\bf getMCH\/}, {\bf scf\/}, {\bf frog\/} and {\bf
|
||||
scripts}
|
||||
directories.
|
||||
Each directory contains documentation describing the programs
|
||||
contained.
|
||||
|
||||
Since release version-1993.0 we have distributed the {\em squirrel (v1.4)}
|
||||
package. Please read the disclaimer that accompanies this software.
|
||||
Additional sources and scripts can be found in {\bf expGetSeq}, {\bf vepe},
|
||||
{\bf newted} and {\bf squirrel-1.4} directories.
|
||||
|
||||
Many scripts (including {\em squirrel}) and filters were developed at the MRC-LMB for
|
||||
{\bf INTERNAL USE ONLY}.
|
||||
We are aware that people elsewhere will want to develop
|
||||
similar software.
|
||||
We include them in the Staden Package merely as {\bf EXAMPLES} of
|
||||
what has been achieved elsewhere.
|
||||
{\bf THESE SCRIPTS WILL NOT WORK ON YOUR SYSTEM WITHOUT MODIFICATION.}
|
||||
|
||||
\section {When All Else Fails...}
|
||||
If you have any problems please contact the authors,
|
||||
\mbox{Rodger Staden}
|
||||
\mbox{(\em rs@mrc-lmba.cam.ac.uk\/)},
|
||||
\mbox{Simon Dear}
|
||||
\mbox{(\em sd@mrc-lmba.cam.ac.uk\/)}
|
||||
and
|
||||
\mbox{James Bonfield}
|
||||
\mbox{(\em jkb@mrc-lmba.cam.ac.uk\/)},
|
||||
by email or by writing to us at:
|
||||
MRC Laboratory of Molecular Biology, Hills Road, Cambridge, \mbox{CB2 2QH}, U.K.
|
||||
We also welcome general comments on the package.
|
||||
|
||||
\end{document}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,213 @@
|
|||
\documentstyle[12pt]{article}
|
||||
|
||||
\title{A trace display and editing program for data from fluorescence based
|
||||
sequencing machines}
|
||||
\author{Timothy Gleeson \and LaDeana Hillier}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
\section*{}
|
||||
\subsection*{}
|
||||
\subsubsection*{ABSTRACT}
|
||||
|
||||
``Ted'' ({\em T}race {\em ed}itor)
|
||||
is a graphical editor for sequence and trace data from automated
|
||||
fluorescence sequencing machines. It provides facilities
|
||||
for viewing sequence and trace data (in top or bottom strand
|
||||
orientation), for editing the base sequence, for
|
||||
automated or manual trimming of the head (vector) and tail
|
||||
(uncertain data) from the sequence, for vertical and horizontal trace
|
||||
scaling, for keeping a history of sequence editing, and for output of
|
||||
the edited sequence. Ted has been used extensively in the C.
|
||||
elegans genome sequencing project,
|
||||
both as a stand-alone program and integrated into
|
||||
the Staden sequence assembly package, and has
|
||||
greatly aided in the efficiency
|
||||
and accuracy of sequence editing. It runs in the X
|
||||
windows environment on Sun workstations and is available from the
|
||||
authors. Ted currently supports sequence and trace data from the ABI
|
||||
373A and Pharmacia A.L.F. sequencers.
|
||||
|
||||
\subsubsection*{INTRODUCTION}
|
||||
Time involved in sequence editing is extensive, and anything easing
|
||||
that burden will improve the efficiency of any major sequencing
|
||||
project. Having sequence and trace data available online in easily-
|
||||
manipulable form is invaluable. Ted (a Trace-EDitor) was developed to
|
||||
fill this role in the C. elegans genome
|
||||
sequencing project [1].
|
||||
|
||||
\subsubsection*{METHODS}
|
||||
|
||||
{\em Computing Design and Implementation.}
|
||||
When designing ted, we had a number of specific computing goals
|
||||
in mind including portability and adaptability. For portability, we
|
||||
chose to write ted in ANSI C using the X windowing system and the
|
||||
Xaw toolkit. X provides basic capabilities for the creation and use
|
||||
of windows, and the toolkit contains a number of pre-packaged
|
||||
components, such as the ``sliders'' used for scrolling. X also allows
|
||||
site, user and per-run defaults to be set. Adaptability is also an
|
||||
important goal since we are providing a new function to
|
||||
research groups who are constantly adding new requirements.
|
||||
|
||||
Stylistically, we have followed an ``Abstract Data Type''
|
||||
discipline. In this discipline, a program is split into a number of
|
||||
modules which provide separate, well-defined functions. We
|
||||
separate the interface of a module from its implementation. For
|
||||
example, a unified internal sequence format is used. This can store
|
||||
a varying amount of information. However, there is a clear and
|
||||
simple interface by which the rest of the program accesses this
|
||||
module. Such a style is not well supported by C, but its adoption has
|
||||
been very successful. The addition of new sequencing machines, and
|
||||
thus new external data formats, may cause some changes in the
|
||||
internal representation of the sequence but should not affect
|
||||
the rest of the program.
|
||||
|
||||
Ted accepts a large number of optional command line arguments,
|
||||
many of which can also be specified as system defaults. This
|
||||
supports a mode of working whereby ted is invoked not directly by the
|
||||
user but instead by a script or another application which supplies
|
||||
arguments appropriate to the editing task.
|
||||
|
||||
|
||||
{\em Graphical Interface.}
|
||||
Ted currently accepts data from two fluorescence based sequencing
|
||||
machines, the Pharmacia A.L.F. and the ABI 373A.
|
||||
The sequencing machine data consists of
|
||||
four traces of fluorescence levels together with the machine's
|
||||
interpretation, which is a sequence of bases.
|
||||
Ted displays
|
||||
the traces and the machine-generated base list.
|
||||
A second, initially identical, list of bases is provided for correction
|
||||
by the user.
|
||||
|
||||
Ted has an X windows based
|
||||
graphical interface. The trace file
|
||||
can either be input from the command line or by
|
||||
clicking on the INPUT button after the program has been invoked.
|
||||
Other parameters which the user may specify on the
|
||||
command line include: the output
|
||||
file name; a base position or sequence string on which the trace is
|
||||
to be centered; a default trace magnification; a 5' vector sequence
|
||||
for automated elimination of the sequence head (vector); top or
|
||||
bottom strand orientation; or any of the usual X-window parameters (e.g.
|
||||
display, geometry...).
|
||||
|
||||
The graphics display (Figure 1) consists of the control
|
||||
panel, the base position information, the original and edited sequence
|
||||
data, and the graphical representation of the trace. The user may
|
||||
begin by using the control panel INPUT button to input a new trace
|
||||
file at which time the user selects whether to view the sequence
|
||||
and trace in top or bottom strand orientation.
|
||||
The trace file is displayed and, if a 5' vector sequence has been
|
||||
specified on the command line, the program attempts to select a
|
||||
cutoff point corresponding to the vector sequence at the ``head'' of the
|
||||
trace file. The bases beyond the ``cutoff'' point are
|
||||
displayed on a shaded background. The user may modify the cutoff
|
||||
position by clicking on the ``Adj left cut'' button and clicking on the
|
||||
position of the desired cutoff. Similarly, the user may adjust the
|
||||
right cutoff of the sequence (chosen by starting at the 5' end of the
|
||||
sequence and looking for the first occurrence when 2 out of 5 bases
|
||||
are 'N') by scrolling along the sequence to that point, clicking on the
|
||||
``Adj right cut'' button, and clicking on the appropriate base.
|
||||
Automation of the ``cutoff'' process is optional; the user may compile
|
||||
the program with that feature turned ``off.''
|
||||
|
||||
Clicking on the ``Edit seq'' button allows the user to enter the edit
|
||||
mode. The ``Search'' button can be used to skip from ``problem'' to
|
||||
``problem'' (i.e., ambiguity to ambiguity) or to look for runs of
|
||||
identical bases (e.g., TTTT) which are often mis-called by
|
||||
the machine software.
|
||||
|
||||
Bases can be inserted, deleted, or replaced as with
|
||||
any ordinary word-processor. In difficult-to-read areas,
|
||||
the trace may be vertically or horizontally scaled by dragging or
|
||||
clicking on the magnification scroll bar or by clicking on the
|
||||
vertical scaling buttons (``Scale down'', ``Scale up''), respectively.
|
||||
Finally, the edited sequence is saved to an ascii file using the
|
||||
``Output'' button. A history of the editing session can also be saved
|
||||
along with the sequence.
|
||||
The ``Quit'' button is used
|
||||
to exit the program. When reinvoking ted on an edited trace file the
|
||||
edited base sequence, rather than the original sequence, is shown in
|
||||
the edited base window. The user may invoke ted by calling in any one
|
||||
of the previous editing sessions.
|
||||
|
||||
|
||||
\subsubsection*{APPLICATIONS AND CONCLUSIONS}
|
||||
|
||||
In the C. elegans genome sequencing project, data from the ABI or
|
||||
A.L.F. sequencing machines' computers are transferred to Sun
|
||||
workstations.
|
||||
The user invokes a Unix shell script that calls ted systematically
|
||||
on each of the new set of trace files creating a set of sequence files.
|
||||
The sequence files that are deemed to be of acceptable quality
|
||||
are then entered into the sequence
|
||||
assembly program xdap [2] where the sequences are assembled into
|
||||
contigs. Portions of the ted trace-editor have been incorporated
|
||||
into the xdap ``trace manager,'' which is used in
|
||||
conjunction with the contig editor to view sets of aligned traces
|
||||
at sites of discrepancies in the aligned sequences.
|
||||
|
||||
Ted is also used at the stage of choosing oligo primers for the
|
||||
``walking'' stage of the sequencing project. It can be invoked directly
|
||||
from the oligo selection program, osp [3], to allow examination
|
||||
of the trace data in the region of the primers so that
|
||||
integrity of the sequence data can be verified.
|
||||
|
||||
Currently, no other programs are known to be available
|
||||
which support editing of the ABI trace data.
|
||||
Further, the modular design of the program should allow
|
||||
support for new types of sequencing machines, with new data
|
||||
formats, to be implemented in a straightforward fashion.
|
||||
|
||||
|
||||
\subsubsection*{AVAILABILITY}
|
||||
Ted is freely available from the authors or from Rodger Staden and
|
||||
Simon Dear (MRC Laboratory of Molecular Biology, Hills Road, Cambridge,
|
||||
UK, CB2 2QH) for use on Sun workstations running X-windows (or OpenLook).
|
||||
|
||||
|
||||
\subsubsection*{ACKNOWLEDGMENTS}
|
||||
The authors would like to thank all members of the C. elegans
|
||||
sequencing project with special thanks to the following people:
|
||||
John Sulston, Bob Waterston,
|
||||
Phil Green, Rick Wilson, Richard Durbin, Simon Dear, and Rodger Staden
|
||||
for their helpful suggestions for improvements in the ted interface
|
||||
and for their parts in the development of ted. This work was
|
||||
supported by the Medical Research Council and NIH grant R01-HG00136.
|
||||
|
||||
\subsubsection*{REFERENCES}
|
||||
|
||||
1. Waterston, R., Sulston, J., et al. (1991), in preparation.
|
||||
|
||||
2. Dear, S. and Staden, R. (1991) Nuc. Acids Res., in press.
|
||||
|
||||
3. Hillier, L. and Green, P. (1991) submitted.
|
||||
|
||||
|
||||
{\bf Figure 1 legend.}
|
||||
|
||||
Figure 1 shows a ``screen dump'' of the ted graphical interface.
|
||||
The display consists of
|
||||
the control panel and the synchronized view of the base position
|
||||
information, original and edited sequence data,
|
||||
and graphical representation of the trace (with each nucleotide's trace
|
||||
being represented
|
||||
by a different color). The control
|
||||
panel allows the user to read in new trace files (in either
|
||||
bottom or top strand orientation)
|
||||
as well as to search for a string of nucleotides or a certain base position.
|
||||
Scroll bars allow the user to adjust the magnification of or scroll through
|
||||
the sequence and trace data. The user may also choose to change the vertical
|
||||
magnification of the trace data. Further, sequence on the head (vector)
|
||||
or tail (uncertain data) of the sequence may be ``cutoff''
|
||||
using the adjust left and right cutoff buttons. Bases can be inserted,
|
||||
deleted, or replaced as with
|
||||
any ordinary word-processor in the sequence data window. Finally, the
|
||||
sequence may be written to an ascii file using the output button on
|
||||
the control panel.
|
||||
|
||||
\end{document}
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,205 @@
|
|||
.NPA
|
||||
.left margin1
|
||||
.CENTER
|
||||
GIP
|
||||
.LEFT MARGIN1
|
||||
.PARA
|
||||
A digitizer is
|
||||
a two dimensional surface
|
||||
which is such that if a special pen is pressed onto it, the pens
|
||||
coordinates can be recorded by a computer.
|
||||
These coordinates
|
||||
can be interpreted by a program.
|
||||
.para
|
||||
The digitizing device we use works by the pen emitting a high frequency
|
||||
sound which is picked up by two microphones positioned at the rear of the
|
||||
working area. The pen position is determined by triangulation and the
|
||||
digitizing device sends the coordinates to the computer. As no special
|
||||
surface is required the device can conveniently be positioned on a light
|
||||
box giving the sequencer an unobscured view of the autoradiographs.
|
||||
.LEFT MARGIN1
|
||||
The digitizer
|
||||
is called a GRAPHBAR MODEL GP7 made by
|
||||
Science Accessories Corp,
|
||||
970 Kings Highway West,
|
||||
Southport,
|
||||
Connecticut 06490,
|
||||
USA.
|
||||
|
||||
.para
|
||||
The program uses a menu to allow the user to select commands or
|
||||
to enter the uncertainty codes for areas of the gel that are
|
||||
difficult to interpret. A menu is simply a series of boxes drawn on
|
||||
the digitizing surface that each contain a command or
|
||||
uncertainty code. When the user puts the pen down in these special
|
||||
regions the program interprets the coordinates as commands and acts
|
||||
appropriately. A copy of the menu should have been sent to you.
|
||||
It should be stuck down on the surface of the
|
||||
light box in the digitizing area. For convenience it is best to position it
|
||||
to the right of the digitizing area, but in practice as long as
|
||||
its top
|
||||
edge is parallel to the digitizer box, it can be put anywhere in the active
|
||||
region.
|
||||
.sk1
|
||||
.left margin1
|
||||
Entering gel readings using a digitizer
|
||||
.left margin1
|
||||
.para
|
||||
The autoradiograph should be stuck down on the light box with the lanes
|
||||
running, as near is as
|
||||
possible, at right angles to the digitizer. To read
|
||||
an autoradiograph placed on the light box
|
||||
the user need only define the positions of
|
||||
the four sequencing lanes and the bases
|
||||
to which they correspond and then use the pen to point to each
|
||||
successive band progressing up the gel. The program examines the
|
||||
coordinates of each pen position to see in which of the four
|
||||
lanes
|
||||
it lies and assigns the corresponding base to be stored in the
|
||||
computer. Each time the pen tip is depressed to point to a position
|
||||
on the surface of the digitizer the program sounds the bell on the
|
||||
terminal (a different sound for each of the four bases on the
|
||||
microcomputer version of the program)
|
||||
to indicate to the user that a point has been recorded. As
|
||||
the sequence is read the program displays it on the screen.
|
||||
|
||||
|
||||
.para
|
||||
The program uses a menu
|
||||
to allow the user to select commands or
|
||||
to enter the uncertainty codes for areas of the gel that are
|
||||
difficult to interpret. A menu is simply a series of boxes drawn on
|
||||
the digitizing surface that each contain a command or
|
||||
uncertainty code. When the user puts the pen down in these special
|
||||
regions the program interprets the coordinates as commands and acts
|
||||
appropriately. As well as the uncertainty codes
|
||||
A,C,G,T,1,2,3,4,B,D,H,V,R,Y,X,-,5,6,7,8 the following commands are
|
||||
included in the menu: DELETE removes the last character from
|
||||
the sequence;
|
||||
RESET allows the lane centres to be redefined;
|
||||
START means begin the next
|
||||
stage of the procedure; STOP means stop the current stage in the
|
||||
procedure; CONFIRM means confirm that the last command or set of
|
||||
coordinates are correct.
|
||||
.para
|
||||
The digitizing device also has a menu of its own. This lies in a two inch wide
|
||||
strip immediately in front of the digitizing box. Pen positions within this
|
||||
two inch strip are interpretted as commands to the digitizer and are not
|
||||
sent to the GIP program. In general the only time users will need to use
|
||||
the device menu is when they tell GIP where the program menu lies in the
|
||||
digitizing area. This is done by first hitting ORIGIN in the device menu
|
||||
and then hitting the bottom left hand corner of the program menu. The
|
||||
program menu can hence be positioned anywhere in the active region but
|
||||
should be arranged parallel to the digitizer.
|
||||
.para
|
||||
The user should try to hit the bands as near as possible to the centre of
|
||||
the lanes because the program tracks the lanes up the film using the pen
|
||||
positions. By using this tracking strategy the user only has to define the
|
||||
centres of the bottom of the lanes before starting to read the film. The
|
||||
program can correctly follow quite curved lanes and constantly checks that
|
||||
its lane centre coordinates look sensible. If the lane centres appear to be
|
||||
getting too close the program stops responding to the pen positions of
|
||||
bands and hence does not ring the bell. If this occurs users must hit the
|
||||
reset box in the menu and the program will request them to redefine the
|
||||
lane centres at the current reading position. Then they can continue
|
||||
reading. As a further safeguard the program will only respond to pen
|
||||
positions either in the menu or very close to the current reading position.
|
||||
.sk1
|
||||
.left margin1
|
||||
Running the gel reading program
|
||||
.left margin1
|
||||
The autoradiograph should be firmly stuck down on the light box and the
|
||||
program started by typing GIP. It will ask the first question.
|
||||
.left margin2
|
||||
" ? FILE OF FILE NAMES="
|
||||
.left margin2
|
||||
Type the name for the file of file names and then follow the instructions.
|
||||
.left margin2
|
||||
" HIT DIGITIZER MENU ORIGIN"
|
||||
.left margin2
|
||||
" THEN PROGRAM MENU ORIGIN"
|
||||
.left margin2
|
||||
" THEN HIT START IN PROGRAM MENU"
|
||||
.left margin2
|
||||
If the bell does not sound after you hit start try hitting metric in the
|
||||
device menu (the program uses metric units, and some digitizers are set to
|
||||
default to use inches; hitting metric switches between the two).
|
||||
.left margin2
|
||||
After the bell has sounded the program will give the default lane order.
|
||||
.left margin2
|
||||
" LANE ORDER IS T C A G"
|
||||
.left margin2
|
||||
" IF CORRECT HIT CONFIRM, ELSE HIT RESET"
|
||||
.left margin2
|
||||
If the lane order, reading from left to right is correct hit confirm in the
|
||||
program menu. If you are using a different order hit reset and you will be
|
||||
asked to define the lane order from left to right using the program menu
|
||||
(as follows).
|
||||
.left margin2
|
||||
" DEFINE LANE ORDER (LEFT TO RIGHT) USING MENU"
|
||||
.left margin2
|
||||
Hit the boxes in the menu that contain the symbols A,C,G,T in the
|
||||
left-right order of the lanes. The program will respond with the lane order
|
||||
as above and ask for confirmation. When this is received, the next task is
|
||||
to define the start positions of the next four lanes.
|
||||
.left margin2
|
||||
" HIT START, THEN HIT (LEFT TO RIGHT)"
|
||||
.left margin2
|
||||
" THE START POSITIONS FOR THE NEXT FOUR LANES"
|
||||
.left margin2
|
||||
Hit the centres of the four lanes at a height level with the first band
|
||||
that is going to be read. The program will report the mean lane separations
|
||||
and asks for confirmation that they are correct.
|
||||
.left margin2
|
||||
" MEAN LANE SEPARATION IS XX"
|
||||
.left margin2
|
||||
" HIT CONFIRM TO CONTINUE"
|
||||
.left margin2
|
||||
Users will become familiar with the values from their films and will spot
|
||||
any unusual numbers.
|
||||
Asking for confirmation allows users to try again if they had made a
|
||||
mistake, but generally the lane separation values can be ignored.
|
||||
Hit confirm, and the program will give the message
|
||||
.left margin2
|
||||
" HIT START WHEN READY TO BEGIN READING"
|
||||
.left margin2
|
||||
Hit start and the program will give the message
|
||||
.left margin2
|
||||
" HIT BANDS, UNCERTAINTY CODES, RESET OR STOP"
|
||||
.left margin2
|
||||
Hit the bands, interpretting the sequence progressing
|
||||
up the film. If necessary use the uncertainty codes. If the pen stops
|
||||
responding hit reset and follow the instructions as above. When the
|
||||
sequence becomes unreadable hit stop and the program will ask for a file
|
||||
name for the gel reading just read.
|
||||
.left margin2
|
||||
" ? FILE NAME FOR THIS GEL READING="
|
||||
.left margin2
|
||||
Type the file name observing the rules about legal gel readings names.
|
||||
The program will ask if you wish
|
||||
to read another sequence.
|
||||
.left margin2
|
||||
" TO ENTER ANOTHER GEL READING TYPE 1"
|
||||
.left margin2
|
||||
To enter another type 1 and you will be back to the step of defining the
|
||||
lane order. Typing anything else will stop the program.
|
||||
.left margin1
|
||||
.sk1
|
||||
Running the microcomputer version of the gel reading program
|
||||
.left margin1
|
||||
The microcomputer version of GIP is slightly different and is called
|
||||
GIPB. The BBC micro
|
||||
does not have the capacity to process the gel readings beyond the reading
|
||||
stage.
|
||||
This means that users of this program
|
||||
would need to transfer their gel readings from the micro to another machine
|
||||
using a terminal emmulator. Transferring many files is tedious and so the
|
||||
microcomputer version of the gel reading program stores all the gel
|
||||
readings for each run of the program in a single file. This special
|
||||
file contains both sequences and file names and can be moved in a single
|
||||
transfer to another machine. Once on the other machine the single file must
|
||||
be split into separate gel reading files and a file of file names. This is
|
||||
done using the program BSPLIT. As far as using the microcomputer version
|
||||
of GIP, the only difference is that the first file name the program
|
||||
requests is not a file of file names, but a name for the single file to
|
||||
contain all the gel readings and their names.
|
|
@ -0,0 +1,859 @@
|
|||
.NPA
|
||||
.SP 1
|
||||
.left margin1
|
||||
@-1. TX 0 @General
|
||||
.sp
|
||||
@-2. T 0 @Screen control
|
||||
.sp
|
||||
@-2. X 0 @Screen
|
||||
.sp
|
||||
@-3. TX 0 @Dictionary analysis
|
||||
.sp
|
||||
@0. TX -1 @MEP
|
||||
.left margin2
|
||||
.para
|
||||
This is a program for analysing families of nucleotide sequences in order
|
||||
to find common motifs and potential binding sites.
|
||||
The ideas in this program were described in Staden, R. "Methods
|
||||
for discovering novel motifs in nucleic acid sequences".
|
||||
Computer Applications in the Biosciences, 5, 293-298, (1989).
|
||||
.PARA
|
||||
The program can read
|
||||
sequences stored in either of two formats: 1) all sequences aligned in a
|
||||
single file; 2) all sequences in separate files and accessed through a file
|
||||
of file names.
|
||||
.PARA
|
||||
The program contains functions that can answer several questions
|
||||
about a set of sequences:
|
||||
.SK1
|
||||
.left margin2
|
||||
Which words are most common?
|
||||
.left margin2
|
||||
Which words occur in the most sequences?
|
||||
.left margin2
|
||||
Which words contain the most information?
|
||||
.left margin2
|
||||
Which words occur in equivalent positions in the sequences?
|
||||
.left margin2
|
||||
Which words are inverted repeats?
|
||||
.left margin2
|
||||
Which words occur on both strands of the sequences?
|
||||
.left margin2
|
||||
Where are the inverted repeats?
|
||||
.left margin2
|
||||
Where are the fuzzy words?
|
||||
.para
|
||||
Most of the program is
|
||||
concerned with analysing
|
||||
what it terms "fuzzy
|
||||
words" within the set of sequences. The analysis is explained
|
||||
below. Note that the standard version of the programs is limited
|
||||
to words of maximum length 8 letters, and a maximum fuzziness
|
||||
of 2.
|
||||
.para
|
||||
The following analyses (preceded by their option numbers) are included:
|
||||
.lit
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
.end lit
|
||||
.para
|
||||
Some of these methods produce graphical
|
||||
results
|
||||
and so the
|
||||
program is generally used from a graphics terminal (a vdu on which lines
|
||||
and points can be drawn as well as characters).
|
||||
.para
|
||||
.LEFT MARGIN2
|
||||
The positions of each of the plots is defined relative to a users drawing
|
||||
board which has size 1-10,000 in x and 1-10,000 in y.
|
||||
Plots for
|
||||
each option are drawn in a window defined by x0,y0 and xlength,ylength.
|
||||
Where x0,y0 is the position of the bottom left hand corner of the window,
|
||||
and xlength is the width of the window and ylength the
|
||||
height of the window.
|
||||
.lit
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
.end lit
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required.
|
||||
.para
|
||||
The options for the program are accessed from 3 main menus: general, screen
|
||||
control and dictionary analylsis.
|
||||
Both menus and options are selected by number.
|
||||
.para
|
||||
The most important and novel part of the program is its use of "fuzzy
|
||||
dictionaries" and an information theory measure, to help show the most
|
||||
interesting motifs.
|
||||
|
||||
Central to the method is the idea of a fuzzy dictionary of word
|
||||
frequencies. A dictionary of word frequencies is an ordered list of
|
||||
all the words in the sequences and a count of the number of times
|
||||
that they occur. A fuzzy dictionary is an equivalent list but which
|
||||
contains instead, for each word, a count of the number of times
|
||||
similar words occur in the sequences. We term words that are
|
||||
similar "relations". The fuzziness is defined by the number of
|
||||
letters in a word that are allowed to be different. So if we had a
|
||||
fuzziness of 1 we allow 1 letter to be different. For example, with
|
||||
a fuzziness of 1, the entry in the fuzzy dictionary for the word
|
||||
TTTTTT would contain a count of the numbers of times TTTTTT
|
||||
occured plus the number of times all words differing by exactly
|
||||
one letter from TTTTTT occured.
|
||||
.para
|
||||
Once the fuzzy dictionary has been created we can examine it in
|
||||
several ways to find candidate control sequences. The simplest
|
||||
question we can ask is which word in the dictionary is the most
|
||||
common. Sometimes this simple criterion of "most common" may
|
||||
be adequate to discover a new motif but in general we would not
|
||||
expect it to be sufficient. For example some words will be common
|
||||
simply because of a base composition bias in the sequences being
|
||||
analysed. In addition a word can be the most frequent and yet not
|
||||
be "well defined". This last point is best explained by an example.
|
||||
.para
|
||||
Suppose we were looking at two letter words and allowing one
|
||||
mismatch, and that there were 10 occurences of TT and 5 of AC.
|
||||
We could align the 10 words that were one letter different from TT
|
||||
and the 5 that were related to AC. Then we could count the
|
||||
number of times each base occured in each position for each of
|
||||
these two sets of words. Suppose we got the two base frequency
|
||||
tables shown below.
|
||||
.lit
|
||||
TT AC
|
||||
T 6 4 T 1 0
|
||||
C 1 3 C 0 4
|
||||
A 1 2 A 4 1
|
||||
G 2 1 G 0 0
|
||||
|
||||
.end lit
|
||||
These tables show that although TT occurs (with one letter
|
||||
mismatch) more often than AC, the ratio of base frequencies for
|
||||
AC at 4/5, 4/5 is higher than those for TT at 6/10, 4/10. Hence we
|
||||
would say that AC was better defined than TT.
|
||||
Expressing this another way we would say that the definition of AC
|
||||
contained more information than that for TT. The program
|
||||
calculates the information content in a way that takes into account
|
||||
both the sequence composition and the level of definition of the
|
||||
motif.
|
||||
.para
|
||||
Definitions
|
||||
|
||||
.para
|
||||
Here we deal only with the dictionary analysis.
|
||||
Suppose we are dealing with a set of
|
||||
sequences and are examining them for words that are six
|
||||
characters in length.
|
||||
|
||||
.para
|
||||
Dictionary Dw contains a count of the number of times each word
|
||||
occurs in the set of sequences. For example the entry for TTTTTT
|
||||
contains a value equal to the number of times the word TTTTTT
|
||||
occurs in the set of sequences.
|
||||
|
||||
.para
|
||||
Dictionary Ds contains a count of the number of different sequences in
|
||||
which each word occurs. For example if the entry for word TTTTTT
|
||||
contains the value 10, it denotes that the word TTTTTT occurs in ten
|
||||
different sequences. Unlike Dw it only counts words once for each
|
||||
sequence. For example if we had a set of 100 sequences, the maximum
|
||||
possible value that Ds could take is 100, and this would only happen if
|
||||
a word occurred in every sequence. However for the same set of
|
||||
sequences, Dw could contain values greater than 100, and this would
|
||||
show that a word had occurred more than once in at least one
|
||||
sequence.
|
||||
|
||||
.para
|
||||
From either of the two dictionaries Dw or Ds we can calculate a fuzzy
|
||||
dictionary Dm. For each word, the entry in the fuzzy dictionary Dm
|
||||
contains the sum of the dictionary values (taken from either Dw or Ds)
|
||||
for all words that differ from it by up to m letters. For example if m=2
|
||||
the entry for TTTTTT contains the number of times that TTTTTT
|
||||
occurs in the dictionary, plus the counts for all words that differ from
|
||||
TTTTTT by 1 or 2 letters.
|
||||
Obviously the interpretation of the values in Dm depends on which of
|
||||
the two dictionaries Dw or Ds they were derived from. When derived
|
||||
from Dw the entry for any word in Dm gives the total number of
|
||||
times it, and its relations, occur in the set of sequences. When derived
|
||||
from Ds the entry for any word in Dm gives the total number of
|
||||
different sequences that contain a word and each of its relations.
|
||||
|
||||
.para
|
||||
Finally, from fuzzy dictionary Dm we can derive fuzzy dictionary Dh.
|
||||
All entries in Dh are zero except for the word(s), within each set of
|
||||
relations, that are most frequent. For example if TTTTTT occurred 20
|
||||
times but had a relation that occurred more often, then the entry for
|
||||
TTTTTT would be zero. However if TTTTTT did not have a more
|
||||
frequently occurring relation, then the entry for TTTTTT would
|
||||
contain the value 20.
|
||||
|
||||
.LEFT MARGIN1
|
||||
@1. T 0 @Help
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
This option gives online help. The user should select option numbers and
|
||||
the current documentation will be given. Note that option 0 gives an
|
||||
introduction to the program, and that ? will get help from anywhere in
|
||||
the
|
||||
program.
|
||||
The following analyses (preceded by their option numbers) are included:
|
||||
.lit
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
.end lit
|
||||
.left margin1
|
||||
@2. T 0 @Quit
|
||||
.left margin2
|
||||
.para
|
||||
This function stops the program.
|
||||
.left margin1
|
||||
@3. TX 1 @Read a new sequence
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
It can read
|
||||
sequences stored in either of two formats: 1) all sequences aligned in a
|
||||
single file; 2) all sequences in separate files and accessed through a file
|
||||
of file names. Typical dialogue follows:
|
||||
.lit
|
||||
|
||||
X 1 Read file of aligned sequences
|
||||
2 Use file of file names
|
||||
? 0,1,2 =
|
||||
|
||||
? File of aligned sequences=F1
|
||||
Number of files 88
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@4. TX 1 @Define active region
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
For its analytic functions
|
||||
the program always works on a region of the sequence called the active
|
||||
region. When new sequences are read into the program the active region is
|
||||
automatically set to start at the beginning of the sequences and go
|
||||
up to the end of the longest one.
|
||||
.left margin1
|
||||
@5. TX 1 @List a sequence
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
The sequence can be listed with line lengths of 50 bases with each sequence
|
||||
numbered in the order in which they were read.
|
||||
Output can be directed to a disk file by
|
||||
first selecting disk output. Typical dialogue follows.
|
||||
.lit
|
||||
|
||||
? Menu or option number=5
|
||||
|
||||
10 20 30 40 50
|
||||
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||
|
||||
60
|
||||
1 TACCCGTTTTT
|
||||
2 GCGTTTTTGT
|
||||
3 TCATACCATAAG
|
||||
4 TTTCATACC
|
||||
5 ATTGTGAGC
|
||||
6 TTCCGGCTCG
|
||||
7 GAAGAGAGT
|
||||
8 TCAGGTGT
|
||||
9 ATGAATG
|
||||
10 TAATTACG
|
||||
.end lit
|
||||
.left margin1
|
||||
@6. TX 1 @List a text file
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
Allows the user to have a text file displayed on the screen. It will appear
|
||||
one page at a time.
|
||||
.left margin1
|
||||
@7. TX 1 @Direct output to disk
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
Used to direct output that would normally appear on the screen to a file.
|
||||
.para
|
||||
Select redirection of either text or graphics, and
|
||||
supply the name of the file that the output should be written to.
|
||||
.para
|
||||
The results from the next options selected will not appear on the screen
|
||||
but will be written to the file. When option 7 is selected again
|
||||
the file will be
|
||||
closed and output will again appear on the screen.
|
||||
.left margin1
|
||||
@10. TX 2 @Clear graphics
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
Clears the screen of both text and graphics.
|
||||
.left margin1
|
||||
@11. TX 2 @Clear text
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
Clears only text from the screen.
|
||||
.left margin1
|
||||
@12. TX 2 @Draw a ruler
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
This option
|
||||
allows the user to draw a ruler or scale along the x axis of the screen to
|
||||
help identify the coordinates of points of interest. The user can define
|
||||
the position of the first amino acid to be marked (for example if the
|
||||
active
|
||||
region is 1501 to 8000, the user might wish to mark every 1000th amino
|
||||
acid
|
||||
starting at either 1501 or 2000 - it depends if the user wishes to treat
|
||||
the active region as an independent unit with its own numbering starting
|
||||
at
|
||||
its left edge, or as part of the whole sequence). The user can also define
|
||||
the separation of the ticks on the scale and their height. If required the
|
||||
labelling routine can be used to add numbers to the ticks.
|
||||
.left margin1
|
||||
@13. TX 2 @Use crosshair
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
This function puts
|
||||
a steerable cross on the screen that can be used to find the
|
||||
coordinates of points in the sequence. The user can move the cross
|
||||
around using the directional keys; when he hits the space bar the
|
||||
program will print out the coordinates of the cross in sequence units and
|
||||
the option will be exited.
|
||||
.para
|
||||
If instead,
|
||||
you hit a , the position will be displayed but the cross will remain on
|
||||
the screen.
|
||||
.para
|
||||
If a letter s is hit the sequence around the cross hair is displayed and
|
||||
the cross remains on the screen.
|
||||
.left margin1
|
||||
@14. TX 2 @Reposition plots
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
The positions of each of the plots is defined relative to a users drawing
|
||||
board which has size 1-10,000 in x and 1-10,000 in y.
|
||||
Plots for
|
||||
each option are drawn in a window defined by x0,y0 and xlength,ylength.
|
||||
Where x0,y0 is the position of the bottom left hand corner of the window,
|
||||
and xlength is the width of the window and ylength the
|
||||
height of the window.
|
||||
.lit
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
.end lit
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required.
|
||||
As all the plots start
|
||||
at the same position in x and have the same width, x0 and xlength are the
|
||||
same for all options. Generally users will only want to change the start
|
||||
level of the window y0 and its height ylength.
|
||||
This option
|
||||
allows users to change window positions whilst running the program.
|
||||
The routine prompts first for the number of the option that the users
|
||||
wishes
|
||||
to reposition; then for the y start and height; then for the x start and
|
||||
length. Note that changes to the x values affect all options. If the user
|
||||
types only carriage return for any value it will remain unchanged.
|
||||
The cross-hair can be used to choose suitable heights.
|
||||
.LEFT MARGIN1
|
||||
@15. TX 2 @Label a diagram
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
This routine allows users to label any diagrams they have produced. They
|
||||
are asked to type in a label. When the user types carriage return to finish
|
||||
typing the label the cross-hair appears on the screen. The user can
|
||||
position it anywhere on the screen. If the user types R (for right justify)
|
||||
the label will be
|
||||
written on the diagram with its right end at the cross-hair position.
|
||||
If the user types L (for left justify) the label will be written on the
|
||||
diagram with its left end at the cross hair position.
|
||||
The
|
||||
cross-hair will then immediately reappear. The user may put the same
|
||||
label
|
||||
on another part of the diagram as before or if he hits the space bar he
|
||||
will be asked if he wishes to type in another label.
|
||||
.left margin1
|
||||
@16. TX 2 @Display a map
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
It is often convenient to plot a map alongside graphed analysis in order
|
||||
to
|
||||
indicate features within the sequence. This function allows users to
|
||||
draw
|
||||
maps using files arranged in the form of EMBL feature tables. Of course
|
||||
the
|
||||
EMBL table are usually only used for nucleic acid sequence annotation
|
||||
but,
|
||||
as long as the features are written in the correct format, they can be
|
||||
employed by this routine. The map is composed of a line representing the
|
||||
sequence and then further lines denoting the endpoints of each feature
|
||||
the
|
||||
user identifies. The user is asked to define height at which the line
|
||||
representing the sequence should be drawn; then for the feature height;
|
||||
then for the features to plot.
|
||||
.left margin1
|
||||
@17. TX 1 @Search for strings
|
||||
.left margin2
|
||||
.para
|
||||
Search for strings
|
||||
perfoms searches of all the sequences for selected words and
|
||||
shows which sequences they are found in. The user types in a word and
|
||||
defines the allowed number of mismatches. The results are listed or
|
||||
plotted. If listed the display includes the sequence number, the position
|
||||
in the sequence and the matching string.
|
||||
The results are plotted in the
|
||||
following way. The x axis of the plot represents the length of the aligned
|
||||
sequences and the y direction is divided into sufficient strips to accommodate
|
||||
each sequence. So if a match is found in the 3rd sequence at a position
|
||||
equivalent to halfway along the longest of the sequences then a short
|
||||
vertical line will be drawn at the midpoint of the 3rd strip. If the sequences
|
||||
are aligned it can be useful if the motifs happen to appear in
|
||||
related positions. For example see the original publication. Typical
|
||||
dialogue follows.
|
||||
.lit
|
||||
|
||||
? Menu or option number=17
|
||||
X 1 Plot match positions
|
||||
2 Plot histogram of matches
|
||||
? 0,1,2 =
|
||||
? Word to search for=TTGACA
|
||||
? Minimum match (0-6) (6) =5
|
||||
? (y/n) (y) Plot results N
|
||||
2 35 TAGACA
|
||||
5 14 TTTACA
|
||||
6 37 TTTACA
|
||||
11 14 TAGACA
|
||||
14 14 TTGACA
|
||||
17 14 GTGACA
|
||||
17 22 TTAACA
|
||||
20 1 TTGACA
|
||||
.end lit
|
||||
.left margin1
|
||||
@18. TX 3 @Set strand
|
||||
.left margin2
|
||||
.para
|
||||
Set strand allows the user to define which strand(s) of the sequences to
|
||||
analyse: input stand, complement of input, or both.
|
||||
.left margin1
|
||||
@19. TX 3 @Set composition
|
||||
.left margin2
|
||||
.para
|
||||
Set composition gives the user three choices for setting the composition
|
||||
of the sequences for use in the calculation of the information content of
|
||||
words. The user can select the overall composition of the sequences as read,
|
||||
an even composition, or can type in any other 4 values.
|
||||
.left margin1
|
||||
@20. TX 3 @Set word length
|
||||
.left margin2
|
||||
.para
|
||||
Set word length sets the length of word for which dictionaries will be made.
|
||||
.left margin1
|
||||
@21. TX 3 @Set number of mismatches
|
||||
.left margin2
|
||||
.para
|
||||
Set number of mismatches sets the level of fuzziness for the creation of
|
||||
dictionary Dm.
|
||||
.left margin1
|
||||
@22. TX 3 @Show settings
|
||||
.left margin2
|
||||
.para
|
||||
Show settings show the current settings for all parameters associated with
|
||||
dictionary analysis. A typical diaplsy follows:
|
||||
.lit
|
||||
? Menu or option number=22
|
||||
Current word length = 6
|
||||
Number of mismatches = 1
|
||||
Start position = 1
|
||||
End position = 63
|
||||
Input strand only
|
||||
Observed composition
|
||||
Dictionary Dw unmade
|
||||
Dictionary Ds unmade
|
||||
Dictionary Dm unmade
|
||||
Dictionary Dh unmade
|
||||
.end lit
|
||||
.left margin1
|
||||
@23. TX 3 @Make dictionary Dw
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Dw creates a dictionary that contains a count of the
|
||||
frequency of occurrence of each word in the collected sequences.
|
||||
.left margin1
|
||||
@24. TX 3 @Make dictionary Ds
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Ds creates a dictionary that contains a count of the
|
||||
number of different sequences that contain each word.
|
||||
.left margin1
|
||||
@25. TX 3 @Make dictionary Dm from Dw
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Dm from Dw creates a dictionary from dictionary Dw that
|
||||
contains the frequency of occurrence of each word (say X) in Dw plus the
|
||||
frequency of occurrence of each word in Dw that differs from X by up to m
|
||||
letters. Dm is called a fuzzy dictionary as it contains the frequencies of
|
||||
occurrence of all words plus the frequencies of all the words that are
|
||||
similar to them.
|
||||
.left margin1
|
||||
@26. TX 3 @Make dictionary Dm from Ds
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Dm from Ds creates a dictionary from dictionary Ds that
|
||||
contains the frequency of occurrence of each word (say X) in Ds plus the
|
||||
frequency of occurrence of each word in Ds that differs from X by up to m
|
||||
letters. Dm is called a fuzzy dictionary as it contains the frequencies of
|
||||
occurrence of all words plus the frequencies of all the words that are
|
||||
similar to them.
|
||||
.left margin1
|
||||
@27. TX 3 @Make dictionary Dh from Dm
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Dh creates a dictionary from dictionary Dm and whose
|
||||
entries are zero except for those words in any set of related words that
|
||||
are most frequent. It finds the dominant words in each set of relations
|
||||
and stores their counts.
|
||||
.left margin1
|
||||
@28. TX 3 @Examine fuzzy dictionary Dm
|
||||
.left margin2
|
||||
.para
|
||||
Examine dictionary Dm allows users to analyse the contents of dictionary
|
||||
Dm to find the most common words or those words that contain the most
|
||||
information. The user supplies a frequency or information cutoff and chooses
|
||||
to have the results sorted on either value. The program will find the top 100
|
||||
words that achieve the cutoff values and present them to the user sorted
|
||||
as selected. The information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dm, and using the current composition
|
||||
setting. Typical dialogue follows:
|
||||
.lit
|
||||
|
||||
? Menu or option number=28
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAC 64 0.66460
|
||||
AAAAAA 90 0.64880
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTG 73 0.64070
|
||||
TTTTGT 63 0.63820
|
||||
TTTTTC 65 0.63810
|
||||
AAAATA 63 0.62670
|
||||
TATAAT 65 0.62510
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =2
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
AAAAAA 90 0.64880
|
||||
TTTTTG 73 0.64070
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTC 65 0.63810
|
||||
TATAAT 65 0.62510
|
||||
AAAAAC 64 0.66460
|
||||
TTTTGT 63 0.63820
|
||||
AAAATA 63 0.62670
|
||||
TTGACA 60 0.73850
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@29. TX 3 @Examine fuzzy dictionary Dh
|
||||
.left margin2
|
||||
.para
|
||||
Examine dictionary Dh allows users to analyse the contents of dictionary Dh
|
||||
to find the most common words or those words that contain the most
|
||||
information. The user supplies a frequency or information cutoff and chooses
|
||||
to have the results sorted on either value. The program will find the top 100
|
||||
words that achieve the cutoff values and present them to the user sorted as
|
||||
selected. The information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dh and using the current composition
|
||||
setting. Typical dialogue follows:
|
||||
.lit
|
||||
|
||||
? Menu or option number=29
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.6
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 4 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
TTTTTT 115 0.60630
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =.5
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =
|
||||
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@30. TX 3 @Examine words in Dm
|
||||
.left margin2
|
||||
.para
|
||||
Examine words in Dm allows users to analyse the contents of dictonary Dm at the
|
||||
level of individual words to find their frequency, information content, and to
|
||||
see their base frequency table. The user types in a word to examine and the
|
||||
program displays the values and table. The information content will be
|
||||
calcutated from either Dw or Ds depending which was used to create Dm,
|
||||
and using the current composition setting. Typical dialogue follows:
|
||||
.lit
|
||||
? Menu or option number=30
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@31. TX 3 @Examine words in Dh
|
||||
.left margin2
|
||||
.para
|
||||
Examine words in Dh allows users to analyse the contents of dictonary Dh at the
|
||||
level of individual words to find their frequency, information content, and to
|
||||
see their base frequency table. The user types in a word to examine and the
|
||||
program displays the values and table. The information content will be
|
||||
calcutated from either Dw or Ds depending which was used to create Dm,
|
||||
and using the current composition setting. Typical dialogue follows:
|
||||
.lit
|
||||
|
||||
? Menu or option number=31
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=GGGGGG
|
||||
gggggg 0 0.6199890
|
||||
3 1 1 2 3 4
|
||||
1 3 1 2 2 1
|
||||
2 1 1 1 1 1
|
||||
11 12 14 12 11 11
|
||||
GGGGGG
|
||||
? Word to examine=
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@32. TX 3 @Save or restore a dictionary
|
||||
.left margin2
|
||||
.para
|
||||
Save or restore dictionary allows users to write or read any dictionary to
|
||||
and from disk files. The user is asked te define the dictionary and file. The
|
||||
function is useful if the machine being used is very slow at calculating
|
||||
because the files can be handled quickly. However note that the files
|
||||
cannot be processed by any other program.
|
||||
.left margin1
|
||||
@33. TX 1 @Find inverted repeats
|
||||
.left margin2
|
||||
.para
|
||||
Find inverted repeats performs searches for simple inverted repeat sequences
|
||||
in each sequence. They are defined by a range of loop sizes and a minimum
|
||||
number of potential basepairs. The results can be plotted or listed. The x
|
||||
axis of the plot represents the length of the aligned sequences and the y
|
||||
direction is divided into sufficient strips to accommodate each sequence.
|
||||
So if an inverted repeat is found in the 3rd sequence at a position equivalent
|
||||
to halfway along the longest of the sequences then a short vertical line will
|
||||
be drawn at the midpoint of the 3rd strip. Alternatively, if the results are
|
||||
listed, the potential hairpin loops are drawn out, with the sequence number
|
||||
and the position of the loop. Typical dialogue follows.
|
||||
.lit
|
||||
|
||||
? Menu or option number=33
|
||||
Define the range of loop sizes
|
||||
? Minimum loop size (0-10) (3) =0
|
||||
? Maximum loop size (1-20) (3) =
|
||||
? Minimum number of basepairs (1-20) (6) =
|
||||
? (y/n) (y) Plot results N
|
||||
Searching
|
||||
|
||||
Sequence 3 34
|
||||
C
|
||||
G.T
|
||||
T-A
|
||||
A-T
|
||||
T.G
|
||||
T.G
|
||||
G.T
|
||||
ATCTTT TATTTCA
|
||||
33
|
||||
|
||||
Sequence 5 35
|
||||
T
|
||||
G.T
|
||||
T.G
|
||||
A-T
|
||||
T.G
|
||||
G.T
|
||||
C-G
|
||||
T.G
|
||||
TCCGGC AATTGTG
|
||||
34
|
||||
.end lit
|
||||
.left margin1
|
||||
@ End of help
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,88 @@
|
|||
.NPA
|
||||
.SP 1
|
||||
.left margin1
|
||||
@-1. TX 0 @General
|
||||
.sp
|
||||
@-2. TX 0 @Screen control
|
||||
.sp
|
||||
@-3. TX 0 @Statistical analysis
|
||||
.sp
|
||||
@-1. TX 0 @General
|
||||
.sp
|
||||
@-2. TX 0 @Screen control
|
||||
.sp
|
||||
@-3. TX 0 @Statistical analysis
|
||||
.sp
|
||||
@0. TX -1 @NIPF
|
||||
.sp
|
||||
@1. TX 1 @ Help
|
||||
.sp
|
||||
@2. TX 1 @ Quit
|
||||
.sp
|
||||
@3. TX 1 @ Read new sequence
|
||||
.sp
|
||||
@4. TX 1 @ Redefine active region
|
||||
.sp
|
||||
@5. TX 1 @ List the sequence
|
||||
.sp
|
||||
@6. TX 1 @ List a text file
|
||||
.sp
|
||||
@7. TX 1 @ Direct output to disk
|
||||
.sp
|
||||
@8. TX 1 @ Write active sequence to disk
|
||||
.sp
|
||||
@9. TX 1 @ List a translation
|
||||
.sp
|
||||
@32. TX 1 @ List showing base differences
|
||||
.sp
|
||||
@37. TX 1 @ List showing translation
|
||||
.sp
|
||||
@33. TX 1 @ List showing amino acid differences
|
||||
.sp
|
||||
@10. TX 2 @ Clear graphics
|
||||
.sp
|
||||
@11. TX 2 @ Clear text
|
||||
.sp
|
||||
@12. TX 2 @ Draw a ruler
|
||||
.sp
|
||||
@13. TX 2 @ Use cross hair
|
||||
.sp
|
||||
@14. TX 2 @ Reset margins
|
||||
.sp
|
||||
@15. TX 2 @ Label diagram
|
||||
.sp
|
||||
@16. TX 2 @ Display a map
|
||||
.sp
|
||||
@17. TX 3 @ Set comparison mode
|
||||
.sp
|
||||
@18. TX 3 @ Set sort mode
|
||||
.sp
|
||||
@21. TX 3 @ Count base changes
|
||||
.sp
|
||||
@22. TX 3 @ Count codon changes
|
||||
.sp
|
||||
@23. TX 3 @ Count genetic events
|
||||
.sp
|
||||
@24. TX 3 @ Show table of base changes
|
||||
.sp
|
||||
@36. TX 3 @ Show table of expressed base changes
|
||||
.sp
|
||||
@39. TX 3 @ Show table of silent base changes
|
||||
.sp
|
||||
@38. TX 3 @ Estimate mutation rate
|
||||
.sp
|
||||
@25. TX 3 @ Plot base changes
|
||||
.sp
|
||||
@26. TX 3 @ Plot expressed changes per base
|
||||
.sp
|
||||
@27. TX 3 @ Plot silent changes per base
|
||||
.sp
|
||||
@28. TX 3 @ Count expressed changes per base
|
||||
.sp
|
||||
@29. TX 3 @ Count silent changes per base
|
||||
.sp
|
||||
@30. TX 3 @ Count changed amino acids
|
||||
.sp
|
||||
@31. TX 3 @ Plot amino acid variability
|
||||
.sp
|
||||
@ end of help
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,38 @@
|
|||
README file for help directory of staden package
|
||||
-----------------------------------------------
|
||||
|
||||
Should contain (at least) ProgramName_help where ProgramName is each of
|
||||
bap, dap, gip, mem, mep, nip, nipf, pip, sap, sip and also staden_help
|
||||
and stadenp_help.
|
||||
|
||||
There are 3 main formats of file in this directory:
|
||||
|
||||
PROGRAM.RNO:
|
||||
This is the unformatted (runoff/nroff style) help for PROGRAM.
|
||||
Any changes to the help should be performed on this file.
|
||||
|
||||
program_help:
|
||||
This is the online formatted help used by PROGRAM. It can also
|
||||
be printed to produce hardcopy documentation.
|
||||
|
||||
program_menu:
|
||||
This is a file that describes the menus used in PROGRAM,
|
||||
together with an index into the program_help file for the
|
||||
online help. The format for each line is:
|
||||
|
||||
<option number> <menu number> <program_help offset> <no. of
|
||||
lines of help> <program type T(ext) or (X)windows> <option name>
|
||||
|
||||
|
||||
Exceptions to these are for the staden_help, stadenp_help, and
|
||||
splitp_help which do not have the relevant .RNO or _menu files. The
|
||||
file staden_help gives an introduction to the xterm user interface
|
||||
(written for vax and vms and so is out of date with the Unix
|
||||
versions).
|
||||
|
||||
See the file splitp_help for information about the reformatting of the
|
||||
PROSITE motif library.
|
||||
|
||||
Rebuild help files with the Unix command "make all". Ensure that the utility
|
||||
program sethelp is compiled and in the executables search path. The sources
|
||||
for the program sethelp are found in $STADENROOT/staden.
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,125 @@
|
|||
.para
|
||||
Preparing the PROSITE protein motif library for use by the Staden programs
|
||||
.para
|
||||
Introduction
|
||||
.para
|
||||
A library of protein motifs (in our terminology, because they include
|
||||
variable gaps, some would be called patterns) has recently become available
|
||||
from Amos Bairoch,Departement de Biochimie Medicale,University of Geneva
|
||||
Currently it contains 317 patterns/motifs and arrives on tape or cdrom
|
||||
in two files:
|
||||
a .dat file and a .doc file. There is also a user documentation file
|
||||
prosite.usr. Here I outline what is required to prepare the PROSITE library for
|
||||
use by our programs.
|
||||
.para
|
||||
Three programs need to be run SPLITP1, SPLITP2, and SPLITP3.
|
||||
.PARA
|
||||
Outline of the PROSITE files
|
||||
.para
|
||||
A typical entry in the .dat file is shown below.
|
||||
.lit
|
||||
|
||||
ID 2FE2S_FERREDOXIN; PATTERN.
|
||||
AC PS00197;
|
||||
DT APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
|
||||
DE 2Fe-2S ferredoxins, iron-sulfur binding region signature.
|
||||
PA C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C.
|
||||
NR /RELEASE=14,15409;
|
||||
NR /TOTAL=69(69); /POSITIVE=63(63); /UNKNOWN=0(0); /FALSE_POS=6(6);
|
||||
NR /FALSE_NEG=5(5);
|
||||
CC /TAXO-RANGE=A?EP?; /MAX-REPEAT=1;
|
||||
CC /SITE=1,iron_sulfur; /SITE=5,iron_sulfur; /SITE=8,iron_sulfur;
|
||||
DR P15788, FER$APHHA , T; P00250, FER$APHSA , T; P00223, FER$ARCLA , T;
|
||||
DR P00227, FER$BRANA , T; P07838, FER$BRYMA , T; P13106, FER$BUMFI , T;
|
||||
DR P00247, FER$CHLFR , T; P07839, FER$CHLRE , T; P00222, FER$COLES , T;
|
||||
DO PDOC00175;
|
||||
//
|
||||
.end lit
|
||||
.para
|
||||
Each entry has an accession number (here PS00197), a pattern definition
|
||||
(here C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C) and a documentation file
|
||||
cross reference (here PDOC00175).
|
||||
This pattern means: C, gap of 1 or 2, any of STA, gap of 2, C, any of STA,
|
||||
not P, C.
|
||||
.para
|
||||
We need to convert all of these patterns into our pattern definitions
|
||||
(as membership of a set, with the appopriate gap ranges) and write each
|
||||
into a separate pattern file with corresponding "membership of a set"
|
||||
weight matrices. Each
|
||||
pattern file is named accession_number.pat (here PS00197.PAT). The
|
||||
corresponding matrix files are accession_number.wtsa,
|
||||
accession_number.wtsb, etc for however many are needed (here PS00197.WTSA
|
||||
and PS00197.WTSB): two are needed because of the variable gap.
|
||||
.para
|
||||
In addition we can optionally
|
||||
split the .dat and .doc files into separate files, one for each
|
||||
entry, with names accession_number.dat and accession_number.doc. Also we
|
||||
create an index for the library prosite.lis, which
|
||||
gives a one line description of each pattern, and ends with the pattern
|
||||
file and documentation file numbers. The start of the file is shown below.
|
||||
.lit
|
||||
|
||||
N-glycosylation site. 00001,00001
|
||||
Glycosaminoglycan attachment site. 00002,00002
|
||||
Tyrosine sulfatation site. 00003,00003
|
||||
cAMP- and cGMP-dependent protein kinase phosphorylation site. 00004,00004
|
||||
|
||||
.end lit
|
||||
So the name of the pattern file for Glycosaminoglycan attachment site is
|
||||
PS00002.PAT, and for the documentation file PDOC00002.DOC
|
||||
.para
|
||||
Finally we
|
||||
create a file of file names for all the patterns in the library.
|
||||
.para
|
||||
To use the complete PROSITE library from program pip, select "pattern searcher"
|
||||
and choose the
|
||||
option "use file of pattern file names", and give the file name
|
||||
prosite.nam). For any matches found, the accession number and pattern title
|
||||
will be
|
||||
displayed.
|
||||
|
||||
.para
|
||||
Running the conversion programs
|
||||
.para
|
||||
|
||||
Only SPLITP3 is necessary for using the library. The others programs
|
||||
only make the
|
||||
original files marginally easier to browse through and produce an index.
|
||||
.para
|
||||
SPLITP1 splits the prosite.dat file to create a separate file for each
|
||||
entry. Each file is automatically named PSentry_number.dat. In addition it
|
||||
creates an index for the library (see above).
|
||||
.para
|
||||
SPLITP2 performs the same operation for the Prosite.doc file, except that
|
||||
no index is created. Files are named PSentry_number.doc.
|
||||
.para
|
||||
SPLITP3 creates a separate pattern file and weight matrix files for each
|
||||
prosite entry from the file prosite.dat. Pattern files are named
|
||||
PSentry_number.pat, weight matrix files PSentry_number.wtsa,
|
||||
Psentry_number.wtsb, etc. The pattern title is the one line description
|
||||
of the motif. SPLITP3 also creates a file of file names. Notice that it
|
||||
will ask for a path name so that the path can be included in the file of
|
||||
file names. This is the path to the directory in which the pattern files
|
||||
are stored.
|
||||
.para
|
||||
Notes
|
||||
.para
|
||||
Obviously the use of files of file names is a general solution, and anybody
|
||||
could now create their own set of interesting patterns for screening, or a
|
||||
subset of prosite.nam, etc.
|
||||
.para
|
||||
Note that 5 of the bairoch motifs contained the symbols > or < which
|
||||
means that the motifs must appear exactly at the N or C termini of the
|
||||
sequences. Currently our methods have no mechanism for such definitions and,
|
||||
for example KDEL motifs, will be permitted to occur anywhere throughout
|
||||
a sequence.
|
||||
|
||||
.para
|
||||
Also, of course, the library does not have to be used solely for performing
|
||||
mass screenings: each individual entry can be used as a single pattern by
|
||||
giving the name of its .pat file - eg pathname/ps00002.pat
|
||||
In addition more sophisticated users will wish to copy pattern files and
|
||||
weight matrices into their own directories and modify them. For example the
|
||||
cutoff scores are probably chosen to be quite high in order to reduce the
|
||||
number of false positives, and some users might wish to lower them.
|
||||
|
|
@ -0,0 +1,354 @@
|
|||
.npa
|
||||
.left margin2
|
||||
.para
|
||||
Introduction to the Staden sequence analysis package and its user interface
|
||||
.PARA
|
||||
The package contains the following programs:
|
||||
.lit
|
||||
|
||||
GIP Gel input program
|
||||
SAP Sequence assemble program
|
||||
NIP Nucleotide interpretation program
|
||||
PIP Protein interpretation program
|
||||
SIP Similarity investigation program
|
||||
MEP Motif exploration program
|
||||
NIPL Nucleotide interpretation program (library)
|
||||
PIPL Protein interpretation program (library)
|
||||
SIPL Similarity investigation program (library)
|
||||
|
||||
.end lit
|
||||
.left margin2
|
||||
GIP uses a digitiser for entry of DNA sequences from
|
||||
autoradiographs.
|
||||
.left margin2
|
||||
SAP handles everything relating to assembling gel
|
||||
readings in order to produce a consensus sequence. It can also deal with
|
||||
families of protein sequences.
|
||||
.left margin2
|
||||
NIP provides functions for analysing and interpretting
|
||||
individual nucleotide sequences.
|
||||
.left margin2
|
||||
PIP provides functions for analysing and interpretting
|
||||
individual protein sequences.
|
||||
.left margin2
|
||||
MEP analyses families of nucleotide sequences to help discover new motifs.
|
||||
.left margin2
|
||||
NIPL performs pattern searches on nucleotide sequence libraries.
|
||||
.left margin2
|
||||
PIPL performs pattern searches on protein sequence libraries.
|
||||
.left margin2
|
||||
SIP provides functions for comparing and aligning
|
||||
pairs of protein or nucleotide sequences.
|
||||
.left margin2
|
||||
SIPL searches nucleotide and protein sequence
|
||||
libraries for entries similar to probe sequences.
|
||||
.left margin2
|
||||
.sk1
|
||||
.para
|
||||
Documentation
|
||||
.para
|
||||
As is explained below, the
|
||||
programs SAP, NIP, PIP, SIP and MEP have online help,
|
||||
and the help files have the names: HELPSAP, HELPNIP, HELPPIP, HELPSIP,
|
||||
HELPMEP. These
|
||||
files can be displayed on the screen or printed using the appropriate
|
||||
commands. Currently the help for the other programs is also contained in
|
||||
these files. For example help for NIPL is in HELPNIP. This file is called
|
||||
HELPSTADEN.
|
||||
.para
|
||||
Sequence formats
|
||||
.para
|
||||
The shotgun sequencing program SAP deals only with simple
|
||||
text files for gel readings, and is a self-contained system.
|
||||
However as there is still no single agreed format
|
||||
for finished sequences or for libraries of sequences,
|
||||
the other programs in the package can read data that is stored in several ways.
|
||||
.para
|
||||
The analytical programs can read individual sequences stored in the following
|
||||
formats:
|
||||
Staden, EMBL, Genbank, PIR (also known as NBRF), and GCG, but for storing whole
|
||||
libraries we use only PIR format. In addition
|
||||
these programs can perform a number of
|
||||
simple operations using libraries stored in this format. They can extract
|
||||
entries by entry name, can search titles for keywords, can search the whole
|
||||
of the annotation files for keywords, and can extract annotations for any
|
||||
named entry.
|
||||
We reformat all sequence libraries into PIR format. Currently we
|
||||
have NBRF, EMBL, SWISSPROT and VECBASE libraries in PIR format.
|
||||
.para
|
||||
The library searching programs operate only
|
||||
on sequences stored in PIR format.
|
||||
.para
|
||||
The analytical programs
|
||||
will operate with uppercase or lowercase sequence
|
||||
characters. In addition T and U are equivalent. SAP uses uppercase letters
|
||||
for original gel readings and lowercase letters for characters that are
|
||||
corrected by the automatic editor.
|
||||
Programs NIP and PIP use IUB symbols for redundancy in back translations
|
||||
and for sequence searches.
|
||||
The symbols are shown below.
|
||||
.LIT
|
||||
|
||||
|
||||
NC-IUB SYMBOLS
|
||||
|
||||
A,C,G,T
|
||||
R (A,G) 'puRine'
|
||||
Y (T,C) 'pYrimidine'
|
||||
W (A,T) 'Weak'
|
||||
S (C,G) 'Strong'
|
||||
M (A,C) 'aMino'
|
||||
K (G,T) 'Keto'
|
||||
H (A,T,C) 'not G'
|
||||
B (G,C,T) 'not A'
|
||||
V (G,A,C) 'not T'
|
||||
D (G,A,T) 'not C'
|
||||
N (G,A,C,T) 'aNy'
|
||||
|
||||
.end lit
|
||||
.PARA
|
||||
The user interface
|
||||
.PARA
|
||||
The user interface is common to all programs.
|
||||
It consists of a set of menus and a uniform way
|
||||
of presenting choices and obtaining input
|
||||
from the user. This section describes: the
|
||||
menu system; how options are selected and other choices made; how values
|
||||
are supplied to the program; how help is obtained, and
|
||||
how to escape from any part of a program. In addition it gives information
|
||||
about saving results in files and the use of graphics for presenting
|
||||
results.
|
||||
.para
|
||||
Menus
|
||||
.para
|
||||
Each program has several menus and numerous options.
|
||||
Each menu or option has a unique number that is used to
|
||||
identify it. Menu numbers are distinguished from
|
||||
option numbers by being preceded by the letter
|
||||
m (or M, all programs make no distinction between
|
||||
upper and lower case letters). With the exception of
|
||||
some parts of program SAP, the menus are not hierachical,
|
||||
rather the options they each contain are simply lists of
|
||||
related functions and their identifying numbers.
|
||||
Therefore options can be selected independently
|
||||
of the menu that is currently being shown on the
|
||||
screen, and the menus are simply memory aides.
|
||||
All options and menus are selected by typing their
|
||||
option number when the programs present the prompt
|
||||
.para
|
||||
"? Menu or option number =".
|
||||
.para
|
||||
To select a menu type its number preceded by
|
||||
the letter M. To select an option type its number.
|
||||
If you type only "return" you will get menu m0
|
||||
which is simply a list of menus. If you select an
|
||||
option you will return to the current menu after the function is completed.
|
||||
.para
|
||||
When you select an option, in many cases the
|
||||
program will immediately perform the operation
|
||||
selected without further dialogue. If you precede an option
|
||||
number by the letter d (e.g. D17), you
|
||||
will force the program to offer dialogue about the selected option
|
||||
before the function operates,
|
||||
hence allowing you to change the value of any of its parameters. If
|
||||
you precede an option number by the symbol ? (e.g. ?17),
|
||||
you will be given help on the option (here 17).
|
||||
.para
|
||||
Where possible, equivalent or identical options have been given the same
|
||||
numbers in all programs, and so users quickly learn the numbers for
|
||||
the functions they employ most often.
|
||||
.para
|
||||
Help
|
||||
.para
|
||||
As mentioned above, help about each option can be obtained by
|
||||
preceding the option number by the symbol ? when you are presented
|
||||
with the prompt "? Menu or option number", but there are two further
|
||||
ways of obtaining help. Whenever the program asks a question
|
||||
you can respond by typing the symbol ? and you will receive information
|
||||
about the current option. In addition, option number 1
|
||||
in all the programs will give help on all of a programs functions.
|
||||
.para
|
||||
Quitting
|
||||
.para
|
||||
To exit from any point in a program you type ! for quit.
|
||||
If a menu is on the screen this will stop the program, otherwise
|
||||
you will be returned to the last menu.
|
||||
.Para
|
||||
Other interactions
|
||||
.para
|
||||
Questions are presented in a few restricted ways.
|
||||
In all cases typing only "return" in response to a question means
|
||||
yes, and typing N or n means no.
|
||||
.para
|
||||
Obvious opposites such as "clear screen" and "keep picture"
|
||||
are presented with only the default shown. For example
|
||||
in this case the default is generally "keep picture" so the
|
||||
program will display:
|
||||
.para
|
||||
"(y/n) (y) Keep picture"
|
||||
.para
|
||||
and the picture will be retained if the user types anything other than N or
|
||||
n, (in which case the screen will be cleared).
|
||||
.para
|
||||
Where there are choices that are not obvious opposites, or
|
||||
there are more than two choices, two further conventions are used:
|
||||
"radio buttons" and "check boxes".
|
||||
.para
|
||||
|
||||
Radio buttons are used when only one of a number of choices can be
|
||||
made at any one time. The choices are presented arranged one above the
|
||||
other, each choice with a number for its selection, and the default
|
||||
choice marked with an X. For example in the restriction
|
||||
enzyme search routine the following choices are offered:
|
||||
.para
|
||||
.lit
|
||||
|
||||
Select output mode
|
||||
1 order results enzyme by enzyme
|
||||
2 order results by positon
|
||||
X 3 show only infrequent cutters
|
||||
4 show names above the sequence
|
||||
? Selection (1-4) (3) =
|
||||
|
||||
.end lit
|
||||
Any single option can be selected by typing the option number,
|
||||
and the default option, (here shown as 3), is also obtained by
|
||||
typing only "return". Again help can be obtained by typing ? and
|
||||
you can quit by typing !.
|
||||
.para
|
||||
Check boxes are used when any number of a set of choices can be
|
||||
made (i.e. the choices are not exclusive). Choices are
|
||||
made by typing choice numbers. Each choice can be considered
|
||||
as a switch whose setting is reversed when it is selected. Choices that are
|
||||
currently switched on are marked with an X.
|
||||
The user quits from making selections by typing only
|
||||
"return". For example in the routine that plots base composition
|
||||
you can plot the frequencies of any combination of bases, e.g. only
|
||||
A, or A+T, or A+T+G etc.
|
||||
The following check box is offered to the user:
|
||||
.lit
|
||||
|
||||
X 1 T
|
||||
2 C
|
||||
X 3 A
|
||||
4 G
|
||||
? Selection (1-4) () =
|
||||
|
||||
.END LIT
|
||||
As shown this will plot the A+T composition. To switch off T
|
||||
you select 1, to switch on C you select 2, etc, to quit,
|
||||
having set the bases required you type only "return".
|
||||
.para
|
||||
Input of numerical values
|
||||
.para
|
||||
All input of integer or decimal numbers is presented in a
|
||||
standard way with the allowed range shown in brackets and the default
|
||||
value also in brackets. For example:
|
||||
.para
|
||||
? span (5-31) (11) =
|
||||
.para
|
||||
In this example you could type any number between 5 and 31,
|
||||
or "return" only, or ! or ? (see above). Any other input will cause the
|
||||
program to ask the question again. Typing only "return" gives the default
|
||||
value (here 11).
|
||||
.para
|
||||
Use of the bell
|
||||
.para
|
||||
The programs use the bell to indicate that a task is completed.
|
||||
This allows users to read textual results before they are scrolled up off
|
||||
the screen, or to look at a plot before it is scrolled over by the menus.
|
||||
When the bell sounds, the programs will wait
|
||||
until return is typed. You can quit from these points by typing ! but
|
||||
no help is available.
|
||||
.para
|
||||
Printing and saving results in files
|
||||
.para
|
||||
A few of the functions in the programs automatically write their textual
|
||||
results
|
||||
to disk files, but for most functions you can choose whether results
|
||||
appear on the terminal screen or go to a file. This applies to both text
|
||||
and graphical results.
|
||||
For these functions
|
||||
the normal, or default, place for results to
|
||||
appear is on the screen, and users need to decide before the
|
||||
function is selected if they want to redirect the results to a file.
|
||||
In all programs, option number 7, "Direct output to disk" gives control
|
||||
over whether results appear on the screen or go to a file. When a program
|
||||
is started results will be sent to the screen. If option 7 is selected
|
||||
users will be given the choice of redirecting either text or graphics to a
|
||||
file. The program will then ask users to supply a file name. From that
|
||||
point on all results will be sent to the file until option 7 is selected again,
|
||||
in which case the "redirection file" will be closed, and results will start
|
||||
to appear on the screen.
|
||||
.para
|
||||
If these files contain textual results they can be looked at
|
||||
from within the programs
|
||||
by using option 6, "List a text file". Once you leave the program
|
||||
you can use an appropriate system command to print the files.
|
||||
There is no function within the programs to direct files to a printer.
|
||||
.para
|
||||
The converse of the above is also possible. That
|
||||
is, it is possible to redirect results that would normally go to file,
|
||||
so that they appear instead on the screen. This is often useful as a way
|
||||
of checking results before saving them in a file. On a VAX using
|
||||
VMS you do this by typing TT: for the name of the file that the
|
||||
program would create. TT: is what VMS calls the screen.
|
||||
.para
|
||||
Use of graphics
|
||||
.para
|
||||
The analytical programs including NIP, PIP and SIP present the results of
|
||||
many of their analyses graphically. The position at which the results for
|
||||
any function appear on the screen is defined relative to a notional users
|
||||
"drawing board" of dimension 10,000 by 10,000. This drawing board fills the
|
||||
screen and results are drawn in windows defined using symbols x0,yo and
|
||||
xlength,ylength,
|
||||
where x0,y0 is the position of the bottom left hand corner of the window,
|
||||
and xlength is the width of the window and ylength the
|
||||
height of the window.
|
||||
.lit
|
||||
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
.end lit
|
||||
.para
|
||||
The window positions for each option are read from a file
|
||||
when a program is started. If required individual users could have their
|
||||
own set of plot positions, and also the positions
|
||||
can be redefined from within the
|
||||
programs using option number 14.
|
||||
.para
|
||||
For those analyses that draw continuous lines to represent results
|
||||
(for example a plot of base composition) the user is asked to supply the
|
||||
"Plot interval". All the analyses produce a value for every point along the
|
||||
sequence but often it is unnecessary to actually plot the
|
||||
values for all the points.
|
||||
The plot interval is simply the distance between the points
|
||||
shown on the screen. If the user selects a plot interval of 1, every point
|
||||
will be plotted; a plot interval of 3 will show every third point. It is a
|
||||
way of speeding up the analyses.
|
||||
.para
|
||||
Saving graphics
|
||||
.para
|
||||
Many terminals are not capable of dumping their screen contents to a
|
||||
file for subsequent printing. One convenient way of obtaining hard copy
|
||||
of graphical results is to use a micro computer as a terminal. On
|
||||
the Macintosh we use the terminal emulator versa
|
||||
termPro. This allows graphics to be saved as
|
||||
Macintosh files that can be annotated and printed using
|
||||
Macdraw and other painting programs.
|
||||
.para
|
||||
Alternatively graphics can be redirected to a file and printed using a
|
||||
laser printer with tektronix capability (see
|
||||
"Printing and saving results in files").
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,84 @@
|
|||
-1 0 21 2 T General
|
||||
-1 0 21 2 X General
|
||||
-2 0 50 2 T Screen control
|
||||
-2 0 71 2 X Screen
|
||||
-3 0 98 2 T Modification
|
||||
-3 0 98 2 X Modification
|
||||
0 -1 116 332 T BAP
|
||||
0 -1 116 332 X BAP
|
||||
17 1 17434 18 T Screen against enzymes
|
||||
17 1 17434 18 X Screen against enzymes
|
||||
18 1 18477 23 T Screen against vector
|
||||
18 1 18477 23 X Screen against vector
|
||||
20 3 19859 121 T Auto assemble
|
||||
20 3 19859 121 X Auto assemble
|
||||
28 1 26426 43 T Highlight disagreements
|
||||
28 1 26426 43 X Highlight disagreements
|
||||
32 3 28846 17 T Extract gel readings
|
||||
32 3 28846 17 X Extract gel readings
|
||||
1 0 29607 3 T Help
|
||||
1 0 29607 3 X Help
|
||||
2 0 29676 5 T Quit
|
||||
2 0 29676 5 X Quit
|
||||
3 1 29869 230 T Open a database
|
||||
3 1 29869 230 X Open a database
|
||||
4 3 41499 320 T Edit contig
|
||||
4 3 41499 320 X Edit contig
|
||||
5 1 56688 43 T Display a contig
|
||||
5 1 56688 43 X Display a contig
|
||||
6 1 58990 6 T List a text file
|
||||
6 1 58990 6 X List a text file
|
||||
8 1 59248 93 T Calculate a consensus
|
||||
8 1 59248 93 X Calculate a consensus
|
||||
25 1 63707 41 T Show relationships
|
||||
25 1 63707 41 X Show relationships
|
||||
23 3 65650 11 T Complement a contig
|
||||
23 3 65650 11 X Complement a contig
|
||||
22 3 66173 59 T Join contigs
|
||||
22 3 66173 59 X Join contigs
|
||||
24 1 69194 11 T Copy the database
|
||||
24 1 69194 11 X Copy the database
|
||||
19 1 69740 43 T Check database
|
||||
19 1 69740 43 X Check database
|
||||
29 1 71898 82 T Examine quality
|
||||
29 1 71898 82 X Examine quality
|
||||
26 3 75715 84 T Alter relationships
|
||||
26 3 75715 84 X Alter relationships
|
||||
27 1 79641 17 T Set display parameters
|
||||
27 1 79641 17 X Set display parameters
|
||||
30 3 80503 7 T Shuffle pads
|
||||
30 3 80503 7 X Shuffle pads
|
||||
10 2 80866 3 T Clear graphics
|
||||
10 2 80866 3 X Clear graphics
|
||||
11 2 80931 3 T Clear text
|
||||
11 2 80931 3 X Clear text
|
||||
12 2 80996 12 T Draw a ruler.
|
||||
12 2 80996 12 X Draw a ruler.
|
||||
14 2 81730 38 T Reposition plots
|
||||
14 2 81730 38 X Reposition plots
|
||||
15 2 84069 28 T Label a diagram
|
||||
15 2 84069 28 X Label a diagram
|
||||
16 2 85174 3 T Display a map
|
||||
16 2 85174 3 X Display a map
|
||||
7 1 85228 12 T Redirect output
|
||||
7 1 85228 12 X Redirect output
|
||||
13 2 85731 43 T Use crosshair
|
||||
13 2 85731 43 X Use crosshair
|
||||
33 2 87876 12 T Plot single contig
|
||||
33 2 87876 12 X Plot single contig
|
||||
34 2 88578 10 T Plot all contigs
|
||||
34 2 88578 10 X Plot all contigs
|
||||
31 3 89160 21 T Disassemble readings
|
||||
31 3 89160 21 X Disassemble readings
|
||||
35 3 90372 94 T Find internal joins
|
||||
35 1 90372 94 T Find internal joins
|
||||
35 3 90372 94 X Find internal joins
|
||||
35 1 90372 94 X Find internal joins
|
||||
36 3 96201 30 T Double strand
|
||||
36 3 96201 30 X Double strand
|
||||
37 3 97555 64 T Auto-select oligos
|
||||
37 3 97555 64 X Auto-select oligos
|
||||
38 1 100421 30 T Check assembly
|
||||
38 1 100421 30 X Check assembly
|
||||
39 1 102178 90 T Find read pairs
|
||||
39 1 102178 90 X Find read pairs
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,79 @@
|
|||
-1 0 21 2 T General
|
||||
-1 0 21 2 X General
|
||||
-2 0 50 2 T Screen control
|
||||
-2 0 71 2 X Screen
|
||||
-3 0 98 2 T Modification
|
||||
-3 0 98 2 X Modification
|
||||
0 -1 116 351 T SAP
|
||||
0 -1 116 351 X SAP
|
||||
17 1 18801 18 T Screen against enzymes
|
||||
17 1 18801 18 X Screen against enzymes
|
||||
18 1 19844 22 T Screen against vector
|
||||
18 1 19844 22 X Screen against vector
|
||||
20 3 21171 113 T Auto assemble
|
||||
20 3 21171 113 X Auto assemble
|
||||
28 1 27332 42 T Highlight disagreements
|
||||
28 1 27332 42 X Highlight disagreements
|
||||
32 3 29694 22 T Extract gel readings
|
||||
32 3 29694 22 X Extract gel readings
|
||||
1 0 30797 3 T Help
|
||||
1 0 30797 3 X Help
|
||||
2 0 30866 5 T Quit
|
||||
2 0 30866 5 X Quit
|
||||
3 1 31059 237 T Open a database
|
||||
3 1 31059 237 X Open a database
|
||||
4 3 43258 239 T Edit contig
|
||||
4 3 43258 239 X Edit contig
|
||||
9 3 54180 42 T Screen edit
|
||||
5 1 56376 45 T Display a contig
|
||||
5 1 56376 45 X Display a contig
|
||||
6 1 58862 6 T List a text file
|
||||
6 1 58862 6 X List a text file
|
||||
8 1 59120 93 T Calculate a consensus
|
||||
8 1 59120 93 X Calculate a consensus
|
||||
25 1 63651 41 T Show relationships
|
||||
25 1 63651 41 X Show relationships
|
||||
21 3 65587 101 T Enter new gel reading
|
||||
21 3 65587 101 X Enter new gel reading
|
||||
23 3 70677 11 T Complement a contig
|
||||
23 3 70677 11 X Complement a contig
|
||||
22 3 71200 63 T Join contigs
|
||||
22 3 71200 63 X Join contigs
|
||||
24 1 74467 11 T Copy the database
|
||||
24 1 74467 11 X Copy the database
|
||||
19 1 75013 41 T Check database
|
||||
19 1 75013 41 X Check database
|
||||
29 1 77032 82 T Examine quality
|
||||
29 1 77032 82 X Examine quality
|
||||
26 3 80849 101 T Alter relationships
|
||||
26 3 80849 101 X Alter relationships
|
||||
27 1 86065 17 T Set display parameters
|
||||
27 1 86065 17 X Set display parameters
|
||||
30 3 86933 48 T Auto edit a contig
|
||||
30 3 86933 48 X Auto edit a contig
|
||||
10 2 89409 3 T Clear graphics
|
||||
10 2 89409 3 X Clear graphics
|
||||
11 2 89474 3 T Clear text
|
||||
11 2 89474 3 X Clear text
|
||||
12 2 89539 12 T Draw a ruler.
|
||||
12 2 89539 12 X Draw a ruler.
|
||||
14 2 90273 38 T Reposition plots
|
||||
14 2 90273 38 X Reposition plots
|
||||
15 2 92612 28 T Label a diagram
|
||||
15 2 92612 28 X Label a diagram
|
||||
16 2 93717 27 T Display a map
|
||||
16 2 93717 27 X Display a map
|
||||
7 1 94692 12 T Redirect output
|
||||
7 1 94692 12 X Redirect output
|
||||
13 2 95163 43 T Use crosshair
|
||||
13 2 95163 43 X Use crosshair
|
||||
33 2 97308 12 T Plot single contig
|
||||
33 2 97308 12 X Plot single contig
|
||||
34 2 98010 10 T Plot all contigs
|
||||
34 2 98010 10 X Plot all contigs
|
||||
31 3 98592 12 T Type in gel readings
|
||||
31 3 98592 12 X Type in gel readings
|
||||
35 3 99223 92 T Find internal joins
|
||||
35 1 99223 92 T Find internal joins
|
||||
35 3 99223 92 X Find internal joins
|
||||
35 1 99223 92 X Find internal joins
|
|
@ -0,0 +1,198 @@
|
|||
GIP
|
||||
|
||||
A digitizer is a two dimensional surface which is such that
|
||||
if a special pen is pressed onto it, the pens coordinates can be
|
||||
recorded by a computer. These coordinates can be interpreted by a
|
||||
program.
|
||||
|
||||
The digitizing device we use works by the pen emitting a high
|
||||
frequency sound which is picked up by two microphones positioned at
|
||||
the rear of the working area. The pen position is determined by
|
||||
triangulation and the digitizing device sends the coordinates to the
|
||||
computer. As no special surface is required the device can
|
||||
conveniently be positioned on a light box giving the sequencer an
|
||||
unobscured view of the autoradiographs.
|
||||
The digitizer is called a GRAPHBAR MODEL GP7 made by Science
|
||||
Accessories Corp, 970 Kings Highway West, Southport, Connecticut
|
||||
06490, USA.
|
||||
|
||||
The program uses a menu to allow the user to select commands
|
||||
or to enter the uncertainty codes for areas of the gel that
|
||||
are difficult to interpret. A menu is simply a series of boxes drawn
|
||||
on the digitizing surface that each contain a command or
|
||||
uncertainty code. When the user puts the pen down in these special
|
||||
regions the program interprets the coordinates as commands and acts
|
||||
appropriately. A copy of the menu should have been sent to you. It
|
||||
should be stuck down on the surface of the light box in the
|
||||
digitizing area. For convenience it is best to position it to the
|
||||
right of the digitizing area, but in practice as long as its top edge
|
||||
is parallel to the digitizer box, it can be put anywhere in the
|
||||
active region.
|
||||
|
||||
Entering gel readings using a digitizer
|
||||
|
||||
The autoradiograph should be stuck down on the light box with
|
||||
the lanes running, as near is as possible, at right angles to the
|
||||
digitizer. To read an autoradiograph placed on the light box the user
|
||||
need only define the positions of the four sequencing lanes and the
|
||||
bases to which they correspond and then use the pen to point to
|
||||
each successive band progressing up the gel. The program examines
|
||||
the coordinates of each pen position to see in which of the four
|
||||
lanes it lies and assigns the corresponding base to be stored
|
||||
in the computer. Each time the pen tip is depressed to point to a
|
||||
position on the surface of the digitizer the program sounds the
|
||||
bell on the terminal (a different sound for each of the four bases on
|
||||
the microcomputer version of the program) to indicate to the user
|
||||
that a point has been recorded. As the sequence is read the
|
||||
program displays it on the screen.
|
||||
|
||||
The program uses a menu to allow the user to select commands
|
||||
or to enter the uncertainty codes for areas of the gel that
|
||||
are difficult to interpret. A menu is simply a series of boxes drawn
|
||||
on the digitizing surface that each contain a command or
|
||||
uncertainty code. When the user puts the pen down in these special
|
||||
regions the program interprets the coordinates as commands and acts
|
||||
appropriately. As well as the uncertainty codes
|
||||
A,C,G,T,1,2,3,4,B,D,H,V,R,Y,X,-,5,6,7,8 the following commands are
|
||||
included in the menu: DELETE removes the last character from the
|
||||
sequence; RESET allows the lane centres to be redefined; START means
|
||||
begin the next stage of the procedure; STOP means stop the
|
||||
current stage in the procedure; CONFIRM means confirm that the last
|
||||
command or set of coordinates are correct.
|
||||
|
||||
The digitizing device also has a menu of its own. This lies in
|
||||
a two inch wide strip immediately in front of the digitizing box. Pen
|
||||
positions within this two inch strip are interpretted as commands to
|
||||
the digitizer and are not sent to the GIP program. In general the
|
||||
only time users will need to use the device menu is when they tell
|
||||
GIP where the program menu lies in the digitizing area. This is done
|
||||
by first hitting ORIGIN in the device menu and then hitting the
|
||||
bottom left hand corner of the program menu. The program menu can
|
||||
hence be positioned anywhere in the active region but should be
|
||||
arranged parallel to the digitizer.
|
||||
|
||||
The user should try to hit the bands as near as possible to
|
||||
the centre of the lanes because the program tracks the lanes up the
|
||||
film using the pen positions. By using this tracking strategy the
|
||||
user only has to define the centres of the bottom of the lanes before
|
||||
starting to read the film. The program can correctly follow quite
|
||||
curved lanes and constantly checks that its lane centre coordinates
|
||||
look sensible. If the lane centres appear to be getting too close the
|
||||
program stops responding to the pen positions of bands and hence does
|
||||
not ring the bell. If this occurs users must hit the reset box in the
|
||||
menu and the program will request them to redefine the lane centres
|
||||
at the current reading position. Then they can continue reading. As a
|
||||
further safeguard the program will only respond to pen positions
|
||||
either in the menu or very close to the current reading position.
|
||||
|
||||
Running the gel reading program
|
||||
The autoradiograph should be firmly stuck down on the light box and
|
||||
the program started by typing GIP. It will ask the first question.
|
||||
" ? FILE OF FILE NAMES="
|
||||
Type the name for the file of file names and then follow the
|
||||
instructions.
|
||||
" HIT DIGITIZER MENU ORIGIN"
|
||||
" THEN PROGRAM MENU ORIGIN"
|
||||
" THEN HIT START IN PROGRAM MENU"
|
||||
If the bell does not sound after you hit start try hitting metric in
|
||||
the device menu (the program uses metric units, and some digitizers
|
||||
are set to default to use inches; hitting metric switches between
|
||||
the two).
|
||||
After the bell has sounded the program will give the default lane
|
||||
order.
|
||||
" LANE ORDER IS T C A G"
|
||||
" IF CORRECT HIT CONFIRM, ELSE HIT RESET"
|
||||
If the lane order, reading from left to right is correct hit confirm
|
||||
in the program menu. If you are using a different order hit reset
|
||||
and you will be asked to define the lane order from left to right
|
||||
using the program menu (as follows).
|
||||
" DEFINE LANE ORDER (LEFT TO RIGHT) USING MENU"
|
||||
Hit the boxes in the menu that contain the symbols A,C,G,T in the
|
||||
left-right order of the lanes. The program will respond with the
|
||||
lane order as above and ask for confirmation. When this is received,
|
||||
the next task is to define the start positions of the next four
|
||||
lanes.
|
||||
" HIT START, THEN HIT (LEFT TO RIGHT)"
|
||||
" THE START POSITIONS FOR THE NEXT FOUR LANES"
|
||||
Hit the centres of the four lanes at a height level with the first
|
||||
band that is going to be read. The program will report the mean lane
|
||||
separations and asks for confirmation that they are correct.
|
||||
" MEAN LANE SEPARATION IS XX"
|
||||
" HIT CONFIRM TO CONTINUE"
|
||||
Users will become familiar with the values from their films and will
|
||||
spot any unusual numbers. Asking for confirmation allows users to
|
||||
try again if they had made a mistake, but generally the lane
|
||||
separation values can be ignored. Hit confirm, and the program will
|
||||
give the message
|
||||
" HIT START WHEN READY TO BEGIN READING"
|
||||
Hit start and the program will give the message
|
||||
" HIT BANDS, UNCERTAINTY CODES, RESET OR STOP"
|
||||
Hit the bands, interpretting the sequence progressing up the film.
|
||||
If necessary use the uncertainty codes. If the pen stops responding
|
||||
hit reset and follow the instructions as above. When the sequence
|
||||
becomes unreadable hit stop and the program will ask for a file name
|
||||
for the gel reading just read.
|
||||
" ? FILE NAME FOR THIS GEL READING="
|
||||
Type the file name observing the rules about legal gel readings
|
||||
names. The program will ask if you wish to read another sequence.
|
||||
" TO ENTER ANOTHER GEL READING TYPE 1"
|
||||
To enter another type 1 and you will be back to the step of defining
|
||||
the lane order. Typing anything else will stop the program.
|
||||
|
||||
Running the microcomputer version of the gel reading program
|
||||
The microcomputer version of GIP is slightly different and is called
|
||||
GIPB. The BBC micro does not have the capacity to process the gel
|
||||
readings beyond the reading stage. This means that users of this
|
||||
program would need to transfer their gel readings from the micro to
|
||||
another machine using a terminal emmulator. Transferring many files
|
||||
is tedious and so the microcomputer version of the gel reading
|
||||
program stores all the gel readings for each run of the program in a
|
||||
single file. This special file contains both sequences and file names
|
||||
and can be moved in a single transfer to another machine. Once on the
|
||||
other machine the single file must be split into separate gel reading
|
||||
files and a file of file names. This is done using the program
|
||||
BSPLIT. As far as using the microcomputer version of GIP, the only
|
||||
difference is that the first file name the program requests is not a
|
||||
file of file names, but a name for the single file to contain all the
|
||||
gel readings and their names.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
#
|
||||
# Make file for help files - this requires gmake on some systems.
|
||||
#
|
||||
PROGS = bap dap gip mep nip \
|
||||
nipf pip sap sip #mem
|
||||
|
||||
HELPS = bap_help dap_help gip_help mep_help nip_help \
|
||||
nipf_help pip_help sap_help sip_help #mem_help
|
||||
|
||||
MENUS = bap_menu dap_menu gip_menu mep_menu nip_menu \
|
||||
nipf_menu pip_menu sap_menu sip_menu #mem_menu
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
DOIT = rm -f $@_help $@_menu; ./runoff $?
|
||||
|
||||
bap: BAP.RNO
|
||||
$(DOIT)
|
||||
|
||||
dap: DAP.RNO
|
||||
$(DOIT)
|
||||
|
||||
gip: GIP.RNO
|
||||
$(DOIT)
|
||||
|
||||
#mem: MEM.RNO
|
||||
# $(DOIT)
|
||||
|
||||
mep: MEP.RNO
|
||||
$(DOIT)
|
||||
|
||||
nip: NIP.RNO
|
||||
$(DOIT)
|
||||
|
||||
nipf: NIPF.RNO
|
||||
$(DOIT)
|
||||
|
||||
pip: PIP.RNO
|
||||
$(DOIT)
|
||||
|
||||
sap: SAP.RNO
|
||||
$(DOIT)
|
||||
|
||||
sip: SIP.RNO
|
||||
$(DOIT)
|
||||
|
||||
clean:
|
||||
rm -f $(HELPS) $(MENUS)
|
|
@ -0,0 +1,698 @@
|
|||
|
||||
@0. B 1 @MEP
|
||||
This is a program for analysing families of nucleotide sequences in
|
||||
order to find common motifs and potential binding sites. The ideas
|
||||
in this program were described in Staden, R. "Methods for
|
||||
discovering novel motifs in nucleic acid sequences". Computer
|
||||
Applications in the Biosciences, 5, 293-298, (1989).
|
||||
|
||||
The program can read sequences stored in either of two
|
||||
formats: 1) all sequences aligned in a single file; 2) all sequences
|
||||
in separate files and accessed through a file of file names.
|
||||
|
||||
The program contains functions that can answer several
|
||||
questions about a set of sequences:
|
||||
|
||||
Which words are most common?
|
||||
Which words occur in the most sequences?
|
||||
Which words contain the most information?
|
||||
Which words occur in equivalent positions in the sequences?
|
||||
Which words are inverted repeats?
|
||||
Which words occur on both strands of the sequences?
|
||||
Where are the inverted repeats?
|
||||
Where are the fuzzy words?
|
||||
|
||||
Most of the program is concerned with analysing what it terms
|
||||
"fuzzy words" within the set of sequences. The analysis is explained
|
||||
below. Note that the standard version of the programs is limited to
|
||||
words of maximum length 8 letters, and a maximum fuzziness of 2.
|
||||
|
||||
The following analyses (preceded by their option numbers) are
|
||||
included:
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
|
||||
Some of these methods produce graphical results and so the
|
||||
program is generally used from a graphics terminal (a vdu on which
|
||||
lines and points can be drawn as well as characters).
|
||||
|
||||
The positions of each of the plots is defined relative to a users
|
||||
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||
for each option are drawn in a window defined by x0,y0 and
|
||||
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||
corner of the window, and xlength is the width of the window and
|
||||
ylength the height of the window.
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required.
|
||||
|
||||
The options for the program are accessed from 3 main menus:
|
||||
general, screen control and dictionary analylsis. Both menus and
|
||||
options are selected by number.
|
||||
|
||||
The most important and novel part of the program is its use of
|
||||
"fuzzy dictionaries" and an information theory measure, to help show
|
||||
the most interesting motifs. Central to the method is the idea of a
|
||||
fuzzy dictionary of word frequencies. A dictionary of word
|
||||
frequencies is an ordered list of all the words in the sequences and
|
||||
a count of the number of times that they occur. A fuzzy dictionary
|
||||
is an equivalent list but which contains instead, for each word, a
|
||||
count of the number of times similar words occur in the sequences.
|
||||
We term words that are similar "relations". The fuzziness is defined
|
||||
by the number of letters in a word that are allowed to be different.
|
||||
So if we had a fuzziness of 1 we allow 1 letter to be different. For
|
||||
example, with a fuzziness of 1, the entry in the fuzzy dictionary
|
||||
for the word TTTTTT would contain a count of the numbers of times
|
||||
TTTTTT occured plus the number of times all words differing by
|
||||
exactly one letter from TTTTTT occured.
|
||||
|
||||
Once the fuzzy dictionary has been created we can examine it
|
||||
in several ways to find candidate control sequences. The simplest
|
||||
question we can ask is which word in the dictionary is the most
|
||||
common. Sometimes this simple criterion of "most common" may be
|
||||
adequate to discover a new motif but in general we would not expect
|
||||
it to be sufficient. For example some words will be common simply
|
||||
because of a base composition bias in the sequences being analysed.
|
||||
In addition a word can be the most frequent and yet not be "well
|
||||
defined". This last point is best explained by an example.
|
||||
|
||||
Suppose we were looking at two letter words and allowing one
|
||||
mismatch, and that there were 10 occurences of TT and 5 of AC. We
|
||||
could align the 10 words that were one letter different from TT and
|
||||
the 5 that were related to AC. Then we could count the number of
|
||||
times each base occured in each position for each of these two sets
|
||||
of words. Suppose we got the two base frequency tables shown below.
|
||||
TT AC
|
||||
T 6 4 T 1 0
|
||||
C 1 3 C 0 4
|
||||
A 1 2 A 4 1
|
||||
G 2 1 G 0 0
|
||||
|
||||
These tables show that although TT occurs (with one letter mismatch)
|
||||
more often than AC, the ratio of base frequencies for AC at 4/5, 4/5
|
||||
is higher than those for TT at 6/10, 4/10. Hence we would say that
|
||||
AC was better defined than TT. Expressing this another way we would
|
||||
say that the definition of AC contained more information than that
|
||||
for TT. The program calculates the information content in a way that
|
||||
takes into account both the sequence composition and the level of
|
||||
definition of the motif.
|
||||
|
||||
Definitions
|
||||
|
||||
Here we deal only with the dictionary analysis. Suppose we
|
||||
are dealing with a set of sequences and are examining them for words
|
||||
that are six characters in length.
|
||||
|
||||
Dictionary Dw contains a count of the number of times each
|
||||
word occurs in the set of sequences. For example the entry for
|
||||
TTTTTT contains a value equal to the number of times the word TTTTTT
|
||||
occurs in the set of sequences.
|
||||
|
||||
Dictionary Ds contains a count of the number of different
|
||||
sequences in which each word occurs. For example if the entry for
|
||||
word TTTTTT contains the value 10, it denotes that the word TTTTTT
|
||||
occurs in ten different sequences. Unlike Dw it only counts words
|
||||
once for each sequence. For example if we had a set of 100
|
||||
sequences, the maximum possible value that Ds could take is 100, and
|
||||
this would only happen if a word occurred in every sequence. However
|
||||
for the same set of sequences, Dw could contain values greater than
|
||||
100, and this would show that a word had occurred more than once in
|
||||
at least one sequence.
|
||||
|
||||
From either of the two dictionaries Dw or Ds we can calculate
|
||||
a fuzzy dictionary Dm. For each word, the entry in the fuzzy
|
||||
dictionary Dm contains the sum of the dictionary values (taken from
|
||||
either Dw or Ds) for all words that differ from it by up to m
|
||||
letters. For example if m=2 the entry for TTTTTT contains the number
|
||||
of times that TTTTTT occurs in the dictionary, plus the counts for
|
||||
all words that differ from TTTTTT by 1 or 2 letters. Obviously the
|
||||
interpretation of the values in Dm depends on which of the two
|
||||
dictionaries Dw or Ds they were derived from. When derived from Dw
|
||||
the entry for any word in Dm gives the total number of times it, and
|
||||
its relations, occur in the set of sequences. When derived from Ds
|
||||
the entry for any word in Dm gives the total number of different
|
||||
sequences that contain a word and each of its relations.
|
||||
|
||||
Finally, from fuzzy dictionary Dm we can derive fuzzy
|
||||
dictionary Dh. All entries in Dh are zero except for the word(s),
|
||||
within each set of relations, that are most frequent. For example if
|
||||
TTTTTT occurred 20 times but had a relation that occurred more
|
||||
often, then the entry for TTTTTT would be zero. However if TTTTTT
|
||||
did not have a more frequently occurring relation, then the entry
|
||||
for TTTTTT would contain the value 20.
|
||||
@1. B 1 @Help
|
||||
This option gives online help. The user should select option numbers
|
||||
and the current documentation will be given. Note that option 0
|
||||
gives an introduction to the program, and that ? will get help from
|
||||
anywhere in the program. The following analyses (preceded by their
|
||||
option numbers) are included:
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
@2. B 1 @Quit
|
||||
This function stops the program.
|
||||
@3. B 1 @Read a new sequence.
|
||||
|
||||
It can read sequences stored in either of two formats: 1) all
|
||||
sequences aligned in a single file; 2) all sequences in separate
|
||||
files and accessed through a file of file names. Typical dialogue
|
||||
follows:
|
||||
|
||||
X 1 Read file of aligned sequences
|
||||
2 Use file of file names
|
||||
? 0,1,2 =
|
||||
|
||||
? File of aligned sequences=F1
|
||||
Number of files 88
|
||||
|
||||
@4. B 1 @Define active region
|
||||
For its analytic functions the program always works on a region of
|
||||
the sequence called the active region. When new sequences are read
|
||||
into the program the active region is automatically set to start at
|
||||
the beginning of the sequences and go up to the end of the longest
|
||||
one.
|
||||
@5. B 1 @List a sequence.
|
||||
The sequence can be listed with line lengths of 50 bases with each
|
||||
sequence numbered in the order in which they were read. Output can
|
||||
be directed to a disk file by first selecting disk output. Typical
|
||||
dialogue follows.
|
||||
|
||||
? Menu or option number=5
|
||||
|
||||
10 20 30 40 50
|
||||
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||
|
||||
60
|
||||
1 TACCCGTTTTT
|
||||
2 GCGTTTTTGT
|
||||
3 TCATACCATAAG
|
||||
4 TTTCATACC
|
||||
5 ATTGTGAGC
|
||||
6 TTCCGGCTCG
|
||||
7 GAAGAGAGT
|
||||
8 TCAGGTGT
|
||||
9 ATGAATG
|
||||
10 TAATTACG
|
||||
@6. B 1 @List a text file.
|
||||
Allows the user to have a text file displayed on the screen. It will
|
||||
appear one page at a time.
|
||||
@7. B 1 @Direct output to disk
|
||||
|
||||
Used to direct output that would normally appear on the screen
|
||||
to a file.
|
||||
|
||||
Select redirection of either text or graphics, and supply the
|
||||
name of the file that the output should be written to.
|
||||
|
||||
The results from the next options selected will not appear on
|
||||
the screen but will be written to the file. When option 7 is
|
||||
selected again the file will be closed and output will again appear
|
||||
on the screen.
|
||||
@10. B 1 @Clear graphics
|
||||
Clears the screen of both text and graphics.
|
||||
@11. B 1 @Clear text
|
||||
Clears only text from the screen.
|
||||
@12. B 1 @Draw a ruler.
|
||||
This option allows the user to draw a ruler or scale along the x
|
||||
axis of the screen to help identify the coordinates of points of
|
||||
interest. The user can define the position of the first amino acid
|
||||
to be marked (for example if the active region is 1501 to 8000, the
|
||||
user might wish to mark every 1000th amino acid starting at either
|
||||
1501 or 2000 - it depends if the user wishes to treat the active
|
||||
region as an independent unit with its own numbering starting at its
|
||||
left edge, or as part of the whole sequence). The user can also
|
||||
define the separation of the ticks on the scale and their height. If
|
||||
required the labelling routine can be used to add numbers to the
|
||||
ticks.
|
||||
@13. B 1 @Use crosshair.
|
||||
This function puts a steerable cross on the screen that can be used
|
||||
to find the coordinates of points in the sequence. The user can move
|
||||
the cross around using the directional keys; when he hits the space
|
||||
bar the program will print out the coordinates of the cross in
|
||||
sequence units and the option will be exited.
|
||||
|
||||
If instead, you hit a , the position will be displayed but the
|
||||
cross will remain on the screen.
|
||||
|
||||
If a letter s is hit the sequence around the cross hair is
|
||||
displayed and the cross remains on the screen.
|
||||
@14. B 1 @Reposition plots
|
||||
The positions of each of the plots is defined relative to a users
|
||||
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||
for each option are drawn in a window defined by x0,y0 and
|
||||
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||
corner of the window, and xlength is the width of the window and
|
||||
ylength the height of the window.
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required. As
|
||||
all the plots start at the same position in x and have the same
|
||||
width, x0 and xlength are the same for all options. Generally users
|
||||
will only want to change the start level of the window y0 and its
|
||||
height ylength. This option allows users to change window positions
|
||||
whilst running the program. The routine prompts first for the
|
||||
number of the option that the users wishes to reposition; then for
|
||||
the y start and height; then for the x start and length. Note that
|
||||
changes to the x values affect all options. If the user types only
|
||||
carriage return for any value it will remain unchanged. The cross-
|
||||
hair can be used to choose suitable heights.
|
||||
@15. B 1 @Label a diagram
|
||||
This routine allows users to label any diagrams they have produced.
|
||||
They are asked to type in a label. When the user types carriage
|
||||
return to finish typing the label the cross-hair appears on the
|
||||
screen. The user can position it anywhere on the screen. If the user
|
||||
types R (for right justify) the label will be written on the diagram
|
||||
with its right end at the cross-hair position. If the user types L
|
||||
(for left justify) the label will be written on the diagram with its
|
||||
left end at the cross hair position. The cross-hair will then
|
||||
immediately reappear. The user may put the same label on another
|
||||
part of the diagram as before or if he hits the space bar he will be
|
||||
asked if he wishes to type in another label.
|
||||
@16. B 1 @Display a map.
|
||||
It is often convenient to plot a map alongside graphed analysis in
|
||||
order to indicate features within the sequence. This function allows
|
||||
users to draw maps using files arranged in the form of EMBL feature
|
||||
tables. Of course the EMBL table are usually only used for nucleic
|
||||
acid sequence annotation but, as long as the features are written in
|
||||
the correct format, they can be employed by this routine. The map is
|
||||
composed of a line representing the sequence and then further lines
|
||||
denoting the endpoints of each feature the user identifies. The user
|
||||
is asked to define height at which the line representing the
|
||||
sequence should be drawn; then for the feature height; then for the
|
||||
features to plot.
|
||||
@17. B 1 @Search for strings
|
||||
Search for strings perfoms searches of all the sequences for
|
||||
selected words and shows which sequences they are found in. The user
|
||||
types in a word and defines the allowed number of mismatches. The
|
||||
results are listed or plotted. If listed the display includes the
|
||||
sequence number, the position in the sequence and the matching
|
||||
string. The results are plotted in the following way. The x axis of
|
||||
the plot represents the length of the aligned sequences and the y
|
||||
direction is divided into sufficient strips to accommodate each
|
||||
sequence. So if a match is found in the 3rd sequence at a position
|
||||
equivalent to halfway along the longest of the sequences then a
|
||||
short vertical line will be drawn at the midpoint of the 3rd strip.
|
||||
If the sequences are aligned it can be useful if the motifs happen
|
||||
to appear in related positions. For example see the original
|
||||
publication. Typical dialogue follows.
|
||||
|
||||
? Menu or option number=17
|
||||
X 1 Plot match positions
|
||||
2 Plot histogram of matches
|
||||
? 0,1,2 =
|
||||
? Word to search for=TTGACA
|
||||
? Minimum match (0-6) (6) =5
|
||||
? (y/n) (y) Plot results N
|
||||
2 35 TAGACA
|
||||
5 14 TTTACA
|
||||
6 37 TTTACA
|
||||
11 14 TAGACA
|
||||
14 14 TTGACA
|
||||
17 14 GTGACA
|
||||
17 22 TTAACA
|
||||
20 1 TTGACA
|
||||
@18. B 1 @Set strand
|
||||
Set strand allows the user to define which strand(s) of the
|
||||
sequences to analyse: input stand, complement of input, or both.
|
||||
@19. B 1 @Set composition
|
||||
Set composition gives the user three choices for setting the
|
||||
composition of the sequences for use in the calculation of the
|
||||
information content of words. The user can select the overall
|
||||
composition of the sequences as read, an even composition, or can
|
||||
type in any other 4 values.
|
||||
@20. B 1 @Set word length
|
||||
Set word length sets the length of word for which dictionaries will
|
||||
be made.
|
||||
@21. B 1 @Set number of mismatches
|
||||
Set number of mismatches sets the level of fuzziness for the
|
||||
creation of dictionary Dm.
|
||||
@22. B 1 @Show settings
|
||||
Show settings show the current settings for all parameters
|
||||
associated with dictionary analysis. A typical diaplsy follows:
|
||||
? Menu or option number=22
|
||||
Current word length = 6
|
||||
Number of mismatches = 1
|
||||
Start position = 1
|
||||
End position = 63
|
||||
Input strand only
|
||||
Observed composition
|
||||
Dictionary Dw unmade
|
||||
Dictionary Ds unmade
|
||||
Dictionary Dm unmade
|
||||
Dictionary Dh unmade
|
||||
@23. B 1 @Make dictionary Dw
|
||||
Make dictionary Dw creates a dictionary that contains a count of
|
||||
the frequency of occurrence of each word in the collected sequences.
|
||||
@24. B 1 @Make dictionary Ds
|
||||
Make dictionary Ds creates a dictionary that contains a count of the
|
||||
number of different sequences that contain each word.
|
||||
@25. B 1 @Make dictionary Dm from Dw
|
||||
Make dictionary Dm from Dw creates a dictionary from dictionary Dw
|
||||
that contains the frequency of occurrence of each word (say X) in Dw
|
||||
plus the frequency of occurrence of each word in Dw that differs
|
||||
from X by up to m letters. Dm is called a fuzzy dictionary as it
|
||||
contains the frequencies of occurrence of all words plus the
|
||||
frequencies of all the words that are similar to them.
|
||||
@26. B 1 @Make dictionary Dm from Ds
|
||||
Make dictionary Dm from Ds creates a dictionary from dictionary Ds
|
||||
that contains the frequency of occurrence of each word (say X) in Ds
|
||||
plus the frequency of occurrence of each word in Ds that differs
|
||||
from X by up to m letters. Dm is called a fuzzy dictionary as it
|
||||
contains the frequencies of occurrence of all words plus the
|
||||
frequencies of all the words that are similar to them.
|
||||
@27. B 1 @Make dictionary Dh from Dm
|
||||
Make dictionary Dh creates a dictionary from dictionary Dm and
|
||||
whose entries are zero except for those words in any set of related
|
||||
words that are most frequent. It finds the dominant words in each
|
||||
set of relations and stores their counts.
|
||||
@28. B 1 @Examine dictionary Dm
|
||||
Examine dictionary Dm allows users to analyse the contents of
|
||||
dictionary Dm to find the most common words or those words that
|
||||
contain the most information. The user supplies a frequency or
|
||||
information cutoff and chooses to have the results sorted on either
|
||||
value. The program will find the top 100 words that achieve the
|
||||
cutoff values and present them to the user sorted as selected. The
|
||||
information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dm, and using the current
|
||||
composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=28
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAC 64 0.66460
|
||||
AAAAAA 90 0.64880
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTG 73 0.64070
|
||||
TTTTGT 63 0.63820
|
||||
TTTTTC 65 0.63810
|
||||
AAAATA 63 0.62670
|
||||
TATAAT 65 0.62510
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =2
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
AAAAAA 90 0.64880
|
||||
TTTTTG 73 0.64070
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTC 65 0.63810
|
||||
TATAAT 65 0.62510
|
||||
AAAAAC 64 0.66460
|
||||
TTTTGT 63 0.63820
|
||||
AAAATA 63 0.62670
|
||||
TTGACA 60 0.73850
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
@29. B 1 @Examine dictionary Dh
|
||||
Examine dictionary Dh allows users to analyse the contents of
|
||||
dictionary Dh to find the most common words or those words that
|
||||
contain the most information. The user supplies a frequency or
|
||||
information cutoff and chooses to have the results sorted on either
|
||||
value. The program will find the top 100 words that achieve the
|
||||
cutoff values and present them to the user sorted as selected. The
|
||||
information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dh and using the current
|
||||
composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=29
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.6
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 4 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
TTTTTT 115 0.60630
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =.5
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =
|
||||
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
@30. B 1 @Examine words in Dm
|
||||
Examine words in Dm allows users to analyse the contents of
|
||||
dictonary Dm at the level of individual words to find their
|
||||
frequency, information content, and to see their base frequency
|
||||
table. The user types in a word to examine and the program displays
|
||||
the values and table. The information content will be calcutated
|
||||
from either Dw or Ds depending which was used to create Dm, and
|
||||
using the current composition setting. Typical dialogue follows:
|
||||
? Menu or option number=30
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=
|
||||
|
||||
@31. B 1 @Examine words in Dh
|
||||
Examine words in Dh allows users to analyse the contents of
|
||||
dictonary Dh at the level of individual words to find their
|
||||
frequency, information content, and to see their base frequency
|
||||
table. The user types in a word to examine and the program displays
|
||||
the values and table. The information content will be calcutated
|
||||
from either Dw or Ds depending which was used to create Dm, and
|
||||
using the current composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=31
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=GGGGGG
|
||||
gggggg 0 0.6199890
|
||||
3 1 1 2 3 4
|
||||
1 3 1 2 2 1
|
||||
2 1 1 1 1 1
|
||||
11 12 14 12 11 11
|
||||
GGGGGG
|
||||
? Word to examine=
|
||||
|
||||
@32. B 1 @Save or restore a dictionary
|
||||
Save or restore dictionary allows users to write or read any
|
||||
dictionary to and from disk files. The user is asked te define the
|
||||
dictionary and file. The function is useful if the machine being
|
||||
used is very slow at calculating because the files can be handled
|
||||
quickly. However note that the files cannot be processed by any
|
||||
other program.
|
||||
@33. B 1 @Find inverted repeats
|
||||
Find inverted repeats performs searches for simple inverted repeat
|
||||
sequences in each sequence. They are defined by a range of loop
|
||||
sizes and a minimum number of potential basepairs. The results can
|
||||
be plotted or listed. The x axis of the plot represents the length
|
||||
of the aligned sequences and the y direction is divided into
|
||||
sufficient strips to accommodate each sequence. So if an inverted
|
||||
repeat is found in the 3rd sequence at a position equivalent to
|
||||
halfway along the longest of the sequences then a short vertical
|
||||
line will be drawn at the midpoint of the 3rd strip. Alternatively,
|
||||
if the results are listed, the potential hairpin loops are drawn
|
||||
out, with the sequence number and the position of the loop. Typical
|
||||
dialogue follows.
|
||||
|
||||
? Menu or option number=33
|
||||
Define the range of loop sizes
|
||||
? Minimum loop size (0-10) (3) =0
|
||||
? Maximum loop size (1-20) (3) =
|
||||
? Minimum number of basepairs (1-20) (6) =
|
||||
? (y/n) (y) Plot results N
|
||||
Searching
|
||||
|
||||
Sequence 3 34
|
||||
C
|
||||
G.T
|
||||
T-A
|
||||
A-T
|
||||
T.G
|
||||
T.G
|
||||
G.T
|
||||
ATCTTT TATTTCA
|
||||
33
|
||||
|
||||
Sequence 5 35
|
||||
T
|
||||
G.T
|
||||
T.G
|
||||
A-T
|
||||
T.G
|
||||
G.T
|
||||
C-G
|
||||
T.G
|
||||
TCCGGC AATTGTG
|
||||
34
|
||||
|
||||
|
||||
@ End of help
|
|
@ -0,0 +1,32 @@
|
|||
0 1 15 184 B MEP
|
||||
1 1 9304 37 B Help
|
||||
2 1 10465 2 B Quit
|
||||
3 1 10531 14 B Read a new sequence.
|
||||
4 1 10932 6 B Define active region
|
||||
5 1 11250 31 B List a sequence.
|
||||
6 1 12393 3 B List a text file.
|
||||
7 1 12525 12 B Direct output to disk
|
||||
10 1 12996 2 B Clear graphics
|
||||
11 1 13065 2 B Clear text
|
||||
12 1 13126 12 B Draw a ruler.
|
||||
13 1 13871 12 B Use crosshair.
|
||||
14 1 14459 34 B Reposition plots
|
||||
15 1 16611 12 B Label a diagram
|
||||
16 1 17394 12 B Display a map.
|
||||
17 1 18154 31 B Search for strings
|
||||
18 1 19507 3 B Set strand
|
||||
19 1 19672 6 B Set composition
|
||||
20 1 20013 3 B Set word length
|
||||
21 1 20131 3 B Set number of mismatches
|
||||
22 1 20256 14 B Show settings
|
||||
23 1 20718 3 B Make dictionary Dw
|
||||
24 1 20890 3 B Make dictionary Ds
|
||||
25 1 21055 7 B Make dictionary Dm from Dw
|
||||
26 1 21505 7 B Make dictionary Dm from Ds
|
||||
27 1 21955 5 B Make dictionary Dh from Dm
|
||||
28 1 22245 55 B Examine dictionary Dm
|
||||
29 1 24148 70 B Examine dictionary Dh
|
||||
30 1 26410 25 B Examine words in Dm
|
||||
31 1 27437 33 B Examine words in Dh
|
||||
32 1 28701 7 B Save or restore a dictionary
|
||||
33 1 29106 46 B Find inverted repeats
|
|
@ -0,0 +1,792 @@
|
|||
|
||||
@-1. TX 0 @General
|
||||
|
||||
@-2. T 0 @Screen control
|
||||
|
||||
@-2. X 0 @Screen
|
||||
|
||||
@-3. TX 0 @Dictionary analysis
|
||||
|
||||
@0. TX -1 @MEP
|
||||
|
||||
This is a program for analysing families of nucleotide
|
||||
sequences in order to find common motifs and potential binding
|
||||
sites. The ideas in this program were described in Staden, R.
|
||||
"Methods for discovering novel motifs in nucleic acid sequences".
|
||||
Computer Applications in the Biosciences, 5, 293-298, (1989).
|
||||
|
||||
The program can read sequences stored in either of two
|
||||
formats: 1) all sequences aligned in a single file; 2) all sequences
|
||||
in separate files and accessed through a file of file names.
|
||||
|
||||
The program contains functions that can answer several
|
||||
questions about a set of sequences:
|
||||
|
||||
Which words are most common?
|
||||
Which words occur in the most sequences?
|
||||
Which words contain the most information?
|
||||
Which words occur in equivalent positions in the sequences?
|
||||
Which words are inverted repeats?
|
||||
Which words occur on both strands of the sequences?
|
||||
Where are the inverted repeats?
|
||||
Where are the fuzzy words?
|
||||
|
||||
Most of the program is concerned with analysing what it terms
|
||||
"fuzzy words" within the set of sequences. The analysis is explained
|
||||
below. Note that the standard version of the programs is limited to
|
||||
words of maximum length 8 letters, and a maximum fuzziness of 2.
|
||||
|
||||
The following analyses (preceded by their option numbers) are
|
||||
included:
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
|
||||
Some of these methods produce graphical results and so the
|
||||
program is generally used from a graphics terminal (a vdu on which
|
||||
lines and points can be drawn as well as characters).
|
||||
|
||||
The positions of each of the plots is defined relative to a users
|
||||
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||
for each option are drawn in a window defined by x0,y0 and
|
||||
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||
corner of the window, and xlength is the width of the window and
|
||||
ylength the height of the window.
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required.
|
||||
|
||||
The options for the program are accessed from 3 main menus:
|
||||
general, screen control and dictionary analylsis. Both menus and
|
||||
options are selected by number.
|
||||
|
||||
The most important and novel part of the program is its use of
|
||||
"fuzzy dictionaries" and an information theory measure, to help show
|
||||
the most interesting motifs. Central to the method is the idea of a
|
||||
fuzzy dictionary of word frequencies. A dictionary of word
|
||||
frequencies is an ordered list of all the words in the sequences and
|
||||
a count of the number of times that they occur. A fuzzy dictionary
|
||||
is an equivalent list but which contains instead, for each word, a
|
||||
count of the number of times similar words occur in the sequences.
|
||||
We term words that are similar "relations". The fuzziness is defined
|
||||
by the number of letters in a word that are allowed to be different.
|
||||
So if we had a fuzziness of 1 we allow 1 letter to be different. For
|
||||
example, with a fuzziness of 1, the entry in the fuzzy dictionary
|
||||
for the word TTTTTT would contain a count of the numbers of times
|
||||
TTTTTT occured plus the number of times all words differing by
|
||||
exactly one letter from TTTTTT occured.
|
||||
|
||||
Once the fuzzy dictionary has been created we can examine it
|
||||
in several ways to find candidate control sequences. The simplest
|
||||
question we can ask is which word in the dictionary is the most
|
||||
common. Sometimes this simple criterion of "most common" may be
|
||||
adequate to discover a new motif but in general we would not expect
|
||||
it to be sufficient. For example some words will be common simply
|
||||
because of a base composition bias in the sequences being analysed.
|
||||
In addition a word can be the most frequent and yet not be "well
|
||||
defined". This last point is best explained by an example.
|
||||
|
||||
Suppose we were looking at two letter words and allowing one
|
||||
mismatch, and that there were 10 occurences of TT and 5 of AC. We
|
||||
could align the 10 words that were one letter different from TT and
|
||||
the 5 that were related to AC. Then we could count the number of
|
||||
times each base occured in each position for each of these two sets
|
||||
of words. Suppose we got the two base frequency tables shown below.
|
||||
TT AC
|
||||
T 6 4 T 1 0
|
||||
C 1 3 C 0 4
|
||||
A 1 2 A 4 1
|
||||
G 2 1 G 0 0
|
||||
|
||||
These tables show that although TT occurs (with one letter mismatch)
|
||||
more often than AC, the ratio of base frequencies for AC at 4/5, 4/5
|
||||
is higher than those for TT at 6/10, 4/10. Hence we would say that
|
||||
AC was better defined than TT. Expressing this another way we would
|
||||
say that the definition of AC contained more information than that
|
||||
for TT. The program calculates the information content in a way that
|
||||
takes into account both the sequence composition and the level of
|
||||
definition of the motif.
|
||||
|
||||
Definitions
|
||||
|
||||
Here we deal only with the dictionary analysis. Suppose we
|
||||
are dealing with a set of sequences and are examining them for words
|
||||
that are six characters in length.
|
||||
|
||||
Dictionary Dw contains a count of the number of times each
|
||||
word occurs in the set of sequences. For example the entry for
|
||||
TTTTTT contains a value equal to the number of times the word TTTTTT
|
||||
occurs in the set of sequences.
|
||||
|
||||
Dictionary Ds contains a count of the number of different
|
||||
sequences in which each word occurs. For example if the entry for
|
||||
word TTTTTT contains the value 10, it denotes that the word TTTTTT
|
||||
occurs in ten different sequences. Unlike Dw it only counts words
|
||||
once for each sequence. For example if we had a set of 100
|
||||
sequences, the maximum possible value that Ds could take is 100, and
|
||||
this would only happen if a word occurred in every sequence. However
|
||||
for the same set of sequences, Dw could contain values greater than
|
||||
100, and this would show that a word had occurred more than once in
|
||||
at least one sequence.
|
||||
|
||||
From either of the two dictionaries Dw or Ds we can calculate
|
||||
a fuzzy dictionary Dm. For each word, the entry in the fuzzy
|
||||
dictionary Dm contains the sum of the dictionary values (taken from
|
||||
either Dw or Ds) for all words that differ from it by up to m
|
||||
letters. For example if m=2 the entry for TTTTTT contains the number
|
||||
of times that TTTTTT occurs in the dictionary, plus the counts for
|
||||
all words that differ from TTTTTT by 1 or 2 letters. Obviously the
|
||||
interpretation of the values in Dm depends on which of the two
|
||||
dictionaries Dw or Ds they were derived from. When derived from Dw
|
||||
the entry for any word in Dm gives the total number of times it, and
|
||||
its relations, occur in the set of sequences. When derived from Ds
|
||||
the entry for any word in Dm gives the total number of different
|
||||
sequences that contain a word and each of its relations.
|
||||
|
||||
Finally, from fuzzy dictionary Dm we can derive fuzzy
|
||||
dictionary Dh. All entries in Dh are zero except for the word(s),
|
||||
within each set of relations, that are most frequent. For example if
|
||||
TTTTTT occurred 20 times but had a relation that occurred more
|
||||
often, then the entry for TTTTTT would be zero. However if TTTTTT
|
||||
did not have a more frequently occurring relation, then the entry
|
||||
for TTTTTT would contain the value 20.
|
||||
@1. T 0 @Help
|
||||
|
||||
This option gives online help. The user should select option
|
||||
numbers and the current documentation will be given. Note that
|
||||
option 0 gives an introduction to the program, and that ? will get
|
||||
help from anywhere in the program. The following analyses (preceded
|
||||
by their option numbers) are included:
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
@2. T 0 @Quit
|
||||
|
||||
This function stops the program.
|
||||
@3. TX 1 @Read a new sequence
|
||||
|
||||
It can read sequences stored in either of two formats: 1) all
|
||||
sequences aligned in a single file; 2) all sequences in separate
|
||||
files and accessed through a file of file names. Typical dialogue
|
||||
follows:
|
||||
|
||||
X 1 Read file of aligned sequences
|
||||
2 Use file of file names
|
||||
? 0,1,2 =
|
||||
|
||||
? File of aligned sequences=F1
|
||||
Number of files 88
|
||||
|
||||
@4. TX 1 @Define active region
|
||||
|
||||
For its analytic functions the program always works on a
|
||||
region of the sequence called the active region. When new sequences
|
||||
are read into the program the active region is automatically set to
|
||||
start at the beginning of the sequences and go up to the end of the
|
||||
longest one.
|
||||
@5. TX 1 @List a sequence
|
||||
|
||||
The sequence can be listed with line lengths of 50 bases with
|
||||
each sequence numbered in the order in which they were read. Output
|
||||
can be directed to a disk file by first selecting disk output.
|
||||
Typical dialogue follows.
|
||||
|
||||
? Menu or option number=5
|
||||
|
||||
10 20 30 40 50
|
||||
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||
|
||||
60
|
||||
1 TACCCGTTTTT
|
||||
2 GCGTTTTTGT
|
||||
3 TCATACCATAAG
|
||||
4 TTTCATACC
|
||||
5 ATTGTGAGC
|
||||
6 TTCCGGCTCG
|
||||
7 GAAGAGAGT
|
||||
8 TCAGGTGT
|
||||
9 ATGAATG
|
||||
10 TAATTACG
|
||||
@6. TX 1 @List a text file
|
||||
|
||||
Allows the user to have a text file displayed on the screen.
|
||||
It will appear one page at a time.
|
||||
@7. TX 1 @Direct output to disk
|
||||
|
||||
Used to direct output that would normally appear on the screen
|
||||
to a file.
|
||||
|
||||
Select redirection of either text or graphics, and supply the
|
||||
name of the file that the output should be written to.
|
||||
|
||||
The results from the next options selected will not appear on
|
||||
the screen but will be written to the file. When option 7 is
|
||||
selected again the file will be closed and output will again appear
|
||||
on the screen.
|
||||
@10. TX 2 @Clear graphics
|
||||
|
||||
Clears the screen of both text and graphics.
|
||||
@11. TX 2 @Clear text
|
||||
|
||||
Clears only text from the screen.
|
||||
@12. TX 2 @Draw a ruler
|
||||
|
||||
This option allows the user to draw a ruler or scale along the
|
||||
x axis of the screen to help identify the coordinates of points of
|
||||
interest. The user can define the position of the first amino acid
|
||||
to be marked (for example if the active region is 1501 to 8000, the
|
||||
user might wish to mark every 1000th amino acid starting at either
|
||||
1501 or 2000 - it depends if the user wishes to treat the active
|
||||
region as an independent unit with its own numbering starting at its
|
||||
left edge, or as part of the whole sequence). The user can also
|
||||
define the separation of the ticks on the scale and their height. If
|
||||
required the labelling routine can be used to add numbers to the
|
||||
ticks.
|
||||
@13. TX 2 @Use crosshair
|
||||
|
||||
This function puts a steerable cross on the screen that can be
|
||||
used to find the coordinates of points in the sequence. The user can
|
||||
move the cross around using the directional keys; when he hits the
|
||||
space bar the program will print out the coordinates of the cross in
|
||||
sequence units and the option will be exited.
|
||||
|
||||
If instead, you hit a , the position will be displayed but the
|
||||
cross will remain on the screen.
|
||||
|
||||
If a letter s is hit the sequence around the cross hair is
|
||||
displayed and the cross remains on the screen.
|
||||
@14. TX 2 @Reposition plots
|
||||
|
||||
The positions of each of the plots is defined relative to a
|
||||
users drawing board which has size 1-10,000 in x and 1-10,000 in y.
|
||||
Plots for each option are drawn in a window defined by x0,y0 and
|
||||
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||
corner of the window, and xlength is the width of the window and
|
||||
ylength the height of the window.
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required. As
|
||||
all the plots start at the same position in x and have the same
|
||||
width, x0 and xlength are the same for all options. Generally users
|
||||
will only want to change the start level of the window y0 and its
|
||||
height ylength. This option allows users to change window positions
|
||||
whilst running the program. The routine prompts first for the
|
||||
number of the option that the users wishes to reposition; then for
|
||||
the y start and height; then for the x start and length. Note that
|
||||
changes to the x values affect all options. If the user types only
|
||||
carriage return for any value it will remain unchanged. The cross-
|
||||
hair can be used to choose suitable heights.
|
||||
@15. TX 2 @Label a diagram
|
||||
|
||||
This routine allows users to label any diagrams they have
|
||||
produced. They are asked to type in a label. When the user types
|
||||
carriage return to finish typing the label the cross-hair appears on
|
||||
the screen. The user can position it anywhere on the screen. If the
|
||||
user types R (for right justify) the label will be written on the
|
||||
diagram with its right end at the cross-hair position. If the user
|
||||
types L (for left justify) the label will be written on the diagram
|
||||
with its left end at the cross hair position. The cross-hair will
|
||||
then immediately reappear. The user may put the same label on
|
||||
another part of the diagram as before or if he hits the space bar he
|
||||
will be asked if he wishes to type in another label.
|
||||
@16. TX 2 @Display a map
|
||||
|
||||
It is often convenient to plot a map alongside graphed
|
||||
analysis in order to indicate features within the sequence. This
|
||||
function allows users to draw maps using files arranged in the form
|
||||
of EMBL feature tables. Of course the EMBL table are usually only
|
||||
used for nucleic acid sequence annotation but, as long as the
|
||||
features are written in the correct format, they can be employed by
|
||||
this routine. The map is composed of a line representing the
|
||||
sequence and then further lines denoting the endpoints of each
|
||||
feature the user identifies. The user is asked to define height at
|
||||
which the line representing the sequence should be drawn; then for
|
||||
the feature height; then for the features to plot.
|
||||
@17. TX 1 @Search for strings
|
||||
|
||||
Search for strings perfoms searches of all the sequences for
|
||||
selected words and shows which sequences they are found in. The user
|
||||
types in a word and defines the allowed number of mismatches. The
|
||||
results are listed or plotted. If listed the display includes the
|
||||
sequence number, the position in the sequence and the matching
|
||||
string. The results are plotted in the following way. The x axis of
|
||||
the plot represents the length of the aligned sequences and the y
|
||||
direction is divided into sufficient strips to accommodate each
|
||||
sequence. So if a match is found in the 3rd sequence at a position
|
||||
equivalent to halfway along the longest of the sequences then a
|
||||
short vertical line will be drawn at the midpoint of the 3rd strip.
|
||||
If the sequences are aligned it can be useful if the motifs happen
|
||||
to appear in related positions. For example see the original
|
||||
publication. Typical dialogue follows.
|
||||
|
||||
? Menu or option number=17
|
||||
X 1 Plot match positions
|
||||
2 Plot histogram of matches
|
||||
? 0,1,2 =
|
||||
? Word to search for=TTGACA
|
||||
? Minimum match (0-6) (6) =5
|
||||
? (y/n) (y) Plot results N
|
||||
2 35 TAGACA
|
||||
5 14 TTTACA
|
||||
6 37 TTTACA
|
||||
11 14 TAGACA
|
||||
14 14 TTGACA
|
||||
17 14 GTGACA
|
||||
17 22 TTAACA
|
||||
20 1 TTGACA
|
||||
@18. TX 3 @Set strand
|
||||
|
||||
Set strand allows the user to define which strand(s) of the
|
||||
sequences to analyse: input stand, complement of input, or both.
|
||||
@19. TX 3 @Set composition
|
||||
|
||||
Set composition gives the user three choices for setting the
|
||||
composition of the sequences for use in the calculation of the
|
||||
information content of words. The user can select the overall
|
||||
composition of the sequences as read, an even composition, or can
|
||||
type in any other 4 values.
|
||||
@20. TX 3 @Set word length
|
||||
|
||||
Set word length sets the length of word for which dictionaries
|
||||
will be made.
|
||||
@21. TX 3 @Set number of mismatches
|
||||
|
||||
Set number of mismatches sets the level of fuzziness for the
|
||||
creation of dictionary Dm.
|
||||
@22. TX 3 @Show settings
|
||||
|
||||
Show settings show the current settings for all parameters
|
||||
associated with dictionary analysis. A typical diaplsy follows:
|
||||
? Menu or option number=22
|
||||
Current word length = 6
|
||||
Number of mismatches = 1
|
||||
Start position = 1
|
||||
End position = 63
|
||||
Input strand only
|
||||
Observed composition
|
||||
Dictionary Dw unmade
|
||||
Dictionary Ds unmade
|
||||
Dictionary Dm unmade
|
||||
Dictionary Dh unmade
|
||||
@23. TX 3 @Make dictionary Dw
|
||||
|
||||
Make dictionary Dw creates a dictionary that contains a count
|
||||
of the frequency of occurrence of each word in the collected
|
||||
sequences.
|
||||
@24. TX 3 @Make dictionary Ds
|
||||
|
||||
Make dictionary Ds creates a dictionary that contains a count
|
||||
of the number of different sequences that contain each word.
|
||||
@25. TX 3 @Make dictionary Dm from Dw
|
||||
|
||||
Make dictionary Dm from Dw creates a dictionary from
|
||||
dictionary Dw that contains the frequency of occurrence of each word
|
||||
(say X) in Dw plus the frequency of occurrence of each word in Dw
|
||||
that differs from X by up to m letters. Dm is called a fuzzy
|
||||
dictionary as it contains the frequencies of occurrence of all words
|
||||
plus the frequencies of all the words that are similar to them.
|
||||
@26. TX 3 @Make dictionary Dm from Ds
|
||||
|
||||
Make dictionary Dm from Ds creates a dictionary from
|
||||
dictionary Ds that contains the frequency of occurrence of each word
|
||||
(say X) in Ds plus the frequency of occurrence of each word in Ds
|
||||
that differs from X by up to m letters. Dm is called a fuzzy
|
||||
dictionary as it contains the frequencies of occurrence of all words
|
||||
plus the frequencies of all the words that are similar to them.
|
||||
@27. TX 3 @Make dictionary Dh from Dm
|
||||
|
||||
Make dictionary Dh creates a dictionary from dictionary Dm
|
||||
and whose entries are zero except for those words in any set of
|
||||
related words that are most frequent. It finds the dominant words in
|
||||
each set of relations and stores their counts.
|
||||
@28. TX 3 @Examine fuzzy dictionary Dm
|
||||
|
||||
Examine dictionary Dm allows users to analyse the contents of
|
||||
dictionary Dm to find the most common words or those words that
|
||||
contain the most information. The user supplies a frequency or
|
||||
information cutoff and chooses to have the results sorted on either
|
||||
value. The program will find the top 100 words that achieve the
|
||||
cutoff values and present them to the user sorted as selected. The
|
||||
information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dm, and using the current
|
||||
composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=28
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAC 64 0.66460
|
||||
AAAAAA 90 0.64880
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTG 73 0.64070
|
||||
TTTTGT 63 0.63820
|
||||
TTTTTC 65 0.63810
|
||||
AAAATA 63 0.62670
|
||||
TATAAT 65 0.62510
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =2
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
AAAAAA 90 0.64880
|
||||
TTTTTG 73 0.64070
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTC 65 0.63810
|
||||
TATAAT 65 0.62510
|
||||
AAAAAC 64 0.66460
|
||||
TTTTGT 63 0.63820
|
||||
AAAATA 63 0.62670
|
||||
TTGACA 60 0.73850
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
@29. TX 3 @Examine fuzzy dictionary Dh
|
||||
|
||||
Examine dictionary Dh allows users to analyse the contents of
|
||||
dictionary Dh to find the most common words or those words that
|
||||
contain the most information. The user supplies a frequency or
|
||||
information cutoff and chooses to have the results sorted on either
|
||||
value. The program will find the top 100 words that achieve the
|
||||
cutoff values and present them to the user sorted as selected. The
|
||||
information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dh and using the current
|
||||
composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=29
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.6
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 4 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
TTTTTT 115 0.60630
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =.5
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =
|
||||
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
@30. TX 3 @Examine words in Dm
|
||||
|
||||
Examine words in Dm allows users to analyse the contents of
|
||||
dictonary Dm at the level of individual words to find their
|
||||
frequency, information content, and to see their base frequency
|
||||
table. The user types in a word to examine and the program displays
|
||||
the values and table. The information content will be calcutated
|
||||
from either Dw or Ds depending which was used to create Dm, and
|
||||
using the current composition setting. Typical dialogue follows:
|
||||
? Menu or option number=30
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=
|
||||
|
||||
@31. TX 3 @Examine words in Dh
|
||||
|
||||
Examine words in Dh allows users to analyse the contents of
|
||||
dictonary Dh at the level of individual words to find their
|
||||
frequency, information content, and to see their base frequency
|
||||
table. The user types in a word to examine and the program displays
|
||||
the values and table. The information content will be calcutated
|
||||
from either Dw or Ds depending which was used to create Dm, and
|
||||
using the current composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=31
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=GGGGGG
|
||||
gggggg 0 0.6199890
|
||||
3 1 1 2 3 4
|
||||
1 3 1 2 2 1
|
||||
2 1 1 1 1 1
|
||||
11 12 14 12 11 11
|
||||
GGGGGG
|
||||
? Word to examine=
|
||||
|
||||
@32. TX 3 @Save or restore a dictionary
|
||||
|
||||
Save or restore dictionary allows users to write or read any
|
||||
dictionary to and from disk files. The user is asked te define the
|
||||
dictionary and file. The function is useful if the machine being
|
||||
used is very slow at calculating because the files can be handled
|
||||
quickly. However note that the files cannot be processed by any
|
||||
other program.
|
||||
@33. TX 1 @Find inverted repeats
|
||||
|
||||
Find inverted repeats performs searches for simple inverted
|
||||
repeat sequences in each sequence. They are defined by a range of
|
||||
loop sizes and a minimum number of potential basepairs. The results
|
||||
can be plotted or listed. The x axis of the plot represents the
|
||||
length of the aligned sequences and the y direction is divided into
|
||||
sufficient strips to accommodate each sequence. So if an inverted
|
||||
repeat is found in the 3rd sequence at a position equivalent to
|
||||
halfway along the longest of the sequences then a short vertical
|
||||
line will be drawn at the midpoint of the 3rd strip. Alternatively,
|
||||
if the results are listed, the potential hairpin loops are drawn
|
||||
out, with the sequence number and the position of the loop. Typical
|
||||
dialogue follows.
|
||||
|
||||
? Menu or option number=33
|
||||
Define the range of loop sizes
|
||||
? Minimum loop size (0-10) (3) =0
|
||||
? Maximum loop size (1-20) (3) =
|
||||
? Minimum number of basepairs (1-20) (6) =
|
||||
? (y/n) (y) Plot results N
|
||||
Searching
|
||||
|
||||
Sequence 3 34
|
||||
C
|
||||
G.T
|
||||
T-A
|
||||
A-T
|
||||
T.G
|
||||
T.G
|
||||
G.T
|
||||
ATCTTT TATTTCA
|
||||
33
|
||||
|
||||
Sequence 5 35
|
||||
T
|
||||
G.T
|
||||
T.G
|
||||
A-T
|
||||
T.G
|
||||
G.T
|
||||
C-G
|
||||
T.G
|
||||
TCCGGC AATTGTG
|
||||
34
|
||||
@ End of help
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
-1 0 22 2 T General
|
||||
-1 0 22 2 X General
|
||||
-2 0 51 2 T Screen control
|
||||
-2 0 72 2 X Screen
|
||||
-3 0 106 2 T Dictionary analysis
|
||||
-3 0 106 2 X Dictionary analysis
|
||||
0 -1 124 185 T MEP
|
||||
0 -1 124 185 X MEP
|
||||
1 0 9423 38 T Help
|
||||
2 0 10594 3 T Quit
|
||||
3 1 10667 14 T Read a new sequence
|
||||
3 1 10667 14 X Read a new sequence
|
||||
4 1 11069 7 T Define active region
|
||||
4 1 11069 7 X Define active region
|
||||
5 1 11396 32 T List a sequence
|
||||
5 1 11396 32 X List a sequence
|
||||
6 1 12548 4 T List a text file
|
||||
6 1 12548 4 X List a text file
|
||||
7 1 12690 12 T Direct output to disk
|
||||
7 1 12690 12 X Direct output to disk
|
||||
10 2 13162 3 T Clear graphics
|
||||
10 2 13162 3 X Clear graphics
|
||||
11 2 13239 3 T Clear text
|
||||
11 2 13239 3 X Clear text
|
||||
12 2 13307 13 T Draw a ruler
|
||||
12 2 13307 13 X Draw a ruler
|
||||
13 2 14053 13 T Use crosshair
|
||||
13 2 14053 13 X Use crosshair
|
||||
14 2 14643 35 T Reposition plots
|
||||
14 2 14643 35 X Reposition plots
|
||||
15 2 16797 13 T Label a diagram
|
||||
15 2 16797 13 X Label a diagram
|
||||
16 2 17589 13 T Display a map
|
||||
16 2 17589 13 X Display a map
|
||||
17 1 18384 32 T Search for strings
|
||||
17 1 18384 32 X Search for strings
|
||||
18 3 19739 4 T Set strand
|
||||
18 3 19739 4 X Set strand
|
||||
19 3 19906 7 T Set composition
|
||||
19 3 19906 7 X Set composition
|
||||
20 3 20249 4 T Set word length
|
||||
20 3 20249 4 X Set word length
|
||||
21 3 20374 4 T Set number of mismatches
|
||||
21 3 20374 4 X Set number of mismatches
|
||||
22 3 20501 15 T Show settings
|
||||
22 3 20501 15 X Show settings
|
||||
23 3 20965 5 T Make dictionary Dw
|
||||
23 3 20965 5 X Make dictionary Dw
|
||||
24 3 21152 4 T Make dictionary Ds
|
||||
24 3 21152 4 X Make dictionary Ds
|
||||
25 3 21326 8 T Make dictionary Dm from Dw
|
||||
25 3 21326 8 X Make dictionary Dm from Dw
|
||||
26 3 21787 8 T Make dictionary Dm from Ds
|
||||
26 3 21787 8 X Make dictionary Dm from Ds
|
||||
27 3 22248 6 T Make dictionary Dh from Dm
|
||||
27 3 22248 6 X Make dictionary Dh from Dm
|
||||
28 3 22551 56 T Examine fuzzy dictionary Dm
|
||||
28 3 22551 56 X Examine fuzzy dictionary Dm
|
||||
29 3 24462 71 T Examine fuzzy dictionary Dh
|
||||
29 3 24462 71 X Examine fuzzy dictionary Dh
|
||||
30 3 26726 26 T Examine words in Dm
|
||||
30 3 26726 26 X Examine words in Dm
|
||||
31 3 27755 34 T Examine words in Dh
|
||||
31 3 27755 34 X Examine words in Dh
|
||||
32 3 29021 8 T Save or restore a dictionary
|
||||
32 3 29021 8 X Save or restore a dictionary
|
||||
33 1 29428 45 T Find inverted repeats
|
||||
33 1 29428 45 X Find inverted repeats
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,156 @@
|
|||
-1 0 22 2 T General
|
||||
-1 0 22 2 X General
|
||||
-2 0 51 2 T Screen control
|
||||
-2 0 72 2 X Screen
|
||||
-3 0 118 2 T Statistical analysis of content
|
||||
-3 0 143 2 X Statistics
|
||||
-4 0 180 2 T Structures and repeats
|
||||
-4 0 205 2 X Structures
|
||||
-5 0 242 2 T Translation and codons
|
||||
-5 0 242 2 X Translation and codons
|
||||
-6 0 279 2 T Gene search by content
|
||||
-6 0 279 2 X Gene search by content
|
||||
-7 0 309 2 T General signals
|
||||
-7 0 309 2 X General signals
|
||||
-8 0 340 2 T Specific signals
|
||||
-8 0 340 2 X Specific signals
|
||||
0 -1 359 16 T NIP
|
||||
0 -1 359 16 X NIP
|
||||
1 0 1155 7 T Help
|
||||
1 0 1155 7 X Help
|
||||
2 0 1469 3 T Quit
|
||||
2 0 1469 3 X Quit
|
||||
3 1 1543 220 T Read a new sequence
|
||||
3 1 1543 220 X Read a new sequence
|
||||
4 1 11372 15 T Define active region
|
||||
4 1 11372 15 X Define active region
|
||||
5 1 12100 24 T List a sequence
|
||||
5 1 12100 24 X List a sequence
|
||||
6 1 13103 6 T List a text file.
|
||||
6 1 13103 6 X List a text file.
|
||||
7 1 13300 12 T Direct output to disk
|
||||
7 1 13300 12 X Direct output to disk
|
||||
8 1 13785 10 T Write active region to disk
|
||||
8 1 13785 10 X Write active region to disk
|
||||
9 1 14128 31 T Edit the sequence
|
||||
9 1 14128 31 X Edit the sequence
|
||||
10 2 15970 3 T Clear graphics
|
||||
10 2 15970 3 X Clear graphics
|
||||
11 2 16036 3 T Clear text
|
||||
11 2 16036 3 X Clear text
|
||||
12 2 16101 12 T Draw a ruler
|
||||
12 2 16101 12 X Draw a ruler
|
||||
13 2 16833 13 T Use crosshair
|
||||
13 2 16833 13 X Use crosshair
|
||||
14 2 17443 35 T Reposition plots
|
||||
14 2 17443 35 X Reposition plots
|
||||
15 2 19598 28 T Label a diagram
|
||||
15 2 19598 28 X Label a diagram
|
||||
16 2 20703 34 T Display a map
|
||||
16 2 20703 34 X Display a map
|
||||
17 1 22073 599 T Search for restriction enzymes
|
||||
17 1 22073 599 X Search for restriction enzymes
|
||||
18 7 46675 105 T Compare a short sequence
|
||||
18 1 46675 105 T Compare a short sequence
|
||||
18 7 46675 105 X Compare a short sequence
|
||||
18 1 46675 105 X Compare a short sequence
|
||||
19 7 49650 106 T Compare a short sequence using a score matrix
|
||||
19 7 49650 106 X Compare a short sequence using a score matrix
|
||||
20 7 53349 230 T Search for a motif using a weight matrix
|
||||
20 7 53349 230 X Search for a motif using a weight matrix
|
||||
21 3 63267 4 T Count base composition
|
||||
21 3 63267 4 X Count base composition
|
||||
22 3 63440 14 T Count dinucleotide frequencies
|
||||
22 3 63440 14 X Count dinucleotide frequencies
|
||||
23 5 64100 179 T Count codons and amino acids
|
||||
23 3 64100 179 T Count codons and amino acids
|
||||
23 5 64100 179 X Count codons and amino acids
|
||||
23 3 64100 179 X Count codons and amino acids
|
||||
24 3 72137 57 T Plot base composition
|
||||
24 3 72137 57 X Plot base composition
|
||||
25 3 73213 23 T Plot local deviations in base composition
|
||||
25 3 73213 23 X Plot local deviations in base composition
|
||||
26 3 74495 23 T Plot local deviations from dinucleotide composition
|
||||
26 3 74495 23 X Plot local deviations from dinucleotide composition
|
||||
27 3 75793 23 T Plot local deviations from trinucleotide composition
|
||||
27 3 75793 23 X Plot local deviations from trinucleotide composition
|
||||
28 5 77065 18 T Calculate codon constraint
|
||||
28 5 77065 18 X Calculate codon constraint
|
||||
59 3 77869 12 T Plot negentropy
|
||||
59 3 77869 12 X Plot negentropy
|
||||
30 4 78454 74 T Search for hairpin loops
|
||||
30 4 78454 74 X Search for hairpin loops
|
||||
31 4 80321 23 T Search for long range inverted repeats
|
||||
31 4 80321 23 X Search for long range inverted repeats
|
||||
32 4 81157 37 T Search for repeats
|
||||
32 4 81157 37 X Search for repeats
|
||||
33 4 82467 12 T Search for z dna (total ry, yr)
|
||||
33 4 82467 12 X Search for z dna (total ry, yr)
|
||||
34 4 82984 12 T Search for z dna (runs of ry, yr)
|
||||
34 4 82984 12 X Search for z dna (runs of ry, yr)
|
||||
35 4 83623 15 T Search for z dna (best phased value)
|
||||
35 4 83623 15 X Search for z dna (best phased value)
|
||||
36 4 84350 92 T Local similarity or complementarity search
|
||||
36 4 84350 92 X Local similarity or complementarity search
|
||||
37 5 87778 39 T Set genetic code
|
||||
37 5 87778 39 X Set genetic code
|
||||
38 4 89050 74 T Examine repeats
|
||||
38 3 89050 74 T Examine repeats
|
||||
39 5 91670 286 T Translate and list in upto six phases
|
||||
39 5 91670 286 X Translate and list in upto six phases
|
||||
40 5 103780 134 T Translate and write the protein sequence to disk
|
||||
40 5 103780 134 X Translate and write the protein sequence to disk
|
||||
41 5 108198 71 T Calculate and write codon table to disk
|
||||
41 5 108198 71 X Calculate and write codon table to disk
|
||||
42 6 111525 132 T Codon usage method
|
||||
42 6 111525 132 X Codon usage method
|
||||
43 6 118508 182 T Positional base preference method.
|
||||
43 6 118508 182 X Positional base preference method.
|
||||
44 6 127924 39 T Uneven positional base frequencies.
|
||||
44 6 127924 39 X Uneven positional base frequencies.
|
||||
45 6 130287 33 T Codon improbability on base composition
|
||||
45 6 130287 33 X Codon improbability on base composition
|
||||
46 6 132146 28 T Codon improbability on amino acid composition
|
||||
46 6 132146 28 X Codon improbability on amino acid composition
|
||||
47 6 133744 14 T Shepherd RNY preference method
|
||||
47 6 133744 14 X Shepherd RNY preference method
|
||||
48 6 134410 30 T Ficketts method
|
||||
48 6 134410 30 X Ficketts method
|
||||
49 6 136094 139 T tRNA gene search.
|
||||
49 6 136094 139 X tRNA gene search.
|
||||
50 7 141894 4 T Plot start codons
|
||||
50 7 141894 4 X Plot start codons
|
||||
51 7 142027 4 T Plot stop codons
|
||||
51 7 142027 4 X Plot stop codons
|
||||
52 7 142188 4 T Plot stop codons on the complementary strand
|
||||
52 7 142188 4 X Plot stop codons on the complementary strand
|
||||
53 7 142365 4 T Plot stop codons on both strands
|
||||
53 7 142365 4 X Plot stop codons on both strands
|
||||
54 5 142536 45 T Search for longest open reading frames
|
||||
54 5 142536 45 X Search for longest open reading frames
|
||||
55 8 144437 67 T Search for E. coli promoter (general)
|
||||
55 8 144437 67 X Search for E. coli promoter (general)
|
||||
56 8 148004 4 T Search for E. coli promoter (general) strand
|
||||
56 8 148004 4 X Search for E. coli promoter (general) strand
|
||||
57 8 148210 4 T Search for E. coli promoter sequences. (-35 and -10)
|
||||
57 8 148210 4 X Search for E. coli promoter sequences. (-35 and -10)
|
||||
58 8 148405 44 T Search for procaryotic ribosome binding sites
|
||||
58 8 148405 44 X Search for procaryotic ribosome binding sites
|
||||
29 1 150862 4 T Reverse and complement the sequence
|
||||
29 1 150862 4 X Reverse and complement the sequence
|
||||
60 7 151001 142 T Search using a dinucleotide weight matrix
|
||||
60 7 151001 142 X Search using a dinucleotide weight matrix
|
||||
61 8 157292 31 T Search for eukaryotic ribosome binding sites
|
||||
61 8 157292 31 X Search for eukaryotic ribosome binding sites
|
||||
62 8 158730 56 T Search for splice junctions
|
||||
62 8 158730 56 X Search for splice junctions
|
||||
63 7 162089 7 T Search using a weight matrix (complementary)
|
||||
63 7 162089 7 X Search using a weight matrix (complementary)
|
||||
64 3 162471 36 T Plot observed-expected word frequencies
|
||||
64 3 162471 36 X Plot observed-expected word frequencies
|
||||
65 9 164175 5 T Search for polya sites
|
||||
65 9 164175 5 X Search for polya sites
|
||||
66 1 164369 4 T Interconvert t and u
|
||||
66 1 164369 4 X Interconvert t and u
|
||||
67 7 164520 797 T Search for patterns of motifs
|
||||
67 7 164520 797 X Search for patterns of motifs
|
|
@ -0,0 +1,132 @@
|
|||
|
||||
@-1. TX 0 @General
|
||||
|
||||
@-2. TX 0 @Screen control
|
||||
|
||||
@-3. TX 0 @Statistical analysis
|
||||
|
||||
@-1. TX 0 @General
|
||||
|
||||
@-2. TX 0 @Screen control
|
||||
|
||||
@-3. TX 0 @Statistical analysis
|
||||
|
||||
@0. TX -1 @NIPF
|
||||
|
||||
@1. TX 1 @ Help
|
||||
|
||||
@2. TX 1 @ Quit
|
||||
|
||||
@3. TX 1 @ Read new sequence
|
||||
|
||||
@4. TX 1 @ Redefine active region
|
||||
|
||||
@5. TX 1 @ List the sequence
|
||||
|
||||
@6. TX 1 @ List a text file
|
||||
|
||||
@7. TX 1 @ Direct output to disk
|
||||
|
||||
@8. TX 1 @ Write active sequence to disk
|
||||
|
||||
@9. TX 1 @ List a translation
|
||||
|
||||
@32. TX 1 @ List showing base differences
|
||||
|
||||
@37. TX 1 @ List showing translation
|
||||
|
||||
@33. TX 1 @ List showing amino acid differences
|
||||
|
||||
@10. TX 2 @ Clear graphics
|
||||
|
||||
@11. TX 2 @ Clear text
|
||||
|
||||
@12. TX 2 @ Draw a ruler
|
||||
|
||||
@13. TX 2 @ Use cross hair
|
||||
|
||||
@14. TX 2 @ Reset margins
|
||||
|
||||
@15. TX 2 @ Label diagram
|
||||
|
||||
@16. TX 2 @ Display a map
|
||||
|
||||
@17. TX 3 @ Set comparison mode
|
||||
|
||||
@18. TX 3 @ Set sort mode
|
||||
|
||||
@21. TX 3 @ Count base changes
|
||||
|
||||
@22. TX 3 @ Count codon changes
|
||||
|
||||
@23. TX 3 @ Count genetic events
|
||||
|
||||
@24. TX 3 @ Show table of base changes
|
||||
|
||||
@36. TX 3 @ Show table of expressed base changes
|
||||
|
||||
@39. TX 3 @ Show table of silent base changes
|
||||
|
||||
@38. TX 3 @ Estimate mutation rate
|
||||
|
||||
@25. TX 3 @ Plot base changes
|
||||
|
||||
@26. TX 3 @ Plot expressed changes per base
|
||||
|
||||
@27. TX 3 @ Plot silent changes per base
|
||||
|
||||
@28. TX 3 @ Count expressed changes per base
|
||||
|
||||
@29. TX 3 @ Count silent changes per base
|
||||
|
||||
@30. TX 3 @ Count changed amino acids
|
||||
|
||||
@31. TX 3 @ Plot amino acid variability
|
||||
|
||||
@ end of help
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
-1 0 23 2 T General
|
||||
-1 0 23 2 X General
|
||||
-2 0 53 2 T Screen control
|
||||
-2 0 53 2 X Screen control
|
||||
-3 0 89 2 T Statistical analysis
|
||||
-3 0 89 2 X Statistical analysis
|
||||
-1 0 112 2 T General
|
||||
-1 0 112 2 X General
|
||||
-2 0 142 2 T Screen control
|
||||
-2 0 142 2 X Screen control
|
||||
-3 0 178 2 T Statistical analysis
|
||||
-3 0 178 2 X Statistical analysis
|
||||
0 -1 198 2 T NIPF
|
||||
0 -1 198 2 X NIPF
|
||||
1 1 217 2 T Help
|
||||
1 1 217 2 X Help
|
||||
2 1 236 2 T Quit
|
||||
2 1 236 2 X Quit
|
||||
3 1 268 2 T Read new sequence
|
||||
3 1 268 2 X Read new sequence
|
||||
4 1 305 2 T Redefine active region
|
||||
4 1 305 2 X Redefine active region
|
||||
5 1 337 2 T List the sequence
|
||||
5 1 337 2 X List the sequence
|
||||
6 1 368 2 T List a text file
|
||||
6 1 368 2 X List a text file
|
||||
7 1 404 2 T Direct output to disk
|
||||
7 1 404 2 X Direct output to disk
|
||||
8 1 448 2 T Write active sequence to disk
|
||||
8 1 448 2 X Write active sequence to disk
|
||||
9 1 481 2 T List a translation
|
||||
9 1 481 2 X List a translation
|
||||
32 1 525 2 T List showing base differences
|
||||
32 1 525 2 X List showing base differences
|
||||
37 1 564 2 T List showing translation
|
||||
37 1 564 2 X List showing translation
|
||||
33 1 614 2 T List showing amino acid differences
|
||||
33 1 614 2 X List showing amino acid differences
|
||||
10 2 643 2 T Clear graphics
|
||||
10 2 643 2 X Clear graphics
|
||||
11 2 668 2 T Clear text
|
||||
11 2 668 2 X Clear text
|
||||
12 2 695 2 T Draw a ruler
|
||||
12 2 695 2 X Draw a ruler
|
||||
13 2 724 2 T Use cross hair
|
||||
13 2 724 2 X Use cross hair
|
||||
14 2 752 2 T Reset margins
|
||||
14 2 752 2 X Reset margins
|
||||
15 2 780 2 T Label diagram
|
||||
15 2 780 2 X Label diagram
|
||||
16 2 808 2 T Display a map
|
||||
16 2 808 2 X Display a map
|
||||
17 3 842 2 T Set comparison mode
|
||||
17 3 842 2 X Set comparison mode
|
||||
18 3 870 2 T Set sort mode
|
||||
18 3 870 2 X Set sort mode
|
||||
21 3 903 2 T Count base changes
|
||||
21 3 903 2 X Count base changes
|
||||
22 3 937 2 T Count codon changes
|
||||
22 3 937 2 X Count codon changes
|
||||
23 3 972 2 T Count genetic events
|
||||
23 3 972 2 X Count genetic events
|
||||
24 3 1013 2 T Show table of base changes
|
||||
24 3 1013 2 X Show table of base changes
|
||||
36 3 1064 2 T Show table of expressed base changes
|
||||
36 3 1064 2 X Show table of expressed base changes
|
||||
39 3 1112 2 T Show table of silent base changes
|
||||
39 3 1112 2 X Show table of silent base changes
|
||||
38 3 1149 2 T Estimate mutation rate
|
||||
38 3 1149 2 X Estimate mutation rate
|
||||
25 3 1181 2 T Plot base changes
|
||||
25 3 1181 2 X Plot base changes
|
||||
26 3 1227 2 T Plot expressed changes per base
|
||||
26 3 1227 2 X Plot expressed changes per base
|
||||
27 3 1270 2 T Plot silent changes per base
|
||||
27 3 1270 2 X Plot silent changes per base
|
||||
28 3 1317 2 T Count expressed changes per base
|
||||
28 3 1317 2 X Count expressed changes per base
|
||||
29 3 1361 2 T Count silent changes per base
|
||||
29 3 1361 2 X Count silent changes per base
|
||||
30 3 1401 2 T Count changed amino acids
|
||||
30 3 1401 2 X Count changed amino acids
|
||||
31 3 1443 2 T Plot amino acid variability
|
||||
31 3 1443 2 X Plot amino acid variability
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,80 @@
|
|||
-1 0 21 2 T General
|
||||
-1 0 21 2 X General
|
||||
-2 0 50 2 T Screen control
|
||||
-2 0 71 2 X Screen
|
||||
-3 0 117 2 T Statistical analysis of content
|
||||
-3 0 142 2 X Statistics
|
||||
-4 0 179 2 T Structures and repeats
|
||||
-4 0 204 2 X Structures
|
||||
-5 0 225 2 T Search
|
||||
-5 0 225 2 X Search
|
||||
0 -1 243 76 T PIP
|
||||
0 -1 243 76 X PIP
|
||||
1 0 3546 8 T Help
|
||||
1 0 3546 8 X Help
|
||||
2 0 3889 3 T Quit
|
||||
2 0 3889 3 X Quit
|
||||
3 1 3962 220 T Read a new sequence
|
||||
3 1 3962 220 X Read a new sequence
|
||||
4 1 13792 12 T Redefine active region
|
||||
4 1 13792 12 X Redefine active region
|
||||
5 1 14480 33 T List a sequence
|
||||
5 1 14480 33 X List a sequence
|
||||
6 1 15941 4 T List a text file
|
||||
6 1 15941 4 X List a text file
|
||||
7 1 16083 12 T Direct output to disk
|
||||
7 1 16083 12 X Direct output to disk
|
||||
8 1 16567 7 T Write active region to disk
|
||||
8 1 16567 7 X Write active region to disk
|
||||
9 1 16922 26 T Edit the sequence
|
||||
9 1 16922 26 X Edit the sequence
|
||||
10 2 18386 3 T Clear graphics
|
||||
10 2 18386 3 X Clear graphics
|
||||
11 2 18463 3 T Clear text
|
||||
11 2 18463 3 X Clear text
|
||||
12 2 18531 13 T Draw a ruler
|
||||
12 2 18531 13 X Draw a ruler
|
||||
13 2 19278 13 T Use cross hair
|
||||
13 2 19278 13 X Use cross hair
|
||||
14 2 19865 35 T Reset margins
|
||||
14 2 19865 35 X Reset margins
|
||||
15 2 22019 13 T Label a diagram
|
||||
15 2 22019 13 X Label a diagram
|
||||
16 2 22811 13 T Display a map
|
||||
16 2 22811 13 X Display a map
|
||||
17 5 23611 254 T Short sequence search
|
||||
17 1 23611 254 T Short sequence search
|
||||
17 5 23611 254 X Short sequence search
|
||||
17 1 23611 254 X Short sequence search
|
||||
18 5 34012 57 T Compare a sequence
|
||||
18 1 34012 57 T Compare a sequence
|
||||
18 5 34012 57 X Compare a sequence
|
||||
18 1 34012 57 X Compare a sequence
|
||||
19 5 35654 69 T Compare a sequence using a score matrix
|
||||
19 1 35654 69 T Compare a sequence using a score matrix
|
||||
19 5 35654 69 X Compare a sequence using a score matrix
|
||||
19 1 35654 69 X Compare a sequence using a score matrix
|
||||
20 5 37587 214 T Search for a motif using a weight matrix
|
||||
20 5 37587 214 X Search for a motif using a weight matrix
|
||||
21 3 46771 20 T Calculate amino acid composition
|
||||
21 3 46771 20 X Calculate amino acid composition
|
||||
22 4 47655 20 T Plot hydrophobicity
|
||||
22 3 47655 20 T Plot hydrophobicity
|
||||
22 4 47655 20 X Plot hydrophobicity
|
||||
22 3 47655 20 X Plot hydrophobicity
|
||||
23 4 48439 19 T Plot charge
|
||||
23 3 48439 19 T Plot charge
|
||||
23 4 48439 19 X Plot charge
|
||||
23 3 48439 19 X Plot charge
|
||||
24 4 48953 72 T Plot robson prediction
|
||||
24 4 48953 72 X Plot robson prediction
|
||||
26 4 51912 32 T Draw a helix wheel
|
||||
26 4 51912 32 X Draw a helix wheel
|
||||
25 4 53561 36 T Plot hydrophobic moment
|
||||
25 3 53561 36 T Plot hydrophobic moment
|
||||
25 4 53561 36 X Plot hydrophobic moment
|
||||
25 3 53561 36 X Plot hydrophobic moment
|
||||
27 1 55101 87 T Back translate to dna
|
||||
27 1 55101 87 X Back translate to dna
|
||||
28 5 59337 809 T Search for patterns of motifs
|
||||
28 5 59337 809 X Search for patterns of motifs
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,76 @@
|
|||
-1 0 21 2 T General
|
||||
-1 0 21 2 X General
|
||||
-2 0 50 2 T Screen control
|
||||
-2 0 71 2 X Screen
|
||||
-3 0 98 2 T Modification
|
||||
-3 0 98 2 X Modification
|
||||
0 -1 116 379 T SAP
|
||||
0 -1 116 379 X SAP
|
||||
17 1 19213 18 T Screen against restriction enzymes
|
||||
17 1 19213 18 X Screen against restriction enzymes
|
||||
18 1 20256 22 T Screen against vector
|
||||
18 1 20256 22 X Screen against vector
|
||||
20 2 21583 113 T Auto assemble
|
||||
20 2 21583 113 X Auto assemble
|
||||
28 1 27744 42 T Highlight disagreements
|
||||
28 1 27744 42 X Highlight disagreements
|
||||
32 3 30106 22 T Extract gel readings
|
||||
32 3 30106 22 X Extract gel readings
|
||||
1 0 31209 3 T Help
|
||||
1 0 31209 3 X Help
|
||||
2 0 31277 5 T Help
|
||||
2 0 31277 5 X Help
|
||||
3 1 31470 175 T Open a database
|
||||
3 1 31470 175 X Open a database
|
||||
4 3 40550 64 T Edit
|
||||
4 3 40550 64 X Edit
|
||||
9 3 43796 40 T Screen edit
|
||||
9 3 43796 40 X Screen edit
|
||||
5 1 45923 45 T Display a contig
|
||||
5 1 45923 45 X Display a contig
|
||||
6 1 48409 6 T List a text file
|
||||
6 1 48409 6 X List a text file
|
||||
8 1 48667 94 T Calculate a consensus
|
||||
8 1 48667 94 X Calculate a consensus
|
||||
25 1 53186 41 T Show relationships
|
||||
25 1 53186 41 X Show relationships
|
||||
21 3 55121 99 T Enter new gel reading
|
||||
21 3 55121 99 X Enter new gel reading
|
||||
23 3 60131 11 T Complement a contig
|
||||
23 3 60131 11 X Complement a contig
|
||||
22 3 60644 70 T Join contigs
|
||||
22 3 60644 70 X Join contigs
|
||||
24 1 64235 11 T Copy the database
|
||||
24 1 64235 11 X Copy the database
|
||||
19 1 64781 41 T Check database
|
||||
19 1 64781 41 X Check database
|
||||
29 1 66799 82 T Examine quality
|
||||
29 1 66799 82 X Examine quality
|
||||
26 3 70617 92 T Alter relationships
|
||||
26 3 70617 92 X Alter relationships
|
||||
27 1 75377 17 T Set display parameters
|
||||
27 1 75377 17 X Set display parameters
|
||||
30 3 76245 48 T Auto edit a contig
|
||||
30 3 76245 48 X Auto edit a contig
|
||||
10 2 78721 3 T Clear graphics
|
||||
10 2 78721 3 X Clear graphics
|
||||
11 2 78786 3 T Clear text
|
||||
11 2 78786 3 X Clear text
|
||||
12 2 78851 12 T Draw a ruler.
|
||||
12 2 78851 12 X Draw a ruler.
|
||||
14 2 79585 38 T Reposition plots
|
||||
14 2 79585 38 X Reposition plots
|
||||
15 2 81933 28 T Label a diagram
|
||||
15 2 81933 28 X Label a diagram
|
||||
16 2 83039 27 T Display a map.
|
||||
16 2 83039 27 X Display a map.
|
||||
7 1 84014 12 T Redirect output
|
||||
7 1 84014 12 X Redirect output
|
||||
13 2 84485 41 T Use crosshair
|
||||
13 2 84485 41 X Use crosshair
|
||||
33 2 86611 11 T Plot single contig
|
||||
33 2 86611 11 X Plot single contig
|
||||
34 2 87312 9 T Plot all contigs
|
||||
34 2 87312 9 X Plot all contigs
|
||||
31 3 87884 9 T Type in gel readings
|
||||
31 3 87884 9 X Type in gel readings
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,78 @@
|
|||
-1 0 22 2 T General
|
||||
-1 0 22 2 X General
|
||||
-2 0 51 2 T Screen control
|
||||
-2 0 72 2 X Screen
|
||||
-3 0 101 2 T Set parameters
|
||||
-3 0 101 2 X Set parameters
|
||||
-4 0 126 2 T Comparison
|
||||
-4 0 126 2 X Comparison
|
||||
0 -1 144 208 T SIP
|
||||
0 -1 144 208 X SIP
|
||||
1 0 12690 39 T Help
|
||||
1 0 12690 39 X Help
|
||||
2 0 13755 3 T Quit
|
||||
2 0 13755 3 X Quit
|
||||
3 1 13828 220 T Read a new sequence
|
||||
3 1 13828 220 X Read a new sequence
|
||||
4 1 23656 10 T Define active region
|
||||
4 1 23656 10 X Define active region
|
||||
5 1 24191 16 T List a sequence
|
||||
5 1 24191 16 X List a sequence
|
||||
6 1 25001 4 T List a text file
|
||||
6 1 25001 4 X List a text file
|
||||
7 1 25143 12 T Direct output to disk
|
||||
7 1 25143 12 X Direct output to disk
|
||||
8 1 25627 4 T Write active region to disk
|
||||
8 1 25627 4 X Write active region to disk
|
||||
9 1 25764 5 T Edit the sequences
|
||||
9 1 25764 5 X Edit the sequences
|
||||
10 2 25944 3 T Clear graphics
|
||||
10 2 25944 3 X Clear graphics
|
||||
11 2 26021 3 T Clear text
|
||||
11 2 26021 3 X Clear text
|
||||
12 2 26089 15 T Draw a ruler
|
||||
12 2 26089 15 X Draw a ruler
|
||||
13 2 26869 54 T Use cross hair
|
||||
13 2 26869 54 X Use cross hair
|
||||
14 2 28754 29 T Reposition plots
|
||||
14 2 28754 29 X Reposition plots
|
||||
15 2 30429 13 T Label a diagram
|
||||
15 2 30429 13 X Label a diagram
|
||||
16 2 31213 7 T Display a map
|
||||
16 2 31213 7 X Display a map
|
||||
17 4 31596 19 T Apply identities algorithm
|
||||
17 4 31596 19 X Apply identities algorithm
|
||||
18 4 32260 81 T Apply proportional algorithm
|
||||
18 4 32260 81 X Apply proportional algorithm
|
||||
19 4 36686 42 T List matching spans
|
||||
19 4 36686 42 X List matching spans
|
||||
20 3 37569 16 T Set span length
|
||||
20 3 37569 16 X Set span length
|
||||
21 3 38560 13 T Set proportional score
|
||||
21 3 38560 13 X Set proportional score
|
||||
22 3 39251 6 T Set identities score
|
||||
22 3 39251 6 X Set identities score
|
||||
23 3 39544 79 T Calculate expected scores
|
||||
23 3 39544 79 X Calculate expected scores
|
||||
24 3 43148 90 T Calculate observed scores
|
||||
24 3 43148 90 X Calculate observed scores
|
||||
25 3 46152 26 T Show current parameter settings
|
||||
25 3 46152 26 X Show current parameter settings
|
||||
27 2 46802 5 T Draw a /
|
||||
27 2 46802 5 X Draw a /
|
||||
26 4 46991 57 T Quick scan
|
||||
26 4 46991 57 X Quick scan
|
||||
28 4 49883 90 T Align sequences
|
||||
28 4 49883 90 X Align sequences
|
||||
29 1 55133 4 T Complement the sequences
|
||||
29 1 55133 4 X Complement the sequences
|
||||
30 3 55256 9 T Switch main diagonal
|
||||
30 3 55256 9 X Switch main diagonal
|
||||
31 3 55755 8 T Switch identities
|
||||
31 3 55755 8 X Switch identities
|
||||
32 3 56202 17 T change score matrix
|
||||
32 3 56202 17 X change score matrix
|
||||
33 3 56884 16 T Set number of sd's for Quickscan
|
||||
33 3 56884 16 X Set number of sd's for Quickscan
|
||||
34 3 57767 13 T Set gap penalities
|
||||
34 3 57767 13 X Set gap penalities
|
|
@ -0,0 +1,132 @@
|
|||
|
||||
Preparing the PROSITE protein motif library for use by
|
||||
the Staden programs
|
||||
|
||||
Introduction
|
||||
|
||||
A library of protein motifs (in our terminology, because
|
||||
they include variable gaps, some would be called patterns) has
|
||||
recently become available from Amos Bairoch,Departement de
|
||||
Biochimie Medicale,University of Geneva Currently it contains 317
|
||||
patterns/motifs and arrives on tape or cdrom in two files: a .dat
|
||||
file and a .doc file. There is also a user documentation file
|
||||
prosite.usr. Here I outline what is required to prepare the
|
||||
PROSITE library for use by our programs.
|
||||
|
||||
Three programs need to be run SPLITP1, SPLITP2, and
|
||||
SPLITP3.
|
||||
|
||||
Outline of the PROSITE files
|
||||
|
||||
A typical entry in the .dat file is shown below.
|
||||
|
||||
ID 2FE2S_FERREDOXIN; PATTERN.
|
||||
AC PS00197;
|
||||
DT APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
|
||||
DE 2Fe-2S ferredoxins, iron-sulfur binding region signature.
|
||||
PA C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C.
|
||||
NR /RELEASE=14,15409;
|
||||
NR /TOTAL=69(69); /POSITIVE=63(63); /UNKNOWN=0(0); /FALSE_POS=6(6);
|
||||
NR /FALSE_NEG=5(5);
|
||||
CC /TAXO-RANGE=A?EP?; /MAX-REPEAT=1;
|
||||
CC /SITE=1,iron_sulfur; /SITE=5,iron_sulfur; /SITE=8,iron_sulfur;
|
||||
DR P15788, FER$APHHA , T; P00250, FER$APHSA , T; P00223, FER$ARCLA , T;
|
||||
DR P00227, FER$BRANA , T; P07838, FER$BRYMA , T; P13106, FER$BUMFI , T;
|
||||
DR P00247, FER$CHLFR , T; P07839, FER$CHLRE , T; P00222, FER$COLES , T;
|
||||
DO PDOC00175;
|
||||
//
|
||||
|
||||
Each entry has an accession number (here PS00197), a
|
||||
pattern definition (here C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C) and a
|
||||
documentation file cross reference (here PDOC00175). This
|
||||
pattern means: C, gap of 1 or 2, any of STA, gap of 2, C, any of
|
||||
STA, not P, C.
|
||||
|
||||
We need to convert all of these patterns into our pattern
|
||||
definitions (as membership of a set, with the appopriate gap
|
||||
ranges) and write each into a separate pattern file with
|
||||
corresponding "membership of a set" weight matrices. Each pattern
|
||||
file is named accession_number.pat (here PS00197.PAT). The
|
||||
corresponding matrix files are accession_number.wtsa,
|
||||
accession_number.wtsb, etc for however many are needed (here
|
||||
PS00197.WTSA and PS00197.WTSB): two are needed because of the
|
||||
variable gap.
|
||||
|
||||
In addition we can optionally split the .dat and .doc
|
||||
files into separate files, one for each entry, with names
|
||||
accession_number.dat and accession_number.doc. Also we create an
|
||||
index for the library prosite.lis, which gives a one line
|
||||
description of each pattern, and ends with the pattern file and
|
||||
documentation file numbers. The start of the file is shown below.
|
||||
|
||||
N-glycosylation site. 00001,00001
|
||||
Glycosaminoglycan attachment site. 00002,00002
|
||||
Tyrosine sulfatation site. 00003,00003
|
||||
cAMP- and cGMP-dependent protein kinase phosphorylation site. 00004,00004
|
||||
|
||||
So the name of the pattern file for Glycosaminoglycan attachment
|
||||
site is PS00002.PAT, and for the documentation file PDOC00002.DOC
|
||||
|
||||
Finally we create a file of file names for all the
|
||||
patterns in the library.
|
||||
|
||||
To use the complete PROSITE library from program pip,
|
||||
select "pattern searcher" and choose the option "use file of
|
||||
pattern file names", and give the file name prosite.nam). For any
|
||||
matches found, the accession number and pattern title will be
|
||||
displayed.
|
||||
|
||||
Running the conversion programs
|
||||
|
||||
Only SPLITP3 is necessary for using the library. The
|
||||
others programs only make the original files marginally easier to
|
||||
browse through and produce an index.
|
||||
|
||||
SPLITP1 splits the prosite.dat file to create a separate
|
||||
file for each entry. Each file is automatically named
|
||||
PSentry_number.dat. In addition it creates an index for the
|
||||
library (see above).
|
||||
|
||||
SPLITP2 performs the same operation for the Prosite.doc
|
||||
file, except that no index is created. Files are named
|
||||
PSentry_number.doc.
|
||||
|
||||
SPLITP3 creates a separate pattern file and weight matrix
|
||||
files for each prosite entry from the file prosite.dat. Pattern
|
||||
files are named PSentry_number.pat, weight matrix files
|
||||
PSentry_number.wtsa, Psentry_number.wtsb, etc. The pattern title
|
||||
is the one line description of the motif. SPLITP3 also creates a
|
||||
file of file names. Notice that it will ask for a path name so
|
||||
that the path can be included in the file of file names. This is
|
||||
the path to the directory in which the pattern files are stored.
|
||||
|
||||
Notes
|
||||
|
||||
Obviously the use of files of file names is a general
|
||||
solution, and anybody could now create their own set of
|
||||
interesting patterns for screening, or a subset of prosite.nam,
|
||||
etc.
|
||||
|
||||
Note that 5 of the bairoch motifs contained the symbols >
|
||||
or < which means that the motifs must appear exactly at the N or
|
||||
C termini of the sequences. Currently our methods have no
|
||||
mechanism for such definitions and, for example KDEL motifs, will
|
||||
be permitted to occur anywhere throughout a sequence.
|
||||
|
||||
Also, of course, the library does not have to be used
|
||||
solely for performing mass screenings: each individual entry can
|
||||
be used as a single pattern by giving the name of its .pat file -
|
||||
eg pathname/ps00002.pat In addition more sophisticated users will
|
||||
wish to copy pattern files and weight matrices into their own
|
||||
directories and modify them. For example the cutoff scores are
|
||||
probably chosen to be quite high in order to reduce the number of
|
||||
false positives, and some users might wish to lower them.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
|
||||
References with further information about the methods
|
||||
|
||||
Staden, R. Nucl. Acid Res. 8, 817-825 (1980)
|
||||
A computer program to search for tRNA genes. (NIP)
|
||||
Staden, R. Nucl. Acid Res. 8, 3673-3694 (1980)
|
||||
A new computer method for the storage and manipulation
|
||||
of DNA gel reading data. (SAP).
|
||||
Staden, R. Nucl. Acid Res. 10, 2951-2961 (1982)
|
||||
An interactive graphics program for comparing and
|
||||
aligning nucleic acid and amino acid sequences.
|
||||
(SIP).
|
||||
Staden, R. Nucl. Acid Res. 10, 4731-4751 (1982)
|
||||
Automation of the computer handling of gel reading data
|
||||
produced by the shotgun method of DNA sequencing.(SAP)
|
||||
Staden, R. and McLachlan, A.,D. Nucl. Acid Res. 10
|
||||
141-156 (1982)
|
||||
Codon preference and its use in identifying protein
|
||||
coding regions in long DNA sequences. (NIP)
|
||||
Staden, R. Nucl. Acid Res. 12, 499-503 (1984)
|
||||
A computer program to enter DNA gel reading data into a
|
||||
computer. (GIP)
|
||||
Staden, R. Nucl. Acid Res. 12, 551-567 (1984)
|
||||
Measurements of the effects that coding for a protein
|
||||
has on on a DNA sequence and their use for finding
|
||||
genes. (NIP: positional base preferences, uneven
|
||||
positional base frequencies)
|
||||
Staden, R. Nucl. Acid Res. 12, 505-519 (1984)
|
||||
Computer methods to locate signals in nucleic acid
|
||||
sequences. NIP: promoters, ribosome binding
|
||||
sites, intron/exon junctions.
|
||||
McLachlan A D, Staden R and Boswell D R, Nucl. Acid Res.
|
||||
12, 9567-9575 (1984)
|
||||
Measure of strength of codon preference. (NIP)
|
||||
Staden R, Computer methods to locate genes and signals in
|
||||
nucleic acid sequences, Genetic Engineering: Principles
|
||||
and Methods Vol. 7, Edited by J. K. Setlow and A.
|
||||
Hollaender, Plenum Publishing Corp. 1985. (NIP)
|
||||
Staden R Nucl. Acid. Res. 14, 217-231 (1986)
|
||||
The current status and portability of our sequence
|
||||
handling software. Summary for May 1985.
|
||||
Staden R "Computer Handling of DNA sequencing projects" in
|
||||
Nucleic acid and protein sequence analysis, A practical
|
||||
approach, 173-217. Edited by M.J.Bishop and C.J.Rawlings,
|
||||
IRL press (1987). (SAP)
|
||||
Staden R, Methods to define and locate patterns of motifs in
|
||||
sequences. CABIOS 4 53-60 (1988). (NIP, PIP,
|
||||
NIPL, PIPL)
|
||||
Staden R, Methods for calculating the probabilities of finding
|
||||
patterns in sequences. CABIOS 5 89-96 (1989). (NIP, PIP,
|
||||
NIPL, PIPL)
|
||||
Staden R, "Methods for discovering novel motifs in nucleic acid
|
||||
sequences". CABIOS 5, 293-298, (1989). (MEP)
|
||||
Staden R, Methods to search for patterns in protein and nucleic
|
||||
acid sequences. In Doolittle, R,R (ed), Methods in
|
||||
Enzymology, 183, Academic Press, San Diego, CA, 193-211.
|
||||
(1990) (NIP, NIPL, PIP, PIPL)
|
||||
Staden R, Finding protein coding regions in genomic sequences.
|
||||
In Doolittle, R,R (ed), Methods in Enzymology, 183,
|
||||
Academic Press, San Diego, CA, 163-180. (1990) (NIP)
|
||||
Gleeson T J and Staden R, An X windows and UNIX implementation
|
||||
of our sequence analysis package. CABIOS 7 398 (1991)
|
||||
Staden R, Screening protein and nucleic acid sequences against
|
||||
libraries of patterns. DNA Sequence, in press (NIP, PIP,
|
||||
SPLITP1, SPLITP2, SPLITP3, PROSITE)
|
||||
Dear S and Staden R, A sequence assembly and editing program for
|
||||
efficient management of large projects. Nucleic Acids
|
||||
Research 19 3907-3911 (1991) (XDAP)
|
||||
Staden R and Dear S, Indexing the sequence libraries: Software
|
||||
providing a common indexing system for all the standard
|
||||
sequence libraries. DNA Sequence 3, 99-105 (1992).
|
||||
Dear S and Staden R, A standard file format for data from DNA
|
||||
sequencing instruments. DNA Sequence 3, 107-110 (1992)
|
||||
Gleeson T and Hillier L, A trace display and editing program
|
||||
for data from fluorescence based sequencing machines.
|
||||
Nucleic Acids Research 19 6481-6483 (1991) (TED)
|
||||
Staden R, Staden package update. Genome News 13 12-13 (1993)
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
|
||||
Introduction to the Staden sequence analysis package and its
|
||||
user interface
|
||||
|
||||
The package contains the following programs:
|
||||
|
||||
GIP Gel input program
|
||||
SAP Sequence assemble program
|
||||
NIP Nucleotide interpretation program
|
||||
PIP Protein interpretation program
|
||||
SIP Similarity investigation program
|
||||
MEP Motif exploration program
|
||||
NIPL Nucleotide interpretation program (library)
|
||||
PIPL Protein interpretation program (library)
|
||||
SIPL Similarity investigation program (library)
|
||||
|
||||
GIP uses a digitiser for entry of DNA sequences from
|
||||
autoradiographs.
|
||||
SAP handles everything relating to assembling gel readings in order
|
||||
to produce a consensus sequence. It can also deal with families of
|
||||
protein sequences.
|
||||
NIP provides functions for analysing and interpretting individual
|
||||
nucleotide sequences.
|
||||
PIP provides functions for analysing and interpretting individual
|
||||
protein sequences.
|
||||
MEP analyses families of nucleotide sequences to help discover new
|
||||
motifs.
|
||||
NIPL performs pattern searches on nucleotide sequence libraries.
|
||||
PIPL performs pattern searches on protein sequence libraries.
|
||||
SIP provides functions for comparing and aligning pairs of protein
|
||||
or nucleotide sequences.
|
||||
SIPL searches nucleotide and protein sequence libraries for entries
|
||||
similar to probe sequences.
|
||||
|
||||
|
||||
Documentation
|
||||
|
||||
As is explained below, the programs SAP, NIP, PIP, SIP and MEP
|
||||
have online help, and the help files have the names: HELPSAP,
|
||||
HELPNIP, HELPPIP, HELPSIP, HELPMEP. These files can be displayed on
|
||||
the screen or printed using the appropriate commands. Currently the
|
||||
help for the other programs is also contained in these files. For
|
||||
example help for NIPL is in HELPNIP. This file is called HELPSTADEN.
|
||||
|
||||
Sequence formats
|
||||
|
||||
The shotgun sequencing program SAP deals only with simple text
|
||||
files for gel readings, and is a self-contained system. However as
|
||||
there is still no single agreed format for finished sequences or for
|
||||
libraries of sequences, the other programs in the package can read
|
||||
data that is stored in several ways.
|
||||
|
||||
The analytical programs can read individual sequences stored
|
||||
in the following formats: Staden, EMBL, Genbank, PIR (also known as
|
||||
NBRF), and GCG, but for storing whole libraries we use only PIR
|
||||
format. In addition these programs can perform a number of simple
|
||||
operations using libraries stored in this format. They can extract
|
||||
entries by entry name, can search titles for keywords, can search
|
||||
the whole of the annotation files for keywords, and can extract
|
||||
annotations for any named entry. We reformat all sequence libraries
|
||||
into PIR format. Currently we have NBRF, EMBL, SWISSPROT and VECBASE
|
||||
libraries in PIR format.
|
||||
|
||||
The library searching programs operate only on sequences
|
||||
stored in PIR format.
|
||||
|
||||
The analytical programs will operate with uppercase or
|
||||
lowercase sequence characters. In addition T and U are equivalent.
|
||||
SAP uses uppercase letters for original gel readings and lowercase
|
||||
letters for characters that are corrected by the automatic editor.
|
||||
Programs NIP and PIP use IUB symbols for redundancy in back
|
||||
translations and for sequence searches. The symbols are shown
|
||||
below.
|
||||
|
||||
|
||||
NC-IUB SYMBOLS
|
||||
|
||||
A,C,G,T
|
||||
R (A,G) 'puRine'
|
||||
Y (T,C) 'pYrimidine'
|
||||
W (A,T) 'Weak'
|
||||
S (C,G) 'Strong'
|
||||
M (A,C) 'aMino'
|
||||
K (G,T) 'Keto'
|
||||
H (A,T,C) 'not G'
|
||||
B (G,C,T) 'not A'
|
||||
V (G,A,C) 'not T'
|
||||
D (G,A,T) 'not C'
|
||||
N (G,A,C,T) 'aNy'
|
||||
|
||||
|
||||
The user interface
|
||||
|
||||
The user interface is common to all programs. It consists of a
|
||||
set of menus and a uniform way of presenting choices and obtaining
|
||||
input from the user. This section describes: the menu system; how
|
||||
options are selected and other choices made; how values are
|
||||
supplied to the program; how help is obtained, and how to escape
|
||||
from any part of a program. In addition it gives information about
|
||||
saving results in files and the use of graphics for presenting
|
||||
results.
|
||||
|
||||
Menus
|
||||
|
||||
Each program has several menus and numerous options. Each menu
|
||||
or option has a unique number that is used to identify it. Menu
|
||||
numbers are distinguished from option numbers by being preceded by
|
||||
the letter m (or M, all programs make no distinction between upper
|
||||
and lower case letters). With the exception of some parts of program
|
||||
SAP, the menus are not hierachical, rather the options they each
|
||||
contain are simply lists of related functions and their identifying
|
||||
numbers. Therefore options can be selected independently of the menu
|
||||
that is currently being shown on the screen, and the menus are
|
||||
simply memory aides. All options and menus are selected by typing
|
||||
their option number when the programs present the prompt
|
||||
|
||||
"? Menu or option number =".
|
||||
|
||||
To select a menu type its number preceded by the letter M. To
|
||||
select an option type its number. If you type only "return" you
|
||||
will get menu m0 which is simply a list of menus. If you select an
|
||||
option you will return to the current menu after the function is
|
||||
completed.
|
||||
|
||||
When you select an option, in many cases the program will
|
||||
immediately perform the operation selected without further dialogue.
|
||||
If you precede an option number by the letter d (e.g. D17), you will
|
||||
force the program to offer dialogue about the selected option before
|
||||
the function operates, hence allowing you to change the value of any
|
||||
of its parameters. If you precede an option number by the symbol ?
|
||||
(e.g. ?17), you will be given help on the option (here 17).
|
||||
|
||||
Where possible, equivalent or identical options have been
|
||||
given the same numbers in all programs, and so users quickly learn
|
||||
the numbers for the functions they employ most often.
|
||||
|
||||
Help
|
||||
|
||||
As mentioned above, help about each option can be obtained by
|
||||
preceding the option number by the symbol ? when you are presented
|
||||
with the prompt "? Menu or option number", but there are two further
|
||||
ways of obtaining help. Whenever the program asks a question you can
|
||||
respond by typing the symbol ? and you will receive information
|
||||
about the current option. In addition, option number 1 in all the
|
||||
programs will give help on all of a programs functions.
|
||||
|
||||
Quitting
|
||||
|
||||
To exit from any point in a program you type ! for quit. If a
|
||||
menu is on the screen this will stop the program, otherwise you will
|
||||
be returned to the last menu.
|
||||
|
||||
Other interactions
|
||||
|
||||
Questions are presented in a few restricted ways. In all
|
||||
cases typing only "return" in response to a question means yes, and
|
||||
typing N or n means no.
|
||||
|
||||
Obvious opposites such as "clear screen" and "keep picture"
|
||||
are presented with only the default shown. For example in this case
|
||||
the default is generally "keep picture" so the program will display:
|
||||
|
||||
"(y/n) (y) Keep picture"
|
||||
|
||||
and the picture will be retained if the user types anything
|
||||
other than N or n, (in which case the screen will be cleared).
|
||||
|
||||
Where there are choices that are not obvious opposites, or
|
||||
there are more than two choices, two further conventions are used:
|
||||
"radio buttons" and "check boxes".
|
||||
|
||||
Radio buttons are used when only one of a number of choices
|
||||
can be made at any one time. The choices are presented arranged one
|
||||
above the other, each choice with a number for its selection, and
|
||||
the default choice marked with an X. For example in the restriction
|
||||
enzyme search routine the following choices are offered:
|
||||
|
||||
|
||||
Select output mode
|
||||
1 order results enzyme by enzyme
|
||||
2 order results by positon
|
||||
X 3 show only infrequent cutters
|
||||
4 show names above the sequence
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
Standard Staden Programs
|
||||
|
||||
gip Gel input program
|
||||
sap Sequence assembly program
|
||||
(x)dap Sequence assembly program
|
||||
(x)nip Nucleotide interpretation program
|
||||
(x)pip Protein interpretation program
|
||||
(x)sip Similarity investigation program
|
||||
(x)mep Motif exploration program
|
||||
nipl Nucleotide interpretation program (library)
|
||||
pipl Protein interpretation program (library)
|
||||
sipl Similarity investigation program (library)
|
||||
Those with (x) have both tektronix (say nip) and x (say xnip) versions.
|
||||
Environment variables for help files
|
||||
HELPSAP sap
|
||||
HELPDAP dap
|
||||
HELPGIP gip
|
||||
HELPNIP nip
|
||||
HELPPIP pip
|
||||
HELPSIP sip
|
||||
HELPMEP mep
|
||||
HELPSTADEN Introduction and user interface
|
||||
e.g. to read HELPSTADEN type 'more $HELPSTADEN'
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
Trace Editor Help
|
||||
-----------------
|
||||
|
||||
The ted trace editor is a prototype to allow the display and editing
|
||||
of traces from sequencing machines, and the simple editing of plain
|
||||
sequences. It runs under the X window system. It provides simultaneous
|
||||
display of traces and bases. The editing allows individual bases to be
|
||||
removed and new ones added, and also a range of bases at either end to
|
||||
be cutoff. Currently, only ABI result files and plain sequences are
|
||||
accepted.
|
||||
|
||||
Only one trace can be edited at a time.
|
||||
|
||||
|
||||
Invocation
|
||||
----------
|
||||
|
||||
ted can be run from the command line by simply typing:
|
||||
|
||||
ted
|
||||
|
||||
It will come up with no sequence initially displayed. If provided with
|
||||
any arguments it does not understand, or invalid combinations of
|
||||
arguments, ted will exit with a message indicating its intended usage.
|
||||
ted accepts the standard X arguments allowing, for example, background
|
||||
colour or geometry to be specified. ted can accept an argument
|
||||
specifying an initial file to display. The key for this is the format
|
||||
of the file, for example:
|
||||
|
||||
ted -ABI {ABI format filename}
|
||||
ted -plain {plain format filename}
|
||||
|
||||
The file is then displayed at 50% magnification, with the caret
|
||||
initially positioned at the first base.
|
||||
|
||||
When an initial file is given, a base number of interest and/or a
|
||||
magnification can also be given, for example:
|
||||
|
||||
ted -ABI {ABI format file} -baseNum 280 -mag 30
|
||||
|
||||
or the bottom strand may be specified:
|
||||
|
||||
ted -ABI {ABI format file} -baseNum 280 -mag 30 -bottom 1
|
||||
or
|
||||
ted -ABI {ABI format file} -bottom 1
|
||||
|
||||
or a string of nucleotides on which the center the window:
|
||||
|
||||
ted -ABI {ABI format file} -astring 1
|
||||
or
|
||||
ted -ABI {ABI format file} -astring 1 -mag 30 -bottom 1
|
||||
|
||||
Options can be specified in any order.
|
||||
|
||||
An output filename can be specified in a similar manner:
|
||||
|
||||
ted -ABI inputfilename -output outputfilename
|
||||
|
||||
The default output filename is inputfilename.seq
|
||||
|
||||
If you are running the program on a remote machine, you must
|
||||
specify a display parameter:
|
||||
|
||||
ted -display machine_name:0.2
|
||||
|
||||
You can also specify the size of the opening window or
|
||||
other screen parameters by the following:
|
||||
|
||||
ted -geometry [{width}][x{height}][{+-}{xoff}[{+-}{yoff}]]
|
||||
[-fg {color}] [-bg {color}] [-bd {color}] [-bw {pixels}]
|
||||
|
||||
Displays
|
||||
--------
|
||||
|
||||
When running, ted displays the name of the file it is currently
|
||||
operating on (if any) and the original number of bases.
|
||||
|
||||
A so-called viewport presents four different synchronised views of
|
||||
part of the trace. The top one indicates the sequence indices - the
|
||||
first digit of the number if positioned over the base to which that
|
||||
number corresponds. Below this is a list of the bases as originally
|
||||
found in the file (this is the interpretation of the trace as made by
|
||||
the sequencing machine). Below this is the list of bases as edited by
|
||||
the user --- initially, if this file has not been edited in the past,
|
||||
this is identical to the list of original bases. However, if in a
|
||||
previous session the user has edited this sequence, the edited
|
||||
version of the sequence will appear in the edit window.
|
||||
The final display is of the traces produced by the sequencing
|
||||
machine for the four respective bases.
|
||||
|
||||
Two controls allow the view presented to be adjusted: both are
|
||||
horizontal sliders or scrollbars. The first affects the magnification
|
||||
at which the trace is viewed. The minimum magnification is such that
|
||||
the whole of the trace is visible within the viewport; when a trace is
|
||||
first input, this is the magnification used. The maximum magnification
|
||||
is such that bases are spaced out with several characters of space
|
||||
between them --- this should allow more than enough room for base
|
||||
insertions to be clearly visible. The second scrollbar is immediately
|
||||
above the viewport and allows the user to select which part of the
|
||||
trace is viewed. Both the sliders work in a similar way: the middle
|
||||
mouse button can be used to drag the thumb to any desired position,
|
||||
the left and right mouse buttons can be clicked within the scrollbar
|
||||
to indicate that paging up or down is desired. In the case of the
|
||||
viewport scrollbar, the amount of paging is determined by how far up
|
||||
the scrollbar the pointer is.
|
||||
|
||||
The whole ted window can be expanded and contracted (to an extent) by
|
||||
dragging the "grow-region" provided by whatever window manager is
|
||||
running. The viewport takes up all of this change in size.
|
||||
|
||||
Controls
|
||||
--------
|
||||
|
||||
ted has four buttons. "Quit" exits the program after first checking
|
||||
whether there is a sequence which has been edited and not saved.
|
||||
|
||||
"Help" pops up this window which has a scrollbar on the left allowing
|
||||
all the text to be viewed.
|
||||
|
||||
"Input" presents a dialogue which asks for the format and name of a
|
||||
file to be processed. The bases and (if this is not a plain format
|
||||
file) traces are read in and displayed for editing. The only
|
||||
conversion performed on bases is from 'N' to '-'.
|
||||
|
||||
"Output" presents a dialogue which asks for a filename into which the
|
||||
edited and clipped bases can be saved. The default value can be set
|
||||
on the command line using the "-output" keyword. No conversion of bases
|
||||
is performed on output.
|
||||
|
||||
ted operates in one of three editing modes, one of which is selected
|
||||
from three "radio buttons". The currently selected mode is
|
||||
highlighted.
|
||||
|
||||
Editing
|
||||
-------
|
||||
|
||||
In "Edit sequence" mode, the (lower) list of editable bases can be
|
||||
edited in much the same way as a text editor operates. A "caret" which
|
||||
is visible in the display of edited bases can be moved left and right
|
||||
with the cursor keys (these are sometimes called arrow keys and often
|
||||
appear on numeric keypads). It can also be positioned by clicking any
|
||||
button while the pointer is pointing into either of the list of bases
|
||||
or the traces. The DELETE key deletes the base immediately to the left
|
||||
of the caret. Any printing character can be inserted to the right of
|
||||
the caret by simply typing it. Inserted characters are placed halfway
|
||||
between their neighbours, or if a space is left by the deletion of a
|
||||
base originally there, its position is used. A base can thus be
|
||||
changed by deleting it and entering the new base.
|
||||
|
||||
Note that in the current version of ted the caret is not constrained
|
||||
to remain within the viewed part of the display and that editing can
|
||||
still continue while it is thus invisible. Such editing would probably
|
||||
only occur by accident.
|
||||
|
||||
ted provides a facility to define a cutoff at either end of the trace.
|
||||
A number of the leftmost bases (corresponding to the vector) and the
|
||||
rightmost bases (corresponding to the point where the data become
|
||||
unreliable) can be defined by setting the editor into "Adjust left
|
||||
cutoff" or "Adjust right cutoff" mode. In either of these modes, the
|
||||
pointer and mouse buttons can be used to indicate the cutoff point,
|
||||
and the cursor keys can be used to adjust this leftwards or
|
||||
rightwards. Initially, the cutoff regions are both empty. The cutoff
|
||||
regions are clearly indicated on the list of edited bases display and
|
||||
on the traces display by being drawn with a dimmed background.
|
||||
|
||||
When the sequence is written out, the list of edited bases, with both
|
||||
cutoff regions removed, is written. The output contains newlines
|
||||
for convenient formatting and always ends with one.
|
|
@ -0,0 +1,102 @@
|
|||
.TH staden 1L "November 1991" "MRC LMB" "LOCAL"
|
||||
.SH NAME
|
||||
staden, xstaden \- sequence analysis suite
|
||||
.SH DESCRIPTION
|
||||
.I staden
|
||||
is a suite of programs for sequence analysis. Currently available are
|
||||
.I mep,
|
||||
.I nip,
|
||||
.I pip,
|
||||
.I sap,
|
||||
.I sip,
|
||||
.I nipl,
|
||||
.I pipl,
|
||||
.I and sipl.
|
||||
These all run under the SUN X11
|
||||
.I xterm
|
||||
Tektronics terminal emulator, but also work with the VT640 terminal
|
||||
and the VersaTermPro and MS-Kermit emulators if they login to a SUN.
|
||||
.PP
|
||||
.I xstaden
|
||||
is the same set of programs, named
|
||||
.I xmep,
|
||||
.I xnip,
|
||||
.I xpip,
|
||||
.I xsap,
|
||||
.I xdap,
|
||||
and
|
||||
.I xsip,
|
||||
which run directly under X providing a convenient user interface,
|
||||
including resizable output and pull-down menus. All these programs
|
||||
accept the standard X arguments. The library searching programs
|
||||
nipl, pipl and sipl are only available in xterm form.
|
||||
.PP
|
||||
Sequence library access is provided for the format as distributed
|
||||
on CDROM by EMBL. The CDROM contains the EMBL nucleotide library and
|
||||
the SWISSPROT protein library. The libraries can be left on the
|
||||
CDROM or transferred to hard disk.
|
||||
.PP
|
||||
The programs also provide an interface to the PROSITE protein motif
|
||||
library.
|
||||
.PP
|
||||
Some initialisation is required in order to use the package. csh users
|
||||
should insert the following in their .login files:
|
||||
.IP
|
||||
setenv STADENROOT /home/BioSW/staden
|
||||
.IP
|
||||
source $STADENROOT/staden.login
|
||||
.LP
|
||||
Users of the Bourne shell, sh, should insert the following in
|
||||
their .profile:
|
||||
.IP
|
||||
STADENROOT=/home/BioSW/staden
|
||||
.IP
|
||||
export STADENROOT
|
||||
.IP
|
||||
. $STADENROOT/staden.profile
|
||||
.LP
|
||||
These initialisations will alter your shell's search path so
|
||||
that it can find the program binaries, and other files that are
|
||||
required.
|
||||
.SH ENVIRONMENT
|
||||
The following environment variables may be set in the
|
||||
user's \fI .login\fP or \fI .profile\fP file:
|
||||
.TP 20
|
||||
.BI STADENROOT= /home/BioSW/staden
|
||||
This must be set in the user's initialisation.
|
||||
.TP 20
|
||||
.BI SEQEDT= editor
|
||||
Set the editor to be used by the package. The default is
|
||||
\fIemacs\fP.
|
||||
.SH FILES
|
||||
.PD 0
|
||||
.TP 30
|
||||
$STADENROOT/staden.login
|
||||
csh initialisation
|
||||
.TP 30
|
||||
$STADENROOT/staden.profile
|
||||
sh initialisation
|
||||
.TP 30
|
||||
$STADENROOT/tables
|
||||
Tables used by the programs
|
||||
.TP 30
|
||||
$STADENROOT/help
|
||||
Helpfiles used by the programs, documentation of the user interface
|
||||
and of each of the programs.
|
||||
.TP 30
|
||||
$STADENROOT/tables/SEQUENCELIBRARIES
|
||||
Defines the sequence libraries available, their file descriptors
|
||||
and the prompts to appear on the users screen.
|
||||
.SH AUTHOR
|
||||
Rodger Staden, MRC Laboratory of Molecular Biology, Hills Rd., Cambridge,
|
||||
CB2 2QH, UK.
|
||||
.SH BUGS
|
||||
.PP
|
||||
When using the xterm programs and in graphics input mode,
|
||||
a carriage return should not be
|
||||
entered on its own but should be preceded by some other character,
|
||||
such as SPACE, COMMA or K. If a carriage return is entered on its
|
||||
own, some garbage will (relatively) harmelssly appear on the plot.
|
||||
.PP
|
||||
General comments on the package can be sent to
|
||||
\fI<rs@uk.ac.cam.mrc-lmb>\fP
|
|
@ -0,0 +1,107 @@
|
|||
.TH ted 1L "July 1991" "MRC LMB" "LOCAL"
|
||||
.SH NAME
|
||||
ted \- trace editor
|
||||
.SH SYNOPSIS
|
||||
.B ted
|
||||
[(
|
||||
.B -ABI\||\|-ALF\||\|-plain
|
||||
)
|
||||
.I tracefilename
|
||||
[
|
||||
.B -baseNum
|
||||
.I number
|
||||
]
|
||||
.B [
|
||||
.B -mag
|
||||
.I number
|
||||
( 1 to 100 )
|
||||
]
|
||||
.B [
|
||||
.B -bottom
|
||||
.I number
|
||||
(1(true) or 0(false))
|
||||
.B ]
|
||||
.B [
|
||||
.B -astring
|
||||
.I nucleotide-string
|
||||
]]
|
||||
.B [
|
||||
.B -enzyme
|
||||
.I 5' cutting sequence
|
||||
]
|
||||
.B [
|
||||
.B -raw
|
||||
.I filename
|
||||
(to be placed at head of xdap compatible .seq file)
|
||||
.B ]
|
||||
[
|
||||
.B -output
|
||||
.I outputfilename
|
||||
]
|
||||
|
||||
.SH DESCRIPTION
|
||||
.B ted
|
||||
is a simple prototype editor for traces produced from automatic
|
||||
sequencing machines. It allows the traces (from the ABI
|
||||
or ALF sequencing machines) produced to be
|
||||
displayed along with the machines interpretation of these into
|
||||
bases and an initially identical sequence which can be edited
|
||||
by the user. A cutoff region can be defined at both ends. The
|
||||
edited and clipped list of bases can then be written out.
|
||||
.LP
|
||||
When initially run,
|
||||
.B ted
|
||||
displays the trace file
|
||||
.I tracefilename
|
||||
(if given) of the specified format centered on the base number
|
||||
.I baseNum
|
||||
(if given). If no file is provided,
|
||||
.B ted
|
||||
initially displays nothing.
|
||||
.LP
|
||||
The display consists of
|
||||
the control panel and the synchronized view of the base position
|
||||
information, original and edited sequence data,
|
||||
and graphical representation of the trace (with each nucleotide's trace
|
||||
being represented by a different color). The control
|
||||
panel allows the user to read in new trace files (in either
|
||||
bottom or top strand orientation)
|
||||
as well as to search for a string of nucleotides or a certain base position.
|
||||
The information button brings up signal strength and average spacing for
|
||||
ABI files.
|
||||
Scroll bars allow the user to adjust the magnification of or scroll through
|
||||
the sequence and trace data. The user may also choose to change the vertical
|
||||
magnification of the trace data. Further, sequence on the head (vector)
|
||||
or tail (uncertain data) of the sequence may be ``cutoff''
|
||||
using the adjust left and right cutoff buttons. Bases can be inserted,
|
||||
deleted, or replaced as with
|
||||
any ordinary word-processor in the sequence data window. Finally, the
|
||||
sequence may be written to an ascii file using the output button on
|
||||
the control panel. The output filename is specified in a dialogue,
|
||||
but a default value of inputfilename.seq is provided or the default value
|
||||
can be given with the
|
||||
.I outputfilename
|
||||
argument.
|
||||
.LP
|
||||
A simple help system is provided.
|
||||
.SH FILES
|
||||
.PD 0
|
||||
.TP 20
|
||||
.B ted.help
|
||||
Text provided in the help window.
|
||||
.TP
|
||||
.B /usr/lib/X11/app-defaults/Xted
|
||||
Default application resources.
|
||||
.SH ENVIRONMENT
|
||||
.TP 20
|
||||
.SB XFILESEARCHPATH
|
||||
Specifies the locations where
|
||||
.B ted.help
|
||||
is sought.
|
||||
If this is not defined,
|
||||
.B ted.help
|
||||
must be in the
|
||||
.B /usr/lib/X11/app-defaults
|
||||
directory.
|
||||
.SH AUTHORS
|
||||
Tim Gleeson, LaDeana Hillier, Simon Dear.
|
|
@ -0,0 +1,7 @@
|
|||
Miscellaneous Routines Simon Dear, 14 April 1992
|
||||
---------------------------------------------------------------
|
||||
|
||||
The source modules in this directory are for commonly used
|
||||
routines. The archive misc.a should be made before any
|
||||
other programs supplied on this tape.
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
#include "misc.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h> /* varargs needed for v*printf() prototypes */
|
||||
|
||||
void crash (char* format,...)
|
||||
{
|
||||
va_list args ;
|
||||
|
||||
va_start (args,format) ;
|
||||
vfprintf (stderr,format,args) ;
|
||||
va_end (args) ;
|
||||
|
||||
exit (1) ;
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
#include "misc.h"
|
||||
#include <stdio.h>
|
||||
|
||||
/******************************************************************************/
|
||||
/*
|
||||
** Time and date calculations
|
||||
*/
|
||||
#include <time.h>
|
||||
char *date_str()
|
||||
{
|
||||
time_t clock;
|
||||
clock = time(NULL);
|
||||
return ctime(&clock);
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
#include "misc.h"
|
||||
#include <string.h>
|
||||
|
||||
char *fn_tail(char *fn)
|
||||
/*
|
||||
** Return file part (:t) of
|
||||
** directory path
|
||||
*/
|
||||
{
|
||||
int len;
|
||||
char *s;
|
||||
|
||||
len = strlen(fn);
|
||||
for(s=fn+len-1;len && *s != '/'; len--, s--) ;
|
||||
s++;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
void fn_toupper (char *s)
|
||||
/*
|
||||
** Convert file to upper case
|
||||
** ignoring directory path head
|
||||
*/
|
||||
{
|
||||
str_toupper(fn_tail(s));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void fn_tolower (char *s)
|
||||
/*
|
||||
** Convert file to lower case
|
||||
** ignoring directory path head
|
||||
*/
|
||||
{
|
||||
str_tolower(fn_tail(s));
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
#include "misc.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
/* Alliant's Concentrix <sys/stat.h> is hugely deficient */
|
||||
/* Define things we require in this program */
|
||||
/* Methinks S_IFMT and S_IFDIR aren't defined in POSIX */
|
||||
#ifndef S_ISDIR
|
||||
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
|
||||
#endif /*!S_ISDIR*/
|
||||
#ifndef S_ISREG
|
||||
#define S_ISREG(m) (((m)&S_IFMT) == S_IFREG)
|
||||
#endif /*!S_ISREG*/
|
||||
|
||||
int is_directory(char * fn)
|
||||
{
|
||||
struct stat buf;
|
||||
if ( stat(fn,&buf) ) return 0;
|
||||
return S_ISDIR(buf.st_mode);
|
||||
}
|
||||
|
||||
int is_file(char * fn)
|
||||
{
|
||||
struct stat buf;
|
||||
if ( stat(fn,&buf) ) return 0;
|
||||
return S_ISREG(buf.st_mode);
|
||||
}
|
||||
|
||||
int file_exists(char * fn)
|
||||
{
|
||||
struct stat buf;
|
||||
return ( stat(fn,&buf) == 0);
|
||||
}
|
||||
|
||||
int file_size(char * fn)
|
||||
{
|
||||
struct stat buf;
|
||||
if ( stat(fn,&buf) != 0) return 0;
|
||||
return buf.st_size;
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
#include "misc.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
char *myfind(char *file, char* searchpath, int (*found) (char *) )
|
||||
{
|
||||
static char wholePath[1024];
|
||||
char *path;
|
||||
char *delimiters=":";
|
||||
char *f;
|
||||
|
||||
f = NULL;
|
||||
if (found(file)) {
|
||||
strcpy(wholePath,file);
|
||||
f = wholePath;
|
||||
} else if (searchpath != NULL) {
|
||||
char *paths;
|
||||
|
||||
paths = (char *) malloc(strlen(searchpath)+1);
|
||||
strcpy(paths,searchpath);
|
||||
|
||||
path = (char *) strtok(paths,delimiters);
|
||||
while (path!= NULL) {
|
||||
|
||||
(void) strcpy(wholePath,path);
|
||||
(void) strcat(wholePath,"/");
|
||||
(void) strcat(wholePath,file);
|
||||
if (found(wholePath)) {
|
||||
f = wholePath;
|
||||
break;
|
||||
}
|
||||
path = (char *) strtok((char *)NULL,delimiters);
|
||||
}
|
||||
free(paths);
|
||||
}
|
||||
|
||||
return f;;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue