init
This commit is contained in:
commit
f8b7bfff1b
949 changed files with 253751 additions and 0 deletions
296
README.txt
Normal file
296
README.txt
Normal file
|
@ -0,0 +1,296 @@
|
|||
General Information
|
||||
(Not for the faint hearted)
|
||||
|
||||
30 September 1992
|
||||
|
||||
|
||||
0. Introduction
|
||||
---------------
|
||||
|
||||
This document contains information on the following subjects:
|
||||
|
||||
1. Installing the Staden Package on SPARCstations and DECstations
|
||||
2. Installing the Staden Package on Other Machines
|
||||
3. A Quick Guide to What's on the Release Tape
|
||||
4. Overview of Data Flow During Sequence Assembly
|
||||
5. Acknowledgements
|
||||
|
||||
|
||||
|
||||
1. Installing the Staden Package on SPARCstations and DECstations
|
||||
-----------------------------------------------------------------
|
||||
|
||||
We are endeavouring to make the installation of the Staden Package as
|
||||
quick and as easy as possible. In this current release we provide
|
||||
statically linked sparc and mips executables as well as all sources.
|
||||
|
||||
To install the package:
|
||||
|
||||
1) Create a new directory for the software. You may have to log on as
|
||||
superuser to do this.
|
||||
|
||||
% mkdir -p /home/BioSW/staden
|
||||
|
||||
2) Place the distribution tape in the drive and down load the package:
|
||||
|
||||
-sun-
|
||||
% tar xvf /dev/rst0
|
||||
...system messages...
|
||||
|
||||
-dec-
|
||||
% tar xvf /dev/rmt0h
|
||||
...system messages...
|
||||
|
||||
3) Users of the C Shell should add the following to his/her .login
|
||||
file:
|
||||
|
||||
setenv STADENROOT /home/BioSW/staden
|
||||
source $STADENROOT/staden.login
|
||||
|
||||
Users of the Bourne shell should add the following to their .profile
|
||||
file:
|
||||
|
||||
STADENROOT=/home/BioSW/staden
|
||||
export STADENROOT
|
||||
. $STADENROOT/staden.profile
|
||||
|
||||
|
||||
4) When the user next logs onto the work station the required
|
||||
initialisation will automatically be performed, and the programs in
|
||||
the Staden package can be run. Refer to the help/*.MEM files for
|
||||
information on the various program. (eg help on xdap is in
|
||||
help/DAP.MEM)
|
||||
|
||||
|
||||
2. Installing the Staden Package on Other Machines
|
||||
--------------------------------------------------
|
||||
|
||||
This is a little more difficult as you will need to remake all the
|
||||
executables. Your system configuration may also mean that some changes
|
||||
will need to be made, though hopefully only to makefiles. We provide
|
||||
a script to aid installation (we hope!), but you may prefer to make
|
||||
all the components manually.
|
||||
|
||||
To remake the Staden package you will require the following:
|
||||
1) A Fortran77 compiler
|
||||
2) An ANSI C compiler
|
||||
3) X11 Release 4, including the Athena Widget libraries.
|
||||
|
||||
Start by following step 1 through 3 above, to unload the sources and
|
||||
perform initialisations. Read the rest of this document and the other
|
||||
help files. Look at the make files. Follow your nose!
|
||||
|
||||
If you have any problems or successes porting our software to other
|
||||
platforms we would love to hear from you. We would also appreciate
|
||||
receiving your general comments on the package.
|
||||
|
||||
Rodger Staden (principle author)
|
||||
phone: +44 223 402389 email: rs@mrc-lmba.cam.ac.uk
|
||||
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||
Simon Dear:
|
||||
phone: +44 223 402266 email: sd@mrc-lmba.cam.ac.uk
|
||||
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||
James Bonfield:
|
||||
phome: +44 223 402499 email: jkb@mrc-lmba.cam.ac.uk
|
||||
post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
|
||||
|
||||
|
||||
|
||||
3. A Quick Guide to What's on the Release Tape
|
||||
----------------------------------------------
|
||||
|
||||
The directory structure on this tape is very important. Once set up, the Staden
|
||||
package expects things to be in a predefined place. The root directory
|
||||
of the structure is referred to by the environment variable
|
||||
STADENROOT. Below this there should be at least the following:
|
||||
|
||||
1) bin/
|
||||
All executable files and scripts should be in this directory.
|
||||
$STADENROOT/bin is added to the search path by the script staden.login
|
||||
(or staden.profile if you are using the Bourne Shell). Though you are
|
||||
not forced to keep programs here, we find it is the simplest place to
|
||||
keep them.
|
||||
|
||||
2) help/
|
||||
All on-line help files are in this directory. Files of the form *.MEM
|
||||
or *.mem are formatted ascii files and can be printed for personal
|
||||
reference. The script staden.login sets up many environment variables
|
||||
that refer to files in this directory, as well as modifying
|
||||
XFILESEARCHPATH, which is used by X programs.
|
||||
|
||||
3) manl/
|
||||
Local manual pages for ted and the staden package are in this directory. The
|
||||
environment variable MANPATH is modified in staden.login to search
|
||||
here too.
|
||||
|
||||
4) staden.login and staden.profile
|
||||
These two files are scripts to set up environment variables required
|
||||
by the Staden package. C Shell users should source staden.login from
|
||||
their .login file, and Bourne Shell users should "source" staden.profile
|
||||
from their .profile directory. See "Installing the Staden Package on
|
||||
SPARCstations and DECstations", Part 3.
|
||||
|
||||
5) tables/
|
||||
Configuration files for the Staden package are in this directory.
|
||||
Various environment variables are set in staden.login to refer to
|
||||
files in this directory.
|
||||
|
||||
Also of use are the following:
|
||||
|
||||
doc/ - Miscellaneous documentation.
|
||||
userdata/ - Sample databases
|
||||
src/ - program sources
|
||||
ReleaseNotes - Notes on this and future releases
|
||||
Staden_install - Installation script
|
||||
SequenceLibraries - Notes on the use and installation of sequence libraries
|
||||
|
||||
|
||||
Program Sources
|
||||
---------------
|
||||
|
||||
All the program sources are found in the directories in $STADENROOT/src:
|
||||
|
||||
0) Misc/
|
||||
Sources for a library of useful routines used by the staden package.
|
||||
** Should be made before the programs in staden/ **
|
||||
|
||||
1) staden/
|
||||
Sources for the Staden suite: mep, xmep, nip, xnip, nipl, pip, xpip,
|
||||
pipl, sap (now superseded by dap), xsap (now superceded by xdap), sip,
|
||||
xsip, sipl, dap, xdap, splitp1, splitp2, splitp3, gip and convert_project.
|
||||
|
||||
2) ted/
|
||||
Sources for the trace display and sequence editing program ted.
|
||||
|
||||
3) abi/
|
||||
Sample scripts and programs for handling ABI 373A data files.
|
||||
|
||||
4) alf/
|
||||
Sample scripts and programs for handling Pharmacia A.L.F. data files.
|
||||
|
||||
Each directory has appropriate makefiles and README files.
|
||||
|
||||
|
||||
|
||||
4. Overview of Data Flow During Sequence Assembly
|
||||
-------------------------------------------------
|
||||
|
||||
During a sequence assembly project the data can enter the sequence
|
||||
assembly program from various routes (See Figure below).
|
||||
|
||||
|
||||
|
||||
Fluorescent Based
|
||||
Sequencing Machine
|
||||
Chromatogram Autoradiogram
|
||||
|
||||
ABI 373A Pharmacia A.L.F. |
|
||||
| | |
|
||||
| | |
|
||||
| alfsplit |
|
||||
| | |
|
||||
+--------+--------+ |
|
||||
| |
|
||||
| |
|
||||
ted (gip)
|
||||
| |
|
||||
+----------------+----------------+
|
||||
|
|
||||
|
|
||||
xdap
|
||||
|
||||
|
||||
Figure 1: Data Flow Through The Staden Suite
|
||||
|
||||
|
||||
The Pharmacia A.L.F. data files in their original format consist of
|
||||
one file for the (up to 10) samples that were on the gel. The program
|
||||
alfsplit divides the file up so that each sample is in a file of
|
||||
its own. From then on each gel reading can be handled individually.
|
||||
Whether these files can be transferred back to the Compaq for
|
||||
reprocessing is unknown.
|
||||
|
||||
All data from fluorescent based sequencing machines must pass through
|
||||
the trace editing program ted. Ted allows data vector sequence at the
|
||||
5' end and unreliable data at the 3' end to be clipped. The sequence
|
||||
can be edited if desired, though we should stress that this is NOT
|
||||
RECOMMENDED when used in conjunction with xdap. Ted translates all
|
||||
Pharmacia A.L.F. uncertainty codes to a hyphen ("-") and outputs the
|
||||
clipped sequence, along with additional information on the position
|
||||
and content of cutoffs, to a file.
|
||||
|
||||
People wanting to use xdap with ABI and Pharmacia files, but who have
|
||||
written their own trace clipping software should be aware that xdap
|
||||
requires information to be passed in the sequence file so that
|
||||
traces can be displayed. You may want to modify your software to be
|
||||
compatible with our file format. The file consists of four parts:
|
||||
|
||||
1) Cut off information (Optional).
|
||||
Format is ";%6d%6d%6d%-4s%-16s", where
|
||||
field 1 = total number of bases called
|
||||
2 = number of bases in the clipped sequence at the 5' end
|
||||
3 = number of bases in the sequence in this file
|
||||
4 = type of trace file.
|
||||
"ALF " - Pharmacia A.L.F.
|
||||
"ABI " - ABI 373A
|
||||
"SCF " - SCF
|
||||
"PLN " - Text only
|
||||
5 = name of trace file.
|
||||
|
||||
2) Content of the clipped sequence at the 5' end (Optional).
|
||||
The sequence can extend over several lines. Each line must
|
||||
begin with ";<" and should be less than 80 characters in
|
||||
length.
|
||||
|
||||
3) Content of the clipped sequence at the 3' end (Optional).
|
||||
The sequence can extend over several lines. Each line must
|
||||
begin with ";>" and should be less than 80 characters in
|
||||
length.
|
||||
|
||||
4) Initial tags for the sequence (Optional)
|
||||
Format is: ";;%4s %6d %6d %s\n", where
|
||||
field 1 = type of tag to be created (see $STADTABL/TAGDB)
|
||||
2 = position of tag
|
||||
3 = length of tag
|
||||
4 = annotation for tag (optional)
|
||||
This feature is only available in the program xbap, which
|
||||
at the time of writing is not yet being distributed with
|
||||
the package.
|
||||
|
||||
5) The sequence, which can extend over several lines. Each
|
||||
line should be less than 80 characters in length.
|
||||
|
||||
Here is a sample file:
|
||||
|
||||
; 660 55 450ABI a21d12.s1RES
|
||||
;<AGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCGGTTCCTTCTGG
|
||||
;<ATATC
|
||||
;>-GATAAGCTGATTTG-TTT-CCATTATGGC-GGTTTGAGCCTC-G-GGTC
|
||||
;>GACCACTCGGTGTGCCAGGAAGGGGTCTGAAATTGAATGGGTTATCACTA
|
||||
;>GGCGACGTTT--TTTTCAAATTCCGGGCTAAATTTTACGGC-GGA-CGGT
|
||||
;>TCCG-
|
||||
;;COMM 1 10 M13mp18 subclone
|
||||
CAAGACATTTTGAAATACTTGGAATACTGAATCCAAGATGTGGAACATTA
|
||||
GACATATCCGTGTGCTCAACAATCGACATTTGATCCACTGATGAAAATGT
|
||||
TCTTCGTTTAGAATTTCTCATAGCATCAGCCACTTTTGCATAATACTCGA
|
||||
TTGAAGGTTCATGGAAAAAGCTGCGTAGAAGGCATGTCATTGTGCTTACG
|
||||
AGCCATTTCGGATATCTTGTGAATTTAGCAGGAAGTTCTGTAACTGGTTG
|
||||
GAATTCAAATATATCAGTTCTTCTTCCTGGATCTCGTCCTTTTTGCACTA
|
||||
AAACCATTGCGATTGCATCCGGATTCTGAGTAAGAGCCACTACAGCTTTA
|
||||
TGATACAGGCTCTTGTTATTCCTTTCGTGCTCGAATGGGAACTTTCCAGT
|
||||
GGCACAAAAATATAGTGTACATCCCAGAGCCCATAGATCACATGTTCCGA
|
||||
|
||||
|
||||
|
||||
5. Acknowledgements
|
||||
|
||||
We would like to thank Applied Biosystems, Inc. and Pharmacia LKB
|
||||
Biotechnology for their cooperation in agreeing to our routines
|
||||
accessing the data files of their fluorescent sequencing machines.
|
||||
|
||||
373A sequence data file formats are the exclusive property of Applied
|
||||
Biosystems, Inc.
|
||||
|
||||
ALF sequence data file formats are the exclusive property of Pharmacia
|
||||
LKB Biotechnology, Inc.
|
||||
|
190
ReleaseNotes
Normal file
190
ReleaseNotes
Normal file
|
@ -0,0 +1,190 @@
|
|||
Release Notes for Staden Package 1992.3
|
||||
---------------------------------------
|
||||
|
||||
|
||||
Installation guide
|
||||
------------------
|
||||
|
||||
The file doc/install.PS contain installation instructions.
|
||||
|
||||
|
||||
Manual for the Staden Package
|
||||
-----------------------------
|
||||
|
||||
There is now a 135 page manual on the Staden Package. It is currently
|
||||
being distributed on a Word4 document on a Macintosh floppy disk.
|
||||
|
||||
|
||||
Feedback and bug reports
|
||||
------------------------
|
||||
|
||||
We welcome comments and suggestions on all aspects of the package and are
|
||||
best contacted by email: rs@uk.ac.cam.mrc-lmb and sd@uk.ac.cam.mrc-lmb.
|
||||
All abnormal terminations are bugs and we would like to be told of them
|
||||
so they can be fixed. We recommend that you request an update at least once
|
||||
a year as the package is evolving very rapidly.
|
||||
|
||||
Note due to popular demand we have decided to release new routines earlier
|
||||
than in the past so please report bugs. The documentation for additions may
|
||||
be sparser than before, or non-existent, but if there is something with which
|
||||
you need help, email us.
|
||||
|
||||
|
||||
Changes this release
|
||||
--------------------
|
||||
|
||||
|
||||
The assembly programs bap and xbap heve several new functions:
|
||||
1. Find single stranded regions and try to fill them with "hidden"
|
||||
data from the adjacent readings.
|
||||
2. Find single stranded regions (includes ends of contigs) and
|
||||
select primers and templates for double stranding them (joining
|
||||
them).
|
||||
3. Pre assembly screening for readings to find those that align
|
||||
best. Optionally the hidden data can also be included in the
|
||||
comparison (part of assembly function).
|
||||
4. Find pairs of readings taken from opposite ends of the same
|
||||
template (ie forward and reverse read pairs). List or plot their
|
||||
positions.
|
||||
5. A new function to check that readings have been assembled into
|
||||
the correct positions. It aligns the hidden (previously termed "unused")
|
||||
parts of readings with the consensus they overlap to see how well
|
||||
they align. Poor alignments are reported.
|
||||
6. During assembly each reading is now allowed to match up to 100
|
||||
different places.
|
||||
|
||||
It might be guessed from the above that we are trying to improve our
|
||||
ability to deal with the assembly of human data. Hence, also the next
|
||||
addition.
|
||||
|
||||
A new experimental program (rep) for screening readings for Alu
|
||||
sequences prior to assembly. The Alu containing segments are tagged
|
||||
so they can be seen in the contig editor. A library of Alu sequences
|
||||
is included in /tables/alus. The program is quite slow as it compares
|
||||
each reading in both orientations with all of the Alu sequences (126
|
||||
of them) in order to find the best match. Only time and more data will
|
||||
tell how sensitive it is, and whether the current default score 0f 0.6
|
||||
is "correct". BEWARE rep modifies the original reading files to include
|
||||
the tag information. The only information is in /help/alu.help
|
||||
|
||||
A new program for extracting sets of sequences and their annotations
|
||||
from the sequence libraries (lip). The only information is in
|
||||
/help/lip.help
|
||||
|
||||
Changes to the xterm userinterface. These routines have been completely
|
||||
rewritten. One addition is that now ?? in response to a question will
|
||||
allow the user to get help on any function in a program. help is also
|
||||
improved in the x version.
|
||||
|
||||
|
||||
Changes last release
|
||||
--------------------
|
||||
|
||||
|
||||
DAP, XDAP have been replaced by BAP and XBAP (see below)
|
||||
|
||||
A new function for examining repeats has been added to NIP
|
||||
|
||||
A new repeat search has been added to SIP
|
||||
|
||||
Some outputs have been changed to produce FASTA format files
|
||||
instead of PIR.
|
||||
|
||||
MEP now allows searches for motifs in which any 8 out of a string
|
||||
of 20 can be switched on.
|
||||
|
||||
The manual has been updated.
|
||||
|
||||
Keyword and author searches on sequence libraries
|
||||
|
||||
All programs that use the libraries can now perform author
|
||||
and keyword searches on all libraries (only nip did so before).
|
||||
|
||||
Postscript output
|
||||
|
||||
All graphics can now be saved to disk in postscript form by
|
||||
use of a sub-option in "Redirect output".
|
||||
|
||||
|
||||
|
||||
Sequence assembly
|
||||
|
||||
BAP, XBAP replace DAP and XDAP. A program to convert DAP databases to BAP
|
||||
databases (convert) is included. BAP databases can contain up to 8000 readings
|
||||
and a consensus of 500,000 bases. A minor edit and recompilation will allow
|
||||
up to 99,999 readings. The space is used more efficiently now as the databases
|
||||
grow as the number of readings increases. Reading names can be 16 characters
|
||||
in length. In addition:
|
||||
|
||||
1) Assembly is 4 times as fast as in the DAP.
|
||||
|
||||
2) Find internal joins is 5 times as fast and now brings up the join editor
|
||||
with the two contigs in the correct orientation and aligned.
|
||||
|
||||
3) The assembly routines align pads better, plus a new automatic function can
|
||||
also be used to align them prior to editing.
|
||||
|
||||
4) The contig editor has been greatly speeded up and its functionality
|
||||
has been enhanced.
|
||||
|
||||
5) A routine for selecting oligos for primer walking is included.
|
||||
|
||||
6) A new routine allows batches of readings to be removed from a database.
|
||||
|
||||
7) We have also included routines for making SCF files, for getting the
|
||||
sequence from SCF files, and one for marking the poor quality data in
|
||||
readings. See the manual.
|
||||
|
||||
Sequence library formats
|
||||
|
||||
The standard sequence library indexing method is now that used on the
|
||||
EMBL CD-ROM. The libraries (EMBL nucleotide and SWISSPROT protein) can be
|
||||
left on the CD-ROM or copied to disk. We include in the package programs
|
||||
for creating this type of index for EMBL updates, PIR in codata format,
|
||||
NRL3D and GenBank. If the indexes are created all programs can read all
|
||||
these libraries. Programs and scripts for this task are contained in the
|
||||
directory indexseqlibs.
|
||||
The keyword and author searches are particularly fast and the
|
||||
keyword index is based on ALL text in the files - not just the keywords.
|
||||
|
||||
Feature table formats
|
||||
|
||||
The programs now use the new feature table format common to EMBL
|
||||
and GenBank, but retain the old format for SWISSPROT which has not yet
|
||||
changed.
|
||||
|
||||
For details of the above see file SequenceLibraries.
|
||||
|
||||
Pattern searches
|
||||
|
||||
Pipl and Nipl now have the facility to find only the best scoring
|
||||
match for each sequence. The prompt is "? report all matches", so typing
|
||||
only return means all matches will be shown and typing n means only the
|
||||
highest scoring will be reported. It is particularly useful when employed
|
||||
to create alignments. The corresponding help file has not been updated.
|
||||
Also to incorporate long unix file names the pattern files no longer include
|
||||
the annotation "filename".
|
||||
|
||||
|
||||
Nip
|
||||
|
||||
Option 38 in nip "translate and list" has been removed as the the
|
||||
more flexible routines of option 39 incorporate all its functionality. Many
|
||||
options that relate to feature tables have been modified but their help files
|
||||
are not yet up to date.
|
||||
|
||||
|
||||
Vep
|
||||
|
||||
A program (vep) for automatic excising of vector (either
|
||||
sequencing vector or cosmid vector) sequences from readings is now
|
||||
included in the package.
|
||||
|
||||
|
||||
|
||||
|
||||
Rodger Staden, Simon Dear, James Bonfield
|
||||
|
||||
|
||||
|
||||
|
420
SequenceLibraries
Normal file
420
SequenceLibraries
Normal file
|
@ -0,0 +1,420 @@
|
|||
Notes on library handling
|
||||
-------------------------
|
||||
|
||||
Contents of this document:
|
||||
|
||||
I) Introduction
|
||||
II) Details of file organisation and use
|
||||
III) Options currently available
|
||||
IV) Installation guide
|
||||
V) New feature table handling routines
|
||||
VI) Indexing the sequence libraries
|
||||
|
||||
|
||||
Section I Introduction
|
||||
----------------------
|
||||
|
||||
Available sequence libraries
|
||||
|
||||
There are a number of different sequence libraries for nucleotide and protein:
|
||||
PIR, GenBank, EMBL, Swissprot, and the Japanese Databank. Even after all the
|
||||
years of their existence they still use different formats for their data. This
|
||||
provides tedious and unrewarding work for software developers. Recently EMBL
|
||||
and GenBank agreed a new and common way of writing their feature tables, which
|
||||
is great help, although the rest of their format is different. Swissprot still
|
||||
uses the old embl style feature table format and PIR yet another.
|
||||
|
||||
All the libraries distribute their data on magnetic tapes and EMBL and GenBank
|
||||
have started to distribute on cdrom. The EMBL cdrom also contains Swissprot.
|
||||
The GenBank and EMBL cdroms use different formats and have different contents.
|
||||
The EMBL cdrom has useful indexes sorted alphabetically: those for entry name
|
||||
and accession number, brief descriptions, keywords and freetext indexes are
|
||||
already available and others are expected. These indexes point to the data for
|
||||
each entry, and can be used to extract the data for any entry quickly.
|
||||
|
||||
Moving to unix
|
||||
|
||||
The VAX version of our package used PIR format which meant reformatting all
|
||||
libraries other than PIR into that format. This required, at least
|
||||
temporarily, having space for two copies of the libraries, and quite a lot of
|
||||
cpu time. The software for doing this was provided by PIR, and is very VAX
|
||||
specific and hence will not run under unix. For the unix version of our package
|
||||
I have decided to use the EMBL cdrom format and its indexes as the primary
|
||||
format. The current programs also support the use of PIR format libraries
|
||||
without indexes - ie just the sequence and annotation files.
|
||||
|
||||
Indexing GenBank, EMBL updates, PIR and NRL3D
|
||||
|
||||
We include programs to create indexes for the above libraries. See below and
|
||||
the README file in indexseqlibs. The programs can read all the above libraries
|
||||
once the indexes are created. The indexing programs index the data in its
|
||||
distributed form: WE DO NOT REFORMAT OR COPY THE LIBRARIES but simply create
|
||||
indexes to the original files. Obviously this saves a lot of disk space, and
|
||||
for those content to use only embl and swissprot from the cdrom, almost no disk
|
||||
space is required. We havent tried it yet, but for genbank on cdrom, the only
|
||||
extra disk space required would be for the indexes.
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
Section II Details of file organisation and use
|
||||
-----------------------------------------------
|
||||
|
||||
The following strategy has been used to try to deal with alternate
|
||||
and changing sequence library formats.
|
||||
|
||||
1) libraries are described at several levels:
|
||||
|
||||
a) the top level file is a list of available libraries which contains:
|
||||
the library type, the name of the file containing the name of
|
||||
each libraries individual files, and the prompt to appear on
|
||||
the users screen: LTYPE LOGNAM PROMPT
|
||||
|
||||
b) the file containing the names of the libraries individual files
|
||||
contains flags to define the file types: FTYPE LOGNAM
|
||||
|
||||
c) the individual library files
|
||||
|
||||
|
||||
|
||||
2) libary types handled:
|
||||
|
||||
a) EMBL/SWISSPROT in distributed format with cdrom index format
|
||||
LTYPE = 'A'
|
||||
b) GenBank in distributed format with cdrom index format LTYPE = 'C'
|
||||
c) PIR/NRL3D in CODATA format with cdrom index format LTYPE = 'B'
|
||||
d) PIR/NBRF .seq files can be read sequentially as "personal files
|
||||
in PIR format" and do not appear in the list of available libraries.
|
||||
e) FASTA format files can be read sequentially as "personal files
|
||||
in FASTA format" and do not appear in the list of available
|
||||
libraries.
|
||||
|
||||
3) EMBL, SWISSPROT and other libraries for which EMBL-style indexes have been
|
||||
created
|
||||
|
||||
current file types:
|
||||
|
||||
A division.lookup
|
||||
B entryname.index
|
||||
C accession.target
|
||||
D accession.hits
|
||||
E brief description
|
||||
F freetext.target
|
||||
G freetext.hits
|
||||
H author.target
|
||||
I author.hits
|
||||
|
||||
|
||||
Library list
|
||||
level 1
|
||||
|
|
||||
|
|
||||
-----------------------------------------------------------
|
||||
| | |
|
||||
lib 1 file list lib 2 file list lib 3 file list
|
||||
level 2
|
||||
| |
|
||||
-------- ---------
|
||||
level 3
|
||||
file 1 file 1
|
||||
file 2 file 2
|
||||
. .
|
||||
file n file n
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
Level 1
|
||||
|
||||
File name: sequence.libs
|
||||
Environment variable: SEQUENCELIBRARIES
|
||||
Contents:
|
||||
|
||||
A EMBLFILES EMBL nucleotide library ! in cdrom format
|
||||
C GENBFILES GenBank nucleotide library!
|
||||
A SWISSFILES SWISSPROT protein library! in cdrom format
|
||||
B PIRFILES PIR protein library!
|
||||
B NRL3DFILES NRL3D protein library!
|
||||
|
||||
Notes:
|
||||
|
||||
The libraries have types A,B,C. The logical names are EMBLLIBDESCRP and
|
||||
SWISSLIBDESCRP, etc and the prompts are 'EMBL nucleotide library' and
|
||||
'SWISSPROT protein library', etc. Anything to the right of a ! is a comment.
|
||||
|
||||
Level 2: the list of library files (using embl as an example)
|
||||
|
||||
File name: embl.files
|
||||
Environment variable: EMBLFILES
|
||||
Contents:
|
||||
|
||||
A EMBLDIVPATH/embl_div.lkp
|
||||
B EMBLINDPATH/entrynam.idx
|
||||
C EMBLINDPATH/acnum.trg
|
||||
D EMBLINDPATH/acnum.hit
|
||||
E EMBLINDPATH/brief.idx
|
||||
F EMBLINDPATH/freetext.trg
|
||||
G EMBLINDPATH/freetext.hit
|
||||
H EMBLINDPATH/author.trg
|
||||
I EMBLINDPATH/author.hit
|
||||
|
||||
|
||||
Level 3: the sequence and annotation files (eg 15 for embl, 1 for swissprot).
|
||||
|
||||
Paths and file names:
|
||||
|
||||
EMBLPATH/bb.dat
|
||||
EMBLPATH/fun.dat
|
||||
EMBLPATH/inv.dat
|
||||
EMBLPATH/mam.dat
|
||||
EMBLPATH/org.dat
|
||||
EMBLPATH/patent.dat
|
||||
EMBLPATH/phg.dat
|
||||
EMBLPATH/pln.dat
|
||||
EMBLPATH/pri.dat
|
||||
EMBLPATH/pro.dat
|
||||
EMBLPATH/rod.dat
|
||||
EMBLPATH/syn.dat
|
||||
EMBLPATH/una.dat
|
||||
EMBLPATH/vrl.dat
|
||||
EMBLPATH/vrt.dat
|
||||
|
||||
All files from the division lookup file down are exactly as they appear on the
|
||||
cdrom. The division lookup file relates numbers stored in the indexes to
|
||||
actual division (or data) files stored on the disk. We rewrite it so the
|
||||
directory structure and file names can be chosen locally. Its format is
|
||||
I6,1x,A. An example is given below.
|
||||
|
||||
Division lookup file
|
||||
|
||||
File name: STADTABL/embl_div.lkp
|
||||
Environment variable path EMBLDIVPATH
|
||||
Contents:
|
||||
|
||||
1 EMBLPATH/bb.dat
|
||||
2 EMBLPATH/fun.dat
|
||||
3 EMBLPATH/inv.dat
|
||||
4 EMBLPATH/mam.dat
|
||||
5 EMBLPATH/org.dat
|
||||
6 EMBLPATH/patent.dat
|
||||
7 EMBLPATH/phg.dat
|
||||
8 EMBLPATH/pln.dat
|
||||
9 EMBLPATH/pri.dat
|
||||
10 EMBLPATH/pro.dat
|
||||
11 EMBLPATH/rod.dat
|
||||
12 EMBLPATH/syn.dat
|
||||
13 EMBLPATH/una.dat
|
||||
14 EMBLPATH/vrl.dat
|
||||
15 EMBLPATH/vrt.dat
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
|
||||
Section III Options currently available
|
||||
---------------------------------------
|
||||
|
||||
Facilities currently offered in nip,pip,sip,nipl,pipl,sipl:
|
||||
|
||||
Get a sequence by knowing its entry name
|
||||
Get a sequences' annotation by knowing its entry name
|
||||
Get an entry name by knowing its accession number
|
||||
Search the freetext index
|
||||
Search the author index
|
||||
|
||||
Facilities currently offered in nipl,pipl,sipl:
|
||||
|
||||
Search whole library
|
||||
Search only a list of entry names
|
||||
Search all but a list of entry names
|
||||
|
||||
Outline of each type of operation
|
||||
|
||||
Looking for an entry by name: the programs will open the library description
|
||||
file and read the names of its files and their file types. Then they will open
|
||||
the entrynam.idx file, and find the sequence offset, annotation offset and
|
||||
division number. Then open the division lookup file, find the file name for the
|
||||
division required, open that file, seek to the required byte and get the data.
|
||||
|
||||
Looking for an entry by accession number: the programs will open the library
|
||||
description file and read the names of its files and their file types. Then
|
||||
they open the acnum.trg and acnum.hit files. The acnum.trg file is read to find
|
||||
the accession number and a pointer to the acnum.hit file and the number of
|
||||
hits. That file is read and the corresponding entry names displayed. At
|
||||
present no further action is performed, although I expect to list out the
|
||||
titles for the entries found.
|
||||
|
||||
Searching the whole of a library: the programs will open the library
|
||||
description file and read the names of its files and their file types. Then
|
||||
they open the division lookup file, read the names and numbers of the sequence
|
||||
files, open all of them, then open the entryname file. Then the library is
|
||||
processed sequentially by reading the entry names, their sequence offsets and
|
||||
division numbers from the entry names file, and then the sequence from the
|
||||
appropriate data file.
|
||||
|
||||
Searching the whole of a library using a list of entry names to include: the
|
||||
programs will open the library description file and read the names of its files
|
||||
and their file types. Then they open the division lookup file, read the names
|
||||
and numbers of the sequence files, open all of them, then open the entryname
|
||||
file. Then the library is processed by reading the list of entry names and
|
||||
finding the names in the entry names file to get their sequence offsets and
|
||||
division numbers, and then the sequence from the appropriate data file. It will
|
||||
stop when it reaches the end of the list of entry names. The list of entry
|
||||
names can be in any order.
|
||||
|
||||
Searching the whole of a library using a list of entry names to exclude: the
|
||||
programs will open the library description file and read the names of its files
|
||||
and their file types. Then they open the division lookup file, read the names
|
||||
and numbers of the sequence files, open all of them, then open the entryname
|
||||
file. Then the library is processed sequentially by reading the list of entry
|
||||
names, reading the next entry in the entry names file to make sure it does not
|
||||
match, then getting the sequence offsets and division numbers, and then the
|
||||
sequence from the appropriate data file. If a the next name matches the name on
|
||||
the list of entry names, it will be skipped, and the next name to exclude read.
|
||||
If the list of excluded names is finished the rest of the library is searched
|
||||
sequentially. The list of entry names must be in the same order as those in the
|
||||
library (ie sorted alphabetically).
|
||||
|
||||
Searching a whole library using a PIR format file is performed by reading it
|
||||
sequentially. If as list of entry names is used it must be in the same order as
|
||||
the entries in the library file.
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
Section IV Installation guide
|
||||
-----------------------------
|
||||
|
||||
EMBL CDROM
|
||||
|
||||
The data can be left on the cdrom or copied to hard disk. The files
|
||||
staden.login and staden.profile source the file $STADTABL/libraries.config.csh
|
||||
and $STADTABL/libraries.config.sh respectively. Refer to this file to see what
|
||||
is required to install, add or move a sequence library that you want to be used
|
||||
by the programs.
|
||||
|
||||
Other libraries (PIR, Genbank, EMBL updates)
|
||||
|
||||
Create the indexes then edit the files that tell the programs where the data is
|
||||
stored. The files staden.login and staden.profile source the file
|
||||
$STADTABL/libraries.config Refer to this file to see what is required to
|
||||
install, add or move a sequence library that you want to be used by the
|
||||
programs.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
Section V New feature table handling facilities
|
||||
-----------------------------------------------
|
||||
|
||||
As mentioned above EMBL and GenBank have recently introduced new feature tables
|
||||
for annotating the sequences. They are a great improvement on the previous ones
|
||||
and, among other things, now permit correct translation of spliced genes.
|
||||
Various options within nip have been added or modified to take advantage of
|
||||
these changes. The routine to translate DNA to protein and write the protein
|
||||
to disk now gives correct results for spliced genes. The routine to translate
|
||||
DNA to protein and display the two together now gives correct translations
|
||||
except for the amino acids spanning intron/exon junctions. The routine to plot
|
||||
maps from feature tables can use the new style. The open reading frame finding
|
||||
routine writes out its results in the new style. The routine that finds open
|
||||
reading frames and writes their translations to disk also writes a title in the
|
||||
form of a new style feature table entry. The feature table format output from
|
||||
the pattern searches in nip also uses the new style.
|
||||
|
||||
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
Section VI Indexing the sequence libraries
|
||||
--------------------------------------------
|
||||
|
||||
We handle EMBL, SwissProt, and GenBank in their distributed format, plus
|
||||
PIR and NRL3D in codata format. All programs and scripts are in directory
|
||||
indexseqlibs.
|
||||
|
||||
Currently we produce entryname index, accession number index freetext index,
|
||||
and brief index (brief index contains the entry name the primary accession
|
||||
number the sequence length and an 80 character description).
|
||||
|
||||
To produce any of the indexes requires the creation of several intermediate
|
||||
files and the indexing programs are written so that the intermediate files
|
||||
are the same for all libraries. This means that only the programs that read
|
||||
the distributed form of each library need to be unique to that library, and
|
||||
all the other processing programs can be used for all libraries.
|
||||
|
||||
|
||||
However even the though the indexes have the same format, programs (like nip)
|
||||
that read the libraries need to treat each library separately because their
|
||||
actual contents are written differently.
|
||||
|
||||
Making the entry name index
|
||||
---------------------------
|
||||
|
||||
Common program entryname2
|
||||
|
||||
EMBL emblentryname1
|
||||
SwissProt emblentryname1
|
||||
|
||||
GenBank genbentryname1
|
||||
|
||||
PIR pirentryname1
|
||||
NRL3D pirentryname1
|
||||
|
||||
|
||||
Making the accession number index
|
||||
---------------------------------
|
||||
|
||||
Common programs access2 access3 access4
|
||||
|
||||
EMBL emblaccess1
|
||||
SwissProt emblaccess1
|
||||
|
||||
GenBank genbaccess1
|
||||
|
||||
PIR piraccess1 piraccess2
|
||||
NRL3D No accession numbers
|
||||
|
||||
Making the brief index
|
||||
----------------------
|
||||
|
||||
Common program title2
|
||||
|
||||
EMBL embltitle1
|
||||
SwissProt embltitle1
|
||||
|
||||
GenBank genbtitle1
|
||||
|
||||
PIR pirtitle1 pirtitle2 (pir3 has no accession numbers)
|
||||
NRL3D pirtitle2
|
||||
|
||||
Scripts
|
||||
-------
|
||||
|
||||
emblentryname.script
|
||||
emblaccession.script
|
||||
embltitle.script
|
||||
|
||||
swissentryname.script
|
||||
swissaccession.script
|
||||
swisstitle.script
|
||||
|
||||
genbentrynamescript
|
||||
genbaccession.script
|
||||
genbtitle.script
|
||||
|
||||
pirentryname.script
|
||||
piraccession.script
|
||||
pirtitle.script
|
||||
|
||||
nrl3dentryname.script
|
||||
nrl3dtitle.script
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
453
Staden_install-alpha
Normal file
453
Staden_install-alpha
Normal file
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = alpha
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
453
Staden_install-dec
Normal file
453
Staden_install-dec
Normal file
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = dec
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
453
Staden_install-sgi
Normal file
453
Staden_install-sgi
Normal file
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = sgi
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
453
Staden_install-solaris
Normal file
453
Staden_install-solaris
Normal file
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = solaris
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
453
Staden_install-sun
Normal file
453
Staden_install-sun
Normal file
|
@ -0,0 +1,453 @@
|
|||
#! /bin/csh -f
|
||||
#
|
||||
# staden_install - version 2.4
|
||||
#
|
||||
# This is a prototype installation program.
|
||||
#
|
||||
# 9 March 1992
|
||||
# Modified for installation on Sun, Alliant, etc
|
||||
# No longer install 2rs
|
||||
#
|
||||
# 20 November 1992
|
||||
# Now includes convert, cop, frog, getMCH and scf
|
||||
#
|
||||
# 25 November 1992
|
||||
# SGI supported
|
||||
#
|
||||
# 19 May 1993
|
||||
# DEC Alpha, Solaris supported
|
||||
#
|
||||
# Written by sd@uk.ac.cam.mrc-lmb
|
||||
#
|
||||
|
||||
# prelim
|
||||
set prog = $0 ; set prog = $prog:t
|
||||
|
||||
# Machines supported: al sun dec sgi alpha solaris
|
||||
#set MACHINE = `echo $prog | sed 's/.*-//'`
|
||||
set MACHINE = sun
|
||||
|
||||
# For local (MRC-LMB) setup only
|
||||
#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
|
||||
set LOCAL = NO
|
||||
|
||||
|
||||
echo ""
|
||||
echo -n "Staden Package installation procedure - "
|
||||
switch (${MACHINE})
|
||||
case "al":
|
||||
echo "Alliant FX/2800 Concentrix version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "sun":
|
||||
echo "SunOS version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
case "dec":
|
||||
echo "DEC Ultrix (mips) version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "sgi":
|
||||
echo "Silicon Graphics Iris version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "alpha":
|
||||
echo "DEC Alpha OSF/1 version"
|
||||
set MAKE = "gmake -sk"
|
||||
breaksw
|
||||
case "solaris":
|
||||
echo "Solaris version"
|
||||
set MAKE = "make -sk"
|
||||
breaksw
|
||||
default:
|
||||
echo "Panic. Unknown version"
|
||||
exit 1
|
||||
endsw
|
||||
echo ""
|
||||
echo "* starting initialization...please wait."
|
||||
echo ""
|
||||
|
||||
# Binary fork of source directory
|
||||
if ($LOCAL == "YES") then
|
||||
set DIR_BINARIES = ${MACHINE}-binaries
|
||||
set DIR_PROGS = ${MACHINE}-bin
|
||||
else
|
||||
set DIR_BINARIES = .
|
||||
set DIR_PROGS = bin
|
||||
set MAKE = "$MAKE -f makefile-${MACHINE}"
|
||||
endif
|
||||
|
||||
init:
|
||||
# Set useful shell variables
|
||||
set YES="YES";
|
||||
set NO="NO"
|
||||
|
||||
# set/unset some .cshrc envs.
|
||||
unset noclobber
|
||||
set noglob
|
||||
|
||||
# set interrupt trap
|
||||
onintr end_failure
|
||||
|
||||
# Make dir command
|
||||
set MKDIR = "mkdir"
|
||||
|
||||
# Copy command
|
||||
set CP = "cp -p"
|
||||
|
||||
# Install command
|
||||
#set INSTALL = "install"
|
||||
#set INSTALL = "mv"
|
||||
set INSTALL = "cp"
|
||||
|
||||
# Set up default responses
|
||||
set DEF_STADEN_ROOT = `pwd`
|
||||
|
||||
set DEF_REQ_NONX = "$YES"
|
||||
set DEF_REQ_X = "$YES"
|
||||
set DEF_REQ_TED = "$YES"
|
||||
set DEF_REQ_MISC = "$YES"
|
||||
|
||||
# directories
|
||||
set DIR_SRC = $DEF_STADEN_ROOT/src
|
||||
set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
|
||||
set DIR_MISC = $DIR_SRC/Misc
|
||||
set DIR_STADEN = $DIR_SRC/staden
|
||||
set DIR_TED = $DIR_SRC/ted
|
||||
set DIR_ABI = $DIR_SRC/abi
|
||||
set DIR_ALF = $DIR_SRC/alf
|
||||
set DIR_BAP = $DIR_SRC/bap
|
||||
set DIR_OSP = $DIR_SRC/bap/osp-bits
|
||||
set DIR_CONVERT = $DIR_SRC/convert
|
||||
set DIR_COP = $DIR_SRC/cop
|
||||
set DIR_FROG = $DIR_SRC/frog
|
||||
set DIR_GETMCH = $DIR_SRC/getMCH
|
||||
set DIR_SCF = $DIR_SRC/scf
|
||||
|
||||
|
||||
main:
|
||||
|
||||
|
||||
preamble:
|
||||
echo ""
|
||||
echo ""
|
||||
echo "* Please answer the following questions."
|
||||
echo " Default answers to questions are given in square brackets."
|
||||
echo " If you require help at any stage respond with a ? to the question."
|
||||
echo ""
|
||||
|
||||
ask_staden_root:
|
||||
set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
|
||||
|
||||
ask_require_nonx_progs:
|
||||
echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
|
||||
set ANS_REQ_NONX = $<
|
||||
if ("$ANS_REQ_NONX" == "?") then
|
||||
echo "* If you do not have X windows on your system you will require"
|
||||
echo " these. However, you will require Tektronics terminal emulation."
|
||||
echo " If you do not require all of the non-X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
echo ""
|
||||
goto ask_require_nonx_progs
|
||||
else if ("$ANS_REQ_NONX" != "") then
|
||||
if ("$ANS_REQ_NONX" =~ [yY]*) then
|
||||
set ANS_REQ_NONX=$YES
|
||||
else if ("$ANS_REQ_NONX" =~ [nN]*) then
|
||||
set ANS_REQ_NONX=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_NONX=$DEF_REQ_NONX
|
||||
endif
|
||||
|
||||
ask_require_x_progs:
|
||||
echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
|
||||
set ANS_REQ_X = $<
|
||||
if ("$ANS_REQ_X" == "?") then
|
||||
echo "* These are the programs that require X windows."
|
||||
echo " If you do not require all of the X programs, you should abort"
|
||||
echo " and manually make the ones you require."
|
||||
|
||||
echo ""
|
||||
goto ask_require_x_progs
|
||||
else if ("$ANS_REQ_X" != "") then
|
||||
if ("$ANS_REQ_X" =~ [yY]*) then
|
||||
set ANS_REQ_X=$YES
|
||||
else if ("$ANS_REQ_X" =~ [nN]*) then
|
||||
set ANS_REQ_X=$NO
|
||||
else
|
||||
goto ask_require_nonx_progs
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_X=$DEF_REQ_X
|
||||
endif
|
||||
|
||||
|
||||
ask_require_ted:
|
||||
echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
|
||||
set ANS_REQ_TED = $<
|
||||
if ("$ANS_REQ_TED" == "?") then
|
||||
echo "* This is the trace editor program. It allows you to look at"
|
||||
echo " traces obtained from automated fluorescent sequencing machines."
|
||||
echo ""
|
||||
goto ask_require_ted
|
||||
else if ("$ANS_REQ_TED" != "") then
|
||||
if ("$ANS_REQ_TED" =~ [yY]*) then
|
||||
set ANS_REQ_TED=$YES
|
||||
else if ("$ANS_REQ_TED" =~ [nN]*) then
|
||||
set ANS_REQ_TED=$NO
|
||||
else
|
||||
goto ask_require_ted
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_TED=$DEF_REQ_TED
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ask_require_misc:
|
||||
echo -n "Compile other programs [$DEF_REQ_MISC]? "
|
||||
set ANS_REQ_MISC = $<
|
||||
if ("$ANS_REQ_MISC" == "?") then
|
||||
echo "* Other programs include:"
|
||||
echo " alfsplit"
|
||||
echo " getABISampleName"
|
||||
echo ""
|
||||
goto ask_require_misc
|
||||
else if ("$ANS_REQ_MISC" != "") then
|
||||
if ("$ANS_REQ_MISC" =~ [yY]*) then
|
||||
set ANS_REQ_MISC=$YES
|
||||
else if ("$ANS_REQ_MISC" =~ [nN]*) then
|
||||
set ANS_REQ_MISC=$NO
|
||||
else
|
||||
goto ask_require_misc
|
||||
endif
|
||||
else
|
||||
set ANS_REQ_MISC=$DEF_REQ_MISC
|
||||
endif
|
||||
|
||||
|
||||
|
||||
time_taken_warning:
|
||||
echo ""
|
||||
echo "The installation procedure is now ready to start."
|
||||
echo ""
|
||||
echo "**** Warning:"
|
||||
echo " The installation will take considerable time to complete. If you"
|
||||
echo " are installing the whole Staden Package from scratch it could"
|
||||
echo " take as long as an hour for all exectuables to be compiled and"
|
||||
echo " installed."
|
||||
echo ""
|
||||
|
||||
ask_goahead:
|
||||
echo -n "Proceed with the installation [YES]? "
|
||||
set ANSWER=$<
|
||||
if ("$ANSWER" == "?") then
|
||||
echo "* Final confirmation to proceed with the installation. Answer"
|
||||
echo " YES to proceed; otherwise, answer NO to abort the installation."
|
||||
echo ""
|
||||
goto ask_goahead
|
||||
else if ("$ANSWER" != "") then
|
||||
if ("$ANSWER" =~ [nN]*) then
|
||||
goto chickens_exit
|
||||
else if ("$ANSWER" !~ [yY]*) then
|
||||
goto ask_goahead
|
||||
endif
|
||||
endif
|
||||
|
||||
installation_proper:
|
||||
|
||||
# make binaries directory if it doesn't exist
|
||||
|
||||
if (! -d $DIR_BIN) then
|
||||
$MKDIR $DIR_BIN
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
|
||||
echo ""
|
||||
echo "+ Compiling miscellaneous library"
|
||||
|
||||
pushd $DIR_MISC > /dev/null
|
||||
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_NONX" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing non X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE nprogs lprogs
|
||||
$INSTALL mep $DIR_BIN
|
||||
$INSTALL nip $DIR_BIN
|
||||
$INSTALL pip $DIR_BIN
|
||||
$INSTALL sap $DIR_BIN
|
||||
$INSTALL sapf $DIR_BIN
|
||||
$INSTALL sip $DIR_BIN
|
||||
$INSTALL splitp1 $DIR_BIN
|
||||
$INSTALL splitp2 $DIR_BIN
|
||||
$INSTALL splitp3 $DIR_BIN
|
||||
$INSTALL sethelp $DIR_BIN
|
||||
$INSTALL gip $DIR_BIN
|
||||
$INSTALL nipl $DIR_BIN
|
||||
$INSTALL pipl $DIR_BIN
|
||||
$INSTALL sipl $DIR_BIN
|
||||
$INSTALL dap $DIR_BIN
|
||||
$INSTALL nipf $DIR_BIN
|
||||
$INSTALL vep $DIR_BIN
|
||||
$INSTALL rep $DIR_BIN
|
||||
$INSTALL lip $DIR_BIN
|
||||
#$INSTALL convert_project $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE bap
|
||||
$INSTALL bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_TED" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing Trace editor"
|
||||
|
||||
pushd $DIR_TED > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE ted
|
||||
$INSTALL ted $DIR_BIN
|
||||
popd > /dev/null
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_X" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing X programs"
|
||||
|
||||
pushd $DIR_STADEN > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xprogs
|
||||
$INSTALL xmep $DIR_BIN
|
||||
$INSTALL xnip $DIR_BIN
|
||||
$INSTALL xpip $DIR_BIN
|
||||
$INSTALL xsap $DIR_BIN
|
||||
$INSTALL xsip $DIR_BIN
|
||||
$INSTALL xdap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_OSP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_BAP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE xbap
|
||||
$INSTALL xbap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
endif
|
||||
|
||||
if ("$ANS_REQ_MISC" == "$YES") then
|
||||
echo ""
|
||||
echo "+ Installing miscellaneous programs"
|
||||
|
||||
pushd $DIR_ABI > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL getABISampleName $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_ALF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE alfsplit
|
||||
$INSTALL alfsplit $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_CONVERT > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE convert
|
||||
$INSTALL convert $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_COP > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE all
|
||||
$INSTALL cop $DIR_BIN
|
||||
$INSTALL cop-bap $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_FROG > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE frog
|
||||
$INSTALL frog $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_GETMCH > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE trace2seq
|
||||
$INSTALL trace2seq $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
pushd $DIR_SCF > /dev/null
|
||||
cd $DIR_BINARIES
|
||||
$MAKE makeSCF
|
||||
$INSTALL makeSCF $DIR_BIN
|
||||
popd > /dev/null
|
||||
|
||||
|
||||
|
||||
endif
|
||||
|
||||
|
||||
installation_done:
|
||||
echo ""
|
||||
echo "+ Installation completed"
|
||||
echo ""
|
||||
|
||||
echo " Some further initialisation is required in order to use the"
|
||||
echo " package. csh users should insert the following in their .login"
|
||||
echo " files:"
|
||||
echo " "
|
||||
echo " setenv STADENROOT $ANS_STADEN_ROOT"
|
||||
echo ' source $STADENROOT/staden.login'
|
||||
echo " "
|
||||
echo " Users of the Bourne shell, sh, should insert the following in"
|
||||
echo " their .profile:"
|
||||
echo " "
|
||||
echo " STADENROOT=$ANS_STADEN_ROOT"
|
||||
echo " export STADENROOT"
|
||||
echo ' . $STADENROOT/staden.profile'
|
||||
echo " "
|
||||
echo " These initialisations will alter the shell's search path so that"
|
||||
echo " it can find the programs in the STADEN Package"
|
||||
echo " "
|
||||
|
||||
normal_exit:
|
||||
exit 0
|
||||
|
||||
chickens_exit:
|
||||
echo ""
|
||||
echo "+ Installation cancelled"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
||||
end_failure:
|
||||
unset noglob
|
||||
echo ""
|
||||
echo "Aborted STADEN Package installation on `date`"
|
||||
echo ""
|
||||
exit 1
|
||||
|
91
Version-1993.0.7
Normal file
91
Version-1993.0.7
Normal file
|
@ -0,0 +1,91 @@
|
|||
Wed Jul 7
|
||||
*Version-1993.0.7*
|
||||
New xbap and ted.
|
||||
Can use Ctrl as well as Meta to shift cutoffs in contig editor.
|
||||
Code to read in ABI traces now robust to ABI problem files, where
|
||||
called base order is not base position order.
|
||||
|
||||
Thu Jul 1
|
||||
*Version-1993.0.6*
|
||||
New xbap and bap, to fix bugs.
|
||||
Break Contig was sometimes not recalculating consensus length correctly.
|
||||
Contig Edit was trucating reading name lengths at 10 characters.
|
||||
|
||||
Thu Jun 16
|
||||
*Version-1993.0.5*
|
||||
New xbap and bap executables. RS changed assembly in bap so that
|
||||
when entry is not permitted the program asks for the percentage
|
||||
mismatch - this allows display of alignments for all levels of
|
||||
mismatch.
|
||||
|
||||
Mon Jun 14 14:54:43 BST 1993
|
||||
*Version-1993.0.4*
|
||||
Bug in xdap. It was compiled with xbap's edUtils.h by mistake.
|
||||
|
||||
Fri Jun 11 17:50:13 BST 1993
|
||||
*Version-1993.0.3*
|
||||
Bugs in bap/xbap fixed. New executables included.
|
||||
|
||||
Thu Jun 3 13:53:38 BST 1993
|
||||
*Version-1993.0.2*
|
||||
Bugs in bap/xbap fixed. New executables included.
|
||||
|
||||
Thu May 20 14:45:38 BST 1993
|
||||
*Version-1993.0.1*
|
||||
Changes to makefiles and Staden_install
|
||||
|
||||
Fri Mar 5 11:27:22 GMT 1993
|
||||
*Version-1993.0*
|
||||
Now for DEC Alpha and Solaris
|
||||
bap/xbap now includes double stranding and auto-creation of oligos
|
||||
|
||||
Tue Jan 26 11:54:36 GMT 1993
|
||||
*Version-1992.3.1*
|
||||
Bug fixes
|
||||
1. indexseqlibs/genbentryname1.c
|
||||
2. convert bugs + new programs
|
||||
|
||||
Mon Nov 23 13:50:39 WET 1992
|
||||
*Version-1992.3*
|
||||
Includes bap/xbap and utility programs
|
||||
|
||||
|
||||
Wed Sep 30 11:18:09 BST 1992
|
||||
*Version-1992.2.1*
|
||||
Source changes since last release
|
||||
bug fixes to postscript output, sequence library programs
|
||||
New sun and dec executables
|
||||
|
||||
|
||||
Thu Aug 27 15:27:05 BST 1992
|
||||
|
||||
*Version-1992.2*
|
||||
|
||||
|
||||
Mon Jul 27 13:01:37 WET 1992
|
||||
|
||||
*Version-1992.1.3*
|
||||
Miscellaneous bug fixes and enhancements
|
||||
New sun and dec executables
|
||||
|
||||
|
||||
Tue Jun 16 16:07:41 BST 1992
|
||||
|
||||
*Version-1992.1.2*
|
||||
Sun sparc executables now linked with cc and not gcc.
|
||||
New makefile-sun files
|
||||
New sources for hitNtrg.c and freetext4.c (indexseqlibs), and
|
||||
tagU2.c (staden)
|
||||
|
||||
|
||||
Wed May 27 17:12:36 BST 1992
|
||||
|
||||
*Version-1992.1.1*
|
||||
Inclusion of vep (vector excision program), plus minor changes and bug fixes
|
||||
|
||||
|
||||
Tue May 26 11:10:28 WET 1992
|
||||
|
||||
*Version-1992.1*
|
||||
This version includes the port to DEC Ultrix (mips)
|
||||
|
BIN
bin/alfsplit
Normal file
BIN
bin/alfsplit
Normal file
Binary file not shown.
BIN
bin/bap
Normal file
BIN
bin/bap
Normal file
Binary file not shown.
BIN
bin/convert
Normal file
BIN
bin/convert
Normal file
Binary file not shown.
BIN
bin/cop
Normal file
BIN
bin/cop
Normal file
Binary file not shown.
BIN
bin/cop-bap
Normal file
BIN
bin/cop-bap
Normal file
Binary file not shown.
BIN
bin/dap
Normal file
BIN
bin/dap
Normal file
Binary file not shown.
BIN
bin/frog
Normal file
BIN
bin/frog
Normal file
Binary file not shown.
BIN
bin/getABISampleName
Normal file
BIN
bin/getABISampleName
Normal file
Binary file not shown.
BIN
bin/gip
Normal file
BIN
bin/gip
Normal file
Binary file not shown.
BIN
bin/lip
Normal file
BIN
bin/lip
Normal file
Binary file not shown.
BIN
bin/makeSCF
Normal file
BIN
bin/makeSCF
Normal file
Binary file not shown.
BIN
bin/mep
Normal file
BIN
bin/mep
Normal file
Binary file not shown.
BIN
bin/nip
Normal file
BIN
bin/nip
Normal file
Binary file not shown.
BIN
bin/nipf
Normal file
BIN
bin/nipf
Normal file
Binary file not shown.
BIN
bin/nipl
Normal file
BIN
bin/nipl
Normal file
Binary file not shown.
BIN
bin/pip
Normal file
BIN
bin/pip
Normal file
Binary file not shown.
BIN
bin/pipl
Normal file
BIN
bin/pipl
Normal file
Binary file not shown.
BIN
bin/rep
Normal file
BIN
bin/rep
Normal file
Binary file not shown.
BIN
bin/sap
Normal file
BIN
bin/sap
Normal file
Binary file not shown.
BIN
bin/sapf
Normal file
BIN
bin/sapf
Normal file
Binary file not shown.
BIN
bin/sethelp
Normal file
BIN
bin/sethelp
Normal file
Binary file not shown.
BIN
bin/sip
Normal file
BIN
bin/sip
Normal file
Binary file not shown.
BIN
bin/sipl
Normal file
BIN
bin/sipl
Normal file
Binary file not shown.
BIN
bin/splitp1
Normal file
BIN
bin/splitp1
Normal file
Binary file not shown.
BIN
bin/splitp2
Normal file
BIN
bin/splitp2
Normal file
Binary file not shown.
BIN
bin/splitp3
Normal file
BIN
bin/splitp3
Normal file
Binary file not shown.
BIN
bin/ted
Normal file
BIN
bin/ted
Normal file
Binary file not shown.
BIN
bin/trace2seq
Normal file
BIN
bin/trace2seq
Normal file
Binary file not shown.
BIN
bin/vep
Normal file
BIN
bin/vep
Normal file
Binary file not shown.
BIN
bin/xbap
Normal file
BIN
bin/xbap
Normal file
Binary file not shown.
BIN
bin/xbap.1
Normal file
BIN
bin/xbap.1
Normal file
Binary file not shown.
BIN
bin/xdap
Normal file
BIN
bin/xdap
Normal file
Binary file not shown.
BIN
bin/xmep
Normal file
BIN
bin/xmep
Normal file
Binary file not shown.
BIN
bin/xnip
Normal file
BIN
bin/xnip
Normal file
Binary file not shown.
BIN
bin/xpip
Normal file
BIN
bin/xpip
Normal file
Binary file not shown.
BIN
bin/xsap
Normal file
BIN
bin/xsap
Normal file
Binary file not shown.
BIN
bin/xsip
Normal file
BIN
bin/xsip
Normal file
Binary file not shown.
32
doc/Converting_Sap_Databases
Normal file
32
doc/Converting_Sap_Databases
Normal file
|
@ -0,0 +1,32 @@
|
|||
Converting Sap Databases For Be Used With XDAP SD 10 July 1991
|
||||
=======================================================================
|
||||
|
||||
The sequence assembly programmes dap and xdap are based on the programs
|
||||
sap and xsap, with major modifications. For a concise summary of the
|
||||
new features I refer you to Rodger and my paper, "A sequence assembly
|
||||
and editing program for efficient management of large projects"
|
||||
(Nucleic Acids Research, in press)
|
||||
|
||||
The need for storing extra information in project databases has
|
||||
resulted in the creation of two files. For users who wish you use old
|
||||
(sap) databases with xdap, additional files must be created to use all
|
||||
the new features. The program 'convert_project' does this. It is
|
||||
interactive, and asks you for names of relevant files, version numbers
|
||||
etc. Here is a sample program dialogue:
|
||||
|
||||
|
||||
% convert_project
|
||||
Database conversion program
|
||||
Converts *.RD? file to *.TG? and *.CC? files
|
||||
|
||||
Project name ? test
|
||||
Version ? 0
|
||||
Conversion completed.
|
||||
|
||||
|
||||
Further, please ensure that the file TAGDB is in your project
|
||||
directory. Copies can be found in $STADTABL. Alternatively ensure that
|
||||
the environment TAGDB variable is set to $STADTABL/TAGDB
|
||||
|
||||
setenv TAGDB $STADTABL/TAGDB
|
||||
|
30
doc/README
Normal file
30
doc/README
Normal file
|
@ -0,0 +1,30 @@
|
|||
Processing and printing LaTeX sources
|
||||
-------------------------------------
|
||||
|
||||
Given a source file src.tex, run LaTeX to generate the bibliographic
|
||||
references:
|
||||
|
||||
latex src
|
||||
|
||||
Now run BibTeX to search the bibliography for them:
|
||||
|
||||
bibtex src
|
||||
|
||||
Now run LaTeX twice, first to pick up the references, second to bind
|
||||
forward references:
|
||||
|
||||
latex src
|
||||
latex src
|
||||
|
||||
This will have generated a src.dvi output file. Now we convert this
|
||||
to PostScript:
|
||||
|
||||
dvi2ps src.dvi >src.ps
|
||||
|
||||
Now we can print this out:
|
||||
|
||||
lpr src.ps
|
||||
|
||||
Most of the above is only necessay if you are building something from
|
||||
scratch, but it's best to go through it anyway until you fully
|
||||
understand how LaTeX works.
|
131
doc/gip-menu.PS
Normal file
131
doc/gip-menu.PS
Normal file
|
@ -0,0 +1,131 @@
|
|||
%!
|
||||
/cm {28.2 mul} def
|
||||
/BOXSIZE 2 cm def
|
||||
|
||||
/boxcen
|
||||
{
|
||||
% move to centre of box
|
||||
BOXSIZE mul 2 div BOXSIZE 2 div rmoveto
|
||||
exch
|
||||
% move back by correct amount to ensure letter is in centre of box
|
||||
dup stringwidth
|
||||
pop 2 div neg % halve & neg x offset
|
||||
% y offset appears to be zero! - so use constant 'square' char (eg X)
|
||||
(X) stringwidth pop 2 div neg
|
||||
} def
|
||||
|
||||
/letter
|
||||
{
|
||||
dup BOXSIZE mul 0 rlineto
|
||||
0 BOXSIZE rlineto
|
||||
dup BOXSIZE mul neg 0 rlineto
|
||||
0 BOXSIZE neg rlineto
|
||||
closepath
|
||||
gsave
|
||||
dup boxcen rmoveto
|
||||
show
|
||||
stroke
|
||||
grestore
|
||||
BOXSIZE mul 0 rmoveto
|
||||
} def
|
||||
|
||||
/nextline {0 BOXSIZE neg rmoveto} def
|
||||
|
||||
/line
|
||||
{
|
||||
gsave
|
||||
1 letter
|
||||
1 letter
|
||||
1 letter
|
||||
1 letter
|
||||
grestore
|
||||
nextline
|
||||
} def
|
||||
|
||||
/Times-Roman findfont 50 scalefont setfont
|
||||
newpath
|
||||
5 setlinewidth
|
||||
200 650 translate
|
||||
0 0 moveto
|
||||
%2 setlinecap
|
||||
|
||||
gsave
|
||||
(A) (G) (C) (T) line
|
||||
(3) (4) (1) (2) line
|
||||
(B) (H) (D) (V) line
|
||||
(M) (N) (K) (L) line
|
||||
(-) (X) (Y) (R) line
|
||||
(8) (7) (6) (5) line
|
||||
/Times-Roman findfont 25 scalefont setfont
|
||||
gsave
|
||||
(DELETE) 2 letter
|
||||
(RESET) 2 letter
|
||||
grestore
|
||||
nextline
|
||||
/Times-Roman findfont 35 scalefont setfont
|
||||
gsave
|
||||
(STOP) 4 letter
|
||||
grestore
|
||||
nextline
|
||||
gsave
|
||||
(START) 4 letter
|
||||
grestore
|
||||
nextline
|
||||
gsave
|
||||
(CONFIRM) 4 letter
|
||||
grestore
|
||||
nextline
|
||||
% yukky from here on
|
||||
gsave
|
||||
0 BOXSIZE rmoveto
|
||||
1 cm 0 rlineto stroke
|
||||
grestore
|
||||
(ORIGIN) dup 4 boxcen rmoveto show pop
|
||||
(ORIGIN) stringwidth neg exch neg exch rmoveto
|
||||
(X) stringwidth exch 2 div rmoveto
|
||||
-5 0 rmoveto
|
||||
2 setlinewidth
|
||||
-45 21 rlineto
|
||||
6 0 rlineto
|
||||
-6 0 rmoveto
|
||||
0 -6 rlineto
|
||||
stroke
|
||||
grestore
|
||||
2 setlinewidth
|
||||
0 BOXSIZE 1.4 mul rmoveto
|
||||
6 6 rlineto
|
||||
-6 -6 rmoveto
|
||||
6 -6 rlineto
|
||||
-6 6 rmoveto
|
||||
80 0 rlineto
|
||||
5 -6 rmoveto
|
||||
/Times-Roman findfont 30 scalefont setfont
|
||||
(8 cm) show
|
||||
5 6 rmoveto
|
||||
76 0 rlineto
|
||||
-6 6 rlineto
|
||||
6 -6 rmoveto
|
||||
-6 -6 rlineto
|
||||
stroke
|
||||
0 0 moveto
|
||||
BOXSIZE .4 mul neg BOXSIZE rmoveto
|
||||
currentpoint translate
|
||||
newpath
|
||||
0 0 moveto
|
||||
90 rotate
|
||||
-6 6 rlineto
|
||||
6 -6 rmoveto
|
||||
-6 -6 rlineto
|
||||
6 6 rmoveto
|
||||
-244 0 rlineto
|
||||
-84 0 rmoveto
|
||||
0 -6 rmoveto
|
||||
(20 cm) show
|
||||
0 6 rmoveto
|
||||
-84 0 rmoveto
|
||||
-227 0 rlineto
|
||||
6 6 rlineto
|
||||
-6 -6 rmoveto
|
||||
6 -6 rlineto
|
||||
stroke
|
||||
showpage
|
2426
doc/install.PS
Normal file
2426
doc/install.PS
Normal file
File diff suppressed because it is too large
Load diff
172
doc/install.tex
Normal file
172
doc/install.tex
Normal file
|
@ -0,0 +1,172 @@
|
|||
\documentstyle[a4,11pt]{article}
|
||||
|
||||
\title{Installing the Staden Package}
|
||||
\author{Simon Dear}
|
||||
\date{21 May 1993}
|
||||
|
||||
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
On the accompanying tape you will find executables for
|
||||
one of SunOS 4.x, Sun
|
||||
Solaris 2.x, DEC Ultrix, DEC OSF/1 and Silicon Graphics SGI operating systems.
|
||||
Also there are sources for all the programs in the Staden package.
|
||||
Programs in the package are:
|
||||
\begin{description}
|
||||
|
||||
\item[mep and xmep] Motif exploration program.
|
||||
\item[nip and xnip] Nucleotide interpretation program.
|
||||
\item[nipl] Nucleotide interpretation program (library).
|
||||
Searches nucleotide libraries for patterns of motifs.
|
||||
\item[pip and xpip] Protein interpretation program.
|
||||
\item[pipl] Protein interpretation program (library).
|
||||
Searches protein libraries for patterns of motifs.
|
||||
\item[sip and xsip] Similarity investigation program.
|
||||
\item[sipl] Similarity investigation program (library).
|
||||
Compares a probe protein or nucleic acid sequence against
|
||||
a library of sequences.
|
||||
\item[sap and xsap] The original sequence assembly program.
|
||||
\item[bap and xbap] Our latest, most advanced sequence assembly program.
|
||||
\item[dap and xdap] An obsolete assembly program, superceded by {\em bap}.
|
||||
\item[lip] Library interface program.
|
||||
\item[rep] Repeat examination program.
|
||||
\item[ted] X windows utility for displaying and editing
|
||||
fluorescent sequencing machine traces.
|
||||
\item[splitp1, splitp2 and splitp3] Refer to help/SPLITP.MEM.
|
||||
\item[sethelp] Builds online help files.
|
||||
\item[gip] Gel input program.
|
||||
\item[convert] Converts between {\em xdap\/} and {\em xbap\/} databases.
|
||||
\item[cop and cop-bap] Checks completed {\em xdap\/} and {\em xbap\/}
|
||||
databases for editing errors.
|
||||
\item[trace2seq] Extracts sequence from trace files.
|
||||
\item[getABISampleName] Extracts sample names from ABI trace files.
|
||||
\item[makeSCF] Converts existing trace files to the compact
|
||||
SCF format.
|
||||
\item[alfsplit] Splits the Pharmacia A.L.F. gel
|
||||
file into multiple files, one for each sample.
|
||||
\item[frog] Relabels lanes in ABI trace files.
|
||||
\item[+ numerous scripts (including {\em squirrel (v1.4)\/})]
|
||||
|
||||
\end{description}
|
||||
|
||||
|
||||
\section{Requirements}
|
||||
|
||||
You will need a tape drive to read the software off the distribution
|
||||
tape (QIC-150, TK50, or Exabyte). You will also need a large amount of
|
||||
disk storage to accommodate the whole package. For release
|
||||
version-1993.0, requirements were
|
||||
31Mb (SunOS 4.x),
|
||||
36Mb (Sun Solaris 2.x)
|
||||
30Mb (DEC Ultrix)
|
||||
37Mb (DEC OSF/1)
|
||||
and
|
||||
27Mb (Silicon Graphics SGI.)
|
||||
|
||||
|
||||
To compile the Staden package you will require:
|
||||
\begin{itemize}
|
||||
\item An ANSI C compiler.
|
||||
\item A FORTRAN-77 compiler.
|
||||
\item X11 (Release 4 or 5).
|
||||
\item GNU make (except with SunOS and Solaris 2.x.)
|
||||
\end{itemize}
|
||||
|
||||
\section{Installation}
|
||||
|
||||
To install the package,
|
||||
\begin{enumerate}
|
||||
\item Create a directory for where you would like the software to be
|
||||
placed. You may have to be superuser to do this.
|
||||
\begin{verbatim} mkdir /home/Staden\end{verbatim}
|
||||
\item Change to this directory.
|
||||
\begin{verbatim} cd /home/Staden\end{verbatim}
|
||||
\item Place the tape into the tape unit.
|
||||
\item Extract the software off the distribution tape (NOTE: the device name may be
|
||||
different on your machine):
|
||||
\begin{verbatim} tar xvf /dev/rst0\end{verbatim}
|
||||
\item C shell users should set the environment variable {\bf STADENROOT}
|
||||
to be the directory where the package is installed and source the file
|
||||
{\em staden.login} found there. This is best done by adding lines to their
|
||||
{\em .login} file:
|
||||
\begin{verbatim}
|
||||
setenv STADENROOT /home/Staden
|
||||
source $STADENROOT/staden.login
|
||||
\end{verbatim}
|
||||
Users of the Bourne shell, sh, should similarly add lines their {\em .profile} file:
|
||||
\begin{verbatim}
|
||||
STADENROOT=/home/Staden
|
||||
export STADENROOT
|
||||
. $STADENROOT/staden.profile
|
||||
\end{verbatim}
|
||||
|
||||
The startup routines set environment variables and modify the shell's
|
||||
search path so that it can find the programs in the Staden Package.
|
||||
When users next log on to the system, they will be able to use the
|
||||
programs.
|
||||
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
\section {Installation on Unsupported Platforms}
|
||||
|
||||
Install the software as you would for a supported machine. You will
|
||||
need to remake all executables. The script {\em Staden\_install} can
|
||||
be used to help recompile the package. A large number of
|
||||
assumptions have been made, and you may need to change the makefiles
|
||||
to suit your system.
|
||||
|
||||
The sources have been organised into subdirectories of the directory
|
||||
{\bf src}. In {\bf Misc} are routines common to many programs. They
|
||||
should be made first. In {\bf staden} are all the programs of the
|
||||
Staden suite ({\em mep}, {\em nip}, {\em pip}, {\em sap}, {\em sip},
|
||||
{\em dap}, {\em gip}, {\em vep}, {\em lip} and {\em rep}) with the
|
||||
exception of {\em bap}. Code for our latest sequence assembly program
|
||||
{\em bap} is in directories {\bf bap} and {\bf bap/osp-bits}. Make
|
||||
the objects in {\bf staden} first, then the ones in {\bf
|
||||
bap/osp-bits}, and finally the ones in {\bf bap}. In {\bf ted} is the
|
||||
trace editing program.
|
||||
|
||||
|
||||
\section {Other Software Provided}
|
||||
|
||||
Other software and scripts can be found in the {\bf alf\/}, {\bf
|
||||
abi\/}, {\bf cop\/}, {\bf getMCH\/}, {\bf scf\/}, {\bf frog\/} and {\bf
|
||||
scripts}
|
||||
directories.
|
||||
Each directory contains documentation describing the programs
|
||||
contained.
|
||||
|
||||
Since release version-1993.0 we have distributed the {\em squirrel (v1.4)}
|
||||
package. Please read the disclaimer that accompanies this software.
|
||||
Additional sources and scripts can be found in {\bf expGetSeq}, {\bf vepe},
|
||||
{\bf newted} and {\bf squirrel-1.4} directories.
|
||||
|
||||
Many scripts (including {\em squirrel}) and filters were developed at the MRC-LMB for
|
||||
{\bf INTERNAL USE ONLY}.
|
||||
We are aware that people elsewhere will want to develop
|
||||
similar software.
|
||||
We include them in the Staden Package merely as {\bf EXAMPLES} of
|
||||
what has been achieved elsewhere.
|
||||
{\bf THESE SCRIPTS WILL NOT WORK ON YOUR SYSTEM WITHOUT MODIFICATION.}
|
||||
|
||||
\section {When All Else Fails...}
|
||||
If you have any problems please contact the authors,
|
||||
\mbox{Rodger Staden}
|
||||
\mbox{(\em rs@mrc-lmba.cam.ac.uk\/)},
|
||||
\mbox{Simon Dear}
|
||||
\mbox{(\em sd@mrc-lmba.cam.ac.uk\/)}
|
||||
and
|
||||
\mbox{James Bonfield}
|
||||
\mbox{(\em jkb@mrc-lmba.cam.ac.uk\/)},
|
||||
by email or by writing to us at:
|
||||
MRC Laboratory of Molecular Biology, Hills Road, Cambridge, \mbox{CB2 2QH}, U.K.
|
||||
We also welcome general comments on the package.
|
||||
|
||||
\end{document}
|
5154
doc/manual.rtf
Normal file
5154
doc/manual.rtf
Normal file
File diff suppressed because it is too large
Load diff
3033
doc/ted.PS
Normal file
3033
doc/ted.PS
Normal file
File diff suppressed because it is too large
Load diff
213
doc/ted.tex
Normal file
213
doc/ted.tex
Normal file
|
@ -0,0 +1,213 @@
|
|||
\documentstyle[12pt]{article}
|
||||
|
||||
\title{A trace display and editing program for data from fluorescence based
|
||||
sequencing machines}
|
||||
\author{Timothy Gleeson \and LaDeana Hillier}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
\section*{}
|
||||
\subsection*{}
|
||||
\subsubsection*{ABSTRACT}
|
||||
|
||||
``Ted'' ({\em T}race {\em ed}itor)
|
||||
is a graphical editor for sequence and trace data from automated
|
||||
fluorescence sequencing machines. It provides facilities
|
||||
for viewing sequence and trace data (in top or bottom strand
|
||||
orientation), for editing the base sequence, for
|
||||
automated or manual trimming of the head (vector) and tail
|
||||
(uncertain data) from the sequence, for vertical and horizontal trace
|
||||
scaling, for keeping a history of sequence editing, and for output of
|
||||
the edited sequence. Ted has been used extensively in the C.
|
||||
elegans genome sequencing project,
|
||||
both as a stand-alone program and integrated into
|
||||
the Staden sequence assembly package, and has
|
||||
greatly aided in the efficiency
|
||||
and accuracy of sequence editing. It runs in the X
|
||||
windows environment on Sun workstations and is available from the
|
||||
authors. Ted currently supports sequence and trace data from the ABI
|
||||
373A and Pharmacia A.L.F. sequencers.
|
||||
|
||||
\subsubsection*{INTRODUCTION}
|
||||
Time involved in sequence editing is extensive, and anything easing
|
||||
that burden will improve the efficiency of any major sequencing
|
||||
project. Having sequence and trace data available online in easily-
|
||||
manipulable form is invaluable. Ted (a Trace-EDitor) was developed to
|
||||
fill this role in the C. elegans genome
|
||||
sequencing project [1].
|
||||
|
||||
\subsubsection*{METHODS}
|
||||
|
||||
{\em Computing Design and Implementation.}
|
||||
When designing ted, we had a number of specific computing goals
|
||||
in mind including portability and adaptability. For portability, we
|
||||
chose to write ted in ANSI C using the X windowing system and the
|
||||
Xaw toolkit. X provides basic capabilities for the creation and use
|
||||
of windows, and the toolkit contains a number of pre-packaged
|
||||
components, such as the ``sliders'' used for scrolling. X also allows
|
||||
site, user and per-run defaults to be set. Adaptability is also an
|
||||
important goal since we are providing a new function to
|
||||
research groups who are constantly adding new requirements.
|
||||
|
||||
Stylistically, we have followed an ``Abstract Data Type''
|
||||
discipline. In this discipline, a program is split into a number of
|
||||
modules which provide separate, well-defined functions. We
|
||||
separate the interface of a module from its implementation. For
|
||||
example, a unified internal sequence format is used. This can store
|
||||
a varying amount of information. However, there is a clear and
|
||||
simple interface by which the rest of the program accesses this
|
||||
module. Such a style is not well supported by C, but its adoption has
|
||||
been very successful. The addition of new sequencing machines, and
|
||||
thus new external data formats, may cause some changes in the
|
||||
internal representation of the sequence but should not affect
|
||||
the rest of the program.
|
||||
|
||||
Ted accepts a large number of optional command line arguments,
|
||||
many of which can also be specified as system defaults. This
|
||||
supports a mode of working whereby ted is invoked not directly by the
|
||||
user but instead by a script or another application which supplies
|
||||
arguments appropriate to the editing task.
|
||||
|
||||
|
||||
{\em Graphical Interface.}
|
||||
Ted currently accepts data from two fluorescence based sequencing
|
||||
machines, the Pharmacia A.L.F. and the ABI 373A.
|
||||
The sequencing machine data consists of
|
||||
four traces of fluorescence levels together with the machine's
|
||||
interpretation, which is a sequence of bases.
|
||||
Ted displays
|
||||
the traces and the machine-generated base list.
|
||||
A second, initially identical, list of bases is provided for correction
|
||||
by the user.
|
||||
|
||||
Ted has an X windows based
|
||||
graphical interface. The trace file
|
||||
can either be input from the command line or by
|
||||
clicking on the INPUT button after the program has been invoked.
|
||||
Other parameters which the user may specify on the
|
||||
command line include: the output
|
||||
file name; a base position or sequence string on which the trace is
|
||||
to be centered; a default trace magnification; a 5' vector sequence
|
||||
for automated elimination of the sequence head (vector); top or
|
||||
bottom strand orientation; or any of the usual X-window parameters (e.g.
|
||||
display, geometry...).
|
||||
|
||||
The graphics display (Figure 1) consists of the control
|
||||
panel, the base position information, the original and edited sequence
|
||||
data, and the graphical representation of the trace. The user may
|
||||
begin by using the control panel INPUT button to input a new trace
|
||||
file at which time the user selects whether to view the sequence
|
||||
and trace in top or bottom strand orientation.
|
||||
The trace file is displayed and, if a 5' vector sequence has been
|
||||
specified on the command line, the program attempts to select a
|
||||
cutoff point corresponding to the vector sequence at the ``head'' of the
|
||||
trace file. The bases beyond the ``cutoff'' point are
|
||||
displayed on a shaded background. The user may modify the cutoff
|
||||
position by clicking on the ``Adj left cut'' button and clicking on the
|
||||
position of the desired cutoff. Similarly, the user may adjust the
|
||||
right cutoff of the sequence (chosen by starting at the 5' end of the
|
||||
sequence and looking for the first occurrence when 2 out of 5 bases
|
||||
are 'N') by scrolling along the sequence to that point, clicking on the
|
||||
``Adj right cut'' button, and clicking on the appropriate base.
|
||||
Automation of the ``cutoff'' process is optional; the user may compile
|
||||
the program with that feature turned ``off.''
|
||||
|
||||
Clicking on the ``Edit seq'' button allows the user to enter the edit
|
||||
mode. The ``Search'' button can be used to skip from ``problem'' to
|
||||
``problem'' (i.e., ambiguity to ambiguity) or to look for runs of
|
||||
identical bases (e.g., TTTT) which are often mis-called by
|
||||
the machine software.
|
||||
|
||||
Bases can be inserted, deleted, or replaced as with
|
||||
any ordinary word-processor. In difficult-to-read areas,
|
||||
the trace may be vertically or horizontally scaled by dragging or
|
||||
clicking on the magnification scroll bar or by clicking on the
|
||||
vertical scaling buttons (``Scale down'', ``Scale up''), respectively.
|
||||
Finally, the edited sequence is saved to an ascii file using the
|
||||
``Output'' button. A history of the editing session can also be saved
|
||||
along with the sequence.
|
||||
The ``Quit'' button is used
|
||||
to exit the program. When reinvoking ted on an edited trace file the
|
||||
edited base sequence, rather than the original sequence, is shown in
|
||||
the edited base window. The user may invoke ted by calling in any one
|
||||
of the previous editing sessions.
|
||||
|
||||
|
||||
\subsubsection*{APPLICATIONS AND CONCLUSIONS}
|
||||
|
||||
In the C. elegans genome sequencing project, data from the ABI or
|
||||
A.L.F. sequencing machines' computers are transferred to Sun
|
||||
workstations.
|
||||
The user invokes a Unix shell script that calls ted systematically
|
||||
on each of the new set of trace files creating a set of sequence files.
|
||||
The sequence files that are deemed to be of acceptable quality
|
||||
are then entered into the sequence
|
||||
assembly program xdap [2] where the sequences are assembled into
|
||||
contigs. Portions of the ted trace-editor have been incorporated
|
||||
into the xdap ``trace manager,'' which is used in
|
||||
conjunction with the contig editor to view sets of aligned traces
|
||||
at sites of discrepancies in the aligned sequences.
|
||||
|
||||
Ted is also used at the stage of choosing oligo primers for the
|
||||
``walking'' stage of the sequencing project. It can be invoked directly
|
||||
from the oligo selection program, osp [3], to allow examination
|
||||
of the trace data in the region of the primers so that
|
||||
integrity of the sequence data can be verified.
|
||||
|
||||
Currently, no other programs are known to be available
|
||||
which support editing of the ABI trace data.
|
||||
Further, the modular design of the program should allow
|
||||
support for new types of sequencing machines, with new data
|
||||
formats, to be implemented in a straightforward fashion.
|
||||
|
||||
|
||||
\subsubsection*{AVAILABILITY}
|
||||
Ted is freely available from the authors or from Rodger Staden and
|
||||
Simon Dear (MRC Laboratory of Molecular Biology, Hills Road, Cambridge,
|
||||
UK, CB2 2QH) for use on Sun workstations running X-windows (or OpenLook).
|
||||
|
||||
|
||||
\subsubsection*{ACKNOWLEDGMENTS}
|
||||
The authors would like to thank all members of the C. elegans
|
||||
sequencing project with special thanks to the following people:
|
||||
John Sulston, Bob Waterston,
|
||||
Phil Green, Rick Wilson, Richard Durbin, Simon Dear, and Rodger Staden
|
||||
for their helpful suggestions for improvements in the ted interface
|
||||
and for their parts in the development of ted. This work was
|
||||
supported by the Medical Research Council and NIH grant R01-HG00136.
|
||||
|
||||
\subsubsection*{REFERENCES}
|
||||
|
||||
1. Waterston, R., Sulston, J., et al. (1991), in preparation.
|
||||
|
||||
2. Dear, S. and Staden, R. (1991) Nuc. Acids Res., in press.
|
||||
|
||||
3. Hillier, L. and Green, P. (1991) submitted.
|
||||
|
||||
|
||||
{\bf Figure 1 legend.}
|
||||
|
||||
Figure 1 shows a ``screen dump'' of the ted graphical interface.
|
||||
The display consists of
|
||||
the control panel and the synchronized view of the base position
|
||||
information, original and edited sequence data,
|
||||
and graphical representation of the trace (with each nucleotide's trace
|
||||
being represented
|
||||
by a different color). The control
|
||||
panel allows the user to read in new trace files (in either
|
||||
bottom or top strand orientation)
|
||||
as well as to search for a string of nucleotides or a certain base position.
|
||||
Scroll bars allow the user to adjust the magnification of or scroll through
|
||||
the sequence and trace data. The user may also choose to change the vertical
|
||||
magnification of the trace data. Further, sequence on the head (vector)
|
||||
or tail (uncertain data) of the sequence may be ``cutoff''
|
||||
using the adjust left and right cutoff buttons. Bases can be inserted,
|
||||
deleted, or replaced as with
|
||||
any ordinary word-processor in the sequence data window. Finally, the
|
||||
sequence may be written to an ascii file using the output button on
|
||||
the control panel.
|
||||
|
||||
\end{document}
|
||||
|
||||
|
||||
|
2722
help/BAP.RNO
Normal file
2722
help/BAP.RNO
Normal file
File diff suppressed because it is too large
Load diff
2724
help/DAP.RNO
Normal file
2724
help/DAP.RNO
Normal file
File diff suppressed because it is too large
Load diff
205
help/GIP.RNO
Normal file
205
help/GIP.RNO
Normal file
|
@ -0,0 +1,205 @@
|
|||
.NPA
|
||||
.left margin1
|
||||
.CENTER
|
||||
GIP
|
||||
.LEFT MARGIN1
|
||||
.PARA
|
||||
A digitizer is
|
||||
a two dimensional surface
|
||||
which is such that if a special pen is pressed onto it, the pens
|
||||
coordinates can be recorded by a computer.
|
||||
These coordinates
|
||||
can be interpreted by a program.
|
||||
.para
|
||||
The digitizing device we use works by the pen emitting a high frequency
|
||||
sound which is picked up by two microphones positioned at the rear of the
|
||||
working area. The pen position is determined by triangulation and the
|
||||
digitizing device sends the coordinates to the computer. As no special
|
||||
surface is required the device can conveniently be positioned on a light
|
||||
box giving the sequencer an unobscured view of the autoradiographs.
|
||||
.LEFT MARGIN1
|
||||
The digitizer
|
||||
is called a GRAPHBAR MODEL GP7 made by
|
||||
Science Accessories Corp,
|
||||
970 Kings Highway West,
|
||||
Southport,
|
||||
Connecticut 06490,
|
||||
USA.
|
||||
|
||||
.para
|
||||
The program uses a menu to allow the user to select commands or
|
||||
to enter the uncertainty codes for areas of the gel that are
|
||||
difficult to interpret. A menu is simply a series of boxes drawn on
|
||||
the digitizing surface that each contain a command or
|
||||
uncertainty code. When the user puts the pen down in these special
|
||||
regions the program interprets the coordinates as commands and acts
|
||||
appropriately. A copy of the menu should have been sent to you.
|
||||
It should be stuck down on the surface of the
|
||||
light box in the digitizing area. For convenience it is best to position it
|
||||
to the right of the digitizing area, but in practice as long as
|
||||
its top
|
||||
edge is parallel to the digitizer box, it can be put anywhere in the active
|
||||
region.
|
||||
.sk1
|
||||
.left margin1
|
||||
Entering gel readings using a digitizer
|
||||
.left margin1
|
||||
.para
|
||||
The autoradiograph should be stuck down on the light box with the lanes
|
||||
running, as near is as
|
||||
possible, at right angles to the digitizer. To read
|
||||
an autoradiograph placed on the light box
|
||||
the user need only define the positions of
|
||||
the four sequencing lanes and the bases
|
||||
to which they correspond and then use the pen to point to each
|
||||
successive band progressing up the gel. The program examines the
|
||||
coordinates of each pen position to see in which of the four
|
||||
lanes
|
||||
it lies and assigns the corresponding base to be stored in the
|
||||
computer. Each time the pen tip is depressed to point to a position
|
||||
on the surface of the digitizer the program sounds the bell on the
|
||||
terminal (a different sound for each of the four bases on the
|
||||
microcomputer version of the program)
|
||||
to indicate to the user that a point has been recorded. As
|
||||
the sequence is read the program displays it on the screen.
|
||||
|
||||
|
||||
.para
|
||||
The program uses a menu
|
||||
to allow the user to select commands or
|
||||
to enter the uncertainty codes for areas of the gel that are
|
||||
difficult to interpret. A menu is simply a series of boxes drawn on
|
||||
the digitizing surface that each contain a command or
|
||||
uncertainty code. When the user puts the pen down in these special
|
||||
regions the program interprets the coordinates as commands and acts
|
||||
appropriately. As well as the uncertainty codes
|
||||
A,C,G,T,1,2,3,4,B,D,H,V,R,Y,X,-,5,6,7,8 the following commands are
|
||||
included in the menu: DELETE removes the last character from
|
||||
the sequence;
|
||||
RESET allows the lane centres to be redefined;
|
||||
START means begin the next
|
||||
stage of the procedure; STOP means stop the current stage in the
|
||||
procedure; CONFIRM means confirm that the last command or set of
|
||||
coordinates are correct.
|
||||
.para
|
||||
The digitizing device also has a menu of its own. This lies in a two inch wide
|
||||
strip immediately in front of the digitizing box. Pen positions within this
|
||||
two inch strip are interpretted as commands to the digitizer and are not
|
||||
sent to the GIP program. In general the only time users will need to use
|
||||
the device menu is when they tell GIP where the program menu lies in the
|
||||
digitizing area. This is done by first hitting ORIGIN in the device menu
|
||||
and then hitting the bottom left hand corner of the program menu. The
|
||||
program menu can hence be positioned anywhere in the active region but
|
||||
should be arranged parallel to the digitizer.
|
||||
.para
|
||||
The user should try to hit the bands as near as possible to the centre of
|
||||
the lanes because the program tracks the lanes up the film using the pen
|
||||
positions. By using this tracking strategy the user only has to define the
|
||||
centres of the bottom of the lanes before starting to read the film. The
|
||||
program can correctly follow quite curved lanes and constantly checks that
|
||||
its lane centre coordinates look sensible. If the lane centres appear to be
|
||||
getting too close the program stops responding to the pen positions of
|
||||
bands and hence does not ring the bell. If this occurs users must hit the
|
||||
reset box in the menu and the program will request them to redefine the
|
||||
lane centres at the current reading position. Then they can continue
|
||||
reading. As a further safeguard the program will only respond to pen
|
||||
positions either in the menu or very close to the current reading position.
|
||||
.sk1
|
||||
.left margin1
|
||||
Running the gel reading program
|
||||
.left margin1
|
||||
The autoradiograph should be firmly stuck down on the light box and the
|
||||
program started by typing GIP. It will ask the first question.
|
||||
.left margin2
|
||||
" ? FILE OF FILE NAMES="
|
||||
.left margin2
|
||||
Type the name for the file of file names and then follow the instructions.
|
||||
.left margin2
|
||||
" HIT DIGITIZER MENU ORIGIN"
|
||||
.left margin2
|
||||
" THEN PROGRAM MENU ORIGIN"
|
||||
.left margin2
|
||||
" THEN HIT START IN PROGRAM MENU"
|
||||
.left margin2
|
||||
If the bell does not sound after you hit start try hitting metric in the
|
||||
device menu (the program uses metric units, and some digitizers are set to
|
||||
default to use inches; hitting metric switches between the two).
|
||||
.left margin2
|
||||
After the bell has sounded the program will give the default lane order.
|
||||
.left margin2
|
||||
" LANE ORDER IS T C A G"
|
||||
.left margin2
|
||||
" IF CORRECT HIT CONFIRM, ELSE HIT RESET"
|
||||
.left margin2
|
||||
If the lane order, reading from left to right is correct hit confirm in the
|
||||
program menu. If you are using a different order hit reset and you will be
|
||||
asked to define the lane order from left to right using the program menu
|
||||
(as follows).
|
||||
.left margin2
|
||||
" DEFINE LANE ORDER (LEFT TO RIGHT) USING MENU"
|
||||
.left margin2
|
||||
Hit the boxes in the menu that contain the symbols A,C,G,T in the
|
||||
left-right order of the lanes. The program will respond with the lane order
|
||||
as above and ask for confirmation. When this is received, the next task is
|
||||
to define the start positions of the next four lanes.
|
||||
.left margin2
|
||||
" HIT START, THEN HIT (LEFT TO RIGHT)"
|
||||
.left margin2
|
||||
" THE START POSITIONS FOR THE NEXT FOUR LANES"
|
||||
.left margin2
|
||||
Hit the centres of the four lanes at a height level with the first band
|
||||
that is going to be read. The program will report the mean lane separations
|
||||
and asks for confirmation that they are correct.
|
||||
.left margin2
|
||||
" MEAN LANE SEPARATION IS XX"
|
||||
.left margin2
|
||||
" HIT CONFIRM TO CONTINUE"
|
||||
.left margin2
|
||||
Users will become familiar with the values from their films and will spot
|
||||
any unusual numbers.
|
||||
Asking for confirmation allows users to try again if they had made a
|
||||
mistake, but generally the lane separation values can be ignored.
|
||||
Hit confirm, and the program will give the message
|
||||
.left margin2
|
||||
" HIT START WHEN READY TO BEGIN READING"
|
||||
.left margin2
|
||||
Hit start and the program will give the message
|
||||
.left margin2
|
||||
" HIT BANDS, UNCERTAINTY CODES, RESET OR STOP"
|
||||
.left margin2
|
||||
Hit the bands, interpretting the sequence progressing
|
||||
up the film. If necessary use the uncertainty codes. If the pen stops
|
||||
responding hit reset and follow the instructions as above. When the
|
||||
sequence becomes unreadable hit stop and the program will ask for a file
|
||||
name for the gel reading just read.
|
||||
.left margin2
|
||||
" ? FILE NAME FOR THIS GEL READING="
|
||||
.left margin2
|
||||
Type the file name observing the rules about legal gel readings names.
|
||||
The program will ask if you wish
|
||||
to read another sequence.
|
||||
.left margin2
|
||||
" TO ENTER ANOTHER GEL READING TYPE 1"
|
||||
.left margin2
|
||||
To enter another type 1 and you will be back to the step of defining the
|
||||
lane order. Typing anything else will stop the program.
|
||||
.left margin1
|
||||
.sk1
|
||||
Running the microcomputer version of the gel reading program
|
||||
.left margin1
|
||||
The microcomputer version of GIP is slightly different and is called
|
||||
GIPB. The BBC micro
|
||||
does not have the capacity to process the gel readings beyond the reading
|
||||
stage.
|
||||
This means that users of this program
|
||||
would need to transfer their gel readings from the micro to another machine
|
||||
using a terminal emmulator. Transferring many files is tedious and so the
|
||||
microcomputer version of the gel reading program stores all the gel
|
||||
readings for each run of the program in a single file. This special
|
||||
file contains both sequences and file names and can be moved in a single
|
||||
transfer to another machine. Once on the other machine the single file must
|
||||
be split into separate gel reading files and a file of file names. This is
|
||||
done using the program BSPLIT. As far as using the microcomputer version
|
||||
of GIP, the only difference is that the first file name the program
|
||||
requests is not a file of file names, but a name for the single file to
|
||||
contain all the gel readings and their names.
|
859
help/MEP.RNO
Normal file
859
help/MEP.RNO
Normal file
|
@ -0,0 +1,859 @@
|
|||
.NPA
|
||||
.SP 1
|
||||
.left margin1
|
||||
@-1. TX 0 @General
|
||||
.sp
|
||||
@-2. T 0 @Screen control
|
||||
.sp
|
||||
@-2. X 0 @Screen
|
||||
.sp
|
||||
@-3. TX 0 @Dictionary analysis
|
||||
.sp
|
||||
@0. TX -1 @MEP
|
||||
.left margin2
|
||||
.para
|
||||
This is a program for analysing families of nucleotide sequences in order
|
||||
to find common motifs and potential binding sites.
|
||||
The ideas in this program were described in Staden, R. "Methods
|
||||
for discovering novel motifs in nucleic acid sequences".
|
||||
Computer Applications in the Biosciences, 5, 293-298, (1989).
|
||||
.PARA
|
||||
The program can read
|
||||
sequences stored in either of two formats: 1) all sequences aligned in a
|
||||
single file; 2) all sequences in separate files and accessed through a file
|
||||
of file names.
|
||||
.PARA
|
||||
The program contains functions that can answer several questions
|
||||
about a set of sequences:
|
||||
.SK1
|
||||
.left margin2
|
||||
Which words are most common?
|
||||
.left margin2
|
||||
Which words occur in the most sequences?
|
||||
.left margin2
|
||||
Which words contain the most information?
|
||||
.left margin2
|
||||
Which words occur in equivalent positions in the sequences?
|
||||
.left margin2
|
||||
Which words are inverted repeats?
|
||||
.left margin2
|
||||
Which words occur on both strands of the sequences?
|
||||
.left margin2
|
||||
Where are the inverted repeats?
|
||||
.left margin2
|
||||
Where are the fuzzy words?
|
||||
.para
|
||||
Most of the program is
|
||||
concerned with analysing
|
||||
what it terms "fuzzy
|
||||
words" within the set of sequences. The analysis is explained
|
||||
below. Note that the standard version of the programs is limited
|
||||
to words of maximum length 8 letters, and a maximum fuzziness
|
||||
of 2.
|
||||
.para
|
||||
The following analyses (preceded by their option numbers) are included:
|
||||
.lit
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
.end lit
|
||||
.para
|
||||
Some of these methods produce graphical
|
||||
results
|
||||
and so the
|
||||
program is generally used from a graphics terminal (a vdu on which lines
|
||||
and points can be drawn as well as characters).
|
||||
.para
|
||||
.LEFT MARGIN2
|
||||
The positions of each of the plots is defined relative to a users drawing
|
||||
board which has size 1-10,000 in x and 1-10,000 in y.
|
||||
Plots for
|
||||
each option are drawn in a window defined by x0,y0 and xlength,ylength.
|
||||
Where x0,y0 is the position of the bottom left hand corner of the window,
|
||||
and xlength is the width of the window and ylength the
|
||||
height of the window.
|
||||
.lit
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
.end lit
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required.
|
||||
.para
|
||||
The options for the program are accessed from 3 main menus: general, screen
|
||||
control and dictionary analylsis.
|
||||
Both menus and options are selected by number.
|
||||
.para
|
||||
The most important and novel part of the program is its use of "fuzzy
|
||||
dictionaries" and an information theory measure, to help show the most
|
||||
interesting motifs.
|
||||
|
||||
Central to the method is the idea of a fuzzy dictionary of word
|
||||
frequencies. A dictionary of word frequencies is an ordered list of
|
||||
all the words in the sequences and a count of the number of times
|
||||
that they occur. A fuzzy dictionary is an equivalent list but which
|
||||
contains instead, for each word, a count of the number of times
|
||||
similar words occur in the sequences. We term words that are
|
||||
similar "relations". The fuzziness is defined by the number of
|
||||
letters in a word that are allowed to be different. So if we had a
|
||||
fuzziness of 1 we allow 1 letter to be different. For example, with
|
||||
a fuzziness of 1, the entry in the fuzzy dictionary for the word
|
||||
TTTTTT would contain a count of the numbers of times TTTTTT
|
||||
occured plus the number of times all words differing by exactly
|
||||
one letter from TTTTTT occured.
|
||||
.para
|
||||
Once the fuzzy dictionary has been created we can examine it in
|
||||
several ways to find candidate control sequences. The simplest
|
||||
question we can ask is which word in the dictionary is the most
|
||||
common. Sometimes this simple criterion of "most common" may
|
||||
be adequate to discover a new motif but in general we would not
|
||||
expect it to be sufficient. For example some words will be common
|
||||
simply because of a base composition bias in the sequences being
|
||||
analysed. In addition a word can be the most frequent and yet not
|
||||
be "well defined". This last point is best explained by an example.
|
||||
.para
|
||||
Suppose we were looking at two letter words and allowing one
|
||||
mismatch, and that there were 10 occurences of TT and 5 of AC.
|
||||
We could align the 10 words that were one letter different from TT
|
||||
and the 5 that were related to AC. Then we could count the
|
||||
number of times each base occured in each position for each of
|
||||
these two sets of words. Suppose we got the two base frequency
|
||||
tables shown below.
|
||||
.lit
|
||||
TT AC
|
||||
T 6 4 T 1 0
|
||||
C 1 3 C 0 4
|
||||
A 1 2 A 4 1
|
||||
G 2 1 G 0 0
|
||||
|
||||
.end lit
|
||||
These tables show that although TT occurs (with one letter
|
||||
mismatch) more often than AC, the ratio of base frequencies for
|
||||
AC at 4/5, 4/5 is higher than those for TT at 6/10, 4/10. Hence we
|
||||
would say that AC was better defined than TT.
|
||||
Expressing this another way we would say that the definition of AC
|
||||
contained more information than that for TT. The program
|
||||
calculates the information content in a way that takes into account
|
||||
both the sequence composition and the level of definition of the
|
||||
motif.
|
||||
.para
|
||||
Definitions
|
||||
|
||||
.para
|
||||
Here we deal only with the dictionary analysis.
|
||||
Suppose we are dealing with a set of
|
||||
sequences and are examining them for words that are six
|
||||
characters in length.
|
||||
|
||||
.para
|
||||
Dictionary Dw contains a count of the number of times each word
|
||||
occurs in the set of sequences. For example the entry for TTTTTT
|
||||
contains a value equal to the number of times the word TTTTTT
|
||||
occurs in the set of sequences.
|
||||
|
||||
.para
|
||||
Dictionary Ds contains a count of the number of different sequences in
|
||||
which each word occurs. For example if the entry for word TTTTTT
|
||||
contains the value 10, it denotes that the word TTTTTT occurs in ten
|
||||
different sequences. Unlike Dw it only counts words once for each
|
||||
sequence. For example if we had a set of 100 sequences, the maximum
|
||||
possible value that Ds could take is 100, and this would only happen if
|
||||
a word occurred in every sequence. However for the same set of
|
||||
sequences, Dw could contain values greater than 100, and this would
|
||||
show that a word had occurred more than once in at least one
|
||||
sequence.
|
||||
|
||||
.para
|
||||
From either of the two dictionaries Dw or Ds we can calculate a fuzzy
|
||||
dictionary Dm. For each word, the entry in the fuzzy dictionary Dm
|
||||
contains the sum of the dictionary values (taken from either Dw or Ds)
|
||||
for all words that differ from it by up to m letters. For example if m=2
|
||||
the entry for TTTTTT contains the number of times that TTTTTT
|
||||
occurs in the dictionary, plus the counts for all words that differ from
|
||||
TTTTTT by 1 or 2 letters.
|
||||
Obviously the interpretation of the values in Dm depends on which of
|
||||
the two dictionaries Dw or Ds they were derived from. When derived
|
||||
from Dw the entry for any word in Dm gives the total number of
|
||||
times it, and its relations, occur in the set of sequences. When derived
|
||||
from Ds the entry for any word in Dm gives the total number of
|
||||
different sequences that contain a word and each of its relations.
|
||||
|
||||
.para
|
||||
Finally, from fuzzy dictionary Dm we can derive fuzzy dictionary Dh.
|
||||
All entries in Dh are zero except for the word(s), within each set of
|
||||
relations, that are most frequent. For example if TTTTTT occurred 20
|
||||
times but had a relation that occurred more often, then the entry for
|
||||
TTTTTT would be zero. However if TTTTTT did not have a more
|
||||
frequently occurring relation, then the entry for TTTTTT would
|
||||
contain the value 20.
|
||||
|
||||
.LEFT MARGIN1
|
||||
@1. T 0 @Help
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
This option gives online help. The user should select option numbers and
|
||||
the current documentation will be given. Note that option 0 gives an
|
||||
introduction to the program, and that ? will get help from anywhere in
|
||||
the
|
||||
program.
|
||||
The following analyses (preceded by their option numbers) are included:
|
||||
.lit
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
.end lit
|
||||
.left margin1
|
||||
@2. T 0 @Quit
|
||||
.left margin2
|
||||
.para
|
||||
This function stops the program.
|
||||
.left margin1
|
||||
@3. TX 1 @Read a new sequence
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
It can read
|
||||
sequences stored in either of two formats: 1) all sequences aligned in a
|
||||
single file; 2) all sequences in separate files and accessed through a file
|
||||
of file names. Typical dialogue follows:
|
||||
.lit
|
||||
|
||||
X 1 Read file of aligned sequences
|
||||
2 Use file of file names
|
||||
? 0,1,2 =
|
||||
|
||||
? File of aligned sequences=F1
|
||||
Number of files 88
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@4. TX 1 @Define active region
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
For its analytic functions
|
||||
the program always works on a region of the sequence called the active
|
||||
region. When new sequences are read into the program the active region is
|
||||
automatically set to start at the beginning of the sequences and go
|
||||
up to the end of the longest one.
|
||||
.left margin1
|
||||
@5. TX 1 @List a sequence
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
The sequence can be listed with line lengths of 50 bases with each sequence
|
||||
numbered in the order in which they were read.
|
||||
Output can be directed to a disk file by
|
||||
first selecting disk output. Typical dialogue follows.
|
||||
.lit
|
||||
|
||||
? Menu or option number=5
|
||||
|
||||
10 20 30 40 50
|
||||
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||
|
||||
60
|
||||
1 TACCCGTTTTT
|
||||
2 GCGTTTTTGT
|
||||
3 TCATACCATAAG
|
||||
4 TTTCATACC
|
||||
5 ATTGTGAGC
|
||||
6 TTCCGGCTCG
|
||||
7 GAAGAGAGT
|
||||
8 TCAGGTGT
|
||||
9 ATGAATG
|
||||
10 TAATTACG
|
||||
.end lit
|
||||
.left margin1
|
||||
@6. TX 1 @List a text file
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
Allows the user to have a text file displayed on the screen. It will appear
|
||||
one page at a time.
|
||||
.left margin1
|
||||
@7. TX 1 @Direct output to disk
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
Used to direct output that would normally appear on the screen to a file.
|
||||
.para
|
||||
Select redirection of either text or graphics, and
|
||||
supply the name of the file that the output should be written to.
|
||||
.para
|
||||
The results from the next options selected will not appear on the screen
|
||||
but will be written to the file. When option 7 is selected again
|
||||
the file will be
|
||||
closed and output will again appear on the screen.
|
||||
.left margin1
|
||||
@10. TX 2 @Clear graphics
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
Clears the screen of both text and graphics.
|
||||
.left margin1
|
||||
@11. TX 2 @Clear text
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
Clears only text from the screen.
|
||||
.left margin1
|
||||
@12. TX 2 @Draw a ruler
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
This option
|
||||
allows the user to draw a ruler or scale along the x axis of the screen to
|
||||
help identify the coordinates of points of interest. The user can define
|
||||
the position of the first amino acid to be marked (for example if the
|
||||
active
|
||||
region is 1501 to 8000, the user might wish to mark every 1000th amino
|
||||
acid
|
||||
starting at either 1501 or 2000 - it depends if the user wishes to treat
|
||||
the active region as an independent unit with its own numbering starting
|
||||
at
|
||||
its left edge, or as part of the whole sequence). The user can also define
|
||||
the separation of the ticks on the scale and their height. If required the
|
||||
labelling routine can be used to add numbers to the ticks.
|
||||
.left margin1
|
||||
@13. TX 2 @Use crosshair
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
This function puts
|
||||
a steerable cross on the screen that can be used to find the
|
||||
coordinates of points in the sequence. The user can move the cross
|
||||
around using the directional keys; when he hits the space bar the
|
||||
program will print out the coordinates of the cross in sequence units and
|
||||
the option will be exited.
|
||||
.para
|
||||
If instead,
|
||||
you hit a , the position will be displayed but the cross will remain on
|
||||
the screen.
|
||||
.para
|
||||
If a letter s is hit the sequence around the cross hair is displayed and
|
||||
the cross remains on the screen.
|
||||
.left margin1
|
||||
@14. TX 2 @Reposition plots
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
The positions of each of the plots is defined relative to a users drawing
|
||||
board which has size 1-10,000 in x and 1-10,000 in y.
|
||||
Plots for
|
||||
each option are drawn in a window defined by x0,y0 and xlength,ylength.
|
||||
Where x0,y0 is the position of the bottom left hand corner of the window,
|
||||
and xlength is the width of the window and ylength the
|
||||
height of the window.
|
||||
.lit
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
.end lit
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required.
|
||||
As all the plots start
|
||||
at the same position in x and have the same width, x0 and xlength are the
|
||||
same for all options. Generally users will only want to change the start
|
||||
level of the window y0 and its height ylength.
|
||||
This option
|
||||
allows users to change window positions whilst running the program.
|
||||
The routine prompts first for the number of the option that the users
|
||||
wishes
|
||||
to reposition; then for the y start and height; then for the x start and
|
||||
length. Note that changes to the x values affect all options. If the user
|
||||
types only carriage return for any value it will remain unchanged.
|
||||
The cross-hair can be used to choose suitable heights.
|
||||
.LEFT MARGIN1
|
||||
@15. TX 2 @Label a diagram
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
This routine allows users to label any diagrams they have produced. They
|
||||
are asked to type in a label. When the user types carriage return to finish
|
||||
typing the label the cross-hair appears on the screen. The user can
|
||||
position it anywhere on the screen. If the user types R (for right justify)
|
||||
the label will be
|
||||
written on the diagram with its right end at the cross-hair position.
|
||||
If the user types L (for left justify) the label will be written on the
|
||||
diagram with its left end at the cross hair position.
|
||||
The
|
||||
cross-hair will then immediately reappear. The user may put the same
|
||||
label
|
||||
on another part of the diagram as before or if he hits the space bar he
|
||||
will be asked if he wishes to type in another label.
|
||||
.left margin1
|
||||
@16. TX 2 @Display a map
|
||||
.LEFT MARGIN2
|
||||
.para
|
||||
It is often convenient to plot a map alongside graphed analysis in order
|
||||
to
|
||||
indicate features within the sequence. This function allows users to
|
||||
draw
|
||||
maps using files arranged in the form of EMBL feature tables. Of course
|
||||
the
|
||||
EMBL table are usually only used for nucleic acid sequence annotation
|
||||
but,
|
||||
as long as the features are written in the correct format, they can be
|
||||
employed by this routine. The map is composed of a line representing the
|
||||
sequence and then further lines denoting the endpoints of each feature
|
||||
the
|
||||
user identifies. The user is asked to define height at which the line
|
||||
representing the sequence should be drawn; then for the feature height;
|
||||
then for the features to plot.
|
||||
.left margin1
|
||||
@17. TX 1 @Search for strings
|
||||
.left margin2
|
||||
.para
|
||||
Search for strings
|
||||
perfoms searches of all the sequences for selected words and
|
||||
shows which sequences they are found in. The user types in a word and
|
||||
defines the allowed number of mismatches. The results are listed or
|
||||
plotted. If listed the display includes the sequence number, the position
|
||||
in the sequence and the matching string.
|
||||
The results are plotted in the
|
||||
following way. The x axis of the plot represents the length of the aligned
|
||||
sequences and the y direction is divided into sufficient strips to accommodate
|
||||
each sequence. So if a match is found in the 3rd sequence at a position
|
||||
equivalent to halfway along the longest of the sequences then a short
|
||||
vertical line will be drawn at the midpoint of the 3rd strip. If the sequences
|
||||
are aligned it can be useful if the motifs happen to appear in
|
||||
related positions. For example see the original publication. Typical
|
||||
dialogue follows.
|
||||
.lit
|
||||
|
||||
? Menu or option number=17
|
||||
X 1 Plot match positions
|
||||
2 Plot histogram of matches
|
||||
? 0,1,2 =
|
||||
? Word to search for=TTGACA
|
||||
? Minimum match (0-6) (6) =5
|
||||
? (y/n) (y) Plot results N
|
||||
2 35 TAGACA
|
||||
5 14 TTTACA
|
||||
6 37 TTTACA
|
||||
11 14 TAGACA
|
||||
14 14 TTGACA
|
||||
17 14 GTGACA
|
||||
17 22 TTAACA
|
||||
20 1 TTGACA
|
||||
.end lit
|
||||
.left margin1
|
||||
@18. TX 3 @Set strand
|
||||
.left margin2
|
||||
.para
|
||||
Set strand allows the user to define which strand(s) of the sequences to
|
||||
analyse: input stand, complement of input, or both.
|
||||
.left margin1
|
||||
@19. TX 3 @Set composition
|
||||
.left margin2
|
||||
.para
|
||||
Set composition gives the user three choices for setting the composition
|
||||
of the sequences for use in the calculation of the information content of
|
||||
words. The user can select the overall composition of the sequences as read,
|
||||
an even composition, or can type in any other 4 values.
|
||||
.left margin1
|
||||
@20. TX 3 @Set word length
|
||||
.left margin2
|
||||
.para
|
||||
Set word length sets the length of word for which dictionaries will be made.
|
||||
.left margin1
|
||||
@21. TX 3 @Set number of mismatches
|
||||
.left margin2
|
||||
.para
|
||||
Set number of mismatches sets the level of fuzziness for the creation of
|
||||
dictionary Dm.
|
||||
.left margin1
|
||||
@22. TX 3 @Show settings
|
||||
.left margin2
|
||||
.para
|
||||
Show settings show the current settings for all parameters associated with
|
||||
dictionary analysis. A typical diaplsy follows:
|
||||
.lit
|
||||
? Menu or option number=22
|
||||
Current word length = 6
|
||||
Number of mismatches = 1
|
||||
Start position = 1
|
||||
End position = 63
|
||||
Input strand only
|
||||
Observed composition
|
||||
Dictionary Dw unmade
|
||||
Dictionary Ds unmade
|
||||
Dictionary Dm unmade
|
||||
Dictionary Dh unmade
|
||||
.end lit
|
||||
.left margin1
|
||||
@23. TX 3 @Make dictionary Dw
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Dw creates a dictionary that contains a count of the
|
||||
frequency of occurrence of each word in the collected sequences.
|
||||
.left margin1
|
||||
@24. TX 3 @Make dictionary Ds
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Ds creates a dictionary that contains a count of the
|
||||
number of different sequences that contain each word.
|
||||
.left margin1
|
||||
@25. TX 3 @Make dictionary Dm from Dw
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Dm from Dw creates a dictionary from dictionary Dw that
|
||||
contains the frequency of occurrence of each word (say X) in Dw plus the
|
||||
frequency of occurrence of each word in Dw that differs from X by up to m
|
||||
letters. Dm is called a fuzzy dictionary as it contains the frequencies of
|
||||
occurrence of all words plus the frequencies of all the words that are
|
||||
similar to them.
|
||||
.left margin1
|
||||
@26. TX 3 @Make dictionary Dm from Ds
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Dm from Ds creates a dictionary from dictionary Ds that
|
||||
contains the frequency of occurrence of each word (say X) in Ds plus the
|
||||
frequency of occurrence of each word in Ds that differs from X by up to m
|
||||
letters. Dm is called a fuzzy dictionary as it contains the frequencies of
|
||||
occurrence of all words plus the frequencies of all the words that are
|
||||
similar to them.
|
||||
.left margin1
|
||||
@27. TX 3 @Make dictionary Dh from Dm
|
||||
.left margin2
|
||||
.para
|
||||
Make dictionary Dh creates a dictionary from dictionary Dm and whose
|
||||
entries are zero except for those words in any set of related words that
|
||||
are most frequent. It finds the dominant words in each set of relations
|
||||
and stores their counts.
|
||||
.left margin1
|
||||
@28. TX 3 @Examine fuzzy dictionary Dm
|
||||
.left margin2
|
||||
.para
|
||||
Examine dictionary Dm allows users to analyse the contents of dictionary
|
||||
Dm to find the most common words or those words that contain the most
|
||||
information. The user supplies a frequency or information cutoff and chooses
|
||||
to have the results sorted on either value. The program will find the top 100
|
||||
words that achieve the cutoff values and present them to the user sorted
|
||||
as selected. The information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dm, and using the current composition
|
||||
setting. Typical dialogue follows:
|
||||
.lit
|
||||
|
||||
? Menu or option number=28
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAC 64 0.66460
|
||||
AAAAAA 90 0.64880
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTG 73 0.64070
|
||||
TTTTGT 63 0.63820
|
||||
TTTTTC 65 0.63810
|
||||
AAAATA 63 0.62670
|
||||
TATAAT 65 0.62510
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =2
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
AAAAAA 90 0.64880
|
||||
TTTTTG 73 0.64070
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTC 65 0.63810
|
||||
TATAAT 65 0.62510
|
||||
AAAAAC 64 0.66460
|
||||
TTTTGT 63 0.63820
|
||||
AAAATA 63 0.62670
|
||||
TTGACA 60 0.73850
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@29. TX 3 @Examine fuzzy dictionary Dh
|
||||
.left margin2
|
||||
.para
|
||||
Examine dictionary Dh allows users to analyse the contents of dictionary Dh
|
||||
to find the most common words or those words that contain the most
|
||||
information. The user supplies a frequency or information cutoff and chooses
|
||||
to have the results sorted on either value. The program will find the top 100
|
||||
words that achieve the cutoff values and present them to the user sorted as
|
||||
selected. The information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dh and using the current composition
|
||||
setting. Typical dialogue follows:
|
||||
.lit
|
||||
|
||||
? Menu or option number=29
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.6
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 4 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
TTTTTT 115 0.60630
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =.5
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =
|
||||
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@30. TX 3 @Examine words in Dm
|
||||
.left margin2
|
||||
.para
|
||||
Examine words in Dm allows users to analyse the contents of dictonary Dm at the
|
||||
level of individual words to find their frequency, information content, and to
|
||||
see their base frequency table. The user types in a word to examine and the
|
||||
program displays the values and table. The information content will be
|
||||
calcutated from either Dw or Ds depending which was used to create Dm,
|
||||
and using the current composition setting. Typical dialogue follows:
|
||||
.lit
|
||||
? Menu or option number=30
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@31. TX 3 @Examine words in Dh
|
||||
.left margin2
|
||||
.para
|
||||
Examine words in Dh allows users to analyse the contents of dictonary Dh at the
|
||||
level of individual words to find their frequency, information content, and to
|
||||
see their base frequency table. The user types in a word to examine and the
|
||||
program displays the values and table. The information content will be
|
||||
calcutated from either Dw or Ds depending which was used to create Dm,
|
||||
and using the current composition setting. Typical dialogue follows:
|
||||
.lit
|
||||
|
||||
? Menu or option number=31
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=GGGGGG
|
||||
gggggg 0 0.6199890
|
||||
3 1 1 2 3 4
|
||||
1 3 1 2 2 1
|
||||
2 1 1 1 1 1
|
||||
11 12 14 12 11 11
|
||||
GGGGGG
|
||||
? Word to examine=
|
||||
|
||||
.end lit
|
||||
.left margin1
|
||||
@32. TX 3 @Save or restore a dictionary
|
||||
.left margin2
|
||||
.para
|
||||
Save or restore dictionary allows users to write or read any dictionary to
|
||||
and from disk files. The user is asked te define the dictionary and file. The
|
||||
function is useful if the machine being used is very slow at calculating
|
||||
because the files can be handled quickly. However note that the files
|
||||
cannot be processed by any other program.
|
||||
.left margin1
|
||||
@33. TX 1 @Find inverted repeats
|
||||
.left margin2
|
||||
.para
|
||||
Find inverted repeats performs searches for simple inverted repeat sequences
|
||||
in each sequence. They are defined by a range of loop sizes and a minimum
|
||||
number of potential basepairs. The results can be plotted or listed. The x
|
||||
axis of the plot represents the length of the aligned sequences and the y
|
||||
direction is divided into sufficient strips to accommodate each sequence.
|
||||
So if an inverted repeat is found in the 3rd sequence at a position equivalent
|
||||
to halfway along the longest of the sequences then a short vertical line will
|
||||
be drawn at the midpoint of the 3rd strip. Alternatively, if the results are
|
||||
listed, the potential hairpin loops are drawn out, with the sequence number
|
||||
and the position of the loop. Typical dialogue follows.
|
||||
.lit
|
||||
|
||||
? Menu or option number=33
|
||||
Define the range of loop sizes
|
||||
? Minimum loop size (0-10) (3) =0
|
||||
? Maximum loop size (1-20) (3) =
|
||||
? Minimum number of basepairs (1-20) (6) =
|
||||
? (y/n) (y) Plot results N
|
||||
Searching
|
||||
|
||||
Sequence 3 34
|
||||
C
|
||||
G.T
|
||||
T-A
|
||||
A-T
|
||||
T.G
|
||||
T.G
|
||||
G.T
|
||||
ATCTTT TATTTCA
|
||||
33
|
||||
|
||||
Sequence 5 35
|
||||
T
|
||||
G.T
|
||||
T.G
|
||||
A-T
|
||||
T.G
|
||||
G.T
|
||||
C-G
|
||||
T.G
|
||||
TCCGGC AATTGTG
|
||||
34
|
||||
.end lit
|
||||
.left margin1
|
||||
@ End of help
|
5116
help/NIP.RNO
Normal file
5116
help/NIP.RNO
Normal file
File diff suppressed because it is too large
Load diff
88
help/NIPF.RNO
Normal file
88
help/NIPF.RNO
Normal file
|
@ -0,0 +1,88 @@
|
|||
.NPA
|
||||
.SP 1
|
||||
.left margin1
|
||||
@-1. TX 0 @General
|
||||
.sp
|
||||
@-2. TX 0 @Screen control
|
||||
.sp
|
||||
@-3. TX 0 @Statistical analysis
|
||||
.sp
|
||||
@-1. TX 0 @General
|
||||
.sp
|
||||
@-2. TX 0 @Screen control
|
||||
.sp
|
||||
@-3. TX 0 @Statistical analysis
|
||||
.sp
|
||||
@0. TX -1 @NIPF
|
||||
.sp
|
||||
@1. TX 1 @ Help
|
||||
.sp
|
||||
@2. TX 1 @ Quit
|
||||
.sp
|
||||
@3. TX 1 @ Read new sequence
|
||||
.sp
|
||||
@4. TX 1 @ Redefine active region
|
||||
.sp
|
||||
@5. TX 1 @ List the sequence
|
||||
.sp
|
||||
@6. TX 1 @ List a text file
|
||||
.sp
|
||||
@7. TX 1 @ Direct output to disk
|
||||
.sp
|
||||
@8. TX 1 @ Write active sequence to disk
|
||||
.sp
|
||||
@9. TX 1 @ List a translation
|
||||
.sp
|
||||
@32. TX 1 @ List showing base differences
|
||||
.sp
|
||||
@37. TX 1 @ List showing translation
|
||||
.sp
|
||||
@33. TX 1 @ List showing amino acid differences
|
||||
.sp
|
||||
@10. TX 2 @ Clear graphics
|
||||
.sp
|
||||
@11. TX 2 @ Clear text
|
||||
.sp
|
||||
@12. TX 2 @ Draw a ruler
|
||||
.sp
|
||||
@13. TX 2 @ Use cross hair
|
||||
.sp
|
||||
@14. TX 2 @ Reset margins
|
||||
.sp
|
||||
@15. TX 2 @ Label diagram
|
||||
.sp
|
||||
@16. TX 2 @ Display a map
|
||||
.sp
|
||||
@17. TX 3 @ Set comparison mode
|
||||
.sp
|
||||
@18. TX 3 @ Set sort mode
|
||||
.sp
|
||||
@21. TX 3 @ Count base changes
|
||||
.sp
|
||||
@22. TX 3 @ Count codon changes
|
||||
.sp
|
||||
@23. TX 3 @ Count genetic events
|
||||
.sp
|
||||
@24. TX 3 @ Show table of base changes
|
||||
.sp
|
||||
@36. TX 3 @ Show table of expressed base changes
|
||||
.sp
|
||||
@39. TX 3 @ Show table of silent base changes
|
||||
.sp
|
||||
@38. TX 3 @ Estimate mutation rate
|
||||
.sp
|
||||
@25. TX 3 @ Plot base changes
|
||||
.sp
|
||||
@26. TX 3 @ Plot expressed changes per base
|
||||
.sp
|
||||
@27. TX 3 @ Plot silent changes per base
|
||||
.sp
|
||||
@28. TX 3 @ Count expressed changes per base
|
||||
.sp
|
||||
@29. TX 3 @ Count silent changes per base
|
||||
.sp
|
||||
@30. TX 3 @ Count changed amino acids
|
||||
.sp
|
||||
@31. TX 3 @ Plot amino acid variability
|
||||
.sp
|
||||
@ end of help
|
2469
help/PIP.RNO
Normal file
2469
help/PIP.RNO
Normal file
File diff suppressed because it is too large
Load diff
38
help/README
Normal file
38
help/README
Normal file
|
@ -0,0 +1,38 @@
|
|||
README file for help directory of staden package
|
||||
-----------------------------------------------
|
||||
|
||||
Should contain (at least) ProgramName_help where ProgramName is each of
|
||||
bap, dap, gip, mem, mep, nip, nipf, pip, sap, sip and also staden_help
|
||||
and stadenp_help.
|
||||
|
||||
There are 3 main formats of file in this directory:
|
||||
|
||||
PROGRAM.RNO:
|
||||
This is the unformatted (runoff/nroff style) help for PROGRAM.
|
||||
Any changes to the help should be performed on this file.
|
||||
|
||||
program_help:
|
||||
This is the online formatted help used by PROGRAM. It can also
|
||||
be printed to produce hardcopy documentation.
|
||||
|
||||
program_menu:
|
||||
This is a file that describes the menus used in PROGRAM,
|
||||
together with an index into the program_help file for the
|
||||
online help. The format for each line is:
|
||||
|
||||
<option number> <menu number> <program_help offset> <no. of
|
||||
lines of help> <program type T(ext) or (X)windows> <option name>
|
||||
|
||||
|
||||
Exceptions to these are for the staden_help, stadenp_help, and
|
||||
splitp_help which do not have the relevant .RNO or _menu files. The
|
||||
file staden_help gives an introduction to the xterm user interface
|
||||
(written for vax and vms and so is out of date with the Unix
|
||||
versions).
|
||||
|
||||
See the file splitp_help for information about the reformatting of the
|
||||
PROSITE motif library.
|
||||
|
||||
Rebuild help files with the Unix command "make all". Ensure that the utility
|
||||
program sethelp is compiled and in the executables search path. The sources
|
||||
for the program sethelp are found in $STADENROOT/staden.
|
2523
help/SAP.RNO
Normal file
2523
help/SAP.RNO
Normal file
File diff suppressed because it is too large
Load diff
1431
help/SIP.RNO
Normal file
1431
help/SIP.RNO
Normal file
File diff suppressed because it is too large
Load diff
125
help/SPLITP.RNO
Normal file
125
help/SPLITP.RNO
Normal file
|
@ -0,0 +1,125 @@
|
|||
.para
|
||||
Preparing the PROSITE protein motif library for use by the Staden programs
|
||||
.para
|
||||
Introduction
|
||||
.para
|
||||
A library of protein motifs (in our terminology, because they include
|
||||
variable gaps, some would be called patterns) has recently become available
|
||||
from Amos Bairoch,Departement de Biochimie Medicale,University of Geneva
|
||||
Currently it contains 317 patterns/motifs and arrives on tape or cdrom
|
||||
in two files:
|
||||
a .dat file and a .doc file. There is also a user documentation file
|
||||
prosite.usr. Here I outline what is required to prepare the PROSITE library for
|
||||
use by our programs.
|
||||
.para
|
||||
Three programs need to be run SPLITP1, SPLITP2, and SPLITP3.
|
||||
.PARA
|
||||
Outline of the PROSITE files
|
||||
.para
|
||||
A typical entry in the .dat file is shown below.
|
||||
.lit
|
||||
|
||||
ID 2FE2S_FERREDOXIN; PATTERN.
|
||||
AC PS00197;
|
||||
DT APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
|
||||
DE 2Fe-2S ferredoxins, iron-sulfur binding region signature.
|
||||
PA C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C.
|
||||
NR /RELEASE=14,15409;
|
||||
NR /TOTAL=69(69); /POSITIVE=63(63); /UNKNOWN=0(0); /FALSE_POS=6(6);
|
||||
NR /FALSE_NEG=5(5);
|
||||
CC /TAXO-RANGE=A?EP?; /MAX-REPEAT=1;
|
||||
CC /SITE=1,iron_sulfur; /SITE=5,iron_sulfur; /SITE=8,iron_sulfur;
|
||||
DR P15788, FER$APHHA , T; P00250, FER$APHSA , T; P00223, FER$ARCLA , T;
|
||||
DR P00227, FER$BRANA , T; P07838, FER$BRYMA , T; P13106, FER$BUMFI , T;
|
||||
DR P00247, FER$CHLFR , T; P07839, FER$CHLRE , T; P00222, FER$COLES , T;
|
||||
DO PDOC00175;
|
||||
//
|
||||
.end lit
|
||||
.para
|
||||
Each entry has an accession number (here PS00197), a pattern definition
|
||||
(here C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C) and a documentation file
|
||||
cross reference (here PDOC00175).
|
||||
This pattern means: C, gap of 1 or 2, any of STA, gap of 2, C, any of STA,
|
||||
not P, C.
|
||||
.para
|
||||
We need to convert all of these patterns into our pattern definitions
|
||||
(as membership of a set, with the appopriate gap ranges) and write each
|
||||
into a separate pattern file with corresponding "membership of a set"
|
||||
weight matrices. Each
|
||||
pattern file is named accession_number.pat (here PS00197.PAT). The
|
||||
corresponding matrix files are accession_number.wtsa,
|
||||
accession_number.wtsb, etc for however many are needed (here PS00197.WTSA
|
||||
and PS00197.WTSB): two are needed because of the variable gap.
|
||||
.para
|
||||
In addition we can optionally
|
||||
split the .dat and .doc files into separate files, one for each
|
||||
entry, with names accession_number.dat and accession_number.doc. Also we
|
||||
create an index for the library prosite.lis, which
|
||||
gives a one line description of each pattern, and ends with the pattern
|
||||
file and documentation file numbers. The start of the file is shown below.
|
||||
.lit
|
||||
|
||||
N-glycosylation site. 00001,00001
|
||||
Glycosaminoglycan attachment site. 00002,00002
|
||||
Tyrosine sulfatation site. 00003,00003
|
||||
cAMP- and cGMP-dependent protein kinase phosphorylation site. 00004,00004
|
||||
|
||||
.end lit
|
||||
So the name of the pattern file for Glycosaminoglycan attachment site is
|
||||
PS00002.PAT, and for the documentation file PDOC00002.DOC
|
||||
.para
|
||||
Finally we
|
||||
create a file of file names for all the patterns in the library.
|
||||
.para
|
||||
To use the complete PROSITE library from program pip, select "pattern searcher"
|
||||
and choose the
|
||||
option "use file of pattern file names", and give the file name
|
||||
prosite.nam). For any matches found, the accession number and pattern title
|
||||
will be
|
||||
displayed.
|
||||
|
||||
.para
|
||||
Running the conversion programs
|
||||
.para
|
||||
|
||||
Only SPLITP3 is necessary for using the library. The others programs
|
||||
only make the
|
||||
original files marginally easier to browse through and produce an index.
|
||||
.para
|
||||
SPLITP1 splits the prosite.dat file to create a separate file for each
|
||||
entry. Each file is automatically named PSentry_number.dat. In addition it
|
||||
creates an index for the library (see above).
|
||||
.para
|
||||
SPLITP2 performs the same operation for the Prosite.doc file, except that
|
||||
no index is created. Files are named PSentry_number.doc.
|
||||
.para
|
||||
SPLITP3 creates a separate pattern file and weight matrix files for each
|
||||
prosite entry from the file prosite.dat. Pattern files are named
|
||||
PSentry_number.pat, weight matrix files PSentry_number.wtsa,
|
||||
Psentry_number.wtsb, etc. The pattern title is the one line description
|
||||
of the motif. SPLITP3 also creates a file of file names. Notice that it
|
||||
will ask for a path name so that the path can be included in the file of
|
||||
file names. This is the path to the directory in which the pattern files
|
||||
are stored.
|
||||
.para
|
||||
Notes
|
||||
.para
|
||||
Obviously the use of files of file names is a general solution, and anybody
|
||||
could now create their own set of interesting patterns for screening, or a
|
||||
subset of prosite.nam, etc.
|
||||
.para
|
||||
Note that 5 of the bairoch motifs contained the symbols > or < which
|
||||
means that the motifs must appear exactly at the N or C termini of the
|
||||
sequences. Currently our methods have no mechanism for such definitions and,
|
||||
for example KDEL motifs, will be permitted to occur anywhere throughout
|
||||
a sequence.
|
||||
|
||||
.para
|
||||
Also, of course, the library does not have to be used solely for performing
|
||||
mass screenings: each individual entry can be used as a single pattern by
|
||||
giving the name of its .pat file - eg pathname/ps00002.pat
|
||||
In addition more sophisticated users will wish to copy pattern files and
|
||||
weight matrices into their own directories and modify them. For example the
|
||||
cutoff scores are probably chosen to be quite high in order to reduce the
|
||||
number of false positives, and some users might wish to lower them.
|
||||
|
354
help/STADEN.RNO
Normal file
354
help/STADEN.RNO
Normal file
|
@ -0,0 +1,354 @@
|
|||
.npa
|
||||
.left margin2
|
||||
.para
|
||||
Introduction to the Staden sequence analysis package and its user interface
|
||||
.PARA
|
||||
The package contains the following programs:
|
||||
.lit
|
||||
|
||||
GIP Gel input program
|
||||
SAP Sequence assemble program
|
||||
NIP Nucleotide interpretation program
|
||||
PIP Protein interpretation program
|
||||
SIP Similarity investigation program
|
||||
MEP Motif exploration program
|
||||
NIPL Nucleotide interpretation program (library)
|
||||
PIPL Protein interpretation program (library)
|
||||
SIPL Similarity investigation program (library)
|
||||
|
||||
.end lit
|
||||
.left margin2
|
||||
GIP uses a digitiser for entry of DNA sequences from
|
||||
autoradiographs.
|
||||
.left margin2
|
||||
SAP handles everything relating to assembling gel
|
||||
readings in order to produce a consensus sequence. It can also deal with
|
||||
families of protein sequences.
|
||||
.left margin2
|
||||
NIP provides functions for analysing and interpretting
|
||||
individual nucleotide sequences.
|
||||
.left margin2
|
||||
PIP provides functions for analysing and interpretting
|
||||
individual protein sequences.
|
||||
.left margin2
|
||||
MEP analyses families of nucleotide sequences to help discover new motifs.
|
||||
.left margin2
|
||||
NIPL performs pattern searches on nucleotide sequence libraries.
|
||||
.left margin2
|
||||
PIPL performs pattern searches on protein sequence libraries.
|
||||
.left margin2
|
||||
SIP provides functions for comparing and aligning
|
||||
pairs of protein or nucleotide sequences.
|
||||
.left margin2
|
||||
SIPL searches nucleotide and protein sequence
|
||||
libraries for entries similar to probe sequences.
|
||||
.left margin2
|
||||
.sk1
|
||||
.para
|
||||
Documentation
|
||||
.para
|
||||
As is explained below, the
|
||||
programs SAP, NIP, PIP, SIP and MEP have online help,
|
||||
and the help files have the names: HELPSAP, HELPNIP, HELPPIP, HELPSIP,
|
||||
HELPMEP. These
|
||||
files can be displayed on the screen or printed using the appropriate
|
||||
commands. Currently the help for the other programs is also contained in
|
||||
these files. For example help for NIPL is in HELPNIP. This file is called
|
||||
HELPSTADEN.
|
||||
.para
|
||||
Sequence formats
|
||||
.para
|
||||
The shotgun sequencing program SAP deals only with simple
|
||||
text files for gel readings, and is a self-contained system.
|
||||
However as there is still no single agreed format
|
||||
for finished sequences or for libraries of sequences,
|
||||
the other programs in the package can read data that is stored in several ways.
|
||||
.para
|
||||
The analytical programs can read individual sequences stored in the following
|
||||
formats:
|
||||
Staden, EMBL, Genbank, PIR (also known as NBRF), and GCG, but for storing whole
|
||||
libraries we use only PIR format. In addition
|
||||
these programs can perform a number of
|
||||
simple operations using libraries stored in this format. They can extract
|
||||
entries by entry name, can search titles for keywords, can search the whole
|
||||
of the annotation files for keywords, and can extract annotations for any
|
||||
named entry.
|
||||
We reformat all sequence libraries into PIR format. Currently we
|
||||
have NBRF, EMBL, SWISSPROT and VECBASE libraries in PIR format.
|
||||
.para
|
||||
The library searching programs operate only
|
||||
on sequences stored in PIR format.
|
||||
.para
|
||||
The analytical programs
|
||||
will operate with uppercase or lowercase sequence
|
||||
characters. In addition T and U are equivalent. SAP uses uppercase letters
|
||||
for original gel readings and lowercase letters for characters that are
|
||||
corrected by the automatic editor.
|
||||
Programs NIP and PIP use IUB symbols for redundancy in back translations
|
||||
and for sequence searches.
|
||||
The symbols are shown below.
|
||||
.LIT
|
||||
|
||||
|
||||
NC-IUB SYMBOLS
|
||||
|
||||
A,C,G,T
|
||||
R (A,G) 'puRine'
|
||||
Y (T,C) 'pYrimidine'
|
||||
W (A,T) 'Weak'
|
||||
S (C,G) 'Strong'
|
||||
M (A,C) 'aMino'
|
||||
K (G,T) 'Keto'
|
||||
H (A,T,C) 'not G'
|
||||
B (G,C,T) 'not A'
|
||||
V (G,A,C) 'not T'
|
||||
D (G,A,T) 'not C'
|
||||
N (G,A,C,T) 'aNy'
|
||||
|
||||
.end lit
|
||||
.PARA
|
||||
The user interface
|
||||
.PARA
|
||||
The user interface is common to all programs.
|
||||
It consists of a set of menus and a uniform way
|
||||
of presenting choices and obtaining input
|
||||
from the user. This section describes: the
|
||||
menu system; how options are selected and other choices made; how values
|
||||
are supplied to the program; how help is obtained, and
|
||||
how to escape from any part of a program. In addition it gives information
|
||||
about saving results in files and the use of graphics for presenting
|
||||
results.
|
||||
.para
|
||||
Menus
|
||||
.para
|
||||
Each program has several menus and numerous options.
|
||||
Each menu or option has a unique number that is used to
|
||||
identify it. Menu numbers are distinguished from
|
||||
option numbers by being preceded by the letter
|
||||
m (or M, all programs make no distinction between
|
||||
upper and lower case letters). With the exception of
|
||||
some parts of program SAP, the menus are not hierachical,
|
||||
rather the options they each contain are simply lists of
|
||||
related functions and their identifying numbers.
|
||||
Therefore options can be selected independently
|
||||
of the menu that is currently being shown on the
|
||||
screen, and the menus are simply memory aides.
|
||||
All options and menus are selected by typing their
|
||||
option number when the programs present the prompt
|
||||
.para
|
||||
"? Menu or option number =".
|
||||
.para
|
||||
To select a menu type its number preceded by
|
||||
the letter M. To select an option type its number.
|
||||
If you type only "return" you will get menu m0
|
||||
which is simply a list of menus. If you select an
|
||||
option you will return to the current menu after the function is completed.
|
||||
.para
|
||||
When you select an option, in many cases the
|
||||
program will immediately perform the operation
|
||||
selected without further dialogue. If you precede an option
|
||||
number by the letter d (e.g. D17), you
|
||||
will force the program to offer dialogue about the selected option
|
||||
before the function operates,
|
||||
hence allowing you to change the value of any of its parameters. If
|
||||
you precede an option number by the symbol ? (e.g. ?17),
|
||||
you will be given help on the option (here 17).
|
||||
.para
|
||||
Where possible, equivalent or identical options have been given the same
|
||||
numbers in all programs, and so users quickly learn the numbers for
|
||||
the functions they employ most often.
|
||||
.para
|
||||
Help
|
||||
.para
|
||||
As mentioned above, help about each option can be obtained by
|
||||
preceding the option number by the symbol ? when you are presented
|
||||
with the prompt "? Menu or option number", but there are two further
|
||||
ways of obtaining help. Whenever the program asks a question
|
||||
you can respond by typing the symbol ? and you will receive information
|
||||
about the current option. In addition, option number 1
|
||||
in all the programs will give help on all of a programs functions.
|
||||
.para
|
||||
Quitting
|
||||
.para
|
||||
To exit from any point in a program you type ! for quit.
|
||||
If a menu is on the screen this will stop the program, otherwise
|
||||
you will be returned to the last menu.
|
||||
.Para
|
||||
Other interactions
|
||||
.para
|
||||
Questions are presented in a few restricted ways.
|
||||
In all cases typing only "return" in response to a question means
|
||||
yes, and typing N or n means no.
|
||||
.para
|
||||
Obvious opposites such as "clear screen" and "keep picture"
|
||||
are presented with only the default shown. For example
|
||||
in this case the default is generally "keep picture" so the
|
||||
program will display:
|
||||
.para
|
||||
"(y/n) (y) Keep picture"
|
||||
.para
|
||||
and the picture will be retained if the user types anything other than N or
|
||||
n, (in which case the screen will be cleared).
|
||||
.para
|
||||
Where there are choices that are not obvious opposites, or
|
||||
there are more than two choices, two further conventions are used:
|
||||
"radio buttons" and "check boxes".
|
||||
.para
|
||||
|
||||
Radio buttons are used when only one of a number of choices can be
|
||||
made at any one time. The choices are presented arranged one above the
|
||||
other, each choice with a number for its selection, and the default
|
||||
choice marked with an X. For example in the restriction
|
||||
enzyme search routine the following choices are offered:
|
||||
.para
|
||||
.lit
|
||||
|
||||
Select output mode
|
||||
1 order results enzyme by enzyme
|
||||
2 order results by positon
|
||||
X 3 show only infrequent cutters
|
||||
4 show names above the sequence
|
||||
? Selection (1-4) (3) =
|
||||
|
||||
.end lit
|
||||
Any single option can be selected by typing the option number,
|
||||
and the default option, (here shown as 3), is also obtained by
|
||||
typing only "return". Again help can be obtained by typing ? and
|
||||
you can quit by typing !.
|
||||
.para
|
||||
Check boxes are used when any number of a set of choices can be
|
||||
made (i.e. the choices are not exclusive). Choices are
|
||||
made by typing choice numbers. Each choice can be considered
|
||||
as a switch whose setting is reversed when it is selected. Choices that are
|
||||
currently switched on are marked with an X.
|
||||
The user quits from making selections by typing only
|
||||
"return". For example in the routine that plots base composition
|
||||
you can plot the frequencies of any combination of bases, e.g. only
|
||||
A, or A+T, or A+T+G etc.
|
||||
The following check box is offered to the user:
|
||||
.lit
|
||||
|
||||
X 1 T
|
||||
2 C
|
||||
X 3 A
|
||||
4 G
|
||||
? Selection (1-4) () =
|
||||
|
||||
.END LIT
|
||||
As shown this will plot the A+T composition. To switch off T
|
||||
you select 1, to switch on C you select 2, etc, to quit,
|
||||
having set the bases required you type only "return".
|
||||
.para
|
||||
Input of numerical values
|
||||
.para
|
||||
All input of integer or decimal numbers is presented in a
|
||||
standard way with the allowed range shown in brackets and the default
|
||||
value also in brackets. For example:
|
||||
.para
|
||||
? span (5-31) (11) =
|
||||
.para
|
||||
In this example you could type any number between 5 and 31,
|
||||
or "return" only, or ! or ? (see above). Any other input will cause the
|
||||
program to ask the question again. Typing only "return" gives the default
|
||||
value (here 11).
|
||||
.para
|
||||
Use of the bell
|
||||
.para
|
||||
The programs use the bell to indicate that a task is completed.
|
||||
This allows users to read textual results before they are scrolled up off
|
||||
the screen, or to look at a plot before it is scrolled over by the menus.
|
||||
When the bell sounds, the programs will wait
|
||||
until return is typed. You can quit from these points by typing ! but
|
||||
no help is available.
|
||||
.para
|
||||
Printing and saving results in files
|
||||
.para
|
||||
A few of the functions in the programs automatically write their textual
|
||||
results
|
||||
to disk files, but for most functions you can choose whether results
|
||||
appear on the terminal screen or go to a file. This applies to both text
|
||||
and graphical results.
|
||||
For these functions
|
||||
the normal, or default, place for results to
|
||||
appear is on the screen, and users need to decide before the
|
||||
function is selected if they want to redirect the results to a file.
|
||||
In all programs, option number 7, "Direct output to disk" gives control
|
||||
over whether results appear on the screen or go to a file. When a program
|
||||
is started results will be sent to the screen. If option 7 is selected
|
||||
users will be given the choice of redirecting either text or graphics to a
|
||||
file. The program will then ask users to supply a file name. From that
|
||||
point on all results will be sent to the file until option 7 is selected again,
|
||||
in which case the "redirection file" will be closed, and results will start
|
||||
to appear on the screen.
|
||||
.para
|
||||
If these files contain textual results they can be looked at
|
||||
from within the programs
|
||||
by using option 6, "List a text file". Once you leave the program
|
||||
you can use an appropriate system command to print the files.
|
||||
There is no function within the programs to direct files to a printer.
|
||||
.para
|
||||
The converse of the above is also possible. That
|
||||
is, it is possible to redirect results that would normally go to file,
|
||||
so that they appear instead on the screen. This is often useful as a way
|
||||
of checking results before saving them in a file. On a VAX using
|
||||
VMS you do this by typing TT: for the name of the file that the
|
||||
program would create. TT: is what VMS calls the screen.
|
||||
.para
|
||||
Use of graphics
|
||||
.para
|
||||
The analytical programs including NIP, PIP and SIP present the results of
|
||||
many of their analyses graphically. The position at which the results for
|
||||
any function appear on the screen is defined relative to a notional users
|
||||
"drawing board" of dimension 10,000 by 10,000. This drawing board fills the
|
||||
screen and results are drawn in windows defined using symbols x0,yo and
|
||||
xlength,ylength,
|
||||
where x0,y0 is the position of the bottom left hand corner of the window,
|
||||
and xlength is the width of the window and ylength the
|
||||
height of the window.
|
||||
.lit
|
||||
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
.end lit
|
||||
.para
|
||||
The window positions for each option are read from a file
|
||||
when a program is started. If required individual users could have their
|
||||
own set of plot positions, and also the positions
|
||||
can be redefined from within the
|
||||
programs using option number 14.
|
||||
.para
|
||||
For those analyses that draw continuous lines to represent results
|
||||
(for example a plot of base composition) the user is asked to supply the
|
||||
"Plot interval". All the analyses produce a value for every point along the
|
||||
sequence but often it is unnecessary to actually plot the
|
||||
values for all the points.
|
||||
The plot interval is simply the distance between the points
|
||||
shown on the screen. If the user selects a plot interval of 1, every point
|
||||
will be plotted; a plot interval of 3 will show every third point. It is a
|
||||
way of speeding up the analyses.
|
||||
.para
|
||||
Saving graphics
|
||||
.para
|
||||
Many terminals are not capable of dumping their screen contents to a
|
||||
file for subsequent printing. One convenient way of obtaining hard copy
|
||||
of graphical results is to use a micro computer as a terminal. On
|
||||
the Macintosh we use the terminal emulator versa
|
||||
termPro. This allows graphics to be saved as
|
||||
Macintosh files that can be annotated and printed using
|
||||
Macdraw and other painting programs.
|
||||
.para
|
||||
Alternatively graphics can be redirected to a file and printed using a
|
||||
laser printer with tektronix capability (see
|
||||
"Printing and saving results in files").
|
2112
help/bap_help
Normal file
2112
help/bap_help
Normal file
File diff suppressed because it is too large
Load diff
84
help/bap_menu
Normal file
84
help/bap_menu
Normal file
|
@ -0,0 +1,84 @@
|
|||
-1 0 21 2 T General
|
||||
-1 0 21 2 X General
|
||||
-2 0 50 2 T Screen control
|
||||
-2 0 71 2 X Screen
|
||||
-3 0 98 2 T Modification
|
||||
-3 0 98 2 X Modification
|
||||
0 -1 116 332 T BAP
|
||||
0 -1 116 332 X BAP
|
||||
17 1 17434 18 T Screen against enzymes
|
||||
17 1 17434 18 X Screen against enzymes
|
||||
18 1 18477 23 T Screen against vector
|
||||
18 1 18477 23 X Screen against vector
|
||||
20 3 19859 121 T Auto assemble
|
||||
20 3 19859 121 X Auto assemble
|
||||
28 1 26426 43 T Highlight disagreements
|
||||
28 1 26426 43 X Highlight disagreements
|
||||
32 3 28846 17 T Extract gel readings
|
||||
32 3 28846 17 X Extract gel readings
|
||||
1 0 29607 3 T Help
|
||||
1 0 29607 3 X Help
|
||||
2 0 29676 5 T Quit
|
||||
2 0 29676 5 X Quit
|
||||
3 1 29869 230 T Open a database
|
||||
3 1 29869 230 X Open a database
|
||||
4 3 41499 320 T Edit contig
|
||||
4 3 41499 320 X Edit contig
|
||||
5 1 56688 43 T Display a contig
|
||||
5 1 56688 43 X Display a contig
|
||||
6 1 58990 6 T List a text file
|
||||
6 1 58990 6 X List a text file
|
||||
8 1 59248 93 T Calculate a consensus
|
||||
8 1 59248 93 X Calculate a consensus
|
||||
25 1 63707 41 T Show relationships
|
||||
25 1 63707 41 X Show relationships
|
||||
23 3 65650 11 T Complement a contig
|
||||
23 3 65650 11 X Complement a contig
|
||||
22 3 66173 59 T Join contigs
|
||||
22 3 66173 59 X Join contigs
|
||||
24 1 69194 11 T Copy the database
|
||||
24 1 69194 11 X Copy the database
|
||||
19 1 69740 43 T Check database
|
||||
19 1 69740 43 X Check database
|
||||
29 1 71898 82 T Examine quality
|
||||
29 1 71898 82 X Examine quality
|
||||
26 3 75715 84 T Alter relationships
|
||||
26 3 75715 84 X Alter relationships
|
||||
27 1 79641 17 T Set display parameters
|
||||
27 1 79641 17 X Set display parameters
|
||||
30 3 80503 7 T Shuffle pads
|
||||
30 3 80503 7 X Shuffle pads
|
||||
10 2 80866 3 T Clear graphics
|
||||
10 2 80866 3 X Clear graphics
|
||||
11 2 80931 3 T Clear text
|
||||
11 2 80931 3 X Clear text
|
||||
12 2 80996 12 T Draw a ruler.
|
||||
12 2 80996 12 X Draw a ruler.
|
||||
14 2 81730 38 T Reposition plots
|
||||
14 2 81730 38 X Reposition plots
|
||||
15 2 84069 28 T Label a diagram
|
||||
15 2 84069 28 X Label a diagram
|
||||
16 2 85174 3 T Display a map
|
||||
16 2 85174 3 X Display a map
|
||||
7 1 85228 12 T Redirect output
|
||||
7 1 85228 12 X Redirect output
|
||||
13 2 85731 43 T Use crosshair
|
||||
13 2 85731 43 X Use crosshair
|
||||
33 2 87876 12 T Plot single contig
|
||||
33 2 87876 12 X Plot single contig
|
||||
34 2 88578 10 T Plot all contigs
|
||||
34 2 88578 10 X Plot all contigs
|
||||
31 3 89160 21 T Disassemble readings
|
||||
31 3 89160 21 X Disassemble readings
|
||||
35 3 90372 94 T Find internal joins
|
||||
35 1 90372 94 T Find internal joins
|
||||
35 3 90372 94 X Find internal joins
|
||||
35 1 90372 94 X Find internal joins
|
||||
36 3 96201 30 T Double strand
|
||||
36 3 96201 30 X Double strand
|
||||
37 3 97555 64 T Auto-select oligos
|
||||
37 3 97555 64 X Auto-select oligos
|
||||
38 1 100421 30 T Check assembly
|
||||
38 1 100421 30 X Check assembly
|
||||
39 1 102178 90 T Find read pairs
|
||||
39 1 102178 90 X Find read pairs
|
2112
help/dap_help
Normal file
2112
help/dap_help
Normal file
File diff suppressed because it is too large
Load diff
79
help/dap_menu
Normal file
79
help/dap_menu
Normal file
|
@ -0,0 +1,79 @@
|
|||
-1 0 21 2 T General
|
||||
-1 0 21 2 X General
|
||||
-2 0 50 2 T Screen control
|
||||
-2 0 71 2 X Screen
|
||||
-3 0 98 2 T Modification
|
||||
-3 0 98 2 X Modification
|
||||
0 -1 116 351 T SAP
|
||||
0 -1 116 351 X SAP
|
||||
17 1 18801 18 T Screen against enzymes
|
||||
17 1 18801 18 X Screen against enzymes
|
||||
18 1 19844 22 T Screen against vector
|
||||
18 1 19844 22 X Screen against vector
|
||||
20 3 21171 113 T Auto assemble
|
||||
20 3 21171 113 X Auto assemble
|
||||
28 1 27332 42 T Highlight disagreements
|
||||
28 1 27332 42 X Highlight disagreements
|
||||
32 3 29694 22 T Extract gel readings
|
||||
32 3 29694 22 X Extract gel readings
|
||||
1 0 30797 3 T Help
|
||||
1 0 30797 3 X Help
|
||||
2 0 30866 5 T Quit
|
||||
2 0 30866 5 X Quit
|
||||
3 1 31059 237 T Open a database
|
||||
3 1 31059 237 X Open a database
|
||||
4 3 43258 239 T Edit contig
|
||||
4 3 43258 239 X Edit contig
|
||||
9 3 54180 42 T Screen edit
|
||||
5 1 56376 45 T Display a contig
|
||||
5 1 56376 45 X Display a contig
|
||||
6 1 58862 6 T List a text file
|
||||
6 1 58862 6 X List a text file
|
||||
8 1 59120 93 T Calculate a consensus
|
||||
8 1 59120 93 X Calculate a consensus
|
||||
25 1 63651 41 T Show relationships
|
||||
25 1 63651 41 X Show relationships
|
||||
21 3 65587 101 T Enter new gel reading
|
||||
21 3 65587 101 X Enter new gel reading
|
||||
23 3 70677 11 T Complement a contig
|
||||
23 3 70677 11 X Complement a contig
|
||||
22 3 71200 63 T Join contigs
|
||||
22 3 71200 63 X Join contigs
|
||||
24 1 74467 11 T Copy the database
|
||||
24 1 74467 11 X Copy the database
|
||||
19 1 75013 41 T Check database
|
||||
19 1 75013 41 X Check database
|
||||
29 1 77032 82 T Examine quality
|
||||
29 1 77032 82 X Examine quality
|
||||
26 3 80849 101 T Alter relationships
|
||||
26 3 80849 101 X Alter relationships
|
||||
27 1 86065 17 T Set display parameters
|
||||
27 1 86065 17 X Set display parameters
|
||||
30 3 86933 48 T Auto edit a contig
|
||||
30 3 86933 48 X Auto edit a contig
|
||||
10 2 89409 3 T Clear graphics
|
||||
10 2 89409 3 X Clear graphics
|
||||
11 2 89474 3 T Clear text
|
||||
11 2 89474 3 X Clear text
|
||||
12 2 89539 12 T Draw a ruler.
|
||||
12 2 89539 12 X Draw a ruler.
|
||||
14 2 90273 38 T Reposition plots
|
||||
14 2 90273 38 X Reposition plots
|
||||
15 2 92612 28 T Label a diagram
|
||||
15 2 92612 28 X Label a diagram
|
||||
16 2 93717 27 T Display a map
|
||||
16 2 93717 27 X Display a map
|
||||
7 1 94692 12 T Redirect output
|
||||
7 1 94692 12 X Redirect output
|
||||
13 2 95163 43 T Use crosshair
|
||||
13 2 95163 43 X Use crosshair
|
||||
33 2 97308 12 T Plot single contig
|
||||
33 2 97308 12 X Plot single contig
|
||||
34 2 98010 10 T Plot all contigs
|
||||
34 2 98010 10 X Plot all contigs
|
||||
31 3 98592 12 T Type in gel readings
|
||||
31 3 98592 12 X Type in gel readings
|
||||
35 3 99223 92 T Find internal joins
|
||||
35 1 99223 92 T Find internal joins
|
||||
35 3 99223 92 X Find internal joins
|
||||
35 1 99223 92 X Find internal joins
|
198
help/gip_help
Normal file
198
help/gip_help
Normal file
|
@ -0,0 +1,198 @@
|
|||
GIP
|
||||
|
||||
A digitizer is a two dimensional surface which is such that
|
||||
if a special pen is pressed onto it, the pens coordinates can be
|
||||
recorded by a computer. These coordinates can be interpreted by a
|
||||
program.
|
||||
|
||||
The digitizing device we use works by the pen emitting a high
|
||||
frequency sound which is picked up by two microphones positioned at
|
||||
the rear of the working area. The pen position is determined by
|
||||
triangulation and the digitizing device sends the coordinates to the
|
||||
computer. As no special surface is required the device can
|
||||
conveniently be positioned on a light box giving the sequencer an
|
||||
unobscured view of the autoradiographs.
|
||||
The digitizer is called a GRAPHBAR MODEL GP7 made by Science
|
||||
Accessories Corp, 970 Kings Highway West, Southport, Connecticut
|
||||
06490, USA.
|
||||
|
||||
The program uses a menu to allow the user to select commands
|
||||
or to enter the uncertainty codes for areas of the gel that
|
||||
are difficult to interpret. A menu is simply a series of boxes drawn
|
||||
on the digitizing surface that each contain a command or
|
||||
uncertainty code. When the user puts the pen down in these special
|
||||
regions the program interprets the coordinates as commands and acts
|
||||
appropriately. A copy of the menu should have been sent to you. It
|
||||
should be stuck down on the surface of the light box in the
|
||||
digitizing area. For convenience it is best to position it to the
|
||||
right of the digitizing area, but in practice as long as its top edge
|
||||
is parallel to the digitizer box, it can be put anywhere in the
|
||||
active region.
|
||||
|
||||
Entering gel readings using a digitizer
|
||||
|
||||
The autoradiograph should be stuck down on the light box with
|
||||
the lanes running, as near is as possible, at right angles to the
|
||||
digitizer. To read an autoradiograph placed on the light box the user
|
||||
need only define the positions of the four sequencing lanes and the
|
||||
bases to which they correspond and then use the pen to point to
|
||||
each successive band progressing up the gel. The program examines
|
||||
the coordinates of each pen position to see in which of the four
|
||||
lanes it lies and assigns the corresponding base to be stored
|
||||
in the computer. Each time the pen tip is depressed to point to a
|
||||
position on the surface of the digitizer the program sounds the
|
||||
bell on the terminal (a different sound for each of the four bases on
|
||||
the microcomputer version of the program) to indicate to the user
|
||||
that a point has been recorded. As the sequence is read the
|
||||
program displays it on the screen.
|
||||
|
||||
The program uses a menu to allow the user to select commands
|
||||
or to enter the uncertainty codes for areas of the gel that
|
||||
are difficult to interpret. A menu is simply a series of boxes drawn
|
||||
on the digitizing surface that each contain a command or
|
||||
uncertainty code. When the user puts the pen down in these special
|
||||
regions the program interprets the coordinates as commands and acts
|
||||
appropriately. As well as the uncertainty codes
|
||||
A,C,G,T,1,2,3,4,B,D,H,V,R,Y,X,-,5,6,7,8 the following commands are
|
||||
included in the menu: DELETE removes the last character from the
|
||||
sequence; RESET allows the lane centres to be redefined; START means
|
||||
begin the next stage of the procedure; STOP means stop the
|
||||
current stage in the procedure; CONFIRM means confirm that the last
|
||||
command or set of coordinates are correct.
|
||||
|
||||
The digitizing device also has a menu of its own. This lies in
|
||||
a two inch wide strip immediately in front of the digitizing box. Pen
|
||||
positions within this two inch strip are interpretted as commands to
|
||||
the digitizer and are not sent to the GIP program. In general the
|
||||
only time users will need to use the device menu is when they tell
|
||||
GIP where the program menu lies in the digitizing area. This is done
|
||||
by first hitting ORIGIN in the device menu and then hitting the
|
||||
bottom left hand corner of the program menu. The program menu can
|
||||
hence be positioned anywhere in the active region but should be
|
||||
arranged parallel to the digitizer.
|
||||
|
||||
The user should try to hit the bands as near as possible to
|
||||
the centre of the lanes because the program tracks the lanes up the
|
||||
film using the pen positions. By using this tracking strategy the
|
||||
user only has to define the centres of the bottom of the lanes before
|
||||
starting to read the film. The program can correctly follow quite
|
||||
curved lanes and constantly checks that its lane centre coordinates
|
||||
look sensible. If the lane centres appear to be getting too close the
|
||||
program stops responding to the pen positions of bands and hence does
|
||||
not ring the bell. If this occurs users must hit the reset box in the
|
||||
menu and the program will request them to redefine the lane centres
|
||||
at the current reading position. Then they can continue reading. As a
|
||||
further safeguard the program will only respond to pen positions
|
||||
either in the menu or very close to the current reading position.
|
||||
|
||||
Running the gel reading program
|
||||
The autoradiograph should be firmly stuck down on the light box and
|
||||
the program started by typing GIP. It will ask the first question.
|
||||
" ? FILE OF FILE NAMES="
|
||||
Type the name for the file of file names and then follow the
|
||||
instructions.
|
||||
" HIT DIGITIZER MENU ORIGIN"
|
||||
" THEN PROGRAM MENU ORIGIN"
|
||||
" THEN HIT START IN PROGRAM MENU"
|
||||
If the bell does not sound after you hit start try hitting metric in
|
||||
the device menu (the program uses metric units, and some digitizers
|
||||
are set to default to use inches; hitting metric switches between
|
||||
the two).
|
||||
After the bell has sounded the program will give the default lane
|
||||
order.
|
||||
" LANE ORDER IS T C A G"
|
||||
" IF CORRECT HIT CONFIRM, ELSE HIT RESET"
|
||||
If the lane order, reading from left to right is correct hit confirm
|
||||
in the program menu. If you are using a different order hit reset
|
||||
and you will be asked to define the lane order from left to right
|
||||
using the program menu (as follows).
|
||||
" DEFINE LANE ORDER (LEFT TO RIGHT) USING MENU"
|
||||
Hit the boxes in the menu that contain the symbols A,C,G,T in the
|
||||
left-right order of the lanes. The program will respond with the
|
||||
lane order as above and ask for confirmation. When this is received,
|
||||
the next task is to define the start positions of the next four
|
||||
lanes.
|
||||
" HIT START, THEN HIT (LEFT TO RIGHT)"
|
||||
" THE START POSITIONS FOR THE NEXT FOUR LANES"
|
||||
Hit the centres of the four lanes at a height level with the first
|
||||
band that is going to be read. The program will report the mean lane
|
||||
separations and asks for confirmation that they are correct.
|
||||
" MEAN LANE SEPARATION IS XX"
|
||||
" HIT CONFIRM TO CONTINUE"
|
||||
Users will become familiar with the values from their films and will
|
||||
spot any unusual numbers. Asking for confirmation allows users to
|
||||
try again if they had made a mistake, but generally the lane
|
||||
separation values can be ignored. Hit confirm, and the program will
|
||||
give the message
|
||||
" HIT START WHEN READY TO BEGIN READING"
|
||||
Hit start and the program will give the message
|
||||
" HIT BANDS, UNCERTAINTY CODES, RESET OR STOP"
|
||||
Hit the bands, interpretting the sequence progressing up the film.
|
||||
If necessary use the uncertainty codes. If the pen stops responding
|
||||
hit reset and follow the instructions as above. When the sequence
|
||||
becomes unreadable hit stop and the program will ask for a file name
|
||||
for the gel reading just read.
|
||||
" ? FILE NAME FOR THIS GEL READING="
|
||||
Type the file name observing the rules about legal gel readings
|
||||
names. The program will ask if you wish to read another sequence.
|
||||
" TO ENTER ANOTHER GEL READING TYPE 1"
|
||||
To enter another type 1 and you will be back to the step of defining
|
||||
the lane order. Typing anything else will stop the program.
|
||||
|
||||
Running the microcomputer version of the gel reading program
|
||||
The microcomputer version of GIP is slightly different and is called
|
||||
GIPB. The BBC micro does not have the capacity to process the gel
|
||||
readings beyond the reading stage. This means that users of this
|
||||
program would need to transfer their gel readings from the micro to
|
||||
another machine using a terminal emmulator. Transferring many files
|
||||
is tedious and so the microcomputer version of the gel reading
|
||||
program stores all the gel readings for each run of the program in a
|
||||
single file. This special file contains both sequences and file names
|
||||
and can be moved in a single transfer to another machine. Once on the
|
||||
other machine the single file must be split into separate gel reading
|
||||
files and a file of file names. This is done using the program
|
||||
BSPLIT. As far as using the microcomputer version of GIP, the only
|
||||
difference is that the first file name the program requests is not a
|
||||
file of file names, but a name for the single file to contain all the
|
||||
gel readings and their names.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
0
help/gip_menu
Normal file
0
help/gip_menu
Normal file
48
help/makefile
Normal file
48
help/makefile
Normal file
|
@ -0,0 +1,48 @@
|
|||
#
|
||||
# Make file for help files - this requires gmake on some systems.
|
||||
#
|
||||
PROGS = bap dap gip mep nip \
|
||||
nipf pip sap sip #mem
|
||||
|
||||
HELPS = bap_help dap_help gip_help mep_help nip_help \
|
||||
nipf_help pip_help sap_help sip_help #mem_help
|
||||
|
||||
MENUS = bap_menu dap_menu gip_menu mep_menu nip_menu \
|
||||
nipf_menu pip_menu sap_menu sip_menu #mem_menu
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
DOIT = rm -f $@_help $@_menu; ./runoff $?
|
||||
|
||||
bap: BAP.RNO
|
||||
$(DOIT)
|
||||
|
||||
dap: DAP.RNO
|
||||
$(DOIT)
|
||||
|
||||
gip: GIP.RNO
|
||||
$(DOIT)
|
||||
|
||||
#mem: MEM.RNO
|
||||
# $(DOIT)
|
||||
|
||||
mep: MEP.RNO
|
||||
$(DOIT)
|
||||
|
||||
nip: NIP.RNO
|
||||
$(DOIT)
|
||||
|
||||
nipf: NIPF.RNO
|
||||
$(DOIT)
|
||||
|
||||
pip: PIP.RNO
|
||||
$(DOIT)
|
||||
|
||||
sap: SAP.RNO
|
||||
$(DOIT)
|
||||
|
||||
sip: SIP.RNO
|
||||
$(DOIT)
|
||||
|
||||
clean:
|
||||
rm -f $(HELPS) $(MENUS)
|
698
help/mem_help
Normal file
698
help/mem_help
Normal file
|
@ -0,0 +1,698 @@
|
|||
|
||||
@0. B 1 @MEP
|
||||
This is a program for analysing families of nucleotide sequences in
|
||||
order to find common motifs and potential binding sites. The ideas
|
||||
in this program were described in Staden, R. "Methods for
|
||||
discovering novel motifs in nucleic acid sequences". Computer
|
||||
Applications in the Biosciences, 5, 293-298, (1989).
|
||||
|
||||
The program can read sequences stored in either of two
|
||||
formats: 1) all sequences aligned in a single file; 2) all sequences
|
||||
in separate files and accessed through a file of file names.
|
||||
|
||||
The program contains functions that can answer several
|
||||
questions about a set of sequences:
|
||||
|
||||
Which words are most common?
|
||||
Which words occur in the most sequences?
|
||||
Which words contain the most information?
|
||||
Which words occur in equivalent positions in the sequences?
|
||||
Which words are inverted repeats?
|
||||
Which words occur on both strands of the sequences?
|
||||
Where are the inverted repeats?
|
||||
Where are the fuzzy words?
|
||||
|
||||
Most of the program is concerned with analysing what it terms
|
||||
"fuzzy words" within the set of sequences. The analysis is explained
|
||||
below. Note that the standard version of the programs is limited to
|
||||
words of maximum length 8 letters, and a maximum fuzziness of 2.
|
||||
|
||||
The following analyses (preceded by their option numbers) are
|
||||
included:
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
|
||||
Some of these methods produce graphical results and so the
|
||||
program is generally used from a graphics terminal (a vdu on which
|
||||
lines and points can be drawn as well as characters).
|
||||
|
||||
The positions of each of the plots is defined relative to a users
|
||||
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||
for each option are drawn in a window defined by x0,y0 and
|
||||
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||
corner of the window, and xlength is the width of the window and
|
||||
ylength the height of the window.
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required.
|
||||
|
||||
The options for the program are accessed from 3 main menus:
|
||||
general, screen control and dictionary analylsis. Both menus and
|
||||
options are selected by number.
|
||||
|
||||
The most important and novel part of the program is its use of
|
||||
"fuzzy dictionaries" and an information theory measure, to help show
|
||||
the most interesting motifs. Central to the method is the idea of a
|
||||
fuzzy dictionary of word frequencies. A dictionary of word
|
||||
frequencies is an ordered list of all the words in the sequences and
|
||||
a count of the number of times that they occur. A fuzzy dictionary
|
||||
is an equivalent list but which contains instead, for each word, a
|
||||
count of the number of times similar words occur in the sequences.
|
||||
We term words that are similar "relations". The fuzziness is defined
|
||||
by the number of letters in a word that are allowed to be different.
|
||||
So if we had a fuzziness of 1 we allow 1 letter to be different. For
|
||||
example, with a fuzziness of 1, the entry in the fuzzy dictionary
|
||||
for the word TTTTTT would contain a count of the numbers of times
|
||||
TTTTTT occured plus the number of times all words differing by
|
||||
exactly one letter from TTTTTT occured.
|
||||
|
||||
Once the fuzzy dictionary has been created we can examine it
|
||||
in several ways to find candidate control sequences. The simplest
|
||||
question we can ask is which word in the dictionary is the most
|
||||
common. Sometimes this simple criterion of "most common" may be
|
||||
adequate to discover a new motif but in general we would not expect
|
||||
it to be sufficient. For example some words will be common simply
|
||||
because of a base composition bias in the sequences being analysed.
|
||||
In addition a word can be the most frequent and yet not be "well
|
||||
defined". This last point is best explained by an example.
|
||||
|
||||
Suppose we were looking at two letter words and allowing one
|
||||
mismatch, and that there were 10 occurences of TT and 5 of AC. We
|
||||
could align the 10 words that were one letter different from TT and
|
||||
the 5 that were related to AC. Then we could count the number of
|
||||
times each base occured in each position for each of these two sets
|
||||
of words. Suppose we got the two base frequency tables shown below.
|
||||
TT AC
|
||||
T 6 4 T 1 0
|
||||
C 1 3 C 0 4
|
||||
A 1 2 A 4 1
|
||||
G 2 1 G 0 0
|
||||
|
||||
These tables show that although TT occurs (with one letter mismatch)
|
||||
more often than AC, the ratio of base frequencies for AC at 4/5, 4/5
|
||||
is higher than those for TT at 6/10, 4/10. Hence we would say that
|
||||
AC was better defined than TT. Expressing this another way we would
|
||||
say that the definition of AC contained more information than that
|
||||
for TT. The program calculates the information content in a way that
|
||||
takes into account both the sequence composition and the level of
|
||||
definition of the motif.
|
||||
|
||||
Definitions
|
||||
|
||||
Here we deal only with the dictionary analysis. Suppose we
|
||||
are dealing with a set of sequences and are examining them for words
|
||||
that are six characters in length.
|
||||
|
||||
Dictionary Dw contains a count of the number of times each
|
||||
word occurs in the set of sequences. For example the entry for
|
||||
TTTTTT contains a value equal to the number of times the word TTTTTT
|
||||
occurs in the set of sequences.
|
||||
|
||||
Dictionary Ds contains a count of the number of different
|
||||
sequences in which each word occurs. For example if the entry for
|
||||
word TTTTTT contains the value 10, it denotes that the word TTTTTT
|
||||
occurs in ten different sequences. Unlike Dw it only counts words
|
||||
once for each sequence. For example if we had a set of 100
|
||||
sequences, the maximum possible value that Ds could take is 100, and
|
||||
this would only happen if a word occurred in every sequence. However
|
||||
for the same set of sequences, Dw could contain values greater than
|
||||
100, and this would show that a word had occurred more than once in
|
||||
at least one sequence.
|
||||
|
||||
From either of the two dictionaries Dw or Ds we can calculate
|
||||
a fuzzy dictionary Dm. For each word, the entry in the fuzzy
|
||||
dictionary Dm contains the sum of the dictionary values (taken from
|
||||
either Dw or Ds) for all words that differ from it by up to m
|
||||
letters. For example if m=2 the entry for TTTTTT contains the number
|
||||
of times that TTTTTT occurs in the dictionary, plus the counts for
|
||||
all words that differ from TTTTTT by 1 or 2 letters. Obviously the
|
||||
interpretation of the values in Dm depends on which of the two
|
||||
dictionaries Dw or Ds they were derived from. When derived from Dw
|
||||
the entry for any word in Dm gives the total number of times it, and
|
||||
its relations, occur in the set of sequences. When derived from Ds
|
||||
the entry for any word in Dm gives the total number of different
|
||||
sequences that contain a word and each of its relations.
|
||||
|
||||
Finally, from fuzzy dictionary Dm we can derive fuzzy
|
||||
dictionary Dh. All entries in Dh are zero except for the word(s),
|
||||
within each set of relations, that are most frequent. For example if
|
||||
TTTTTT occurred 20 times but had a relation that occurred more
|
||||
often, then the entry for TTTTTT would be zero. However if TTTTTT
|
||||
did not have a more frequently occurring relation, then the entry
|
||||
for TTTTTT would contain the value 20.
|
||||
@1. B 1 @Help
|
||||
This option gives online help. The user should select option numbers
|
||||
and the current documentation will be given. Note that option 0
|
||||
gives an introduction to the program, and that ? will get help from
|
||||
anywhere in the program. The following analyses (preceded by their
|
||||
option numbers) are included:
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
@2. B 1 @Quit
|
||||
This function stops the program.
|
||||
@3. B 1 @Read a new sequence.
|
||||
|
||||
It can read sequences stored in either of two formats: 1) all
|
||||
sequences aligned in a single file; 2) all sequences in separate
|
||||
files and accessed through a file of file names. Typical dialogue
|
||||
follows:
|
||||
|
||||
X 1 Read file of aligned sequences
|
||||
2 Use file of file names
|
||||
? 0,1,2 =
|
||||
|
||||
? File of aligned sequences=F1
|
||||
Number of files 88
|
||||
|
||||
@4. B 1 @Define active region
|
||||
For its analytic functions the program always works on a region of
|
||||
the sequence called the active region. When new sequences are read
|
||||
into the program the active region is automatically set to start at
|
||||
the beginning of the sequences and go up to the end of the longest
|
||||
one.
|
||||
@5. B 1 @List a sequence.
|
||||
The sequence can be listed with line lengths of 50 bases with each
|
||||
sequence numbered in the order in which they were read. Output can
|
||||
be directed to a disk file by first selecting disk output. Typical
|
||||
dialogue follows.
|
||||
|
||||
? Menu or option number=5
|
||||
|
||||
10 20 30 40 50
|
||||
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||
|
||||
60
|
||||
1 TACCCGTTTTT
|
||||
2 GCGTTTTTGT
|
||||
3 TCATACCATAAG
|
||||
4 TTTCATACC
|
||||
5 ATTGTGAGC
|
||||
6 TTCCGGCTCG
|
||||
7 GAAGAGAGT
|
||||
8 TCAGGTGT
|
||||
9 ATGAATG
|
||||
10 TAATTACG
|
||||
@6. B 1 @List a text file.
|
||||
Allows the user to have a text file displayed on the screen. It will
|
||||
appear one page at a time.
|
||||
@7. B 1 @Direct output to disk
|
||||
|
||||
Used to direct output that would normally appear on the screen
|
||||
to a file.
|
||||
|
||||
Select redirection of either text or graphics, and supply the
|
||||
name of the file that the output should be written to.
|
||||
|
||||
The results from the next options selected will not appear on
|
||||
the screen but will be written to the file. When option 7 is
|
||||
selected again the file will be closed and output will again appear
|
||||
on the screen.
|
||||
@10. B 1 @Clear graphics
|
||||
Clears the screen of both text and graphics.
|
||||
@11. B 1 @Clear text
|
||||
Clears only text from the screen.
|
||||
@12. B 1 @Draw a ruler.
|
||||
This option allows the user to draw a ruler or scale along the x
|
||||
axis of the screen to help identify the coordinates of points of
|
||||
interest. The user can define the position of the first amino acid
|
||||
to be marked (for example if the active region is 1501 to 8000, the
|
||||
user might wish to mark every 1000th amino acid starting at either
|
||||
1501 or 2000 - it depends if the user wishes to treat the active
|
||||
region as an independent unit with its own numbering starting at its
|
||||
left edge, or as part of the whole sequence). The user can also
|
||||
define the separation of the ticks on the scale and their height. If
|
||||
required the labelling routine can be used to add numbers to the
|
||||
ticks.
|
||||
@13. B 1 @Use crosshair.
|
||||
This function puts a steerable cross on the screen that can be used
|
||||
to find the coordinates of points in the sequence. The user can move
|
||||
the cross around using the directional keys; when he hits the space
|
||||
bar the program will print out the coordinates of the cross in
|
||||
sequence units and the option will be exited.
|
||||
|
||||
If instead, you hit a , the position will be displayed but the
|
||||
cross will remain on the screen.
|
||||
|
||||
If a letter s is hit the sequence around the cross hair is
|
||||
displayed and the cross remains on the screen.
|
||||
@14. B 1 @Reposition plots
|
||||
The positions of each of the plots is defined relative to a users
|
||||
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||
for each option are drawn in a window defined by x0,y0 and
|
||||
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||
corner of the window, and xlength is the width of the window and
|
||||
ylength the height of the window.
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required. As
|
||||
all the plots start at the same position in x and have the same
|
||||
width, x0 and xlength are the same for all options. Generally users
|
||||
will only want to change the start level of the window y0 and its
|
||||
height ylength. This option allows users to change window positions
|
||||
whilst running the program. The routine prompts first for the
|
||||
number of the option that the users wishes to reposition; then for
|
||||
the y start and height; then for the x start and length. Note that
|
||||
changes to the x values affect all options. If the user types only
|
||||
carriage return for any value it will remain unchanged. The cross-
|
||||
hair can be used to choose suitable heights.
|
||||
@15. B 1 @Label a diagram
|
||||
This routine allows users to label any diagrams they have produced.
|
||||
They are asked to type in a label. When the user types carriage
|
||||
return to finish typing the label the cross-hair appears on the
|
||||
screen. The user can position it anywhere on the screen. If the user
|
||||
types R (for right justify) the label will be written on the diagram
|
||||
with its right end at the cross-hair position. If the user types L
|
||||
(for left justify) the label will be written on the diagram with its
|
||||
left end at the cross hair position. The cross-hair will then
|
||||
immediately reappear. The user may put the same label on another
|
||||
part of the diagram as before or if he hits the space bar he will be
|
||||
asked if he wishes to type in another label.
|
||||
@16. B 1 @Display a map.
|
||||
It is often convenient to plot a map alongside graphed analysis in
|
||||
order to indicate features within the sequence. This function allows
|
||||
users to draw maps using files arranged in the form of EMBL feature
|
||||
tables. Of course the EMBL table are usually only used for nucleic
|
||||
acid sequence annotation but, as long as the features are written in
|
||||
the correct format, they can be employed by this routine. The map is
|
||||
composed of a line representing the sequence and then further lines
|
||||
denoting the endpoints of each feature the user identifies. The user
|
||||
is asked to define height at which the line representing the
|
||||
sequence should be drawn; then for the feature height; then for the
|
||||
features to plot.
|
||||
@17. B 1 @Search for strings
|
||||
Search for strings perfoms searches of all the sequences for
|
||||
selected words and shows which sequences they are found in. The user
|
||||
types in a word and defines the allowed number of mismatches. The
|
||||
results are listed or plotted. If listed the display includes the
|
||||
sequence number, the position in the sequence and the matching
|
||||
string. The results are plotted in the following way. The x axis of
|
||||
the plot represents the length of the aligned sequences and the y
|
||||
direction is divided into sufficient strips to accommodate each
|
||||
sequence. So if a match is found in the 3rd sequence at a position
|
||||
equivalent to halfway along the longest of the sequences then a
|
||||
short vertical line will be drawn at the midpoint of the 3rd strip.
|
||||
If the sequences are aligned it can be useful if the motifs happen
|
||||
to appear in related positions. For example see the original
|
||||
publication. Typical dialogue follows.
|
||||
|
||||
? Menu or option number=17
|
||||
X 1 Plot match positions
|
||||
2 Plot histogram of matches
|
||||
? 0,1,2 =
|
||||
? Word to search for=TTGACA
|
||||
? Minimum match (0-6) (6) =5
|
||||
? (y/n) (y) Plot results N
|
||||
2 35 TAGACA
|
||||
5 14 TTTACA
|
||||
6 37 TTTACA
|
||||
11 14 TAGACA
|
||||
14 14 TTGACA
|
||||
17 14 GTGACA
|
||||
17 22 TTAACA
|
||||
20 1 TTGACA
|
||||
@18. B 1 @Set strand
|
||||
Set strand allows the user to define which strand(s) of the
|
||||
sequences to analyse: input stand, complement of input, or both.
|
||||
@19. B 1 @Set composition
|
||||
Set composition gives the user three choices for setting the
|
||||
composition of the sequences for use in the calculation of the
|
||||
information content of words. The user can select the overall
|
||||
composition of the sequences as read, an even composition, or can
|
||||
type in any other 4 values.
|
||||
@20. B 1 @Set word length
|
||||
Set word length sets the length of word for which dictionaries will
|
||||
be made.
|
||||
@21. B 1 @Set number of mismatches
|
||||
Set number of mismatches sets the level of fuzziness for the
|
||||
creation of dictionary Dm.
|
||||
@22. B 1 @Show settings
|
||||
Show settings show the current settings for all parameters
|
||||
associated with dictionary analysis. A typical diaplsy follows:
|
||||
? Menu or option number=22
|
||||
Current word length = 6
|
||||
Number of mismatches = 1
|
||||
Start position = 1
|
||||
End position = 63
|
||||
Input strand only
|
||||
Observed composition
|
||||
Dictionary Dw unmade
|
||||
Dictionary Ds unmade
|
||||
Dictionary Dm unmade
|
||||
Dictionary Dh unmade
|
||||
@23. B 1 @Make dictionary Dw
|
||||
Make dictionary Dw creates a dictionary that contains a count of
|
||||
the frequency of occurrence of each word in the collected sequences.
|
||||
@24. B 1 @Make dictionary Ds
|
||||
Make dictionary Ds creates a dictionary that contains a count of the
|
||||
number of different sequences that contain each word.
|
||||
@25. B 1 @Make dictionary Dm from Dw
|
||||
Make dictionary Dm from Dw creates a dictionary from dictionary Dw
|
||||
that contains the frequency of occurrence of each word (say X) in Dw
|
||||
plus the frequency of occurrence of each word in Dw that differs
|
||||
from X by up to m letters. Dm is called a fuzzy dictionary as it
|
||||
contains the frequencies of occurrence of all words plus the
|
||||
frequencies of all the words that are similar to them.
|
||||
@26. B 1 @Make dictionary Dm from Ds
|
||||
Make dictionary Dm from Ds creates a dictionary from dictionary Ds
|
||||
that contains the frequency of occurrence of each word (say X) in Ds
|
||||
plus the frequency of occurrence of each word in Ds that differs
|
||||
from X by up to m letters. Dm is called a fuzzy dictionary as it
|
||||
contains the frequencies of occurrence of all words plus the
|
||||
frequencies of all the words that are similar to them.
|
||||
@27. B 1 @Make dictionary Dh from Dm
|
||||
Make dictionary Dh creates a dictionary from dictionary Dm and
|
||||
whose entries are zero except for those words in any set of related
|
||||
words that are most frequent. It finds the dominant words in each
|
||||
set of relations and stores their counts.
|
||||
@28. B 1 @Examine dictionary Dm
|
||||
Examine dictionary Dm allows users to analyse the contents of
|
||||
dictionary Dm to find the most common words or those words that
|
||||
contain the most information. The user supplies a frequency or
|
||||
information cutoff and chooses to have the results sorted on either
|
||||
value. The program will find the top 100 words that achieve the
|
||||
cutoff values and present them to the user sorted as selected. The
|
||||
information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dm, and using the current
|
||||
composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=28
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAC 64 0.66460
|
||||
AAAAAA 90 0.64880
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTG 73 0.64070
|
||||
TTTTGT 63 0.63820
|
||||
TTTTTC 65 0.63810
|
||||
AAAATA 63 0.62670
|
||||
TATAAT 65 0.62510
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =2
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
AAAAAA 90 0.64880
|
||||
TTTTTG 73 0.64070
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTC 65 0.63810
|
||||
TATAAT 65 0.62510
|
||||
AAAAAC 64 0.66460
|
||||
TTTTGT 63 0.63820
|
||||
AAAATA 63 0.62670
|
||||
TTGACA 60 0.73850
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
@29. B 1 @Examine dictionary Dh
|
||||
Examine dictionary Dh allows users to analyse the contents of
|
||||
dictionary Dh to find the most common words or those words that
|
||||
contain the most information. The user supplies a frequency or
|
||||
information cutoff and chooses to have the results sorted on either
|
||||
value. The program will find the top 100 words that achieve the
|
||||
cutoff values and present them to the user sorted as selected. The
|
||||
information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dh and using the current
|
||||
composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=29
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.6
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 4 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
TTTTTT 115 0.60630
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =.5
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =
|
||||
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
@30. B 1 @Examine words in Dm
|
||||
Examine words in Dm allows users to analyse the contents of
|
||||
dictonary Dm at the level of individual words to find their
|
||||
frequency, information content, and to see their base frequency
|
||||
table. The user types in a word to examine and the program displays
|
||||
the values and table. The information content will be calcutated
|
||||
from either Dw or Ds depending which was used to create Dm, and
|
||||
using the current composition setting. Typical dialogue follows:
|
||||
? Menu or option number=30
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=
|
||||
|
||||
@31. B 1 @Examine words in Dh
|
||||
Examine words in Dh allows users to analyse the contents of
|
||||
dictonary Dh at the level of individual words to find their
|
||||
frequency, information content, and to see their base frequency
|
||||
table. The user types in a word to examine and the program displays
|
||||
the values and table. The information content will be calcutated
|
||||
from either Dw or Ds depending which was used to create Dm, and
|
||||
using the current composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=31
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=GGGGGG
|
||||
gggggg 0 0.6199890
|
||||
3 1 1 2 3 4
|
||||
1 3 1 2 2 1
|
||||
2 1 1 1 1 1
|
||||
11 12 14 12 11 11
|
||||
GGGGGG
|
||||
? Word to examine=
|
||||
|
||||
@32. B 1 @Save or restore a dictionary
|
||||
Save or restore dictionary allows users to write or read any
|
||||
dictionary to and from disk files. The user is asked te define the
|
||||
dictionary and file. The function is useful if the machine being
|
||||
used is very slow at calculating because the files can be handled
|
||||
quickly. However note that the files cannot be processed by any
|
||||
other program.
|
||||
@33. B 1 @Find inverted repeats
|
||||
Find inverted repeats performs searches for simple inverted repeat
|
||||
sequences in each sequence. They are defined by a range of loop
|
||||
sizes and a minimum number of potential basepairs. The results can
|
||||
be plotted or listed. The x axis of the plot represents the length
|
||||
of the aligned sequences and the y direction is divided into
|
||||
sufficient strips to accommodate each sequence. So if an inverted
|
||||
repeat is found in the 3rd sequence at a position equivalent to
|
||||
halfway along the longest of the sequences then a short vertical
|
||||
line will be drawn at the midpoint of the 3rd strip. Alternatively,
|
||||
if the results are listed, the potential hairpin loops are drawn
|
||||
out, with the sequence number and the position of the loop. Typical
|
||||
dialogue follows.
|
||||
|
||||
? Menu or option number=33
|
||||
Define the range of loop sizes
|
||||
? Minimum loop size (0-10) (3) =0
|
||||
? Maximum loop size (1-20) (3) =
|
||||
? Minimum number of basepairs (1-20) (6) =
|
||||
? (y/n) (y) Plot results N
|
||||
Searching
|
||||
|
||||
Sequence 3 34
|
||||
C
|
||||
G.T
|
||||
T-A
|
||||
A-T
|
||||
T.G
|
||||
T.G
|
||||
G.T
|
||||
ATCTTT TATTTCA
|
||||
33
|
||||
|
||||
Sequence 5 35
|
||||
T
|
||||
G.T
|
||||
T.G
|
||||
A-T
|
||||
T.G
|
||||
G.T
|
||||
C-G
|
||||
T.G
|
||||
TCCGGC AATTGTG
|
||||
34
|
||||
|
||||
|
||||
@ End of help
|
32
help/mem_menu
Normal file
32
help/mem_menu
Normal file
|
@ -0,0 +1,32 @@
|
|||
0 1 15 184 B MEP
|
||||
1 1 9304 37 B Help
|
||||
2 1 10465 2 B Quit
|
||||
3 1 10531 14 B Read a new sequence.
|
||||
4 1 10932 6 B Define active region
|
||||
5 1 11250 31 B List a sequence.
|
||||
6 1 12393 3 B List a text file.
|
||||
7 1 12525 12 B Direct output to disk
|
||||
10 1 12996 2 B Clear graphics
|
||||
11 1 13065 2 B Clear text
|
||||
12 1 13126 12 B Draw a ruler.
|
||||
13 1 13871 12 B Use crosshair.
|
||||
14 1 14459 34 B Reposition plots
|
||||
15 1 16611 12 B Label a diagram
|
||||
16 1 17394 12 B Display a map.
|
||||
17 1 18154 31 B Search for strings
|
||||
18 1 19507 3 B Set strand
|
||||
19 1 19672 6 B Set composition
|
||||
20 1 20013 3 B Set word length
|
||||
21 1 20131 3 B Set number of mismatches
|
||||
22 1 20256 14 B Show settings
|
||||
23 1 20718 3 B Make dictionary Dw
|
||||
24 1 20890 3 B Make dictionary Ds
|
||||
25 1 21055 7 B Make dictionary Dm from Dw
|
||||
26 1 21505 7 B Make dictionary Dm from Ds
|
||||
27 1 21955 5 B Make dictionary Dh from Dm
|
||||
28 1 22245 55 B Examine dictionary Dm
|
||||
29 1 24148 70 B Examine dictionary Dh
|
||||
30 1 26410 25 B Examine words in Dm
|
||||
31 1 27437 33 B Examine words in Dh
|
||||
32 1 28701 7 B Save or restore a dictionary
|
||||
33 1 29106 46 B Find inverted repeats
|
792
help/mep_help
Normal file
792
help/mep_help
Normal file
|
@ -0,0 +1,792 @@
|
|||
|
||||
@-1. TX 0 @General
|
||||
|
||||
@-2. T 0 @Screen control
|
||||
|
||||
@-2. X 0 @Screen
|
||||
|
||||
@-3. TX 0 @Dictionary analysis
|
||||
|
||||
@0. TX -1 @MEP
|
||||
|
||||
This is a program for analysing families of nucleotide
|
||||
sequences in order to find common motifs and potential binding
|
||||
sites. The ideas in this program were described in Staden, R.
|
||||
"Methods for discovering novel motifs in nucleic acid sequences".
|
||||
Computer Applications in the Biosciences, 5, 293-298, (1989).
|
||||
|
||||
The program can read sequences stored in either of two
|
||||
formats: 1) all sequences aligned in a single file; 2) all sequences
|
||||
in separate files and accessed through a file of file names.
|
||||
|
||||
The program contains functions that can answer several
|
||||
questions about a set of sequences:
|
||||
|
||||
Which words are most common?
|
||||
Which words occur in the most sequences?
|
||||
Which words contain the most information?
|
||||
Which words occur in equivalent positions in the sequences?
|
||||
Which words are inverted repeats?
|
||||
Which words occur on both strands of the sequences?
|
||||
Where are the inverted repeats?
|
||||
Where are the fuzzy words?
|
||||
|
||||
Most of the program is concerned with analysing what it terms
|
||||
"fuzzy words" within the set of sequences. The analysis is explained
|
||||
below. Note that the standard version of the programs is limited to
|
||||
words of maximum length 8 letters, and a maximum fuzziness of 2.
|
||||
|
||||
The following analyses (preceded by their option numbers) are
|
||||
included:
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
|
||||
Some of these methods produce graphical results and so the
|
||||
program is generally used from a graphics terminal (a vdu on which
|
||||
lines and points can be drawn as well as characters).
|
||||
|
||||
The positions of each of the plots is defined relative to a users
|
||||
drawing board which has size 1-10,000 in x and 1-10,000 in y. Plots
|
||||
for each option are drawn in a window defined by x0,y0 and
|
||||
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||
corner of the window, and xlength is the width of the window and
|
||||
ylength the height of the window.
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required.
|
||||
|
||||
The options for the program are accessed from 3 main menus:
|
||||
general, screen control and dictionary analylsis. Both menus and
|
||||
options are selected by number.
|
||||
|
||||
The most important and novel part of the program is its use of
|
||||
"fuzzy dictionaries" and an information theory measure, to help show
|
||||
the most interesting motifs. Central to the method is the idea of a
|
||||
fuzzy dictionary of word frequencies. A dictionary of word
|
||||
frequencies is an ordered list of all the words in the sequences and
|
||||
a count of the number of times that they occur. A fuzzy dictionary
|
||||
is an equivalent list but which contains instead, for each word, a
|
||||
count of the number of times similar words occur in the sequences.
|
||||
We term words that are similar "relations". The fuzziness is defined
|
||||
by the number of letters in a word that are allowed to be different.
|
||||
So if we had a fuzziness of 1 we allow 1 letter to be different. For
|
||||
example, with a fuzziness of 1, the entry in the fuzzy dictionary
|
||||
for the word TTTTTT would contain a count of the numbers of times
|
||||
TTTTTT occured plus the number of times all words differing by
|
||||
exactly one letter from TTTTTT occured.
|
||||
|
||||
Once the fuzzy dictionary has been created we can examine it
|
||||
in several ways to find candidate control sequences. The simplest
|
||||
question we can ask is which word in the dictionary is the most
|
||||
common. Sometimes this simple criterion of "most common" may be
|
||||
adequate to discover a new motif but in general we would not expect
|
||||
it to be sufficient. For example some words will be common simply
|
||||
because of a base composition bias in the sequences being analysed.
|
||||
In addition a word can be the most frequent and yet not be "well
|
||||
defined". This last point is best explained by an example.
|
||||
|
||||
Suppose we were looking at two letter words and allowing one
|
||||
mismatch, and that there were 10 occurences of TT and 5 of AC. We
|
||||
could align the 10 words that were one letter different from TT and
|
||||
the 5 that were related to AC. Then we could count the number of
|
||||
times each base occured in each position for each of these two sets
|
||||
of words. Suppose we got the two base frequency tables shown below.
|
||||
TT AC
|
||||
T 6 4 T 1 0
|
||||
C 1 3 C 0 4
|
||||
A 1 2 A 4 1
|
||||
G 2 1 G 0 0
|
||||
|
||||
These tables show that although TT occurs (with one letter mismatch)
|
||||
more often than AC, the ratio of base frequencies for AC at 4/5, 4/5
|
||||
is higher than those for TT at 6/10, 4/10. Hence we would say that
|
||||
AC was better defined than TT. Expressing this another way we would
|
||||
say that the definition of AC contained more information than that
|
||||
for TT. The program calculates the information content in a way that
|
||||
takes into account both the sequence composition and the level of
|
||||
definition of the motif.
|
||||
|
||||
Definitions
|
||||
|
||||
Here we deal only with the dictionary analysis. Suppose we
|
||||
are dealing with a set of sequences and are examining them for words
|
||||
that are six characters in length.
|
||||
|
||||
Dictionary Dw contains a count of the number of times each
|
||||
word occurs in the set of sequences. For example the entry for
|
||||
TTTTTT contains a value equal to the number of times the word TTTTTT
|
||||
occurs in the set of sequences.
|
||||
|
||||
Dictionary Ds contains a count of the number of different
|
||||
sequences in which each word occurs. For example if the entry for
|
||||
word TTTTTT contains the value 10, it denotes that the word TTTTTT
|
||||
occurs in ten different sequences. Unlike Dw it only counts words
|
||||
once for each sequence. For example if we had a set of 100
|
||||
sequences, the maximum possible value that Ds could take is 100, and
|
||||
this would only happen if a word occurred in every sequence. However
|
||||
for the same set of sequences, Dw could contain values greater than
|
||||
100, and this would show that a word had occurred more than once in
|
||||
at least one sequence.
|
||||
|
||||
From either of the two dictionaries Dw or Ds we can calculate
|
||||
a fuzzy dictionary Dm. For each word, the entry in the fuzzy
|
||||
dictionary Dm contains the sum of the dictionary values (taken from
|
||||
either Dw or Ds) for all words that differ from it by up to m
|
||||
letters. For example if m=2 the entry for TTTTTT contains the number
|
||||
of times that TTTTTT occurs in the dictionary, plus the counts for
|
||||
all words that differ from TTTTTT by 1 or 2 letters. Obviously the
|
||||
interpretation of the values in Dm depends on which of the two
|
||||
dictionaries Dw or Ds they were derived from. When derived from Dw
|
||||
the entry for any word in Dm gives the total number of times it, and
|
||||
its relations, occur in the set of sequences. When derived from Ds
|
||||
the entry for any word in Dm gives the total number of different
|
||||
sequences that contain a word and each of its relations.
|
||||
|
||||
Finally, from fuzzy dictionary Dm we can derive fuzzy
|
||||
dictionary Dh. All entries in Dh are zero except for the word(s),
|
||||
within each set of relations, that are most frequent. For example if
|
||||
TTTTTT occurred 20 times but had a relation that occurred more
|
||||
often, then the entry for TTTTTT would be zero. However if TTTTTT
|
||||
did not have a more frequently occurring relation, then the entry
|
||||
for TTTTTT would contain the value 20.
|
||||
@1. T 0 @Help
|
||||
|
||||
This option gives online help. The user should select option
|
||||
numbers and the current documentation will be given. Note that
|
||||
option 0 gives an introduction to the program, and that ? will get
|
||||
help from anywhere in the program. The following analyses (preceded
|
||||
by their option numbers) are included:
|
||||
? = Help
|
||||
! = Quit
|
||||
3 = Read new sequences
|
||||
4 = Redefine active region
|
||||
5 = List the sequences
|
||||
6 = List text file
|
||||
7 = Direct output to disk
|
||||
10 = Clear graphics
|
||||
11 = Clear text
|
||||
12 = Draw ruler
|
||||
13 = Use cross hair
|
||||
14 = Reset margins
|
||||
15 = Label diagram
|
||||
16 = Draw map
|
||||
17 = Search for strings
|
||||
18 = Set strand
|
||||
19 = Set composition
|
||||
20 = Set word length
|
||||
21 = Set number of mismatches
|
||||
22 = Show settings
|
||||
23 = Make dictionary Dw
|
||||
24 = Make dictionary Ds
|
||||
25 = Make fuzzy dictionary Dm from Dw
|
||||
26 = Make fuzzy dictionary Dm from Ds
|
||||
27 = Make fuzzy dictionary Dh from Dm
|
||||
28 = Examine fuzzy dictionary Dm
|
||||
29 = Examine fuzzy dictionary Dh
|
||||
30 = Examine words in Dm
|
||||
31 = Examine words in Dh
|
||||
32 = Save or restore a dictionary
|
||||
33 = Find inverted repeats
|
||||
@2. T 0 @Quit
|
||||
|
||||
This function stops the program.
|
||||
@3. TX 1 @Read a new sequence
|
||||
|
||||
It can read sequences stored in either of two formats: 1) all
|
||||
sequences aligned in a single file; 2) all sequences in separate
|
||||
files and accessed through a file of file names. Typical dialogue
|
||||
follows:
|
||||
|
||||
X 1 Read file of aligned sequences
|
||||
2 Use file of file names
|
||||
? 0,1,2 =
|
||||
|
||||
? File of aligned sequences=F1
|
||||
Number of files 88
|
||||
|
||||
@4. TX 1 @Define active region
|
||||
|
||||
For its analytic functions the program always works on a
|
||||
region of the sequence called the active region. When new sequences
|
||||
are read into the program the active region is automatically set to
|
||||
start at the beginning of the sequences and go up to the end of the
|
||||
longest one.
|
||||
@5. TX 1 @List a sequence
|
||||
|
||||
The sequence can be listed with line lengths of 50 bases with
|
||||
each sequence numbered in the order in which they were read. Output
|
||||
can be directed to a disk file by first selecting disk output.
|
||||
Typical dialogue follows.
|
||||
|
||||
? Menu or option number=5
|
||||
|
||||
10 20 30 40 50
|
||||
1 TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
|
||||
2 CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
|
||||
3 TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
|
||||
4 ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
|
||||
5 AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
|
||||
6 TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
|
||||
7 ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
|
||||
8 GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
|
||||
9 AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
|
||||
10 AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
|
||||
|
||||
60
|
||||
1 TACCCGTTTTT
|
||||
2 GCGTTTTTGT
|
||||
3 TCATACCATAAG
|
||||
4 TTTCATACC
|
||||
5 ATTGTGAGC
|
||||
6 TTCCGGCTCG
|
||||
7 GAAGAGAGT
|
||||
8 TCAGGTGT
|
||||
9 ATGAATG
|
||||
10 TAATTACG
|
||||
@6. TX 1 @List a text file
|
||||
|
||||
Allows the user to have a text file displayed on the screen.
|
||||
It will appear one page at a time.
|
||||
@7. TX 1 @Direct output to disk
|
||||
|
||||
Used to direct output that would normally appear on the screen
|
||||
to a file.
|
||||
|
||||
Select redirection of either text or graphics, and supply the
|
||||
name of the file that the output should be written to.
|
||||
|
||||
The results from the next options selected will not appear on
|
||||
the screen but will be written to the file. When option 7 is
|
||||
selected again the file will be closed and output will again appear
|
||||
on the screen.
|
||||
@10. TX 2 @Clear graphics
|
||||
|
||||
Clears the screen of both text and graphics.
|
||||
@11. TX 2 @Clear text
|
||||
|
||||
Clears only text from the screen.
|
||||
@12. TX 2 @Draw a ruler
|
||||
|
||||
This option allows the user to draw a ruler or scale along the
|
||||
x axis of the screen to help identify the coordinates of points of
|
||||
interest. The user can define the position of the first amino acid
|
||||
to be marked (for example if the active region is 1501 to 8000, the
|
||||
user might wish to mark every 1000th amino acid starting at either
|
||||
1501 or 2000 - it depends if the user wishes to treat the active
|
||||
region as an independent unit with its own numbering starting at its
|
||||
left edge, or as part of the whole sequence). The user can also
|
||||
define the separation of the ticks on the scale and their height. If
|
||||
required the labelling routine can be used to add numbers to the
|
||||
ticks.
|
||||
@13. TX 2 @Use crosshair
|
||||
|
||||
This function puts a steerable cross on the screen that can be
|
||||
used to find the coordinates of points in the sequence. The user can
|
||||
move the cross around using the directional keys; when he hits the
|
||||
space bar the program will print out the coordinates of the cross in
|
||||
sequence units and the option will be exited.
|
||||
|
||||
If instead, you hit a , the position will be displayed but the
|
||||
cross will remain on the screen.
|
||||
|
||||
If a letter s is hit the sequence around the cross hair is
|
||||
displayed and the cross remains on the screen.
|
||||
@14. TX 2 @Reposition plots
|
||||
|
||||
The positions of each of the plots is defined relative to a
|
||||
users drawing board which has size 1-10,000 in x and 1-10,000 in y.
|
||||
Plots for each option are drawn in a window defined by x0,y0 and
|
||||
xlength,ylength. Where x0,y0 is the position of the bottom left hand
|
||||
corner of the window, and xlength is the width of the window and
|
||||
ylength the height of the window.
|
||||
--------------------------------------------------------- 10,000
|
||||
1 1
|
||||
1 -------------------------------------- ^ 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 1 1 ylength 1
|
||||
1 1 1 1 1
|
||||
1 1 1 1 1
|
||||
1 -------------------------------------- v 1
|
||||
1 x0,y0^ 1
|
||||
1 <---------------xlength--------------> 1
|
||||
--------------------------------------------------------- 1
|
||||
1 10,000
|
||||
|
||||
All values are in drawing board units (i.e. 1-10,000, 1-10,000).
|
||||
The default window positions are read from a file "MEPMARG" when the
|
||||
program is started. Users can have their own file if required. As
|
||||
all the plots start at the same position in x and have the same
|
||||
width, x0 and xlength are the same for all options. Generally users
|
||||
will only want to change the start level of the window y0 and its
|
||||
height ylength. This option allows users to change window positions
|
||||
whilst running the program. The routine prompts first for the
|
||||
number of the option that the users wishes to reposition; then for
|
||||
the y start and height; then for the x start and length. Note that
|
||||
changes to the x values affect all options. If the user types only
|
||||
carriage return for any value it will remain unchanged. The cross-
|
||||
hair can be used to choose suitable heights.
|
||||
@15. TX 2 @Label a diagram
|
||||
|
||||
This routine allows users to label any diagrams they have
|
||||
produced. They are asked to type in a label. When the user types
|
||||
carriage return to finish typing the label the cross-hair appears on
|
||||
the screen. The user can position it anywhere on the screen. If the
|
||||
user types R (for right justify) the label will be written on the
|
||||
diagram with its right end at the cross-hair position. If the user
|
||||
types L (for left justify) the label will be written on the diagram
|
||||
with its left end at the cross hair position. The cross-hair will
|
||||
then immediately reappear. The user may put the same label on
|
||||
another part of the diagram as before or if he hits the space bar he
|
||||
will be asked if he wishes to type in another label.
|
||||
@16. TX 2 @Display a map
|
||||
|
||||
It is often convenient to plot a map alongside graphed
|
||||
analysis in order to indicate features within the sequence. This
|
||||
function allows users to draw maps using files arranged in the form
|
||||
of EMBL feature tables. Of course the EMBL table are usually only
|
||||
used for nucleic acid sequence annotation but, as long as the
|
||||
features are written in the correct format, they can be employed by
|
||||
this routine. The map is composed of a line representing the
|
||||
sequence and then further lines denoting the endpoints of each
|
||||
feature the user identifies. The user is asked to define height at
|
||||
which the line representing the sequence should be drawn; then for
|
||||
the feature height; then for the features to plot.
|
||||
@17. TX 1 @Search for strings
|
||||
|
||||
Search for strings perfoms searches of all the sequences for
|
||||
selected words and shows which sequences they are found in. The user
|
||||
types in a word and defines the allowed number of mismatches. The
|
||||
results are listed or plotted. If listed the display includes the
|
||||
sequence number, the position in the sequence and the matching
|
||||
string. The results are plotted in the following way. The x axis of
|
||||
the plot represents the length of the aligned sequences and the y
|
||||
direction is divided into sufficient strips to accommodate each
|
||||
sequence. So if a match is found in the 3rd sequence at a position
|
||||
equivalent to halfway along the longest of the sequences then a
|
||||
short vertical line will be drawn at the midpoint of the 3rd strip.
|
||||
If the sequences are aligned it can be useful if the motifs happen
|
||||
to appear in related positions. For example see the original
|
||||
publication. Typical dialogue follows.
|
||||
|
||||
? Menu or option number=17
|
||||
X 1 Plot match positions
|
||||
2 Plot histogram of matches
|
||||
? 0,1,2 =
|
||||
? Word to search for=TTGACA
|
||||
? Minimum match (0-6) (6) =5
|
||||
? (y/n) (y) Plot results N
|
||||
2 35 TAGACA
|
||||
5 14 TTTACA
|
||||
6 37 TTTACA
|
||||
11 14 TAGACA
|
||||
14 14 TTGACA
|
||||
17 14 GTGACA
|
||||
17 22 TTAACA
|
||||
20 1 TTGACA
|
||||
@18. TX 3 @Set strand
|
||||
|
||||
Set strand allows the user to define which strand(s) of the
|
||||
sequences to analyse: input stand, complement of input, or both.
|
||||
@19. TX 3 @Set composition
|
||||
|
||||
Set composition gives the user three choices for setting the
|
||||
composition of the sequences for use in the calculation of the
|
||||
information content of words. The user can select the overall
|
||||
composition of the sequences as read, an even composition, or can
|
||||
type in any other 4 values.
|
||||
@20. TX 3 @Set word length
|
||||
|
||||
Set word length sets the length of word for which dictionaries
|
||||
will be made.
|
||||
@21. TX 3 @Set number of mismatches
|
||||
|
||||
Set number of mismatches sets the level of fuzziness for the
|
||||
creation of dictionary Dm.
|
||||
@22. TX 3 @Show settings
|
||||
|
||||
Show settings show the current settings for all parameters
|
||||
associated with dictionary analysis. A typical diaplsy follows:
|
||||
? Menu or option number=22
|
||||
Current word length = 6
|
||||
Number of mismatches = 1
|
||||
Start position = 1
|
||||
End position = 63
|
||||
Input strand only
|
||||
Observed composition
|
||||
Dictionary Dw unmade
|
||||
Dictionary Ds unmade
|
||||
Dictionary Dm unmade
|
||||
Dictionary Dh unmade
|
||||
@23. TX 3 @Make dictionary Dw
|
||||
|
||||
Make dictionary Dw creates a dictionary that contains a count
|
||||
of the frequency of occurrence of each word in the collected
|
||||
sequences.
|
||||
@24. TX 3 @Make dictionary Ds
|
||||
|
||||
Make dictionary Ds creates a dictionary that contains a count
|
||||
of the number of different sequences that contain each word.
|
||||
@25. TX 3 @Make dictionary Dm from Dw
|
||||
|
||||
Make dictionary Dm from Dw creates a dictionary from
|
||||
dictionary Dw that contains the frequency of occurrence of each word
|
||||
(say X) in Dw plus the frequency of occurrence of each word in Dw
|
||||
that differs from X by up to m letters. Dm is called a fuzzy
|
||||
dictionary as it contains the frequencies of occurrence of all words
|
||||
plus the frequencies of all the words that are similar to them.
|
||||
@26. TX 3 @Make dictionary Dm from Ds
|
||||
|
||||
Make dictionary Dm from Ds creates a dictionary from
|
||||
dictionary Ds that contains the frequency of occurrence of each word
|
||||
(say X) in Ds plus the frequency of occurrence of each word in Ds
|
||||
that differs from X by up to m letters. Dm is called a fuzzy
|
||||
dictionary as it contains the frequencies of occurrence of all words
|
||||
plus the frequencies of all the words that are similar to them.
|
||||
@27. TX 3 @Make dictionary Dh from Dm
|
||||
|
||||
Make dictionary Dh creates a dictionary from dictionary Dm
|
||||
and whose entries are zero except for those words in any set of
|
||||
related words that are most frequent. It finds the dominant words in
|
||||
each set of relations and stores their counts.
|
||||
@28. TX 3 @Examine fuzzy dictionary Dm
|
||||
|
||||
Examine dictionary Dm allows users to analyse the contents of
|
||||
dictionary Dm to find the most common words or those words that
|
||||
contain the most information. The user supplies a frequency or
|
||||
information cutoff and chooses to have the results sorted on either
|
||||
value. The program will find the top 100 words that achieve the
|
||||
cutoff values and present them to the user sorted as selected. The
|
||||
information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dm, and using the current
|
||||
composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=28
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAC 64 0.66460
|
||||
AAAAAA 90 0.64880
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTG 73 0.64070
|
||||
TTTTGT 63 0.63820
|
||||
TTTTTC 65 0.63810
|
||||
AAAATA 63 0.62670
|
||||
TATAAT 65 0.62510
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.62
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =2
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 9 Maximum information= 0.7385326
|
||||
AAAAAA 90 0.64880
|
||||
TTTTTG 73 0.64070
|
||||
GTTTTT 66 0.64300
|
||||
TTTTTC 65 0.63810
|
||||
TATAAT 65 0.62510
|
||||
AAAAAC 64 0.66460
|
||||
TTTTGT 63 0.63820
|
||||
AAAATA 63 0.62670
|
||||
TTGACA 60 0.73850
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
@29. TX 3 @Examine fuzzy dictionary Dh
|
||||
|
||||
Examine dictionary Dh allows users to analyse the contents of
|
||||
dictionary Dh to find the most common words or those words that
|
||||
contain the most information. The user supplies a frequency or
|
||||
information cutoff and chooses to have the results sorted on either
|
||||
value. The program will find the top 100 words that achieve the
|
||||
cutoff values and present them to the user sorted as selected. The
|
||||
information content will be calcutated from either Dw or Ds
|
||||
depending which was used to create Dh and using the current
|
||||
composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=29
|
||||
Looking for highest scoring words
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =60
|
||||
? Minimum information (0.00-1.00) (0.00) =.6
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 4 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
TTTTTT 115 0.60630
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =.5
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =50
|
||||
? Minimum information (0.00-1.00) (0.00) =
|
||||
|
||||
X 1 Sort on information
|
||||
2 Sort on word score
|
||||
? 0,1,2 =
|
||||
|
||||
? Maximum number to list (0-100) (100) =
|
||||
|
||||
The words are
|
||||
Total words= 8 Maximum information= 0.7385326
|
||||
TTGACA 60 0.73850
|
||||
TCTTGA 54 0.66080
|
||||
AAAAAA 90 0.64880
|
||||
TATAAT 65 0.62510
|
||||
ACTTTA 57 0.61960
|
||||
TTTTTT 115 0.60630
|
||||
AGTATA 51 0.60540
|
||||
TTATAA 55 0.59300
|
||||
The highest word score = 115
|
||||
? Minimum word score (0-115) (0) =!
|
||||
|
||||
@30. TX 3 @Examine words in Dm
|
||||
|
||||
Examine words in Dm allows users to analyse the contents of
|
||||
dictonary Dm at the level of individual words to find their
|
||||
frequency, information content, and to see their base frequency
|
||||
table. The user types in a word to examine and the program displays
|
||||
the values and table. The information content will be calcutated
|
||||
from either Dw or Ds depending which was used to create Dm, and
|
||||
using the current composition setting. Typical dialogue follows:
|
||||
? Menu or option number=30
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=
|
||||
|
||||
@31. TX 3 @Examine words in Dh
|
||||
|
||||
Examine words in Dh allows users to analyse the contents of
|
||||
dictonary Dh at the level of individual words to find their
|
||||
frequency, information content, and to see their base frequency
|
||||
table. The user types in a word to examine and the program displays
|
||||
the values and table. The information content will be calcutated
|
||||
from either Dw or Ds depending which was used to create Dm, and
|
||||
using the current composition setting. Typical dialogue follows:
|
||||
|
||||
? Menu or option number=31
|
||||
? Word to examine=TTGACA
|
||||
TtgacA 60 0.7385326
|
||||
56 56 6 7 5 11
|
||||
4 3 2 1 52 1
|
||||
1 4 2 53 3 48
|
||||
3 1 54 3 4 4
|
||||
TTGACA
|
||||
? Word to examine=TATAAT
|
||||
taTAat 65 0.6251902
|
||||
56 3 53 4 4 60
|
||||
6 1 5 5 5 3
|
||||
3 60 5 57 57 4
|
||||
4 5 6 3 3 2
|
||||
TATAAT
|
||||
? Word to examine=GGGGGG
|
||||
gggggg 0 0.6199890
|
||||
3 1 1 2 3 4
|
||||
1 3 1 2 2 1
|
||||
2 1 1 1 1 1
|
||||
11 12 14 12 11 11
|
||||
GGGGGG
|
||||
? Word to examine=
|
||||
|
||||
@32. TX 3 @Save or restore a dictionary
|
||||
|
||||
Save or restore dictionary allows users to write or read any
|
||||
dictionary to and from disk files. The user is asked te define the
|
||||
dictionary and file. The function is useful if the machine being
|
||||
used is very slow at calculating because the files can be handled
|
||||
quickly. However note that the files cannot be processed by any
|
||||
other program.
|
||||
@33. TX 1 @Find inverted repeats
|
||||
|
||||
Find inverted repeats performs searches for simple inverted
|
||||
repeat sequences in each sequence. They are defined by a range of
|
||||
loop sizes and a minimum number of potential basepairs. The results
|
||||
can be plotted or listed. The x axis of the plot represents the
|
||||
length of the aligned sequences and the y direction is divided into
|
||||
sufficient strips to accommodate each sequence. So if an inverted
|
||||
repeat is found in the 3rd sequence at a position equivalent to
|
||||
halfway along the longest of the sequences then a short vertical
|
||||
line will be drawn at the midpoint of the 3rd strip. Alternatively,
|
||||
if the results are listed, the potential hairpin loops are drawn
|
||||
out, with the sequence number and the position of the loop. Typical
|
||||
dialogue follows.
|
||||
|
||||
? Menu or option number=33
|
||||
Define the range of loop sizes
|
||||
? Minimum loop size (0-10) (3) =0
|
||||
? Maximum loop size (1-20) (3) =
|
||||
? Minimum number of basepairs (1-20) (6) =
|
||||
? (y/n) (y) Plot results N
|
||||
Searching
|
||||
|
||||
Sequence 3 34
|
||||
C
|
||||
G.T
|
||||
T-A
|
||||
A-T
|
||||
T.G
|
||||
T.G
|
||||
G.T
|
||||
ATCTTT TATTTCA
|
||||
33
|
||||
|
||||
Sequence 5 35
|
||||
T
|
||||
G.T
|
||||
T.G
|
||||
A-T
|
||||
T.G
|
||||
G.T
|
||||
C-G
|
||||
T.G
|
||||
TCCGGC AATTGTG
|
||||
34
|
||||
@ End of help
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
68
help/mep_menu
Normal file
68
help/mep_menu
Normal file
|
@ -0,0 +1,68 @@
|
|||
-1 0 22 2 T General
|
||||
-1 0 22 2 X General
|
||||
-2 0 51 2 T Screen control
|
||||
-2 0 72 2 X Screen
|
||||
-3 0 106 2 T Dictionary analysis
|
||||
-3 0 106 2 X Dictionary analysis
|
||||
0 -1 124 185 T MEP
|
||||
0 -1 124 185 X MEP
|
||||
1 0 9423 38 T Help
|
||||
2 0 10594 3 T Quit
|
||||
3 1 10667 14 T Read a new sequence
|
||||
3 1 10667 14 X Read a new sequence
|
||||
4 1 11069 7 T Define active region
|
||||
4 1 11069 7 X Define active region
|
||||
5 1 11396 32 T List a sequence
|
||||
5 1 11396 32 X List a sequence
|
||||
6 1 12548 4 T List a text file
|
||||
6 1 12548 4 X List a text file
|
||||
7 1 12690 12 T Direct output to disk
|
||||
7 1 12690 12 X Direct output to disk
|
||||
10 2 13162 3 T Clear graphics
|
||||
10 2 13162 3 X Clear graphics
|
||||
11 2 13239 3 T Clear text
|
||||
11 2 13239 3 X Clear text
|
||||
12 2 13307 13 T Draw a ruler
|
||||
12 2 13307 13 X Draw a ruler
|
||||
13 2 14053 13 T Use crosshair
|
||||
13 2 14053 13 X Use crosshair
|
||||
14 2 14643 35 T Reposition plots
|
||||
14 2 14643 35 X Reposition plots
|
||||
15 2 16797 13 T Label a diagram
|
||||
15 2 16797 13 X Label a diagram
|
||||
16 2 17589 13 T Display a map
|
||||
16 2 17589 13 X Display a map
|
||||
17 1 18384 32 T Search for strings
|
||||
17 1 18384 32 X Search for strings
|
||||
18 3 19739 4 T Set strand
|
||||
18 3 19739 4 X Set strand
|
||||
19 3 19906 7 T Set composition
|
||||
19 3 19906 7 X Set composition
|
||||
20 3 20249 4 T Set word length
|
||||
20 3 20249 4 X Set word length
|
||||
21 3 20374 4 T Set number of mismatches
|
||||
21 3 20374 4 X Set number of mismatches
|
||||
22 3 20501 15 T Show settings
|
||||
22 3 20501 15 X Show settings
|
||||
23 3 20965 5 T Make dictionary Dw
|
||||
23 3 20965 5 X Make dictionary Dw
|
||||
24 3 21152 4 T Make dictionary Ds
|
||||
24 3 21152 4 X Make dictionary Ds
|
||||
25 3 21326 8 T Make dictionary Dm from Dw
|
||||
25 3 21326 8 X Make dictionary Dm from Dw
|
||||
26 3 21787 8 T Make dictionary Dm from Ds
|
||||
26 3 21787 8 X Make dictionary Dm from Ds
|
||||
27 3 22248 6 T Make dictionary Dh from Dm
|
||||
27 3 22248 6 X Make dictionary Dh from Dm
|
||||
28 3 22551 56 T Examine fuzzy dictionary Dm
|
||||
28 3 22551 56 X Examine fuzzy dictionary Dm
|
||||
29 3 24462 71 T Examine fuzzy dictionary Dh
|
||||
29 3 24462 71 X Examine fuzzy dictionary Dh
|
||||
30 3 26726 26 T Examine words in Dm
|
||||
30 3 26726 26 X Examine words in Dm
|
||||
31 3 27755 34 T Examine words in Dh
|
||||
31 3 27755 34 X Examine words in Dh
|
||||
32 3 29021 8 T Save or restore a dictionary
|
||||
32 3 29021 8 X Save or restore a dictionary
|
||||
33 1 29428 45 T Find inverted repeats
|
||||
33 1 29428 45 X Find inverted repeats
|
4620
help/nip_help
Normal file
4620
help/nip_help
Normal file
File diff suppressed because it is too large
Load diff
156
help/nip_menu
Normal file
156
help/nip_menu
Normal file
|
@ -0,0 +1,156 @@
|
|||
-1 0 22 2 T General
|
||||
-1 0 22 2 X General
|
||||
-2 0 51 2 T Screen control
|
||||
-2 0 72 2 X Screen
|
||||
-3 0 118 2 T Statistical analysis of content
|
||||
-3 0 143 2 X Statistics
|
||||
-4 0 180 2 T Structures and repeats
|
||||
-4 0 205 2 X Structures
|
||||
-5 0 242 2 T Translation and codons
|
||||
-5 0 242 2 X Translation and codons
|
||||
-6 0 279 2 T Gene search by content
|
||||
-6 0 279 2 X Gene search by content
|
||||
-7 0 309 2 T General signals
|
||||
-7 0 309 2 X General signals
|
||||
-8 0 340 2 T Specific signals
|
||||
-8 0 340 2 X Specific signals
|
||||
0 -1 359 16 T NIP
|
||||
0 -1 359 16 X NIP
|
||||
1 0 1155 7 T Help
|
||||
1 0 1155 7 X Help
|
||||
2 0 1469 3 T Quit
|
||||
2 0 1469 3 X Quit
|
||||
3 1 1543 220 T Read a new sequence
|
||||
3 1 1543 220 X Read a new sequence
|
||||
4 1 11372 15 T Define active region
|
||||
4 1 11372 15 X Define active region
|
||||
5 1 12100 24 T List a sequence
|
||||
5 1 12100 24 X List a sequence
|
||||
6 1 13103 6 T List a text file.
|
||||
6 1 13103 6 X List a text file.
|
||||
7 1 13300 12 T Direct output to disk
|
||||
7 1 13300 12 X Direct output to disk
|
||||
8 1 13785 10 T Write active region to disk
|
||||
8 1 13785 10 X Write active region to disk
|
||||
9 1 14128 31 T Edit the sequence
|
||||
9 1 14128 31 X Edit the sequence
|
||||
10 2 15970 3 T Clear graphics
|
||||
10 2 15970 3 X Clear graphics
|
||||
11 2 16036 3 T Clear text
|
||||
11 2 16036 3 X Clear text
|
||||
12 2 16101 12 T Draw a ruler
|
||||
12 2 16101 12 X Draw a ruler
|
||||
13 2 16833 13 T Use crosshair
|
||||
13 2 16833 13 X Use crosshair
|
||||
14 2 17443 35 T Reposition plots
|
||||
14 2 17443 35 X Reposition plots
|
||||
15 2 19598 28 T Label a diagram
|
||||
15 2 19598 28 X Label a diagram
|
||||
16 2 20703 34 T Display a map
|
||||
16 2 20703 34 X Display a map
|
||||
17 1 22073 599 T Search for restriction enzymes
|
||||
17 1 22073 599 X Search for restriction enzymes
|
||||
18 7 46675 105 T Compare a short sequence
|
||||
18 1 46675 105 T Compare a short sequence
|
||||
18 7 46675 105 X Compare a short sequence
|
||||
18 1 46675 105 X Compare a short sequence
|
||||
19 7 49650 106 T Compare a short sequence using a score matrix
|
||||
19 7 49650 106 X Compare a short sequence using a score matrix
|
||||
20 7 53349 230 T Search for a motif using a weight matrix
|
||||
20 7 53349 230 X Search for a motif using a weight matrix
|
||||
21 3 63267 4 T Count base composition
|
||||
21 3 63267 4 X Count base composition
|
||||
22 3 63440 14 T Count dinucleotide frequencies
|
||||
22 3 63440 14 X Count dinucleotide frequencies
|
||||
23 5 64100 179 T Count codons and amino acids
|
||||
23 3 64100 179 T Count codons and amino acids
|
||||
23 5 64100 179 X Count codons and amino acids
|
||||
23 3 64100 179 X Count codons and amino acids
|
||||
24 3 72137 57 T Plot base composition
|
||||
24 3 72137 57 X Plot base composition
|
||||
25 3 73213 23 T Plot local deviations in base composition
|
||||
25 3 73213 23 X Plot local deviations in base composition
|
||||
26 3 74495 23 T Plot local deviations from dinucleotide composition
|
||||
26 3 74495 23 X Plot local deviations from dinucleotide composition
|
||||
27 3 75793 23 T Plot local deviations from trinucleotide composition
|
||||
27 3 75793 23 X Plot local deviations from trinucleotide composition
|
||||
28 5 77065 18 T Calculate codon constraint
|
||||
28 5 77065 18 X Calculate codon constraint
|
||||
59 3 77869 12 T Plot negentropy
|
||||
59 3 77869 12 X Plot negentropy
|
||||
30 4 78454 74 T Search for hairpin loops
|
||||
30 4 78454 74 X Search for hairpin loops
|
||||
31 4 80321 23 T Search for long range inverted repeats
|
||||
31 4 80321 23 X Search for long range inverted repeats
|
||||
32 4 81157 37 T Search for repeats
|
||||
32 4 81157 37 X Search for repeats
|
||||
33 4 82467 12 T Search for z dna (total ry, yr)
|
||||
33 4 82467 12 X Search for z dna (total ry, yr)
|
||||
34 4 82984 12 T Search for z dna (runs of ry, yr)
|
||||
34 4 82984 12 X Search for z dna (runs of ry, yr)
|
||||
35 4 83623 15 T Search for z dna (best phased value)
|
||||
35 4 83623 15 X Search for z dna (best phased value)
|
||||
36 4 84350 92 T Local similarity or complementarity search
|
||||
36 4 84350 92 X Local similarity or complementarity search
|
||||
37 5 87778 39 T Set genetic code
|
||||
37 5 87778 39 X Set genetic code
|
||||
38 4 89050 74 T Examine repeats
|
||||
38 3 89050 74 T Examine repeats
|
||||
39 5 91670 286 T Translate and list in upto six phases
|
||||
39 5 91670 286 X Translate and list in upto six phases
|
||||
40 5 103780 134 T Translate and write the protein sequence to disk
|
||||
40 5 103780 134 X Translate and write the protein sequence to disk
|
||||
41 5 108198 71 T Calculate and write codon table to disk
|
||||
41 5 108198 71 X Calculate and write codon table to disk
|
||||
42 6 111525 132 T Codon usage method
|
||||
42 6 111525 132 X Codon usage method
|
||||
43 6 118508 182 T Positional base preference method.
|
||||
43 6 118508 182 X Positional base preference method.
|
||||
44 6 127924 39 T Uneven positional base frequencies.
|
||||
44 6 127924 39 X Uneven positional base frequencies.
|
||||
45 6 130287 33 T Codon improbability on base composition
|
||||
45 6 130287 33 X Codon improbability on base composition
|
||||
46 6 132146 28 T Codon improbability on amino acid composition
|
||||
46 6 132146 28 X Codon improbability on amino acid composition
|
||||
47 6 133744 14 T Shepherd RNY preference method
|
||||
47 6 133744 14 X Shepherd RNY preference method
|
||||
48 6 134410 30 T Ficketts method
|
||||
48 6 134410 30 X Ficketts method
|
||||
49 6 136094 139 T tRNA gene search.
|
||||
49 6 136094 139 X tRNA gene search.
|
||||
50 7 141894 4 T Plot start codons
|
||||
50 7 141894 4 X Plot start codons
|
||||
51 7 142027 4 T Plot stop codons
|
||||
51 7 142027 4 X Plot stop codons
|
||||
52 7 142188 4 T Plot stop codons on the complementary strand
|
||||
52 7 142188 4 X Plot stop codons on the complementary strand
|
||||
53 7 142365 4 T Plot stop codons on both strands
|
||||
53 7 142365 4 X Plot stop codons on both strands
|
||||
54 5 142536 45 T Search for longest open reading frames
|
||||
54 5 142536 45 X Search for longest open reading frames
|
||||
55 8 144437 67 T Search for E. coli promoter (general)
|
||||
55 8 144437 67 X Search for E. coli promoter (general)
|
||||
56 8 148004 4 T Search for E. coli promoter (general) strand
|
||||
56 8 148004 4 X Search for E. coli promoter (general) strand
|
||||
57 8 148210 4 T Search for E. coli promoter sequences. (-35 and -10)
|
||||
57 8 148210 4 X Search for E. coli promoter sequences. (-35 and -10)
|
||||
58 8 148405 44 T Search for procaryotic ribosome binding sites
|
||||
58 8 148405 44 X Search for procaryotic ribosome binding sites
|
||||
29 1 150862 4 T Reverse and complement the sequence
|
||||
29 1 150862 4 X Reverse and complement the sequence
|
||||
60 7 151001 142 T Search using a dinucleotide weight matrix
|
||||
60 7 151001 142 X Search using a dinucleotide weight matrix
|
||||
61 8 157292 31 T Search for eukaryotic ribosome binding sites
|
||||
61 8 157292 31 X Search for eukaryotic ribosome binding sites
|
||||
62 8 158730 56 T Search for splice junctions
|
||||
62 8 158730 56 X Search for splice junctions
|
||||
63 7 162089 7 T Search using a weight matrix (complementary)
|
||||
63 7 162089 7 X Search using a weight matrix (complementary)
|
||||
64 3 162471 36 T Plot observed-expected word frequencies
|
||||
64 3 162471 36 X Plot observed-expected word frequencies
|
||||
65 9 164175 5 T Search for polya sites
|
||||
65 9 164175 5 X Search for polya sites
|
||||
66 1 164369 4 T Interconvert t and u
|
||||
66 1 164369 4 X Interconvert t and u
|
||||
67 7 164520 797 T Search for patterns of motifs
|
||||
67 7 164520 797 X Search for patterns of motifs
|
132
help/nipf_help
Normal file
132
help/nipf_help
Normal file
|
@ -0,0 +1,132 @@
|
|||
|
||||
@-1. TX 0 @General
|
||||
|
||||
@-2. TX 0 @Screen control
|
||||
|
||||
@-3. TX 0 @Statistical analysis
|
||||
|
||||
@-1. TX 0 @General
|
||||
|
||||
@-2. TX 0 @Screen control
|
||||
|
||||
@-3. TX 0 @Statistical analysis
|
||||
|
||||
@0. TX -1 @NIPF
|
||||
|
||||
@1. TX 1 @ Help
|
||||
|
||||
@2. TX 1 @ Quit
|
||||
|
||||
@3. TX 1 @ Read new sequence
|
||||
|
||||
@4. TX 1 @ Redefine active region
|
||||
|
||||
@5. TX 1 @ List the sequence
|
||||
|
||||
@6. TX 1 @ List a text file
|
||||
|
||||
@7. TX 1 @ Direct output to disk
|
||||
|
||||
@8. TX 1 @ Write active sequence to disk
|
||||
|
||||
@9. TX 1 @ List a translation
|
||||
|
||||
@32. TX 1 @ List showing base differences
|
||||
|
||||
@37. TX 1 @ List showing translation
|
||||
|
||||
@33. TX 1 @ List showing amino acid differences
|
||||
|
||||
@10. TX 2 @ Clear graphics
|
||||
|
||||
@11. TX 2 @ Clear text
|
||||
|
||||
@12. TX 2 @ Draw a ruler
|
||||
|
||||
@13. TX 2 @ Use cross hair
|
||||
|
||||
@14. TX 2 @ Reset margins
|
||||
|
||||
@15. TX 2 @ Label diagram
|
||||
|
||||
@16. TX 2 @ Display a map
|
||||
|
||||
@17. TX 3 @ Set comparison mode
|
||||
|
||||
@18. TX 3 @ Set sort mode
|
||||
|
||||
@21. TX 3 @ Count base changes
|
||||
|
||||
@22. TX 3 @ Count codon changes
|
||||
|
||||
@23. TX 3 @ Count genetic events
|
||||
|
||||
@24. TX 3 @ Show table of base changes
|
||||
|
||||
@36. TX 3 @ Show table of expressed base changes
|
||||
|
||||
@39. TX 3 @ Show table of silent base changes
|
||||
|
||||
@38. TX 3 @ Estimate mutation rate
|
||||
|
||||
@25. TX 3 @ Plot base changes
|
||||
|
||||
@26. TX 3 @ Plot expressed changes per base
|
||||
|
||||
@27. TX 3 @ Plot silent changes per base
|
||||
|
||||
@28. TX 3 @ Count expressed changes per base
|
||||
|
||||
@29. TX 3 @ Count silent changes per base
|
||||
|
||||
@30. TX 3 @ Count changed amino acids
|
||||
|
||||
@31. TX 3 @ Plot amino acid variability
|
||||
|
||||
@ end of help
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
84
help/nipf_menu
Normal file
84
help/nipf_menu
Normal file
|
@ -0,0 +1,84 @@
|
|||
-1 0 23 2 T General
|
||||
-1 0 23 2 X General
|
||||
-2 0 53 2 T Screen control
|
||||
-2 0 53 2 X Screen control
|
||||
-3 0 89 2 T Statistical analysis
|
||||
-3 0 89 2 X Statistical analysis
|
||||
-1 0 112 2 T General
|
||||
-1 0 112 2 X General
|
||||
-2 0 142 2 T Screen control
|
||||
-2 0 142 2 X Screen control
|
||||
-3 0 178 2 T Statistical analysis
|
||||
-3 0 178 2 X Statistical analysis
|
||||
0 -1 198 2 T NIPF
|
||||
0 -1 198 2 X NIPF
|
||||
1 1 217 2 T Help
|
||||
1 1 217 2 X Help
|
||||
2 1 236 2 T Quit
|
||||
2 1 236 2 X Quit
|
||||
3 1 268 2 T Read new sequence
|
||||
3 1 268 2 X Read new sequence
|
||||
4 1 305 2 T Redefine active region
|
||||
4 1 305 2 X Redefine active region
|
||||
5 1 337 2 T List the sequence
|
||||
5 1 337 2 X List the sequence
|
||||
6 1 368 2 T List a text file
|
||||
6 1 368 2 X List a text file
|
||||
7 1 404 2 T Direct output to disk
|
||||
7 1 404 2 X Direct output to disk
|
||||
8 1 448 2 T Write active sequence to disk
|
||||
8 1 448 2 X Write active sequence to disk
|
||||
9 1 481 2 T List a translation
|
||||
9 1 481 2 X List a translation
|
||||
32 1 525 2 T List showing base differences
|
||||
32 1 525 2 X List showing base differences
|
||||
37 1 564 2 T List showing translation
|
||||
37 1 564 2 X List showing translation
|
||||
33 1 614 2 T List showing amino acid differences
|
||||
33 1 614 2 X List showing amino acid differences
|
||||
10 2 643 2 T Clear graphics
|
||||
10 2 643 2 X Clear graphics
|
||||
11 2 668 2 T Clear text
|
||||
11 2 668 2 X Clear text
|
||||
12 2 695 2 T Draw a ruler
|
||||
12 2 695 2 X Draw a ruler
|
||||
13 2 724 2 T Use cross hair
|
||||
13 2 724 2 X Use cross hair
|
||||
14 2 752 2 T Reset margins
|
||||
14 2 752 2 X Reset margins
|
||||
15 2 780 2 T Label diagram
|
||||
15 2 780 2 X Label diagram
|
||||
16 2 808 2 T Display a map
|
||||
16 2 808 2 X Display a map
|
||||
17 3 842 2 T Set comparison mode
|
||||
17 3 842 2 X Set comparison mode
|
||||
18 3 870 2 T Set sort mode
|
||||
18 3 870 2 X Set sort mode
|
||||
21 3 903 2 T Count base changes
|
||||
21 3 903 2 X Count base changes
|
||||
22 3 937 2 T Count codon changes
|
||||
22 3 937 2 X Count codon changes
|
||||
23 3 972 2 T Count genetic events
|
||||
23 3 972 2 X Count genetic events
|
||||
24 3 1013 2 T Show table of base changes
|
||||
24 3 1013 2 X Show table of base changes
|
||||
36 3 1064 2 T Show table of expressed base changes
|
||||
36 3 1064 2 X Show table of expressed base changes
|
||||
39 3 1112 2 T Show table of silent base changes
|
||||
39 3 1112 2 X Show table of silent base changes
|
||||
38 3 1149 2 T Estimate mutation rate
|
||||
38 3 1149 2 X Estimate mutation rate
|
||||
25 3 1181 2 T Plot base changes
|
||||
25 3 1181 2 X Plot base changes
|
||||
26 3 1227 2 T Plot expressed changes per base
|
||||
26 3 1227 2 X Plot expressed changes per base
|
||||
27 3 1270 2 T Plot silent changes per base
|
||||
27 3 1270 2 X Plot silent changes per base
|
||||
28 3 1317 2 T Count expressed changes per base
|
||||
28 3 1317 2 X Count expressed changes per base
|
||||
29 3 1361 2 T Count silent changes per base
|
||||
29 3 1361 2 X Count silent changes per base
|
||||
30 3 1401 2 T Count changed amino acids
|
||||
30 3 1401 2 X Count changed amino acids
|
||||
31 3 1443 2 T Plot amino acid variability
|
||||
31 3 1443 2 X Plot amino acid variability
|
2244
help/pip_help
Normal file
2244
help/pip_help
Normal file
File diff suppressed because it is too large
Load diff
80
help/pip_menu
Normal file
80
help/pip_menu
Normal file
|
@ -0,0 +1,80 @@
|
|||
-1 0 21 2 T General
|
||||
-1 0 21 2 X General
|
||||
-2 0 50 2 T Screen control
|
||||
-2 0 71 2 X Screen
|
||||
-3 0 117 2 T Statistical analysis of content
|
||||
-3 0 142 2 X Statistics
|
||||
-4 0 179 2 T Structures and repeats
|
||||
-4 0 204 2 X Structures
|
||||
-5 0 225 2 T Search
|
||||
-5 0 225 2 X Search
|
||||
0 -1 243 76 T PIP
|
||||
0 -1 243 76 X PIP
|
||||
1 0 3546 8 T Help
|
||||
1 0 3546 8 X Help
|
||||
2 0 3889 3 T Quit
|
||||
2 0 3889 3 X Quit
|
||||
3 1 3962 220 T Read a new sequence
|
||||
3 1 3962 220 X Read a new sequence
|
||||
4 1 13792 12 T Redefine active region
|
||||
4 1 13792 12 X Redefine active region
|
||||
5 1 14480 33 T List a sequence
|
||||
5 1 14480 33 X List a sequence
|
||||
6 1 15941 4 T List a text file
|
||||
6 1 15941 4 X List a text file
|
||||
7 1 16083 12 T Direct output to disk
|
||||
7 1 16083 12 X Direct output to disk
|
||||
8 1 16567 7 T Write active region to disk
|
||||
8 1 16567 7 X Write active region to disk
|
||||
9 1 16922 26 T Edit the sequence
|
||||
9 1 16922 26 X Edit the sequence
|
||||
10 2 18386 3 T Clear graphics
|
||||
10 2 18386 3 X Clear graphics
|
||||
11 2 18463 3 T Clear text
|
||||
11 2 18463 3 X Clear text
|
||||
12 2 18531 13 T Draw a ruler
|
||||
12 2 18531 13 X Draw a ruler
|
||||
13 2 19278 13 T Use cross hair
|
||||
13 2 19278 13 X Use cross hair
|
||||
14 2 19865 35 T Reset margins
|
||||
14 2 19865 35 X Reset margins
|
||||
15 2 22019 13 T Label a diagram
|
||||
15 2 22019 13 X Label a diagram
|
||||
16 2 22811 13 T Display a map
|
||||
16 2 22811 13 X Display a map
|
||||
17 5 23611 254 T Short sequence search
|
||||
17 1 23611 254 T Short sequence search
|
||||
17 5 23611 254 X Short sequence search
|
||||
17 1 23611 254 X Short sequence search
|
||||
18 5 34012 57 T Compare a sequence
|
||||
18 1 34012 57 T Compare a sequence
|
||||
18 5 34012 57 X Compare a sequence
|
||||
18 1 34012 57 X Compare a sequence
|
||||
19 5 35654 69 T Compare a sequence using a score matrix
|
||||
19 1 35654 69 T Compare a sequence using a score matrix
|
||||
19 5 35654 69 X Compare a sequence using a score matrix
|
||||
19 1 35654 69 X Compare a sequence using a score matrix
|
||||
20 5 37587 214 T Search for a motif using a weight matrix
|
||||
20 5 37587 214 X Search for a motif using a weight matrix
|
||||
21 3 46771 20 T Calculate amino acid composition
|
||||
21 3 46771 20 X Calculate amino acid composition
|
||||
22 4 47655 20 T Plot hydrophobicity
|
||||
22 3 47655 20 T Plot hydrophobicity
|
||||
22 4 47655 20 X Plot hydrophobicity
|
||||
22 3 47655 20 X Plot hydrophobicity
|
||||
23 4 48439 19 T Plot charge
|
||||
23 3 48439 19 T Plot charge
|
||||
23 4 48439 19 X Plot charge
|
||||
23 3 48439 19 X Plot charge
|
||||
24 4 48953 72 T Plot robson prediction
|
||||
24 4 48953 72 X Plot robson prediction
|
||||
26 4 51912 32 T Draw a helix wheel
|
||||
26 4 51912 32 X Draw a helix wheel
|
||||
25 4 53561 36 T Plot hydrophobic moment
|
||||
25 3 53561 36 T Plot hydrophobic moment
|
||||
25 4 53561 36 X Plot hydrophobic moment
|
||||
25 3 53561 36 X Plot hydrophobic moment
|
||||
27 1 55101 87 T Back translate to dna
|
||||
27 1 55101 87 X Back translate to dna
|
||||
28 5 59337 809 T Search for patterns of motifs
|
||||
28 5 59337 809 X Search for patterns of motifs
|
1848
help/sap_help
Normal file
1848
help/sap_help
Normal file
File diff suppressed because it is too large
Load diff
76
help/sap_menu
Normal file
76
help/sap_menu
Normal file
|
@ -0,0 +1,76 @@
|
|||
-1 0 21 2 T General
|
||||
-1 0 21 2 X General
|
||||
-2 0 50 2 T Screen control
|
||||
-2 0 71 2 X Screen
|
||||
-3 0 98 2 T Modification
|
||||
-3 0 98 2 X Modification
|
||||
0 -1 116 379 T SAP
|
||||
0 -1 116 379 X SAP
|
||||
17 1 19213 18 T Screen against restriction enzymes
|
||||
17 1 19213 18 X Screen against restriction enzymes
|
||||
18 1 20256 22 T Screen against vector
|
||||
18 1 20256 22 X Screen against vector
|
||||
20 2 21583 113 T Auto assemble
|
||||
20 2 21583 113 X Auto assemble
|
||||
28 1 27744 42 T Highlight disagreements
|
||||
28 1 27744 42 X Highlight disagreements
|
||||
32 3 30106 22 T Extract gel readings
|
||||
32 3 30106 22 X Extract gel readings
|
||||
1 0 31209 3 T Help
|
||||
1 0 31209 3 X Help
|
||||
2 0 31277 5 T Help
|
||||
2 0 31277 5 X Help
|
||||
3 1 31470 175 T Open a database
|
||||
3 1 31470 175 X Open a database
|
||||
4 3 40550 64 T Edit
|
||||
4 3 40550 64 X Edit
|
||||
9 3 43796 40 T Screen edit
|
||||
9 3 43796 40 X Screen edit
|
||||
5 1 45923 45 T Display a contig
|
||||
5 1 45923 45 X Display a contig
|
||||
6 1 48409 6 T List a text file
|
||||
6 1 48409 6 X List a text file
|
||||
8 1 48667 94 T Calculate a consensus
|
||||
8 1 48667 94 X Calculate a consensus
|
||||
25 1 53186 41 T Show relationships
|
||||
25 1 53186 41 X Show relationships
|
||||
21 3 55121 99 T Enter new gel reading
|
||||
21 3 55121 99 X Enter new gel reading
|
||||
23 3 60131 11 T Complement a contig
|
||||
23 3 60131 11 X Complement a contig
|
||||
22 3 60644 70 T Join contigs
|
||||
22 3 60644 70 X Join contigs
|
||||
24 1 64235 11 T Copy the database
|
||||
24 1 64235 11 X Copy the database
|
||||
19 1 64781 41 T Check database
|
||||
19 1 64781 41 X Check database
|
||||
29 1 66799 82 T Examine quality
|
||||
29 1 66799 82 X Examine quality
|
||||
26 3 70617 92 T Alter relationships
|
||||
26 3 70617 92 X Alter relationships
|
||||
27 1 75377 17 T Set display parameters
|
||||
27 1 75377 17 X Set display parameters
|
||||
30 3 76245 48 T Auto edit a contig
|
||||
30 3 76245 48 X Auto edit a contig
|
||||
10 2 78721 3 T Clear graphics
|
||||
10 2 78721 3 X Clear graphics
|
||||
11 2 78786 3 T Clear text
|
||||
11 2 78786 3 X Clear text
|
||||
12 2 78851 12 T Draw a ruler.
|
||||
12 2 78851 12 X Draw a ruler.
|
||||
14 2 79585 38 T Reposition plots
|
||||
14 2 79585 38 X Reposition plots
|
||||
15 2 81933 28 T Label a diagram
|
||||
15 2 81933 28 X Label a diagram
|
||||
16 2 83039 27 T Display a map.
|
||||
16 2 83039 27 X Display a map.
|
||||
7 1 84014 12 T Redirect output
|
||||
7 1 84014 12 X Redirect output
|
||||
13 2 84485 41 T Use crosshair
|
||||
13 2 84485 41 X Use crosshair
|
||||
33 2 86611 11 T Plot single contig
|
||||
33 2 86611 11 X Plot single contig
|
||||
34 2 87312 9 T Plot all contigs
|
||||
34 2 87312 9 X Plot all contigs
|
||||
31 3 87884 9 T Type in gel readings
|
||||
31 3 87884 9 X Type in gel readings
|
1254
help/sip_help
Normal file
1254
help/sip_help
Normal file
File diff suppressed because it is too large
Load diff
78
help/sip_menu
Normal file
78
help/sip_menu
Normal file
|
@ -0,0 +1,78 @@
|
|||
-1 0 22 2 T General
|
||||
-1 0 22 2 X General
|
||||
-2 0 51 2 T Screen control
|
||||
-2 0 72 2 X Screen
|
||||
-3 0 101 2 T Set parameters
|
||||
-3 0 101 2 X Set parameters
|
||||
-4 0 126 2 T Comparison
|
||||
-4 0 126 2 X Comparison
|
||||
0 -1 144 208 T SIP
|
||||
0 -1 144 208 X SIP
|
||||
1 0 12690 39 T Help
|
||||
1 0 12690 39 X Help
|
||||
2 0 13755 3 T Quit
|
||||
2 0 13755 3 X Quit
|
||||
3 1 13828 220 T Read a new sequence
|
||||
3 1 13828 220 X Read a new sequence
|
||||
4 1 23656 10 T Define active region
|
||||
4 1 23656 10 X Define active region
|
||||
5 1 24191 16 T List a sequence
|
||||
5 1 24191 16 X List a sequence
|
||||
6 1 25001 4 T List a text file
|
||||
6 1 25001 4 X List a text file
|
||||
7 1 25143 12 T Direct output to disk
|
||||
7 1 25143 12 X Direct output to disk
|
||||
8 1 25627 4 T Write active region to disk
|
||||
8 1 25627 4 X Write active region to disk
|
||||
9 1 25764 5 T Edit the sequences
|
||||
9 1 25764 5 X Edit the sequences
|
||||
10 2 25944 3 T Clear graphics
|
||||
10 2 25944 3 X Clear graphics
|
||||
11 2 26021 3 T Clear text
|
||||
11 2 26021 3 X Clear text
|
||||
12 2 26089 15 T Draw a ruler
|
||||
12 2 26089 15 X Draw a ruler
|
||||
13 2 26869 54 T Use cross hair
|
||||
13 2 26869 54 X Use cross hair
|
||||
14 2 28754 29 T Reposition plots
|
||||
14 2 28754 29 X Reposition plots
|
||||
15 2 30429 13 T Label a diagram
|
||||
15 2 30429 13 X Label a diagram
|
||||
16 2 31213 7 T Display a map
|
||||
16 2 31213 7 X Display a map
|
||||
17 4 31596 19 T Apply identities algorithm
|
||||
17 4 31596 19 X Apply identities algorithm
|
||||
18 4 32260 81 T Apply proportional algorithm
|
||||
18 4 32260 81 X Apply proportional algorithm
|
||||
19 4 36686 42 T List matching spans
|
||||
19 4 36686 42 X List matching spans
|
||||
20 3 37569 16 T Set span length
|
||||
20 3 37569 16 X Set span length
|
||||
21 3 38560 13 T Set proportional score
|
||||
21 3 38560 13 X Set proportional score
|
||||
22 3 39251 6 T Set identities score
|
||||
22 3 39251 6 X Set identities score
|
||||
23 3 39544 79 T Calculate expected scores
|
||||
23 3 39544 79 X Calculate expected scores
|
||||
24 3 43148 90 T Calculate observed scores
|
||||
24 3 43148 90 X Calculate observed scores
|
||||
25 3 46152 26 T Show current parameter settings
|
||||
25 3 46152 26 X Show current parameter settings
|
||||
27 2 46802 5 T Draw a /
|
||||
27 2 46802 5 X Draw a /
|
||||
26 4 46991 57 T Quick scan
|
||||
26 4 46991 57 X Quick scan
|
||||
28 4 49883 90 T Align sequences
|
||||
28 4 49883 90 X Align sequences
|
||||
29 1 55133 4 T Complement the sequences
|
||||
29 1 55133 4 X Complement the sequences
|
||||
30 3 55256 9 T Switch main diagonal
|
||||
30 3 55256 9 X Switch main diagonal
|
||||
31 3 55755 8 T Switch identities
|
||||
31 3 55755 8 X Switch identities
|
||||
32 3 56202 17 T change score matrix
|
||||
32 3 56202 17 X change score matrix
|
||||
33 3 56884 16 T Set number of sd's for Quickscan
|
||||
33 3 56884 16 X Set number of sd's for Quickscan
|
||||
34 3 57767 13 T Set gap penalities
|
||||
34 3 57767 13 X Set gap penalities
|
132
help/splitp_help
Normal file
132
help/splitp_help
Normal file
|
@ -0,0 +1,132 @@
|
|||
|
||||
Preparing the PROSITE protein motif library for use by
|
||||
the Staden programs
|
||||
|
||||
Introduction
|
||||
|
||||
A library of protein motifs (in our terminology, because
|
||||
they include variable gaps, some would be called patterns) has
|
||||
recently become available from Amos Bairoch,Departement de
|
||||
Biochimie Medicale,University of Geneva Currently it contains 317
|
||||
patterns/motifs and arrives on tape or cdrom in two files: a .dat
|
||||
file and a .doc file. There is also a user documentation file
|
||||
prosite.usr. Here I outline what is required to prepare the
|
||||
PROSITE library for use by our programs.
|
||||
|
||||
Three programs need to be run SPLITP1, SPLITP2, and
|
||||
SPLITP3.
|
||||
|
||||
Outline of the PROSITE files
|
||||
|
||||
A typical entry in the .dat file is shown below.
|
||||
|
||||
ID 2FE2S_FERREDOXIN; PATTERN.
|
||||
AC PS00197;
|
||||
DT APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
|
||||
DE 2Fe-2S ferredoxins, iron-sulfur binding region signature.
|
||||
PA C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C.
|
||||
NR /RELEASE=14,15409;
|
||||
NR /TOTAL=69(69); /POSITIVE=63(63); /UNKNOWN=0(0); /FALSE_POS=6(6);
|
||||
NR /FALSE_NEG=5(5);
|
||||
CC /TAXO-RANGE=A?EP?; /MAX-REPEAT=1;
|
||||
CC /SITE=1,iron_sulfur; /SITE=5,iron_sulfur; /SITE=8,iron_sulfur;
|
||||
DR P15788, FER$APHHA , T; P00250, FER$APHSA , T; P00223, FER$ARCLA , T;
|
||||
DR P00227, FER$BRANA , T; P07838, FER$BRYMA , T; P13106, FER$BUMFI , T;
|
||||
DR P00247, FER$CHLFR , T; P07839, FER$CHLRE , T; P00222, FER$COLES , T;
|
||||
DO PDOC00175;
|
||||
//
|
||||
|
||||
Each entry has an accession number (here PS00197), a
|
||||
pattern definition (here C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C) and a
|
||||
documentation file cross reference (here PDOC00175). This
|
||||
pattern means: C, gap of 1 or 2, any of STA, gap of 2, C, any of
|
||||
STA, not P, C.
|
||||
|
||||
We need to convert all of these patterns into our pattern
|
||||
definitions (as membership of a set, with the appopriate gap
|
||||
ranges) and write each into a separate pattern file with
|
||||
corresponding "membership of a set" weight matrices. Each pattern
|
||||
file is named accession_number.pat (here PS00197.PAT). The
|
||||
corresponding matrix files are accession_number.wtsa,
|
||||
accession_number.wtsb, etc for however many are needed (here
|
||||
PS00197.WTSA and PS00197.WTSB): two are needed because of the
|
||||
variable gap.
|
||||
|
||||
In addition we can optionally split the .dat and .doc
|
||||
files into separate files, one for each entry, with names
|
||||
accession_number.dat and accession_number.doc. Also we create an
|
||||
index for the library prosite.lis, which gives a one line
|
||||
description of each pattern, and ends with the pattern file and
|
||||
documentation file numbers. The start of the file is shown below.
|
||||
|
||||
N-glycosylation site. 00001,00001
|
||||
Glycosaminoglycan attachment site. 00002,00002
|
||||
Tyrosine sulfatation site. 00003,00003
|
||||
cAMP- and cGMP-dependent protein kinase phosphorylation site. 00004,00004
|
||||
|
||||
So the name of the pattern file for Glycosaminoglycan attachment
|
||||
site is PS00002.PAT, and for the documentation file PDOC00002.DOC
|
||||
|
||||
Finally we create a file of file names for all the
|
||||
patterns in the library.
|
||||
|
||||
To use the complete PROSITE library from program pip,
|
||||
select "pattern searcher" and choose the option "use file of
|
||||
pattern file names", and give the file name prosite.nam). For any
|
||||
matches found, the accession number and pattern title will be
|
||||
displayed.
|
||||
|
||||
Running the conversion programs
|
||||
|
||||
Only SPLITP3 is necessary for using the library. The
|
||||
others programs only make the original files marginally easier to
|
||||
browse through and produce an index.
|
||||
|
||||
SPLITP1 splits the prosite.dat file to create a separate
|
||||
file for each entry. Each file is automatically named
|
||||
PSentry_number.dat. In addition it creates an index for the
|
||||
library (see above).
|
||||
|
||||
SPLITP2 performs the same operation for the Prosite.doc
|
||||
file, except that no index is created. Files are named
|
||||
PSentry_number.doc.
|
||||
|
||||
SPLITP3 creates a separate pattern file and weight matrix
|
||||
files for each prosite entry from the file prosite.dat. Pattern
|
||||
files are named PSentry_number.pat, weight matrix files
|
||||
PSentry_number.wtsa, Psentry_number.wtsb, etc. The pattern title
|
||||
is the one line description of the motif. SPLITP3 also creates a
|
||||
file of file names. Notice that it will ask for a path name so
|
||||
that the path can be included in the file of file names. This is
|
||||
the path to the directory in which the pattern files are stored.
|
||||
|
||||
Notes
|
||||
|
||||
Obviously the use of files of file names is a general
|
||||
solution, and anybody could now create their own set of
|
||||
interesting patterns for screening, or a subset of prosite.nam,
|
||||
etc.
|
||||
|
||||
Note that 5 of the bairoch motifs contained the symbols >
|
||||
or < which means that the motifs must appear exactly at the N or
|
||||
C termini of the sequences. Currently our methods have no
|
||||
mechanism for such definitions and, for example KDEL motifs, will
|
||||
be permitted to occur anywhere throughout a sequence.
|
||||
|
||||
Also, of course, the library does not have to be used
|
||||
solely for performing mass screenings: each individual entry can
|
||||
be used as a single pattern by giving the name of its .pat file -
|
||||
eg pathname/ps00002.pat In addition more sophisticated users will
|
||||
wish to copy pattern files and weight matrices into their own
|
||||
directories and modify them. For example the cutoff scores are
|
||||
probably chosen to be quite high in order to reduce the number of
|
||||
false positives, and some users might wish to lower them.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
81
help/staden.references
Normal file
81
help/staden.references
Normal file
|
@ -0,0 +1,81 @@
|
|||
|
||||
References with further information about the methods
|
||||
|
||||
Staden, R. Nucl. Acid Res. 8, 817-825 (1980)
|
||||
A computer program to search for tRNA genes. (NIP)
|
||||
Staden, R. Nucl. Acid Res. 8, 3673-3694 (1980)
|
||||
A new computer method for the storage and manipulation
|
||||
of DNA gel reading data. (SAP).
|
||||
Staden, R. Nucl. Acid Res. 10, 2951-2961 (1982)
|
||||
An interactive graphics program for comparing and
|
||||
aligning nucleic acid and amino acid sequences.
|
||||
(SIP).
|
||||
Staden, R. Nucl. Acid Res. 10, 4731-4751 (1982)
|
||||
Automation of the computer handling of gel reading data
|
||||
produced by the shotgun method of DNA sequencing.(SAP)
|
||||
Staden, R. and McLachlan, A.,D. Nucl. Acid Res. 10
|
||||
141-156 (1982)
|
||||
Codon preference and its use in identifying protein
|
||||
coding regions in long DNA sequences. (NIP)
|
||||
Staden, R. Nucl. Acid Res. 12, 499-503 (1984)
|
||||
A computer program to enter DNA gel reading data into a
|
||||
computer. (GIP)
|
||||
Staden, R. Nucl. Acid Res. 12, 551-567 (1984)
|
||||
Measurements of the effects that coding for a protein
|
||||
has on on a DNA sequence and their use for finding
|
||||
genes. (NIP: positional base preferences, uneven
|
||||
positional base frequencies)
|
||||
Staden, R. Nucl. Acid Res. 12, 505-519 (1984)
|
||||
Computer methods to locate signals in nucleic acid
|
||||
sequences. NIP: promoters, ribosome binding
|
||||
sites, intron/exon junctions.
|
||||
McLachlan A D, Staden R and Boswell D R, Nucl. Acid Res.
|
||||
12, 9567-9575 (1984)
|
||||
Measure of strength of codon preference. (NIP)
|
||||
Staden R, Computer methods to locate genes and signals in
|
||||
nucleic acid sequences, Genetic Engineering: Principles
|
||||
and Methods Vol. 7, Edited by J. K. Setlow and A.
|
||||
Hollaender, Plenum Publishing Corp. 1985. (NIP)
|
||||
Staden R Nucl. Acid. Res. 14, 217-231 (1986)
|
||||
The current status and portability of our sequence
|
||||
handling software. Summary for May 1985.
|
||||
Staden R "Computer Handling of DNA sequencing projects" in
|
||||
Nucleic acid and protein sequence analysis, A practical
|
||||
approach, 173-217. Edited by M.J.Bishop and C.J.Rawlings,
|
||||
IRL press (1987). (SAP)
|
||||
Staden R, Methods to define and locate patterns of motifs in
|
||||
sequences. CABIOS 4 53-60 (1988). (NIP, PIP,
|
||||
NIPL, PIPL)
|
||||
Staden R, Methods for calculating the probabilities of finding
|
||||
patterns in sequences. CABIOS 5 89-96 (1989). (NIP, PIP,
|
||||
NIPL, PIPL)
|
||||
Staden R, "Methods for discovering novel motifs in nucleic acid
|
||||
sequences". CABIOS 5, 293-298, (1989). (MEP)
|
||||
Staden R, Methods to search for patterns in protein and nucleic
|
||||
acid sequences. In Doolittle, R,R (ed), Methods in
|
||||
Enzymology, 183, Academic Press, San Diego, CA, 193-211.
|
||||
(1990) (NIP, NIPL, PIP, PIPL)
|
||||
Staden R, Finding protein coding regions in genomic sequences.
|
||||
In Doolittle, R,R (ed), Methods in Enzymology, 183,
|
||||
Academic Press, San Diego, CA, 163-180. (1990) (NIP)
|
||||
Gleeson T J and Staden R, An X windows and UNIX implementation
|
||||
of our sequence analysis package. CABIOS 7 398 (1991)
|
||||
Staden R, Screening protein and nucleic acid sequences against
|
||||
libraries of patterns. DNA Sequence, in press (NIP, PIP,
|
||||
SPLITP1, SPLITP2, SPLITP3, PROSITE)
|
||||
Dear S and Staden R, A sequence assembly and editing program for
|
||||
efficient management of large projects. Nucleic Acids
|
||||
Research 19 3907-3911 (1991) (XDAP)
|
||||
Staden R and Dear S, Indexing the sequence libraries: Software
|
||||
providing a common indexing system for all the standard
|
||||
sequence libraries. DNA Sequence 3, 99-105 (1992).
|
||||
Dear S and Staden R, A standard file format for data from DNA
|
||||
sequencing instruments. DNA Sequence 3, 107-110 (1992)
|
||||
Gleeson T and Hillier L, A trace display and editing program
|
||||
for data from fluorescence based sequencing machines.
|
||||
Nucleic Acids Research 19 6481-6483 (1991) (TED)
|
||||
Staden R, Staden package update. Genome News 13 12-13 (1993)
|
||||
|
||||
|
||||
|
||||
|
184
help/staden_help
Normal file
184
help/staden_help
Normal file
|
@ -0,0 +1,184 @@
|
|||
|
||||
Introduction to the Staden sequence analysis package and its
|
||||
user interface
|
||||
|
||||
The package contains the following programs:
|
||||
|
||||
GIP Gel input program
|
||||
SAP Sequence assemble program
|
||||
NIP Nucleotide interpretation program
|
||||
PIP Protein interpretation program
|
||||
SIP Similarity investigation program
|
||||
MEP Motif exploration program
|
||||
NIPL Nucleotide interpretation program (library)
|
||||
PIPL Protein interpretation program (library)
|
||||
SIPL Similarity investigation program (library)
|
||||
|
||||
GIP uses a digitiser for entry of DNA sequences from
|
||||
autoradiographs.
|
||||
SAP handles everything relating to assembling gel readings in order
|
||||
to produce a consensus sequence. It can also deal with families of
|
||||
protein sequences.
|
||||
NIP provides functions for analysing and interpretting individual
|
||||
nucleotide sequences.
|
||||
PIP provides functions for analysing and interpretting individual
|
||||
protein sequences.
|
||||
MEP analyses families of nucleotide sequences to help discover new
|
||||
motifs.
|
||||
NIPL performs pattern searches on nucleotide sequence libraries.
|
||||
PIPL performs pattern searches on protein sequence libraries.
|
||||
SIP provides functions for comparing and aligning pairs of protein
|
||||
or nucleotide sequences.
|
||||
SIPL searches nucleotide and protein sequence libraries for entries
|
||||
similar to probe sequences.
|
||||
|
||||
|
||||
Documentation
|
||||
|
||||
As is explained below, the programs SAP, NIP, PIP, SIP and MEP
|
||||
have online help, and the help files have the names: HELPSAP,
|
||||
HELPNIP, HELPPIP, HELPSIP, HELPMEP. These files can be displayed on
|
||||
the screen or printed using the appropriate commands. Currently the
|
||||
help for the other programs is also contained in these files. For
|
||||
example help for NIPL is in HELPNIP. This file is called HELPSTADEN.
|
||||
|
||||
Sequence formats
|
||||
|
||||
The shotgun sequencing program SAP deals only with simple text
|
||||
files for gel readings, and is a self-contained system. However as
|
||||
there is still no single agreed format for finished sequences or for
|
||||
libraries of sequences, the other programs in the package can read
|
||||
data that is stored in several ways.
|
||||
|
||||
The analytical programs can read individual sequences stored
|
||||
in the following formats: Staden, EMBL, Genbank, PIR (also known as
|
||||
NBRF), and GCG, but for storing whole libraries we use only PIR
|
||||
format. In addition these programs can perform a number of simple
|
||||
operations using libraries stored in this format. They can extract
|
||||
entries by entry name, can search titles for keywords, can search
|
||||
the whole of the annotation files for keywords, and can extract
|
||||
annotations for any named entry. We reformat all sequence libraries
|
||||
into PIR format. Currently we have NBRF, EMBL, SWISSPROT and VECBASE
|
||||
libraries in PIR format.
|
||||
|
||||
The library searching programs operate only on sequences
|
||||
stored in PIR format.
|
||||
|
||||
The analytical programs will operate with uppercase or
|
||||
lowercase sequence characters. In addition T and U are equivalent.
|
||||
SAP uses uppercase letters for original gel readings and lowercase
|
||||
letters for characters that are corrected by the automatic editor.
|
||||
Programs NIP and PIP use IUB symbols for redundancy in back
|
||||
translations and for sequence searches. The symbols are shown
|
||||
below.
|
||||
|
||||
|
||||
NC-IUB SYMBOLS
|
||||
|
||||
A,C,G,T
|
||||
R (A,G) 'puRine'
|
||||
Y (T,C) 'pYrimidine'
|
||||
W (A,T) 'Weak'
|
||||
S (C,G) 'Strong'
|
||||
M (A,C) 'aMino'
|
||||
K (G,T) 'Keto'
|
||||
H (A,T,C) 'not G'
|
||||
B (G,C,T) 'not A'
|
||||
V (G,A,C) 'not T'
|
||||
D (G,A,T) 'not C'
|
||||
N (G,A,C,T) 'aNy'
|
||||
|
||||
|
||||
The user interface
|
||||
|
||||
The user interface is common to all programs. It consists of a
|
||||
set of menus and a uniform way of presenting choices and obtaining
|
||||
input from the user. This section describes: the menu system; how
|
||||
options are selected and other choices made; how values are
|
||||
supplied to the program; how help is obtained, and how to escape
|
||||
from any part of a program. In addition it gives information about
|
||||
saving results in files and the use of graphics for presenting
|
||||
results.
|
||||
|
||||
Menus
|
||||
|
||||
Each program has several menus and numerous options. Each menu
|
||||
or option has a unique number that is used to identify it. Menu
|
||||
numbers are distinguished from option numbers by being preceded by
|
||||
the letter m (or M, all programs make no distinction between upper
|
||||
and lower case letters). With the exception of some parts of program
|
||||
SAP, the menus are not hierachical, rather the options they each
|
||||
contain are simply lists of related functions and their identifying
|
||||
numbers. Therefore options can be selected independently of the menu
|
||||
that is currently being shown on the screen, and the menus are
|
||||
simply memory aides. All options and menus are selected by typing
|
||||
their option number when the programs present the prompt
|
||||
|
||||
"? Menu or option number =".
|
||||
|
||||
To select a menu type its number preceded by the letter M. To
|
||||
select an option type its number. If you type only "return" you
|
||||
will get menu m0 which is simply a list of menus. If you select an
|
||||
option you will return to the current menu after the function is
|
||||
completed.
|
||||
|
||||
When you select an option, in many cases the program will
|
||||
immediately perform the operation selected without further dialogue.
|
||||
If you precede an option number by the letter d (e.g. D17), you will
|
||||
force the program to offer dialogue about the selected option before
|
||||
the function operates, hence allowing you to change the value of any
|
||||
of its parameters. If you precede an option number by the symbol ?
|
||||
(e.g. ?17), you will be given help on the option (here 17).
|
||||
|
||||
Where possible, equivalent or identical options have been
|
||||
given the same numbers in all programs, and so users quickly learn
|
||||
the numbers for the functions they employ most often.
|
||||
|
||||
Help
|
||||
|
||||
As mentioned above, help about each option can be obtained by
|
||||
preceding the option number by the symbol ? when you are presented
|
||||
with the prompt "? Menu or option number", but there are two further
|
||||
ways of obtaining help. Whenever the program asks a question you can
|
||||
respond by typing the symbol ? and you will receive information
|
||||
about the current option. In addition, option number 1 in all the
|
||||
programs will give help on all of a programs functions.
|
||||
|
||||
Quitting
|
||||
|
||||
To exit from any point in a program you type ! for quit. If a
|
||||
menu is on the screen this will stop the program, otherwise you will
|
||||
be returned to the last menu.
|
||||
|
||||
Other interactions
|
||||
|
||||
Questions are presented in a few restricted ways. In all
|
||||
cases typing only "return" in response to a question means yes, and
|
||||
typing N or n means no.
|
||||
|
||||
Obvious opposites such as "clear screen" and "keep picture"
|
||||
are presented with only the default shown. For example in this case
|
||||
the default is generally "keep picture" so the program will display:
|
||||
|
||||
"(y/n) (y) Keep picture"
|
||||
|
||||
and the picture will be retained if the user types anything
|
||||
other than N or n, (in which case the screen will be cleared).
|
||||
|
||||
Where there are choices that are not obvious opposites, or
|
||||
there are more than two choices, two further conventions are used:
|
||||
"radio buttons" and "check boxes".
|
||||
|
||||
Radio buttons are used when only one of a number of choices
|
||||
can be made at any one time. The choices are presented arranged one
|
||||
above the other, each choice with a number for its selection, and
|
||||
the default choice marked with an X. For example in the restriction
|
||||
enzyme search routine the following choices are offered:
|
||||
|
||||
|
||||
Select output mode
|
||||
1 order results enzyme by enzyme
|
||||
2 order results by positon
|
||||
X 3 show only infrequent cutters
|
||||
4 show names above the sequence
|
||||
|
26
help/stadenp_help
Normal file
26
help/stadenp_help
Normal file
|
@ -0,0 +1,26 @@
|
|||
Standard Staden Programs
|
||||
|
||||
gip Gel input program
|
||||
sap Sequence assembly program
|
||||
(x)dap Sequence assembly program
|
||||
(x)nip Nucleotide interpretation program
|
||||
(x)pip Protein interpretation program
|
||||
(x)sip Similarity investigation program
|
||||
(x)mep Motif exploration program
|
||||
nipl Nucleotide interpretation program (library)
|
||||
pipl Protein interpretation program (library)
|
||||
sipl Similarity investigation program (library)
|
||||
Those with (x) have both tektronix (say nip) and x (say xnip) versions.
|
||||
Environment variables for help files
|
||||
HELPSAP sap
|
||||
HELPDAP dap
|
||||
HELPGIP gip
|
||||
HELPNIP nip
|
||||
HELPPIP pip
|
||||
HELPSIP sip
|
||||
HELPMEP mep
|
||||
HELPSTADEN Introduction and user interface
|
||||
e.g. to read HELPSTADEN type 'more $HELPSTADEN'
|
||||
|
||||
|
||||
|
168
help/ted.help
Normal file
168
help/ted.help
Normal file
|
@ -0,0 +1,168 @@
|
|||
Trace Editor Help
|
||||
-----------------
|
||||
|
||||
The ted trace editor is a prototype to allow the display and editing
|
||||
of traces from sequencing machines, and the simple editing of plain
|
||||
sequences. It runs under the X window system. It provides simultaneous
|
||||
display of traces and bases. The editing allows individual bases to be
|
||||
removed and new ones added, and also a range of bases at either end to
|
||||
be cutoff. Currently, only ABI result files and plain sequences are
|
||||
accepted.
|
||||
|
||||
Only one trace can be edited at a time.
|
||||
|
||||
|
||||
Invocation
|
||||
----------
|
||||
|
||||
ted can be run from the command line by simply typing:
|
||||
|
||||
ted
|
||||
|
||||
It will come up with no sequence initially displayed. If provided with
|
||||
any arguments it does not understand, or invalid combinations of
|
||||
arguments, ted will exit with a message indicating its intended usage.
|
||||
ted accepts the standard X arguments allowing, for example, background
|
||||
colour or geometry to be specified. ted can accept an argument
|
||||
specifying an initial file to display. The key for this is the format
|
||||
of the file, for example:
|
||||
|
||||
ted -ABI {ABI format filename}
|
||||
ted -plain {plain format filename}
|
||||
|
||||
The file is then displayed at 50% magnification, with the caret
|
||||
initially positioned at the first base.
|
||||
|
||||
When an initial file is given, a base number of interest and/or a
|
||||
magnification can also be given, for example:
|
||||
|
||||
ted -ABI {ABI format file} -baseNum 280 -mag 30
|
||||
|
||||
or the bottom strand may be specified:
|
||||
|
||||
ted -ABI {ABI format file} -baseNum 280 -mag 30 -bottom 1
|
||||
or
|
||||
ted -ABI {ABI format file} -bottom 1
|
||||
|
||||
or a string of nucleotides on which the center the window:
|
||||
|
||||
ted -ABI {ABI format file} -astring 1
|
||||
or
|
||||
ted -ABI {ABI format file} -astring 1 -mag 30 -bottom 1
|
||||
|
||||
Options can be specified in any order.
|
||||
|
||||
An output filename can be specified in a similar manner:
|
||||
|
||||
ted -ABI inputfilename -output outputfilename
|
||||
|
||||
The default output filename is inputfilename.seq
|
||||
|
||||
If you are running the program on a remote machine, you must
|
||||
specify a display parameter:
|
||||
|
||||
ted -display machine_name:0.2
|
||||
|
||||
You can also specify the size of the opening window or
|
||||
other screen parameters by the following:
|
||||
|
||||
ted -geometry [{width}][x{height}][{+-}{xoff}[{+-}{yoff}]]
|
||||
[-fg {color}] [-bg {color}] [-bd {color}] [-bw {pixels}]
|
||||
|
||||
Displays
|
||||
--------
|
||||
|
||||
When running, ted displays the name of the file it is currently
|
||||
operating on (if any) and the original number of bases.
|
||||
|
||||
A so-called viewport presents four different synchronised views of
|
||||
part of the trace. The top one indicates the sequence indices - the
|
||||
first digit of the number if positioned over the base to which that
|
||||
number corresponds. Below this is a list of the bases as originally
|
||||
found in the file (this is the interpretation of the trace as made by
|
||||
the sequencing machine). Below this is the list of bases as edited by
|
||||
the user --- initially, if this file has not been edited in the past,
|
||||
this is identical to the list of original bases. However, if in a
|
||||
previous session the user has edited this sequence, the edited
|
||||
version of the sequence will appear in the edit window.
|
||||
The final display is of the traces produced by the sequencing
|
||||
machine for the four respective bases.
|
||||
|
||||
Two controls allow the view presented to be adjusted: both are
|
||||
horizontal sliders or scrollbars. The first affects the magnification
|
||||
at which the trace is viewed. The minimum magnification is such that
|
||||
the whole of the trace is visible within the viewport; when a trace is
|
||||
first input, this is the magnification used. The maximum magnification
|
||||
is such that bases are spaced out with several characters of space
|
||||
between them --- this should allow more than enough room for base
|
||||
insertions to be clearly visible. The second scrollbar is immediately
|
||||
above the viewport and allows the user to select which part of the
|
||||
trace is viewed. Both the sliders work in a similar way: the middle
|
||||
mouse button can be used to drag the thumb to any desired position,
|
||||
the left and right mouse buttons can be clicked within the scrollbar
|
||||
to indicate that paging up or down is desired. In the case of the
|
||||
viewport scrollbar, the amount of paging is determined by how far up
|
||||
the scrollbar the pointer is.
|
||||
|
||||
The whole ted window can be expanded and contracted (to an extent) by
|
||||
dragging the "grow-region" provided by whatever window manager is
|
||||
running. The viewport takes up all of this change in size.
|
||||
|
||||
Controls
|
||||
--------
|
||||
|
||||
ted has four buttons. "Quit" exits the program after first checking
|
||||
whether there is a sequence which has been edited and not saved.
|
||||
|
||||
"Help" pops up this window which has a scrollbar on the left allowing
|
||||
all the text to be viewed.
|
||||
|
||||
"Input" presents a dialogue which asks for the format and name of a
|
||||
file to be processed. The bases and (if this is not a plain format
|
||||
file) traces are read in and displayed for editing. The only
|
||||
conversion performed on bases is from 'N' to '-'.
|
||||
|
||||
"Output" presents a dialogue which asks for a filename into which the
|
||||
edited and clipped bases can be saved. The default value can be set
|
||||
on the command line using the "-output" keyword. No conversion of bases
|
||||
is performed on output.
|
||||
|
||||
ted operates in one of three editing modes, one of which is selected
|
||||
from three "radio buttons". The currently selected mode is
|
||||
highlighted.
|
||||
|
||||
Editing
|
||||
-------
|
||||
|
||||
In "Edit sequence" mode, the (lower) list of editable bases can be
|
||||
edited in much the same way as a text editor operates. A "caret" which
|
||||
is visible in the display of edited bases can be moved left and right
|
||||
with the cursor keys (these are sometimes called arrow keys and often
|
||||
appear on numeric keypads). It can also be positioned by clicking any
|
||||
button while the pointer is pointing into either of the list of bases
|
||||
or the traces. The DELETE key deletes the base immediately to the left
|
||||
of the caret. Any printing character can be inserted to the right of
|
||||
the caret by simply typing it. Inserted characters are placed halfway
|
||||
between their neighbours, or if a space is left by the deletion of a
|
||||
base originally there, its position is used. A base can thus be
|
||||
changed by deleting it and entering the new base.
|
||||
|
||||
Note that in the current version of ted the caret is not constrained
|
||||
to remain within the viewed part of the display and that editing can
|
||||
still continue while it is thus invisible. Such editing would probably
|
||||
only occur by accident.
|
||||
|
||||
ted provides a facility to define a cutoff at either end of the trace.
|
||||
A number of the leftmost bases (corresponding to the vector) and the
|
||||
rightmost bases (corresponding to the point where the data become
|
||||
unreliable) can be defined by setting the editor into "Adjust left
|
||||
cutoff" or "Adjust right cutoff" mode. In either of these modes, the
|
||||
pointer and mouse buttons can be used to indicate the cutoff point,
|
||||
and the cursor keys can be used to adjust this leftwards or
|
||||
rightwards. Initially, the cutoff regions are both empty. The cutoff
|
||||
regions are clearly indicated on the list of edited bases display and
|
||||
on the traces display by being drawn with a dimmed background.
|
||||
|
||||
When the sequence is written out, the list of edited bases, with both
|
||||
cutoff regions removed, is written. The output contains newlines
|
||||
for convenient formatting and always ends with one.
|
102
manl/staden.l
Normal file
102
manl/staden.l
Normal file
|
@ -0,0 +1,102 @@
|
|||
.TH staden 1L "November 1991" "MRC LMB" "LOCAL"
|
||||
.SH NAME
|
||||
staden, xstaden \- sequence analysis suite
|
||||
.SH DESCRIPTION
|
||||
.I staden
|
||||
is a suite of programs for sequence analysis. Currently available are
|
||||
.I mep,
|
||||
.I nip,
|
||||
.I pip,
|
||||
.I sap,
|
||||
.I sip,
|
||||
.I nipl,
|
||||
.I pipl,
|
||||
.I and sipl.
|
||||
These all run under the SUN X11
|
||||
.I xterm
|
||||
Tektronics terminal emulator, but also work with the VT640 terminal
|
||||
and the VersaTermPro and MS-Kermit emulators if they login to a SUN.
|
||||
.PP
|
||||
.I xstaden
|
||||
is the same set of programs, named
|
||||
.I xmep,
|
||||
.I xnip,
|
||||
.I xpip,
|
||||
.I xsap,
|
||||
.I xdap,
|
||||
and
|
||||
.I xsip,
|
||||
which run directly under X providing a convenient user interface,
|
||||
including resizable output and pull-down menus. All these programs
|
||||
accept the standard X arguments. The library searching programs
|
||||
nipl, pipl and sipl are only available in xterm form.
|
||||
.PP
|
||||
Sequence library access is provided for the format as distributed
|
||||
on CDROM by EMBL. The CDROM contains the EMBL nucleotide library and
|
||||
the SWISSPROT protein library. The libraries can be left on the
|
||||
CDROM or transferred to hard disk.
|
||||
.PP
|
||||
The programs also provide an interface to the PROSITE protein motif
|
||||
library.
|
||||
.PP
|
||||
Some initialisation is required in order to use the package. csh users
|
||||
should insert the following in their .login files:
|
||||
.IP
|
||||
setenv STADENROOT /home/BioSW/staden
|
||||
.IP
|
||||
source $STADENROOT/staden.login
|
||||
.LP
|
||||
Users of the Bourne shell, sh, should insert the following in
|
||||
their .profile:
|
||||
.IP
|
||||
STADENROOT=/home/BioSW/staden
|
||||
.IP
|
||||
export STADENROOT
|
||||
.IP
|
||||
. $STADENROOT/staden.profile
|
||||
.LP
|
||||
These initialisations will alter your shell's search path so
|
||||
that it can find the program binaries, and other files that are
|
||||
required.
|
||||
.SH ENVIRONMENT
|
||||
The following environment variables may be set in the
|
||||
user's \fI .login\fP or \fI .profile\fP file:
|
||||
.TP 20
|
||||
.BI STADENROOT= /home/BioSW/staden
|
||||
This must be set in the user's initialisation.
|
||||
.TP 20
|
||||
.BI SEQEDT= editor
|
||||
Set the editor to be used by the package. The default is
|
||||
\fIemacs\fP.
|
||||
.SH FILES
|
||||
.PD 0
|
||||
.TP 30
|
||||
$STADENROOT/staden.login
|
||||
csh initialisation
|
||||
.TP 30
|
||||
$STADENROOT/staden.profile
|
||||
sh initialisation
|
||||
.TP 30
|
||||
$STADENROOT/tables
|
||||
Tables used by the programs
|
||||
.TP 30
|
||||
$STADENROOT/help
|
||||
Helpfiles used by the programs, documentation of the user interface
|
||||
and of each of the programs.
|
||||
.TP 30
|
||||
$STADENROOT/tables/SEQUENCELIBRARIES
|
||||
Defines the sequence libraries available, their file descriptors
|
||||
and the prompts to appear on the users screen.
|
||||
.SH AUTHOR
|
||||
Rodger Staden, MRC Laboratory of Molecular Biology, Hills Rd., Cambridge,
|
||||
CB2 2QH, UK.
|
||||
.SH BUGS
|
||||
.PP
|
||||
When using the xterm programs and in graphics input mode,
|
||||
a carriage return should not be
|
||||
entered on its own but should be preceded by some other character,
|
||||
such as SPACE, COMMA or K. If a carriage return is entered on its
|
||||
own, some garbage will (relatively) harmelssly appear on the plot.
|
||||
.PP
|
||||
General comments on the package can be sent to
|
||||
\fI<rs@uk.ac.cam.mrc-lmb>\fP
|
107
manl/ted.l
Normal file
107
manl/ted.l
Normal file
|
@ -0,0 +1,107 @@
|
|||
.TH ted 1L "July 1991" "MRC LMB" "LOCAL"
|
||||
.SH NAME
|
||||
ted \- trace editor
|
||||
.SH SYNOPSIS
|
||||
.B ted
|
||||
[(
|
||||
.B -ABI\||\|-ALF\||\|-plain
|
||||
)
|
||||
.I tracefilename
|
||||
[
|
||||
.B -baseNum
|
||||
.I number
|
||||
]
|
||||
.B [
|
||||
.B -mag
|
||||
.I number
|
||||
( 1 to 100 )
|
||||
]
|
||||
.B [
|
||||
.B -bottom
|
||||
.I number
|
||||
(1(true) or 0(false))
|
||||
.B ]
|
||||
.B [
|
||||
.B -astring
|
||||
.I nucleotide-string
|
||||
]]
|
||||
.B [
|
||||
.B -enzyme
|
||||
.I 5' cutting sequence
|
||||
]
|
||||
.B [
|
||||
.B -raw
|
||||
.I filename
|
||||
(to be placed at head of xdap compatible .seq file)
|
||||
.B ]
|
||||
[
|
||||
.B -output
|
||||
.I outputfilename
|
||||
]
|
||||
|
||||
.SH DESCRIPTION
|
||||
.B ted
|
||||
is a simple prototype editor for traces produced from automatic
|
||||
sequencing machines. It allows the traces (from the ABI
|
||||
or ALF sequencing machines) produced to be
|
||||
displayed along with the machines interpretation of these into
|
||||
bases and an initially identical sequence which can be edited
|
||||
by the user. A cutoff region can be defined at both ends. The
|
||||
edited and clipped list of bases can then be written out.
|
||||
.LP
|
||||
When initially run,
|
||||
.B ted
|
||||
displays the trace file
|
||||
.I tracefilename
|
||||
(if given) of the specified format centered on the base number
|
||||
.I baseNum
|
||||
(if given). If no file is provided,
|
||||
.B ted
|
||||
initially displays nothing.
|
||||
.LP
|
||||
The display consists of
|
||||
the control panel and the synchronized view of the base position
|
||||
information, original and edited sequence data,
|
||||
and graphical representation of the trace (with each nucleotide's trace
|
||||
being represented by a different color). The control
|
||||
panel allows the user to read in new trace files (in either
|
||||
bottom or top strand orientation)
|
||||
as well as to search for a string of nucleotides or a certain base position.
|
||||
The information button brings up signal strength and average spacing for
|
||||
ABI files.
|
||||
Scroll bars allow the user to adjust the magnification of or scroll through
|
||||
the sequence and trace data. The user may also choose to change the vertical
|
||||
magnification of the trace data. Further, sequence on the head (vector)
|
||||
or tail (uncertain data) of the sequence may be ``cutoff''
|
||||
using the adjust left and right cutoff buttons. Bases can be inserted,
|
||||
deleted, or replaced as with
|
||||
any ordinary word-processor in the sequence data window. Finally, the
|
||||
sequence may be written to an ascii file using the output button on
|
||||
the control panel. The output filename is specified in a dialogue,
|
||||
but a default value of inputfilename.seq is provided or the default value
|
||||
can be given with the
|
||||
.I outputfilename
|
||||
argument.
|
||||
.LP
|
||||
A simple help system is provided.
|
||||
.SH FILES
|
||||
.PD 0
|
||||
.TP 20
|
||||
.B ted.help
|
||||
Text provided in the help window.
|
||||
.TP
|
||||
.B /usr/lib/X11/app-defaults/Xted
|
||||
Default application resources.
|
||||
.SH ENVIRONMENT
|
||||
.TP 20
|
||||
.SB XFILESEARCHPATH
|
||||
Specifies the locations where
|
||||
.B ted.help
|
||||
is sought.
|
||||
If this is not defined,
|
||||
.B ted.help
|
||||
must be in the
|
||||
.B /usr/lib/X11/app-defaults
|
||||
directory.
|
||||
.SH AUTHORS
|
||||
Tim Gleeson, LaDeana Hillier, Simon Dear.
|
7
src/Misc/README
Normal file
7
src/Misc/README
Normal file
|
@ -0,0 +1,7 @@
|
|||
Miscellaneous Routines Simon Dear, 14 April 1992
|
||||
---------------------------------------------------------------
|
||||
|
||||
The source modules in this directory are for commonly used
|
||||
routines. The archive misc.a should be made before any
|
||||
other programs supplied on this tape.
|
||||
|
15
src/Misc/crash.c
Normal file
15
src/Misc/crash.c
Normal file
|
@ -0,0 +1,15 @@
|
|||
#include "misc.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h> /* varargs needed for v*printf() prototypes */
|
||||
|
||||
void crash (char* format,...)
|
||||
{
|
||||
va_list args ;
|
||||
|
||||
va_start (args,format) ;
|
||||
vfprintf (stderr,format,args) ;
|
||||
va_end (args) ;
|
||||
|
||||
exit (1) ;
|
||||
}
|
14
src/Misc/date.c
Normal file
14
src/Misc/date.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
#include "misc.h"
|
||||
#include <stdio.h>
|
||||
|
||||
/******************************************************************************/
|
||||
/*
|
||||
** Time and date calculations
|
||||
*/
|
||||
#include <time.h>
|
||||
char *date_str()
|
||||
{
|
||||
time_t clock;
|
||||
clock = time(NULL);
|
||||
return ctime(&clock);
|
||||
}
|
39
src/Misc/filenames.c
Normal file
39
src/Misc/filenames.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
#include "misc.h"
|
||||
#include <string.h>
|
||||
|
||||
char *fn_tail(char *fn)
|
||||
/*
|
||||
** Return file part (:t) of
|
||||
** directory path
|
||||
*/
|
||||
{
|
||||
int len;
|
||||
char *s;
|
||||
|
||||
len = strlen(fn);
|
||||
for(s=fn+len-1;len && *s != '/'; len--, s--) ;
|
||||
s++;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
void fn_toupper (char *s)
|
||||
/*
|
||||
** Convert file to upper case
|
||||
** ignoring directory path head
|
||||
*/
|
||||
{
|
||||
str_toupper(fn_tail(s));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void fn_tolower (char *s)
|
||||
/*
|
||||
** Convert file to lower case
|
||||
** ignoring directory path head
|
||||
*/
|
||||
{
|
||||
str_tolower(fn_tail(s));
|
||||
}
|
41
src/Misc/files.c
Normal file
41
src/Misc/files.c
Normal file
|
@ -0,0 +1,41 @@
|
|||
#include "misc.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
/* Alliant's Concentrix <sys/stat.h> is hugely deficient */
|
||||
/* Define things we require in this program */
|
||||
/* Methinks S_IFMT and S_IFDIR aren't defined in POSIX */
|
||||
#ifndef S_ISDIR
|
||||
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
|
||||
#endif /*!S_ISDIR*/
|
||||
#ifndef S_ISREG
|
||||
#define S_ISREG(m) (((m)&S_IFMT) == S_IFREG)
|
||||
#endif /*!S_ISREG*/
|
||||
|
||||
int is_directory(char * fn)
|
||||
{
|
||||
struct stat buf;
|
||||
if ( stat(fn,&buf) ) return 0;
|
||||
return S_ISDIR(buf.st_mode);
|
||||
}
|
||||
|
||||
int is_file(char * fn)
|
||||
{
|
||||
struct stat buf;
|
||||
if ( stat(fn,&buf) ) return 0;
|
||||
return S_ISREG(buf.st_mode);
|
||||
}
|
||||
|
||||
int file_exists(char * fn)
|
||||
{
|
||||
struct stat buf;
|
||||
return ( stat(fn,&buf) == 0);
|
||||
}
|
||||
|
||||
int file_size(char * fn)
|
||||
{
|
||||
struct stat buf;
|
||||
if ( stat(fn,&buf) != 0) return 0;
|
||||
return buf.st_size;
|
||||
}
|
||||
|
39
src/Misc/find.c
Normal file
39
src/Misc/find.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
#include "misc.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
char *myfind(char *file, char* searchpath, int (*found) (char *) )
|
||||
{
|
||||
static char wholePath[1024];
|
||||
char *path;
|
||||
char *delimiters=":";
|
||||
char *f;
|
||||
|
||||
f = NULL;
|
||||
if (found(file)) {
|
||||
strcpy(wholePath,file);
|
||||
f = wholePath;
|
||||
} else if (searchpath != NULL) {
|
||||
char *paths;
|
||||
|
||||
paths = (char *) malloc(strlen(searchpath)+1);
|
||||
strcpy(paths,searchpath);
|
||||
|
||||
path = (char *) strtok(paths,delimiters);
|
||||
while (path!= NULL) {
|
||||
|
||||
(void) strcpy(wholePath,path);
|
||||
(void) strcat(wholePath,"/");
|
||||
(void) strcat(wholePath,file);
|
||||
if (found(wholePath)) {
|
||||
f = wholePath;
|
||||
break;
|
||||
}
|
||||
path = (char *) strtok((char *)NULL,delimiters);
|
||||
}
|
||||
free(paths);
|
||||
}
|
||||
|
||||
return f;;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue