init

2021-12-04 05:07:58 +00:00 · 2021-12-04 05:07:58 +00:00 · f8b7bfff1b
commit f8b7bfff1b
949 changed files with 253751 additions and 0 deletions
--- a/README.txt
+++ b/README.txt
@ -0,0 +1,296 @@
+                       General Information
+                   (Not for the faint hearted)
+
+			30 September 1992
+
+
+0. Introduction
+---------------
+
+This document contains information on the following subjects:
+
+   1. Installing the Staden Package on SPARCstations and DECstations
+   2. Installing the Staden Package on Other Machines
+   3. A Quick Guide to What's on the Release Tape
+   4. Overview of Data Flow During Sequence Assembly
+   5. Acknowledgements
+
+
+
+1. Installing the Staden Package on SPARCstations and DECstations
+-----------------------------------------------------------------
+
+We are endeavouring to make the installation of the Staden Package as
+quick and as easy as possible. In this current release we provide
+statically linked sparc and mips executables as well as all sources.
+
+To install the package:
+
+1) Create a new directory for the software. You may have to log on as
+superuser to do this.
+
+	% mkdir -p /home/BioSW/staden
+
+2) Place the distribution tape in the drive and down load the package:
+
+   -sun-
+	% tar xvf /dev/rst0
+	...system messages...
+
+   -dec- 
+	% tar xvf /dev/rmt0h
+	...system messages...
+
+3) Users of the C Shell should add the following to his/her .login
+file:
+
+	setenv STADENROOT /home/BioSW/staden
+	source $STADENROOT/staden.login
+
+Users of the Bourne shell should add the following to their .profile
+file: 
+
+	STADENROOT=/home/BioSW/staden
+	export STADENROOT
+	. $STADENROOT/staden.profile
+
+
+4) When the user next logs onto the work station the required
+initialisation will automatically be performed, and the programs in
+the Staden package can be run. Refer to the help/*.MEM files for
+information on the various program. (eg help on xdap is in
+help/DAP.MEM)
+
+
+2. Installing the Staden Package on Other Machines
+--------------------------------------------------
+
+This is a little more difficult as you will need to remake all the
+executables. Your system configuration may also mean that some changes
+will need to be made, though hopefully only to makefiles. We provide
+a script to aid installation (we hope!), but you may prefer to make
+all the components manually.
+
+To remake the Staden package you will require the following:
+	1) A Fortran77 compiler
+	2) An ANSI C compiler
+	3) X11 Release 4, including the Athena Widget libraries.
+
+Start by following step 1 through 3 above, to unload the sources and
+perform initialisations. Read the rest of this document and the other
+help files. Look at the make files. Follow your nose!
+
+If you have any problems or successes porting our software to other
+platforms we would love to hear from you. We would also appreciate
+receiving your general comments on the package.
+
+Rodger Staden (principle author)
+  phone: +44 223 402389  email: rs@mrc-lmba.cam.ac.uk
+  post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
+Simon Dear:    
+  phone: +44 223 402266  email: sd@mrc-lmba.cam.ac.uk
+  post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
+James Bonfield:
+  phome: +44 223 402499  email: jkb@mrc-lmba.cam.ac.uk
+  post: MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, U.K.
+
+
+
+3. A Quick Guide to What's on the Release Tape
+----------------------------------------------
+
+The directory structure on this tape is very important. Once set up, the Staden
+package expects things to be in a predefined place. The root directory
+of the structure is referred to by the environment variable
+STADENROOT. Below this there should be at least the following:
+
+1) bin/
+All executable files and scripts should be in this directory.
+$STADENROOT/bin is added to the search path by the script staden.login
+(or staden.profile if you are using the Bourne Shell). Though you are
+not forced to keep programs here, we find it is the simplest place to
+keep them.
+
+2) help/
+All on-line help files are in this directory. Files of the form *.MEM
+or *.mem are formatted ascii files and can be printed for personal
+reference. The script staden.login sets up many environment variables
+that refer to files in this directory, as well as modifying
+XFILESEARCHPATH, which is used by X programs.
+
+3) manl/
+Local manual pages for ted and the staden package are in this directory. The
+environment variable MANPATH is modified in staden.login to search
+here too.
+
+4) staden.login and staden.profile
+These two files are scripts to set up environment variables required
+by the Staden package. C Shell users should source staden.login from
+their .login file, and Bourne Shell users should "source" staden.profile
+from their .profile directory. See "Installing the Staden Package on
+SPARCstations and DECstations", Part 3.
+
+5) tables/
+Configuration files for the Staden package are in this directory.
+Various environment variables are set in staden.login to refer to
+files in this directory.
+
+Also of use are the following:
+
+doc/           - Miscellaneous documentation.
+userdata/      - Sample databases
+src/           - program sources
+ReleaseNotes   - Notes on this and future releases
+Staden_install - Installation script
+SequenceLibraries - Notes on the use and installation of sequence libraries
+
+
+Program Sources
+---------------
+
+All the program sources are found in the directories in $STADENROOT/src:
+
+0) Misc/
+Sources for a library of useful routines used by the staden package.
+** Should be made before the programs in staden/ **
+
+1) staden/
+Sources for the Staden suite: mep, xmep, nip, xnip, nipl, pip, xpip,
+pipl, sap (now superseded by dap), xsap (now superceded by xdap), sip,
+xsip, sipl, dap, xdap, splitp1, splitp2, splitp3, gip and convert_project.
+
+2) ted/
+Sources for the trace display and sequence editing program ted.
+
+3) abi/
+Sample scripts and programs for handling ABI 373A data files.
+
+4) alf/
+Sample scripts and programs for handling Pharmacia A.L.F. data files.
+
+Each directory has appropriate makefiles and README files.
+
+
+
+4. Overview of Data Flow During Sequence Assembly
+-------------------------------------------------
+
+During a sequence assembly project the data can enter the sequence
+assembly program from various routes (See Figure below).
+
+
+       
+             Fluorescent Based
+             Sequencing Machine
+                Chromatogram                      Autoradiogram
+
+	 ABI 373A     	Pharmacia A.L.F.                |  
+             |                 |                        |
+             |                 |                        |
+             |             alfsplit                     |
+             |                 |                        |
+             +--------+--------+                        |
+                      |                                 |
+                      |                                 |
+                     ted                              (gip)
+                      |                                 |
+                      +----------------+----------------+
+                                       |
+                                       |
+                                     xdap
+
+
+                 Figure 1: Data Flow Through The Staden Suite
+
+
+The Pharmacia A.L.F. data files in their original format consist of
+one file for the (up to 10) samples that were on the gel. The program
+alfsplit divides the file up so that each sample is in a file of
+its own. From then on each gel reading can be handled individually.
+Whether these files can be transferred back to the Compaq for
+reprocessing is unknown.
+
+All data from fluorescent based sequencing machines must pass through
+the trace editing program ted. Ted allows data vector sequence at the
+5' end and unreliable data at the 3' end to be clipped. The sequence
+can be edited if desired, though we should stress that this is NOT
+RECOMMENDED when used in conjunction with xdap. Ted translates all
+Pharmacia A.L.F. uncertainty codes to a hyphen ("-") and outputs the
+clipped sequence, along with additional information on the position
+and content of cutoffs, to a file.
+
+People wanting to use xdap with ABI and Pharmacia files, but who have
+written their own trace clipping software should be aware that xdap
+requires information to be passed in the sequence file so that
+traces can be displayed. You may want to modify your software to be
+compatible with our file format. The file consists of four parts:
+
+	1) Cut off information (Optional).
+	Format is ";%6d%6d%6d%-4s%-16s", where
+	field 1 = total number of bases called
+	      2 = number of bases in the clipped sequence at the 5' end
+	      3 = number of bases in the sequence in this file
+	      4 = type of trace file.
+	          "ALF " - Pharmacia A.L.F.
+		  "ABI " - ABI 373A
+		  "SCF " - SCF
+		  "PLN " - Text only
+	      5 = name of trace file.
+
+	2) Content of the clipped sequence at the 5' end (Optional).
+	The sequence can extend over several lines. Each line must
+	begin with ";<" and should be less than 80 characters in
+	length.
+
+	3) Content of the clipped sequence at the 3' end (Optional).
+	The sequence can extend over several lines. Each line must
+	begin with ";>" and should be less than 80 characters in
+	length.
+
+	4) Initial tags for the sequence (Optional)
+	Format is: ";;%4s %6d %6d %s\n", where
+	field 1 = type of tag to be created (see $STADTABL/TAGDB)
+	      2 = position of tag
+	      3 = length of tag
+	      4 = annotation for tag (optional)
+	This feature is only available in the program xbap, which
+	at the time of writing is not yet being distributed with
+	the package.
+
+	5) The sequence, which can extend over several lines. Each
+	line should be less than 80 characters in length.
+
+Here is a sample file:
+
+;   660    55   450ABI a21d12.s1RES
+;<AGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCGGTTCCTTCTGG
+;<ATATC
+;>-GATAAGCTGATTTG-TTT-CCATTATGGC-GGTTTGAGCCTC-G-GGTC
+;>GACCACTCGGTGTGCCAGGAAGGGGTCTGAAATTGAATGGGTTATCACTA
+;>GGCGACGTTT--TTTTCAAATTCCGGGCTAAATTTTACGGC-GGA-CGGT
+;>TCCG-
+;;COMM      1     10 M13mp18 subclone
+CAAGACATTTTGAAATACTTGGAATACTGAATCCAAGATGTGGAACATTA
+GACATATCCGTGTGCTCAACAATCGACATTTGATCCACTGATGAAAATGT
+TCTTCGTTTAGAATTTCTCATAGCATCAGCCACTTTTGCATAATACTCGA
+TTGAAGGTTCATGGAAAAAGCTGCGTAGAAGGCATGTCATTGTGCTTACG
+AGCCATTTCGGATATCTTGTGAATTTAGCAGGAAGTTCTGTAACTGGTTG
+GAATTCAAATATATCAGTTCTTCTTCCTGGATCTCGTCCTTTTTGCACTA
+AAACCATTGCGATTGCATCCGGATTCTGAGTAAGAGCCACTACAGCTTTA
+TGATACAGGCTCTTGTTATTCCTTTCGTGCTCGAATGGGAACTTTCCAGT
+GGCACAAAAATATAGTGTACATCCCAGAGCCCATAGATCACATGTTCCGA
+
+
+
+5. Acknowledgements
+
+We would like to thank Applied Biosystems, Inc. and Pharmacia LKB
+Biotechnology for their cooperation in agreeing to our routines
+accessing the data files of their fluorescent sequencing machines.
+
+373A sequence data file formats are the exclusive property of Applied
+Biosystems, Inc.
+
+ALF sequence data file formats are the exclusive property of Pharmacia
+LKB Biotechnology, Inc.
+
--- a/190
+++ b/190
@ -0,0 +1,190 @@
+		Release Notes for Staden Package 1992.3
+		---------------------------------------
+
+
+	Installation guide
+	------------------
+
+The file doc/install.PS contain installation instructions.
+
+
+	Manual for the Staden Package
+	-----------------------------
+
+There is now a 135 page manual on the Staden Package. It is currently
+being distributed on a Word4 document on a Macintosh floppy disk.
+
+
+	Feedback and bug reports
+	------------------------
+
+We welcome comments and suggestions on all aspects of the package and are
+best contacted by email: rs@uk.ac.cam.mrc-lmb and sd@uk.ac.cam.mrc-lmb.
+All abnormal terminations are bugs and we would like to be told of them
+so they can be fixed. We recommend that you request an update at least once
+a year as the package is evolving very rapidly.
+
+Note due to popular demand we have decided to release new routines earlier 
+than in the past so please report bugs. The documentation for additions may
+be sparser than before, or non-existent, but if there is something with which 
+you need help, email us.
+
+
+	Changes this release
+	--------------------
+
+
+	The assembly programs bap and xbap heve several new functions:
+	1. Find single stranded regions and try to fill them with "hidden"
+	data from the adjacent readings.
+	2. Find single stranded regions (includes ends of contigs) and 
+	select primers and templates for double stranding them (joining
+	them).
+	3. Pre assembly screening for readings to find those that align
+	best. Optionally the hidden data can also be included in the
+	comparison (part of assembly function).
+	4. Find pairs of readings taken from opposite ends of the same
+	template (ie forward and reverse read pairs). List or plot their
+	positions.
+	5. A new function to check that readings have been assembled into
+	the correct positions. It aligns the hidden (previously termed "unused")
+	parts of readings with the consensus they overlap to see how well
+	they align. Poor alignments are reported.
+	6. During assembly each reading is now allowed to match up to 100
+	different places.
+
+	It might be guessed from the above that we are trying to improve our
+	ability to deal with the assembly of human data. Hence, also the next
+	addition.
+
+	A new experimental program (rep) for screening readings for Alu
+	sequences prior to assembly. The Alu containing segments are tagged
+	so they can be seen in the contig editor. A library of Alu sequences
+	is included in /tables/alus. The program is quite slow as it compares
+	each reading in both orientations with all of the Alu sequences (126
+	of them) in order to find the best match. Only time and more data will
+	tell how sensitive it is, and whether the current default score 0f 0.6
+	is "correct". BEWARE rep modifies the original reading files to include
+	the tag information. The only information is in /help/alu.help
+
+	A new program for extracting sets of sequences and their annotations
+	from the sequence libraries (lip). The only information is in
+	/help/lip.help
+
+	Changes to the xterm userinterface. These routines have been completely
+	rewritten. One addition is that now ?? in response to a question will
+	allow the user to get help on any function in a program. help is also
+	improved in the x version.
+
+
+	Changes last release
+	--------------------
+
+
+	DAP, XDAP have been replaced by BAP and XBAP (see below)
+
+	A new function for examining repeats has been added to NIP
+
+	A new repeat search has been added to SIP
+
+	Some outputs have been changed to produce FASTA format files
+	instead of PIR.
+
+	MEP now allows searches for motifs in which any 8 out of a string
+	of 20 can be switched on.
+
+	The manual has been updated.
+
+        Keyword and author searches on sequence libraries
+
+	All programs that use the libraries can now perform author
+and keyword searches on all libraries (only nip did so before).
+
+	Postscript output
+
+	All graphics can now be saved to disk in postscript form by
+use of a sub-option in "Redirect output".
+	
+
+
+	Sequence assembly
+
+BAP, XBAP replace DAP and XDAP. A program to convert DAP databases to BAP
+databases (convert) is included. BAP databases can contain up to 8000 readings
+and a consensus of 500,000 bases. A minor edit and recompilation will allow
+up to 99,999 readings. The space is used more efficiently now as the databases
+grow as the number of readings increases. Reading names can be 16 characters
+in length. In addition:
+
+1) Assembly is 4 times as fast as in the DAP.
+
+2) Find internal joins is 5 times as fast and now brings up the join editor
+with the two contigs in the correct orientation and aligned.
+
+3) The assembly routines align pads better, plus a new automatic function can
+also be used to align them prior to editing.
+
+4) The contig editor has been greatly speeded up and its functionality
+has been enhanced.
+
+5) A routine for selecting oligos for primer walking is included. 
+
+6) A new routine allows batches of readings to be removed from a database. 
+
+7) We have also included routines for making SCF files, for getting the 
+sequence from SCF files, and one for marking the poor quality data in 
+readings. See the manual.
+
+	Sequence library formats
+
+	The standard sequence library indexing method is now that used on the
+EMBL CD-ROM. The libraries (EMBL nucleotide and SWISSPROT protein) can be
+left on the CD-ROM or copied to disk. We include in the package programs
+for creating this type of index for EMBL updates, PIR in codata format,
+NRL3D and GenBank. If the indexes are created all programs can read all
+these libraries. Programs and scripts for this task are contained in the
+directory indexseqlibs.
+	The keyword and author searches are particularly fast and the
+keyword index is based on ALL text in the files - not just the keywords.
+
+	Feature table formats
+
+	The programs now use the new feature table format common to EMBL
+and GenBank, but retain the old format for SWISSPROT which has not yet
+changed. 
+
+	For details of the above see file SequenceLibraries.
+
+	Pattern searches
+
+	Pipl and Nipl now have the facility to find only the best scoring
+match for each sequence. The prompt is "? report all matches", so typing
+only return means all matches will be shown and typing n means only the
+highest scoring will be reported. It is particularly useful when employed
+to create alignments. The corresponding help file has not been updated.
+Also to incorporate long unix file names the pattern files no longer include
+the annotation "filename".
+
+
+	Nip
+
+	Option 38 in nip "translate and list" has been removed as the the
+more flexible routines of option 39 incorporate all its functionality. Many
+options that relate to feature tables have been modified but their help files
+are not yet up to date.
+
+
+        Vep
+
+	A program (vep) for automatic excising of vector (either
+sequencing vector or cosmid vector) sequences from readings is now
+included in the package.
+
+
+
+
+	Rodger Staden, Simon Dear, James Bonfield
+
+  
+
+
--- a/420
+++ b/420
@ -0,0 +1,420 @@
+	Notes on library handling
+	-------------------------
+
+Contents of this document:
+
+I)   Introduction
+II)  Details of file organisation and use
+III) Options currently available
+IV)  Installation guide
+V)   New feature table handling routines
+VI)  Indexing the sequence libraries
+
+
+		Section I Introduction
+		----------------------
+
+Available sequence libraries
+
+There are a number of different sequence libraries for nucleotide and protein:
+PIR, GenBank, EMBL, Swissprot, and the Japanese Databank. Even after all the
+years of their existence they still use different formats for their data. This
+provides tedious and unrewarding work for software developers. Recently EMBL
+and GenBank agreed a new and common way of writing their feature tables, which
+is great help, although the rest of their format is different. Swissprot still
+uses the old embl style feature table format and PIR yet another.
+
+All the libraries distribute their data on magnetic tapes and EMBL and GenBank
+have started to distribute on cdrom. The EMBL cdrom also contains Swissprot.
+The GenBank and EMBL cdroms use different formats and have different contents.
+The EMBL cdrom has useful indexes sorted alphabetically: those for entry name
+and accession number, brief descriptions, keywords and freetext indexes are
+already available and others are expected. These indexes point to the data for
+each entry, and can be used to extract the data for any entry quickly.
+
+Moving to unix
+
+The VAX version of our package used PIR format which meant reformatting all
+libraries other than PIR into that format.  This required, at least
+temporarily, having space for two copies of the libraries, and quite a lot of
+cpu time. The software for doing this was provided by PIR, and is very VAX
+specific and hence will not run under unix. For the unix version of our package
+I have decided to use the EMBL cdrom format and its indexes as the primary
+format.  The current programs also support the use of PIR format libraries
+without indexes - ie just the sequence and annotation files.
+
+Indexing GenBank, EMBL updates, PIR and NRL3D
+
+We include programs to create indexes for the above libraries. See below and
+the README file in indexseqlibs. The programs can read all the above libraries
+once the indexes are created. The indexing programs index the data in its
+distributed form: WE DO NOT REFORMAT OR COPY THE LIBRARIES but simply create
+indexes to the original files. Obviously this saves a lot of disk space, and
+for those content to use only embl and swissprot from the cdrom, almost no disk
+space is required. We havent tried it yet, but for genbank on cdrom, the only
+extra disk space required would be for the indexes.
+
+ ---------------------------------------------------------------------------
+
+		Section II Details of file organisation and use
+		-----------------------------------------------
+
+The following strategy has been used to try to deal with alternate
+and changing sequence library formats.
+
+1) libraries are described at several levels:
+
+	a) the top level file is a list of available libraries which contains:
+ 		the library type, the name of the file containing the name of
+		each libraries individual files, and the prompt to appear on
+		the users screen: LTYPE LOGNAM PROMPT
+
+	b) the file containing the names of the libraries individual files
+		contains flags to define the file types: FTYPE LOGNAM
+
+	c) the individual library files
+
+
+
+2) libary types handled: 
+
+	a) EMBL/SWISSPROT in distributed format with cdrom index format
+	   LTYPE = 'A'
+	b) GenBank in distributed format with cdrom index format LTYPE = 'C'
+	c) PIR/NRL3D in CODATA format with cdrom index format LTYPE = 'B'
+	d) PIR/NBRF .seq files can be read sequentially as "personal files
+	   in PIR format" and do not appear in the list of available libraries.
+	e) FASTA format files can be read sequentially as "personal files
+	   in FASTA format" and do not appear in the list of available
+	   libraries.
+
+3) EMBL, SWISSPROT and other libraries for which EMBL-style indexes have been
+created
+
+		current file types: 
+
+		A division.lookup
+		B entryname.index
+		C accession.target
+		D accession.hits
+                E brief description
+                F freetext.target
+                G freetext.hits
+                H author.target
+                I author.hits
+
+
+                   Library list
+level 1
+			|
+			|
+	        -----------------------------------------------------------
+		|			|			|
+	  lib 1 file list		lib 2 file list		lib 3 file list
+level 2
+		|			|
+ 	  --------		---------
+level 3
+	  file 1			file 1
+	  file 2			file 2
+	  .			.
+	  file n			file n
+
+ ---------------------------------------------------------------------------
+
+
+Example
+-------
+
+Level 1
+
+	File name: sequence.libs 
+	Environment variable: SEQUENCELIBRARIES
+	Contents:
+
+A EMBLFILES EMBL nucleotide library ! in cdrom format
+C GENBFILES GenBank nucleotide library!
+A SWISSFILES SWISSPROT protein library! in cdrom format
+B PIRFILES PIR protein library!
+B NRL3DFILES NRL3D protein library!
+
+	Notes: 
+
+The libraries have types A,B,C.  The logical names are EMBLLIBDESCRP and
+SWISSLIBDESCRP, etc and the prompts are 'EMBL nucleotide library' and
+'SWISSPROT protein library', etc.  Anything to the right of a ! is a comment.
+
+Level 2: the list of library files (using embl as an example)
+
+	File name: embl.files
+	Environment variable: EMBLFILES
+	Contents:
+
+A EMBLDIVPATH/embl_div.lkp
+B EMBLINDPATH/entrynam.idx
+C EMBLINDPATH/acnum.trg
+D EMBLINDPATH/acnum.hit
+E EMBLINDPATH/brief.idx
+F EMBLINDPATH/freetext.trg
+G EMBLINDPATH/freetext.hit
+H EMBLINDPATH/author.trg
+I EMBLINDPATH/author.hit
+
+
+Level 3: the sequence and annotation files (eg 15 for embl, 1 for swissprot).
+
+	Paths and file names:
+
+   EMBLPATH/bb.dat
+   EMBLPATH/fun.dat
+   EMBLPATH/inv.dat
+   EMBLPATH/mam.dat
+   EMBLPATH/org.dat
+   EMBLPATH/patent.dat
+   EMBLPATH/phg.dat
+   EMBLPATH/pln.dat
+   EMBLPATH/pri.dat
+   EMBLPATH/pro.dat
+   EMBLPATH/rod.dat
+   EMBLPATH/syn.dat
+   EMBLPATH/una.dat
+   EMBLPATH/vrl.dat
+   EMBLPATH/vrt.dat
+
+All files from the division lookup file down are exactly as they appear on the
+cdrom.  The division lookup file relates numbers stored in the indexes to
+actual division (or data) files stored on the disk. We rewrite it so the
+directory structure and file names can be chosen locally. Its format is
+I6,1x,A. An example is given below.
+
+	Division lookup file 
+
+	File name: STADTABL/embl_div.lkp
+	Environment variable path EMBLDIVPATH
+	Contents:
+
+     1 EMBLPATH/bb.dat
+     2 EMBLPATH/fun.dat
+     3 EMBLPATH/inv.dat
+     4 EMBLPATH/mam.dat
+     5 EMBLPATH/org.dat
+     6 EMBLPATH/patent.dat
+     7 EMBLPATH/phg.dat
+     8 EMBLPATH/pln.dat
+     9 EMBLPATH/pri.dat
+    10 EMBLPATH/pro.dat
+    11 EMBLPATH/rod.dat
+    12 EMBLPATH/syn.dat
+    13 EMBLPATH/una.dat
+    14 EMBLPATH/vrl.dat
+    15 EMBLPATH/vrt.dat
+ ---------------------------------------------------------------------------
+
+
+		Section III Options currently available
+		---------------------------------------
+
+Facilities currently offered in nip,pip,sip,nipl,pipl,sipl:
+
+	Get a sequence by knowing its entry name
+	Get a sequences' annotation by knowing its entry name
+	Get an entry name by knowing its accession number
+        Search the freetext index
+        Search the author index
+
+Facilities currently offered in nipl,pipl,sipl:
+
+	Search whole library
+	Search only a list of entry names
+	Search all but a list of entry names
+
+Outline of each type of operation
+
+Looking for an entry by name: the programs will open the library description
+file and read the names of its files and their file types. Then they will open
+the entrynam.idx file, and find the sequence offset, annotation offset and
+division number. Then open the division lookup file, find the file name for the
+division required, open that file, seek to the required byte and get the data.
+
+Looking for an entry by accession number: the programs will open the library
+description file and read the names of its files and their file types. Then
+they open the acnum.trg and acnum.hit files. The acnum.trg file is read to find
+the accession number and a pointer to the acnum.hit file and the number of
+hits.  That file is read and the corresponding entry names displayed. At
+present no further action is performed, although I expect to list out the
+titles for the entries found.
+
+Searching the whole of a library: the programs will open the library
+description file and read the names of its files and their file types. Then
+they open the division lookup file, read the names and numbers of the sequence
+files, open all of them, then open the entryname file. Then the library is
+processed sequentially by reading the entry names, their sequence offsets and
+division numbers from the entry names file, and then the sequence from the
+appropriate data file.
+
+Searching the whole of a library using a list of entry names to include: the
+programs will open the library description file and read the names of its files
+and their file types. Then they open the division lookup file, read the names
+and numbers of the sequence files, open all of them, then open the entryname
+file. Then the library is processed by reading the list of entry names and
+finding the names in the entry names file to get their sequence offsets and
+division numbers, and then the sequence from the appropriate data file. It will
+stop when it reaches the end of the list of entry names. The list of entry
+names can be in any order.
+
+Searching the whole of a library using a list of entry names to exclude: the
+programs will open the library description file and read the names of its files
+and their file types. Then they open the division lookup file, read the names
+and numbers of the sequence files, open all of them, then open the entryname
+file. Then the library is processed sequentially by reading the list of entry
+names, reading the next entry in the entry names file to make sure it does not
+match, then getting the sequence offsets and division numbers, and then the
+sequence from the appropriate data file. If a the next name matches the name on
+the list of entry names, it will be skipped, and the next name to exclude read.
+If the list of excluded names is finished the rest of the library is searched
+sequentially. The list of entry names must be in the same order as those in the
+library (ie sorted alphabetically).
+
+Searching a whole library using a PIR format file is performed by reading it
+sequentially. If as list of entry names is used it must be in the same order as
+the entries in the library file.
+ ---------------------------------------------------------------------------
+
+
+
+
+		Section IV Installation guide
+		-----------------------------
+
+EMBL CDROM
+
+ The data can be left on the cdrom or copied to hard disk. The files
+staden.login and staden.profile source the file $STADTABL/libraries.config.csh
+and $STADTABL/libraries.config.sh respectively. Refer to this file to see what
+is required to install, add or move a sequence library that you want to be used
+by the programs.
+
+Other libraries (PIR, Genbank, EMBL updates)
+
+Create the indexes then edit the files that tell the programs where the data is
+stored.  The files staden.login and staden.profile source the file
+$STADTABL/libraries.config Refer to this file to see what is required to
+install, add or move a sequence library that you want to be used by the
+programs.
+
+
+------------------------------------------------------------------------------
+
+
+		Section V New feature table handling facilities
+		-----------------------------------------------
+
+As mentioned above EMBL and GenBank have recently introduced new feature tables
+for annotating the sequences. They are a great improvement on the previous ones
+and, among other things, now permit correct translation of spliced genes.
+Various options within nip have been added or modified to take advantage of
+these changes.  The routine to translate DNA to protein and write the protein
+to disk now gives correct results for spliced genes. The routine to translate
+DNA to protein and display the two together now gives correct translations
+except for the amino acids spanning intron/exon junctions. The routine to plot
+maps from feature tables can use the new style. The open reading frame finding
+routine writes out its results in the new style. The routine that finds open
+reading frames and writes their translations to disk also writes a title in the
+form of a new style feature table entry. The feature table format output from
+the pattern searches in nip also uses the new style.
+
+ 
+
+----------------------------------------------------------------------------
+
+                Section VI Indexing the sequence libraries
+                --------------------------------------------
+
+We handle EMBL, SwissProt, and GenBank in their distributed format, plus
+PIR and NRL3D in codata format. All programs and scripts are in directory 
+indexseqlibs.
+
+Currently we produce entryname index, accession number index freetext index,
+and brief index (brief index contains the entry name the primary accession 
+number the sequence length and an 80 character description).
+
+To produce any of the indexes requires the creation of several intermediate
+files and the indexing programs are written so that the intermediate files
+are the same for all libraries. This means that only the programs that read
+the distributed form of each library need to be unique to that library, and
+all the other processing programs can be used for all libraries.
+
+
+However even the though the indexes have the same format, programs (like nip)
+that read the libraries need to treat each library separately because their
+actual contents are written differently.
+
+Making the entry name index
+---------------------------
+
+Common program entryname2
+
+EMBL		emblentryname1
+SwissProt	emblentryname1
+
+GenBank		genbentryname1
+
+PIR		pirentryname1
+NRL3D		pirentryname1
+
+
+Making the accession number index
+---------------------------------
+
+Common programs access2 access3 access4
+
+EMBL		emblaccess1
+SwissProt	emblaccess1
+
+GenBank		genbaccess1
+
+PIR		piraccess1 piraccess2 
+NRL3D		No accession numbers
+
+Making the brief index
+----------------------
+
+Common program title2
+
+EMBL		embltitle1
+SwissProt	embltitle1
+
+GenBank		genbtitle1
+
+PIR		pirtitle1 pirtitle2 (pir3 has no accession numbers)
+NRL3D		pirtitle2
+
+Scripts
+-------
+
+emblentryname.script
+emblaccession.script
+embltitle.script
+
+swissentryname.script
+swissaccession.script
+swisstitle.script
+
+genbentrynamescript
+genbaccession.script
+genbtitle.script
+
+pirentryname.script
+piraccession.script
+pirtitle.script
+
+nrl3dentryname.script
+nrl3dtitle.script
+
+
+ 
+
+
+
+
+
--- a/453
+++ b/453
@ -0,0 +1,453 @@
+#! /bin/csh -f
+#
+# staden_install - version 2.4
+#
+#	This is a prototype installation program.
+#
+# 9 March 1992
+#	Modified for installation on Sun, Alliant, etc
+#	No longer install 2rs
+#
+# 20 November 1992
+#	Now includes convert, cop, frog, getMCH and scf
+#
+# 25 November 1992
+#	SGI supported
+# 
+# 19 May 1993
+#	DEC Alpha, Solaris supported
+# 
+# Written by sd@uk.ac.cam.mrc-lmb
+#
+
+# prelim
+set prog = $0 ; set prog = $prog:t
+
+# Machines supported: al sun dec sgi alpha solaris
+#set MACHINE = `echo $prog | sed 's/.*-//'`
+set MACHINE = alpha
+
+# For local (MRC-LMB) setup only
+#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
+set LOCAL = NO
+
+
+echo ""
+echo -n "Staden Package installation procedure - "
+switch (${MACHINE})
+	case "al":
+		echo "Alliant FX/2800 Concentrix version"
+		set MAKE = "make -sk"
+		breaksw
+	case "sun":
+		echo "SunOS version"
+		set MAKE = "make -sk"
+		breaksw
+	case "dec":
+		echo "DEC Ultrix (mips) version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "sgi":
+		echo "Silicon Graphics Iris version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "alpha":
+		echo "DEC Alpha OSF/1 version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "solaris":
+		echo "Solaris version"
+		set MAKE = "make -sk"
+		breaksw
+	default:
+		echo "Panic. Unknown version"
+		exit 1
+endsw
+echo ""
+echo "* starting initialization...please wait."
+echo ""
+
+# Binary fork of source directory
+if ($LOCAL == "YES") then
+    set DIR_BINARIES = ${MACHINE}-binaries
+    set DIR_PROGS = ${MACHINE}-bin
+else
+    set DIR_BINARIES = .
+    set DIR_PROGS = bin
+    set MAKE = "$MAKE -f makefile-${MACHINE}"
+endif
+
+init:
+# Set useful shell variables
+set YES="YES";
+set NO="NO"
+
+# set/unset some .cshrc envs.
+unset noclobber
+set noglob
+
+# set interrupt trap
+onintr end_failure
+
+# Make dir command
+set MKDIR = "mkdir"
+
+# Copy command
+set CP = "cp -p"
+
+# Install command
+#set INSTALL = "install"
+#set INSTALL = "mv"
+set INSTALL = "cp"
+
+# Set up default responses
+set DEF_STADEN_ROOT = `pwd`
+
+set DEF_REQ_NONX = "$YES"
+set DEF_REQ_X = "$YES"
+set DEF_REQ_TED = "$YES"
+set DEF_REQ_MISC = "$YES"
+
+# directories
+set DIR_SRC = $DEF_STADEN_ROOT/src
+set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
+set DIR_MISC = $DIR_SRC/Misc
+set DIR_STADEN = $DIR_SRC/staden
+set DIR_TED = $DIR_SRC/ted
+set DIR_ABI = $DIR_SRC/abi
+set DIR_ALF = $DIR_SRC/alf
+set DIR_BAP = $DIR_SRC/bap
+set DIR_OSP = $DIR_SRC/bap/osp-bits
+set DIR_CONVERT = $DIR_SRC/convert
+set DIR_COP = $DIR_SRC/cop
+set DIR_FROG = $DIR_SRC/frog
+set DIR_GETMCH = $DIR_SRC/getMCH
+set DIR_SCF = $DIR_SRC/scf
+
+
+main:
+
+	
+preamble:
+	echo ""
+	echo ""
+	echo "* Please answer the following questions."
+	echo "  Default answers to questions are given in square brackets."
+	echo "  If you require help at any stage respond with a ? to the question."
+	echo ""
+
+ask_staden_root:
+	set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
+
+ask_require_nonx_progs: 
+	echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
+	set ANS_REQ_NONX = $<
+	if ("$ANS_REQ_NONX" == "?") then
+		echo "* If you do not have X windows on your system you will require"
+		echo "  these.  However, you will require Tektronics terminal emulation."
+		echo "  If you do not require all of the non-X programs, you should abort"
+		echo "  and manually make the ones you require."
+		echo ""
+		goto ask_require_nonx_progs
+	else if ("$ANS_REQ_NONX" != "") then
+		if ("$ANS_REQ_NONX" =~ [yY]*) then
+			set ANS_REQ_NONX=$YES
+		else if ("$ANS_REQ_NONX" =~ [nN]*) then
+			set ANS_REQ_NONX=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_NONX=$DEF_REQ_NONX
+	endif
+
+ask_require_x_progs:
+	echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
+	set ANS_REQ_X = $<
+	if ("$ANS_REQ_X" == "?") then
+		echo "* These are the programs that require X windows."
+		echo "  If you do not require all of the X programs, you should abort"
+		echo "  and manually make the ones you require."
+
+		echo ""
+		goto ask_require_x_progs
+	else if ("$ANS_REQ_X" != "") then
+		if ("$ANS_REQ_X" =~ [yY]*) then
+			set ANS_REQ_X=$YES
+		else if ("$ANS_REQ_X" =~ [nN]*) then
+			set ANS_REQ_X=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_X=$DEF_REQ_X
+	endif
+
+
+ask_require_ted:
+	echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
+	set ANS_REQ_TED = $<
+	if ("$ANS_REQ_TED" == "?") then
+		echo "* This is the trace editor program.  It allows you to look at"
+		echo "  traces obtained from automated fluorescent sequencing machines."
+		echo ""
+		goto ask_require_ted
+	else if ("$ANS_REQ_TED" != "") then
+		if ("$ANS_REQ_TED" =~ [yY]*) then
+			set ANS_REQ_TED=$YES
+		else if ("$ANS_REQ_TED" =~ [nN]*) then
+			set ANS_REQ_TED=$NO
+		else
+			goto ask_require_ted
+		endif
+	else
+		set ANS_REQ_TED=$DEF_REQ_TED
+	endif
+
+
+
+ask_require_misc:
+	echo -n "Compile other programs [$DEF_REQ_MISC]? "
+	set ANS_REQ_MISC = $<
+	if ("$ANS_REQ_MISC" == "?") then
+		echo "* Other programs include:"
+		echo "    alfsplit"
+		echo "    getABISampleName"
+		echo ""
+		goto ask_require_misc
+	else if ("$ANS_REQ_MISC" != "") then
+		if ("$ANS_REQ_MISC" =~ [yY]*) then
+			set ANS_REQ_MISC=$YES
+		else if ("$ANS_REQ_MISC" =~ [nN]*) then
+			set ANS_REQ_MISC=$NO
+		else
+			goto ask_require_misc
+		endif
+	else
+		set ANS_REQ_MISC=$DEF_REQ_MISC
+	endif
+
+
+
+time_taken_warning:
+	echo ""
+	echo "The installation procedure is now ready to start."
+	echo ""
+	echo "**** Warning:"
+	echo "    The installation will take considerable time to complete.  If you"
+	echo "    are installing the whole Staden Package from scratch it could"
+	echo "    take as long as an hour for all exectuables to be compiled and"
+	echo "    installed."
+	echo ""
+
+ask_goahead:
+	echo -n "Proceed with the installation [YES]? " 
+	set ANSWER=$<
+	if ("$ANSWER" == "?") then
+		echo "* Final confirmation to proceed with the installation.  Answer"
+		echo "  YES to proceed; otherwise, answer NO to abort the installation."
+		echo ""
+		goto ask_goahead
+	else if ("$ANSWER" != "") then
+		if ("$ANSWER" =~ [nN]*) then
+			goto chickens_exit
+		else if ("$ANSWER" !~ [yY]*) then
+			goto ask_goahead
+		endif
+	endif
+
+installation_proper:
+
+# make binaries directory if it doesn't exist
+
+	if (! -d $DIR_BIN) then
+		$MKDIR $DIR_BIN
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
+		echo ""
+		echo "+ Compiling miscellaneous library"
+
+		pushd $DIR_MISC > /dev/null
+
+		cd $DIR_BINARIES
+	        $MAKE all
+
+		popd > /dev/null
+
+	endif
+
+        if ("$ANS_REQ_NONX" == "$YES") then
+		echo ""
+		echo "+ Installing non X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE nprogs lprogs
+                $INSTALL mep $DIR_BIN
+		$INSTALL nip $DIR_BIN
+		$INSTALL pip $DIR_BIN
+		$INSTALL sap $DIR_BIN
+		$INSTALL sapf $DIR_BIN
+		$INSTALL sip $DIR_BIN
+		$INSTALL splitp1 $DIR_BIN
+		$INSTALL splitp2 $DIR_BIN
+		$INSTALL splitp3 $DIR_BIN
+		$INSTALL sethelp $DIR_BIN
+		$INSTALL gip $DIR_BIN
+		$INSTALL nipl $DIR_BIN
+		$INSTALL pipl $DIR_BIN
+		$INSTALL sipl $DIR_BIN
+		$INSTALL dap $DIR_BIN
+		$INSTALL nipf $DIR_BIN
+		$INSTALL vep $DIR_BIN
+		$INSTALL rep $DIR_BIN
+		$INSTALL lip $DIR_BIN
+		#$INSTALL convert_project $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE bap
+                $INSTALL bap $DIR_BIN
+		popd > /dev/null
+
+	endif
+
+	if ("$ANS_REQ_TED" == "$YES") then
+		echo ""
+		echo "+ Installing Trace editor"
+
+		pushd $DIR_TED > /dev/null
+		cd $DIR_BINARIES
+                $MAKE ted
+		$INSTALL ted $DIR_BIN
+		popd > /dev/null
+	endif
+
+	if ("$ANS_REQ_X" == "$YES") then
+		echo ""
+		echo "+ Installing X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+                $MAKE xprogs
+		$INSTALL xmep $DIR_BIN
+		$INSTALL xnip $DIR_BIN
+		$INSTALL xpip $DIR_BIN
+		$INSTALL xsap $DIR_BIN
+		$INSTALL xsip $DIR_BIN
+		$INSTALL xdap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE xbap
+                $INSTALL xbap $DIR_BIN
+		popd > /dev/null
+
+
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES") then
+		echo ""
+		echo "+ Installing miscellaneous programs"
+
+		pushd $DIR_ABI > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL getABISampleName $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_ALF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE alfsplit
+		$INSTALL alfsplit $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_CONVERT > /dev/null
+		cd $DIR_BINARIES
+                $MAKE convert
+		$INSTALL convert $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_COP > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL cop $DIR_BIN
+		$INSTALL cop-bap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_FROG > /dev/null
+		cd $DIR_BINARIES
+                $MAKE frog
+		$INSTALL frog $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_GETMCH > /dev/null
+		cd $DIR_BINARIES
+                $MAKE trace2seq
+		$INSTALL trace2seq $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_SCF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE makeSCF
+		$INSTALL makeSCF $DIR_BIN
+		popd > /dev/null
+
+
+
+	endif
+
+
+installation_done:
+	echo ""
+	echo "+ Installation completed"
+	echo ""
+
+	echo "  Some further initialisation is required in order to use the"
+	echo "  package.  csh users should insert the following in their .login"
+	echo "  files:"
+	echo "  "
+	echo "  	setenv STADENROOT $ANS_STADEN_ROOT"
+	echo '  	source $STADENROOT/staden.login'
+	echo "  "
+	echo "  Users of the Bourne shell, sh, should insert the following in"
+	echo "  their .profile:"
+	echo "  "
+	echo "  	STADENROOT=$ANS_STADEN_ROOT"
+	echo "  	export STADENROOT"
+	echo '  	. $STADENROOT/staden.profile'
+	echo "  "
+	echo "  These initialisations will alter the shell's search path so that"
+	echo "  it can find the programs in the STADEN Package"
+	echo "  "
+
+normal_exit:
+	exit 0
+
+chickens_exit:
+	echo ""
+	echo "+ Installation cancelled"
+	echo ""
+
+	exit 0
+
+end_failure:
+	unset noglob
+	echo ""
+	echo "Aborted STADEN Package installation on `date`" 
+	echo ""
+	exit 1
+
--- a/453
+++ b/453
@ -0,0 +1,453 @@
+#! /bin/csh -f
+#
+# staden_install - version 2.4
+#
+#	This is a prototype installation program.
+#
+# 9 March 1992
+#	Modified for installation on Sun, Alliant, etc
+#	No longer install 2rs
+#
+# 20 November 1992
+#	Now includes convert, cop, frog, getMCH and scf
+#
+# 25 November 1992
+#	SGI supported
+# 
+# 19 May 1993
+#	DEC Alpha, Solaris supported
+# 
+# Written by sd@uk.ac.cam.mrc-lmb
+#
+
+# prelim
+set prog = $0 ; set prog = $prog:t
+
+# Machines supported: al sun dec sgi alpha solaris
+#set MACHINE = `echo $prog | sed 's/.*-//'`
+set MACHINE = dec
+
+# For local (MRC-LMB) setup only
+#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
+set LOCAL = NO
+
+
+echo ""
+echo -n "Staden Package installation procedure - "
+switch (${MACHINE})
+	case "al":
+		echo "Alliant FX/2800 Concentrix version"
+		set MAKE = "make -sk"
+		breaksw
+	case "sun":
+		echo "SunOS version"
+		set MAKE = "make -sk"
+		breaksw
+	case "dec":
+		echo "DEC Ultrix (mips) version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "sgi":
+		echo "Silicon Graphics Iris version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "alpha":
+		echo "DEC Alpha OSF/1 version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "solaris":
+		echo "Solaris version"
+		set MAKE = "make -sk"
+		breaksw
+	default:
+		echo "Panic. Unknown version"
+		exit 1
+endsw
+echo ""
+echo "* starting initialization...please wait."
+echo ""
+
+# Binary fork of source directory
+if ($LOCAL == "YES") then
+    set DIR_BINARIES = ${MACHINE}-binaries
+    set DIR_PROGS = ${MACHINE}-bin
+else
+    set DIR_BINARIES = .
+    set DIR_PROGS = bin
+    set MAKE = "$MAKE -f makefile-${MACHINE}"
+endif
+
+init:
+# Set useful shell variables
+set YES="YES";
+set NO="NO"
+
+# set/unset some .cshrc envs.
+unset noclobber
+set noglob
+
+# set interrupt trap
+onintr end_failure
+
+# Make dir command
+set MKDIR = "mkdir"
+
+# Copy command
+set CP = "cp -p"
+
+# Install command
+#set INSTALL = "install"
+#set INSTALL = "mv"
+set INSTALL = "cp"
+
+# Set up default responses
+set DEF_STADEN_ROOT = `pwd`
+
+set DEF_REQ_NONX = "$YES"
+set DEF_REQ_X = "$YES"
+set DEF_REQ_TED = "$YES"
+set DEF_REQ_MISC = "$YES"
+
+# directories
+set DIR_SRC = $DEF_STADEN_ROOT/src
+set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
+set DIR_MISC = $DIR_SRC/Misc
+set DIR_STADEN = $DIR_SRC/staden
+set DIR_TED = $DIR_SRC/ted
+set DIR_ABI = $DIR_SRC/abi
+set DIR_ALF = $DIR_SRC/alf
+set DIR_BAP = $DIR_SRC/bap
+set DIR_OSP = $DIR_SRC/bap/osp-bits
+set DIR_CONVERT = $DIR_SRC/convert
+set DIR_COP = $DIR_SRC/cop
+set DIR_FROG = $DIR_SRC/frog
+set DIR_GETMCH = $DIR_SRC/getMCH
+set DIR_SCF = $DIR_SRC/scf
+
+
+main:
+
+	
+preamble:
+	echo ""
+	echo ""
+	echo "* Please answer the following questions."
+	echo "  Default answers to questions are given in square brackets."
+	echo "  If you require help at any stage respond with a ? to the question."
+	echo ""
+
+ask_staden_root:
+	set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
+
+ask_require_nonx_progs: 
+	echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
+	set ANS_REQ_NONX = $<
+	if ("$ANS_REQ_NONX" == "?") then
+		echo "* If you do not have X windows on your system you will require"
+		echo "  these.  However, you will require Tektronics terminal emulation."
+		echo "  If you do not require all of the non-X programs, you should abort"
+		echo "  and manually make the ones you require."
+		echo ""
+		goto ask_require_nonx_progs
+	else if ("$ANS_REQ_NONX" != "") then
+		if ("$ANS_REQ_NONX" =~ [yY]*) then
+			set ANS_REQ_NONX=$YES
+		else if ("$ANS_REQ_NONX" =~ [nN]*) then
+			set ANS_REQ_NONX=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_NONX=$DEF_REQ_NONX
+	endif
+
+ask_require_x_progs:
+	echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
+	set ANS_REQ_X = $<
+	if ("$ANS_REQ_X" == "?") then
+		echo "* These are the programs that require X windows."
+		echo "  If you do not require all of the X programs, you should abort"
+		echo "  and manually make the ones you require."
+
+		echo ""
+		goto ask_require_x_progs
+	else if ("$ANS_REQ_X" != "") then
+		if ("$ANS_REQ_X" =~ [yY]*) then
+			set ANS_REQ_X=$YES
+		else if ("$ANS_REQ_X" =~ [nN]*) then
+			set ANS_REQ_X=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_X=$DEF_REQ_X
+	endif
+
+
+ask_require_ted:
+	echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
+	set ANS_REQ_TED = $<
+	if ("$ANS_REQ_TED" == "?") then
+		echo "* This is the trace editor program.  It allows you to look at"
+		echo "  traces obtained from automated fluorescent sequencing machines."
+		echo ""
+		goto ask_require_ted
+	else if ("$ANS_REQ_TED" != "") then
+		if ("$ANS_REQ_TED" =~ [yY]*) then
+			set ANS_REQ_TED=$YES
+		else if ("$ANS_REQ_TED" =~ [nN]*) then
+			set ANS_REQ_TED=$NO
+		else
+			goto ask_require_ted
+		endif
+	else
+		set ANS_REQ_TED=$DEF_REQ_TED
+	endif
+
+
+
+ask_require_misc:
+	echo -n "Compile other programs [$DEF_REQ_MISC]? "
+	set ANS_REQ_MISC = $<
+	if ("$ANS_REQ_MISC" == "?") then
+		echo "* Other programs include:"
+		echo "    alfsplit"
+		echo "    getABISampleName"
+		echo ""
+		goto ask_require_misc
+	else if ("$ANS_REQ_MISC" != "") then
+		if ("$ANS_REQ_MISC" =~ [yY]*) then
+			set ANS_REQ_MISC=$YES
+		else if ("$ANS_REQ_MISC" =~ [nN]*) then
+			set ANS_REQ_MISC=$NO
+		else
+			goto ask_require_misc
+		endif
+	else
+		set ANS_REQ_MISC=$DEF_REQ_MISC
+	endif
+
+
+
+time_taken_warning:
+	echo ""
+	echo "The installation procedure is now ready to start."
+	echo ""
+	echo "**** Warning:"
+	echo "    The installation will take considerable time to complete.  If you"
+	echo "    are installing the whole Staden Package from scratch it could"
+	echo "    take as long as an hour for all exectuables to be compiled and"
+	echo "    installed."
+	echo ""
+
+ask_goahead:
+	echo -n "Proceed with the installation [YES]? " 
+	set ANSWER=$<
+	if ("$ANSWER" == "?") then
+		echo "* Final confirmation to proceed with the installation.  Answer"
+		echo "  YES to proceed; otherwise, answer NO to abort the installation."
+		echo ""
+		goto ask_goahead
+	else if ("$ANSWER" != "") then
+		if ("$ANSWER" =~ [nN]*) then
+			goto chickens_exit
+		else if ("$ANSWER" !~ [yY]*) then
+			goto ask_goahead
+		endif
+	endif
+
+installation_proper:
+
+# make binaries directory if it doesn't exist
+
+	if (! -d $DIR_BIN) then
+		$MKDIR $DIR_BIN
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
+		echo ""
+		echo "+ Compiling miscellaneous library"
+
+		pushd $DIR_MISC > /dev/null
+
+		cd $DIR_BINARIES
+	        $MAKE all
+
+		popd > /dev/null
+
+	endif
+
+        if ("$ANS_REQ_NONX" == "$YES") then
+		echo ""
+		echo "+ Installing non X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE nprogs lprogs
+                $INSTALL mep $DIR_BIN
+		$INSTALL nip $DIR_BIN
+		$INSTALL pip $DIR_BIN
+		$INSTALL sap $DIR_BIN
+		$INSTALL sapf $DIR_BIN
+		$INSTALL sip $DIR_BIN
+		$INSTALL splitp1 $DIR_BIN
+		$INSTALL splitp2 $DIR_BIN
+		$INSTALL splitp3 $DIR_BIN
+		$INSTALL sethelp $DIR_BIN
+		$INSTALL gip $DIR_BIN
+		$INSTALL nipl $DIR_BIN
+		$INSTALL pipl $DIR_BIN
+		$INSTALL sipl $DIR_BIN
+		$INSTALL dap $DIR_BIN
+		$INSTALL nipf $DIR_BIN
+		$INSTALL vep $DIR_BIN
+		$INSTALL rep $DIR_BIN
+		$INSTALL lip $DIR_BIN
+		#$INSTALL convert_project $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE bap
+                $INSTALL bap $DIR_BIN
+		popd > /dev/null
+
+	endif
+
+	if ("$ANS_REQ_TED" == "$YES") then
+		echo ""
+		echo "+ Installing Trace editor"
+
+		pushd $DIR_TED > /dev/null
+		cd $DIR_BINARIES
+                $MAKE ted
+		$INSTALL ted $DIR_BIN
+		popd > /dev/null
+	endif
+
+	if ("$ANS_REQ_X" == "$YES") then
+		echo ""
+		echo "+ Installing X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+                $MAKE xprogs
+		$INSTALL xmep $DIR_BIN
+		$INSTALL xnip $DIR_BIN
+		$INSTALL xpip $DIR_BIN
+		$INSTALL xsap $DIR_BIN
+		$INSTALL xsip $DIR_BIN
+		$INSTALL xdap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE xbap
+                $INSTALL xbap $DIR_BIN
+		popd > /dev/null
+
+
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES") then
+		echo ""
+		echo "+ Installing miscellaneous programs"
+
+		pushd $DIR_ABI > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL getABISampleName $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_ALF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE alfsplit
+		$INSTALL alfsplit $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_CONVERT > /dev/null
+		cd $DIR_BINARIES
+                $MAKE convert
+		$INSTALL convert $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_COP > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL cop $DIR_BIN
+		$INSTALL cop-bap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_FROG > /dev/null
+		cd $DIR_BINARIES
+                $MAKE frog
+		$INSTALL frog $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_GETMCH > /dev/null
+		cd $DIR_BINARIES
+                $MAKE trace2seq
+		$INSTALL trace2seq $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_SCF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE makeSCF
+		$INSTALL makeSCF $DIR_BIN
+		popd > /dev/null
+
+
+
+	endif
+
+
+installation_done:
+	echo ""
+	echo "+ Installation completed"
+	echo ""
+
+	echo "  Some further initialisation is required in order to use the"
+	echo "  package.  csh users should insert the following in their .login"
+	echo "  files:"
+	echo "  "
+	echo "  	setenv STADENROOT $ANS_STADEN_ROOT"
+	echo '  	source $STADENROOT/staden.login'
+	echo "  "
+	echo "  Users of the Bourne shell, sh, should insert the following in"
+	echo "  their .profile:"
+	echo "  "
+	echo "  	STADENROOT=$ANS_STADEN_ROOT"
+	echo "  	export STADENROOT"
+	echo '  	. $STADENROOT/staden.profile'
+	echo "  "
+	echo "  These initialisations will alter the shell's search path so that"
+	echo "  it can find the programs in the STADEN Package"
+	echo "  "
+
+normal_exit:
+	exit 0
+
+chickens_exit:
+	echo ""
+	echo "+ Installation cancelled"
+	echo ""
+
+	exit 0
+
+end_failure:
+	unset noglob
+	echo ""
+	echo "Aborted STADEN Package installation on `date`" 
+	echo ""
+	exit 1
+
--- a/453
+++ b/453
@ -0,0 +1,453 @@
+#! /bin/csh -f
+#
+# staden_install - version 2.4
+#
+#	This is a prototype installation program.
+#
+# 9 March 1992
+#	Modified for installation on Sun, Alliant, etc
+#	No longer install 2rs
+#
+# 20 November 1992
+#	Now includes convert, cop, frog, getMCH and scf
+#
+# 25 November 1992
+#	SGI supported
+# 
+# 19 May 1993
+#	DEC Alpha, Solaris supported
+# 
+# Written by sd@uk.ac.cam.mrc-lmb
+#
+
+# prelim
+set prog = $0 ; set prog = $prog:t
+
+# Machines supported: al sun dec sgi alpha solaris
+#set MACHINE = `echo $prog | sed 's/.*-//'`
+set MACHINE = sgi
+
+# For local (MRC-LMB) setup only
+#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
+set LOCAL = NO
+
+
+echo ""
+echo -n "Staden Package installation procedure - "
+switch (${MACHINE})
+	case "al":
+		echo "Alliant FX/2800 Concentrix version"
+		set MAKE = "make -sk"
+		breaksw
+	case "sun":
+		echo "SunOS version"
+		set MAKE = "make -sk"
+		breaksw
+	case "dec":
+		echo "DEC Ultrix (mips) version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "sgi":
+		echo "Silicon Graphics Iris version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "alpha":
+		echo "DEC Alpha OSF/1 version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "solaris":
+		echo "Solaris version"
+		set MAKE = "make -sk"
+		breaksw
+	default:
+		echo "Panic. Unknown version"
+		exit 1
+endsw
+echo ""
+echo "* starting initialization...please wait."
+echo ""
+
+# Binary fork of source directory
+if ($LOCAL == "YES") then
+    set DIR_BINARIES = ${MACHINE}-binaries
+    set DIR_PROGS = ${MACHINE}-bin
+else
+    set DIR_BINARIES = .
+    set DIR_PROGS = bin
+    set MAKE = "$MAKE -f makefile-${MACHINE}"
+endif
+
+init:
+# Set useful shell variables
+set YES="YES";
+set NO="NO"
+
+# set/unset some .cshrc envs.
+unset noclobber
+set noglob
+
+# set interrupt trap
+onintr end_failure
+
+# Make dir command
+set MKDIR = "mkdir"
+
+# Copy command
+set CP = "cp -p"
+
+# Install command
+#set INSTALL = "install"
+#set INSTALL = "mv"
+set INSTALL = "cp"
+
+# Set up default responses
+set DEF_STADEN_ROOT = `pwd`
+
+set DEF_REQ_NONX = "$YES"
+set DEF_REQ_X = "$YES"
+set DEF_REQ_TED = "$YES"
+set DEF_REQ_MISC = "$YES"
+
+# directories
+set DIR_SRC = $DEF_STADEN_ROOT/src
+set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
+set DIR_MISC = $DIR_SRC/Misc
+set DIR_STADEN = $DIR_SRC/staden
+set DIR_TED = $DIR_SRC/ted
+set DIR_ABI = $DIR_SRC/abi
+set DIR_ALF = $DIR_SRC/alf
+set DIR_BAP = $DIR_SRC/bap
+set DIR_OSP = $DIR_SRC/bap/osp-bits
+set DIR_CONVERT = $DIR_SRC/convert
+set DIR_COP = $DIR_SRC/cop
+set DIR_FROG = $DIR_SRC/frog
+set DIR_GETMCH = $DIR_SRC/getMCH
+set DIR_SCF = $DIR_SRC/scf
+
+
+main:
+
+	
+preamble:
+	echo ""
+	echo ""
+	echo "* Please answer the following questions."
+	echo "  Default answers to questions are given in square brackets."
+	echo "  If you require help at any stage respond with a ? to the question."
+	echo ""
+
+ask_staden_root:
+	set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
+
+ask_require_nonx_progs: 
+	echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
+	set ANS_REQ_NONX = $<
+	if ("$ANS_REQ_NONX" == "?") then
+		echo "* If you do not have X windows on your system you will require"
+		echo "  these.  However, you will require Tektronics terminal emulation."
+		echo "  If you do not require all of the non-X programs, you should abort"
+		echo "  and manually make the ones you require."
+		echo ""
+		goto ask_require_nonx_progs
+	else if ("$ANS_REQ_NONX" != "") then
+		if ("$ANS_REQ_NONX" =~ [yY]*) then
+			set ANS_REQ_NONX=$YES
+		else if ("$ANS_REQ_NONX" =~ [nN]*) then
+			set ANS_REQ_NONX=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_NONX=$DEF_REQ_NONX
+	endif
+
+ask_require_x_progs:
+	echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
+	set ANS_REQ_X = $<
+	if ("$ANS_REQ_X" == "?") then
+		echo "* These are the programs that require X windows."
+		echo "  If you do not require all of the X programs, you should abort"
+		echo "  and manually make the ones you require."
+
+		echo ""
+		goto ask_require_x_progs
+	else if ("$ANS_REQ_X" != "") then
+		if ("$ANS_REQ_X" =~ [yY]*) then
+			set ANS_REQ_X=$YES
+		else if ("$ANS_REQ_X" =~ [nN]*) then
+			set ANS_REQ_X=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_X=$DEF_REQ_X
+	endif
+
+
+ask_require_ted:
+	echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
+	set ANS_REQ_TED = $<
+	if ("$ANS_REQ_TED" == "?") then
+		echo "* This is the trace editor program.  It allows you to look at"
+		echo "  traces obtained from automated fluorescent sequencing machines."
+		echo ""
+		goto ask_require_ted
+	else if ("$ANS_REQ_TED" != "") then
+		if ("$ANS_REQ_TED" =~ [yY]*) then
+			set ANS_REQ_TED=$YES
+		else if ("$ANS_REQ_TED" =~ [nN]*) then
+			set ANS_REQ_TED=$NO
+		else
+			goto ask_require_ted
+		endif
+	else
+		set ANS_REQ_TED=$DEF_REQ_TED
+	endif
+
+
+
+ask_require_misc:
+	echo -n "Compile other programs [$DEF_REQ_MISC]? "
+	set ANS_REQ_MISC = $<
+	if ("$ANS_REQ_MISC" == "?") then
+		echo "* Other programs include:"
+		echo "    alfsplit"
+		echo "    getABISampleName"
+		echo ""
+		goto ask_require_misc
+	else if ("$ANS_REQ_MISC" != "") then
+		if ("$ANS_REQ_MISC" =~ [yY]*) then
+			set ANS_REQ_MISC=$YES
+		else if ("$ANS_REQ_MISC" =~ [nN]*) then
+			set ANS_REQ_MISC=$NO
+		else
+			goto ask_require_misc
+		endif
+	else
+		set ANS_REQ_MISC=$DEF_REQ_MISC
+	endif
+
+
+
+time_taken_warning:
+	echo ""
+	echo "The installation procedure is now ready to start."
+	echo ""
+	echo "**** Warning:"
+	echo "    The installation will take considerable time to complete.  If you"
+	echo "    are installing the whole Staden Package from scratch it could"
+	echo "    take as long as an hour for all exectuables to be compiled and"
+	echo "    installed."
+	echo ""
+
+ask_goahead:
+	echo -n "Proceed with the installation [YES]? " 
+	set ANSWER=$<
+	if ("$ANSWER" == "?") then
+		echo "* Final confirmation to proceed with the installation.  Answer"
+		echo "  YES to proceed; otherwise, answer NO to abort the installation."
+		echo ""
+		goto ask_goahead
+	else if ("$ANSWER" != "") then
+		if ("$ANSWER" =~ [nN]*) then
+			goto chickens_exit
+		else if ("$ANSWER" !~ [yY]*) then
+			goto ask_goahead
+		endif
+	endif
+
+installation_proper:
+
+# make binaries directory if it doesn't exist
+
+	if (! -d $DIR_BIN) then
+		$MKDIR $DIR_BIN
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
+		echo ""
+		echo "+ Compiling miscellaneous library"
+
+		pushd $DIR_MISC > /dev/null
+
+		cd $DIR_BINARIES
+	        $MAKE all
+
+		popd > /dev/null
+
+	endif
+
+        if ("$ANS_REQ_NONX" == "$YES") then
+		echo ""
+		echo "+ Installing non X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE nprogs lprogs
+                $INSTALL mep $DIR_BIN
+		$INSTALL nip $DIR_BIN
+		$INSTALL pip $DIR_BIN
+		$INSTALL sap $DIR_BIN
+		$INSTALL sapf $DIR_BIN
+		$INSTALL sip $DIR_BIN
+		$INSTALL splitp1 $DIR_BIN
+		$INSTALL splitp2 $DIR_BIN
+		$INSTALL splitp3 $DIR_BIN
+		$INSTALL sethelp $DIR_BIN
+		$INSTALL gip $DIR_BIN
+		$INSTALL nipl $DIR_BIN
+		$INSTALL pipl $DIR_BIN
+		$INSTALL sipl $DIR_BIN
+		$INSTALL dap $DIR_BIN
+		$INSTALL nipf $DIR_BIN
+		$INSTALL vep $DIR_BIN
+		$INSTALL rep $DIR_BIN
+		$INSTALL lip $DIR_BIN
+		#$INSTALL convert_project $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE bap
+                $INSTALL bap $DIR_BIN
+		popd > /dev/null
+
+	endif
+
+	if ("$ANS_REQ_TED" == "$YES") then
+		echo ""
+		echo "+ Installing Trace editor"
+
+		pushd $DIR_TED > /dev/null
+		cd $DIR_BINARIES
+                $MAKE ted
+		$INSTALL ted $DIR_BIN
+		popd > /dev/null
+	endif
+
+	if ("$ANS_REQ_X" == "$YES") then
+		echo ""
+		echo "+ Installing X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+                $MAKE xprogs
+		$INSTALL xmep $DIR_BIN
+		$INSTALL xnip $DIR_BIN
+		$INSTALL xpip $DIR_BIN
+		$INSTALL xsap $DIR_BIN
+		$INSTALL xsip $DIR_BIN
+		$INSTALL xdap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE xbap
+                $INSTALL xbap $DIR_BIN
+		popd > /dev/null
+
+
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES") then
+		echo ""
+		echo "+ Installing miscellaneous programs"
+
+		pushd $DIR_ABI > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL getABISampleName $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_ALF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE alfsplit
+		$INSTALL alfsplit $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_CONVERT > /dev/null
+		cd $DIR_BINARIES
+                $MAKE convert
+		$INSTALL convert $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_COP > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL cop $DIR_BIN
+		$INSTALL cop-bap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_FROG > /dev/null
+		cd $DIR_BINARIES
+                $MAKE frog
+		$INSTALL frog $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_GETMCH > /dev/null
+		cd $DIR_BINARIES
+                $MAKE trace2seq
+		$INSTALL trace2seq $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_SCF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE makeSCF
+		$INSTALL makeSCF $DIR_BIN
+		popd > /dev/null
+
+
+
+	endif
+
+
+installation_done:
+	echo ""
+	echo "+ Installation completed"
+	echo ""
+
+	echo "  Some further initialisation is required in order to use the"
+	echo "  package.  csh users should insert the following in their .login"
+	echo "  files:"
+	echo "  "
+	echo "  	setenv STADENROOT $ANS_STADEN_ROOT"
+	echo '  	source $STADENROOT/staden.login'
+	echo "  "
+	echo "  Users of the Bourne shell, sh, should insert the following in"
+	echo "  their .profile:"
+	echo "  "
+	echo "  	STADENROOT=$ANS_STADEN_ROOT"
+	echo "  	export STADENROOT"
+	echo '  	. $STADENROOT/staden.profile'
+	echo "  "
+	echo "  These initialisations will alter the shell's search path so that"
+	echo "  it can find the programs in the STADEN Package"
+	echo "  "
+
+normal_exit:
+	exit 0
+
+chickens_exit:
+	echo ""
+	echo "+ Installation cancelled"
+	echo ""
+
+	exit 0
+
+end_failure:
+	unset noglob
+	echo ""
+	echo "Aborted STADEN Package installation on `date`" 
+	echo ""
+	exit 1
+
--- a/453
+++ b/453
@ -0,0 +1,453 @@
+#! /bin/csh -f
+#
+# staden_install - version 2.4
+#
+#	This is a prototype installation program.
+#
+# 9 March 1992
+#	Modified for installation on Sun, Alliant, etc
+#	No longer install 2rs
+#
+# 20 November 1992
+#	Now includes convert, cop, frog, getMCH and scf
+#
+# 25 November 1992
+#	SGI supported
+# 
+# 19 May 1993
+#	DEC Alpha, Solaris supported
+# 
+# Written by sd@uk.ac.cam.mrc-lmb
+#
+
+# prelim
+set prog = $0 ; set prog = $prog:t
+
+# Machines supported: al sun dec sgi alpha solaris
+#set MACHINE = `echo $prog | sed 's/.*-//'`
+set MACHINE = solaris
+
+# For local (MRC-LMB) setup only
+#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
+set LOCAL = NO
+
+
+echo ""
+echo -n "Staden Package installation procedure - "
+switch (${MACHINE})
+	case "al":
+		echo "Alliant FX/2800 Concentrix version"
+		set MAKE = "make -sk"
+		breaksw
+	case "sun":
+		echo "SunOS version"
+		set MAKE = "make -sk"
+		breaksw
+	case "dec":
+		echo "DEC Ultrix (mips) version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "sgi":
+		echo "Silicon Graphics Iris version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "alpha":
+		echo "DEC Alpha OSF/1 version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "solaris":
+		echo "Solaris version"
+		set MAKE = "make -sk"
+		breaksw
+	default:
+		echo "Panic. Unknown version"
+		exit 1
+endsw
+echo ""
+echo "* starting initialization...please wait."
+echo ""
+
+# Binary fork of source directory
+if ($LOCAL == "YES") then
+    set DIR_BINARIES = ${MACHINE}-binaries
+    set DIR_PROGS = ${MACHINE}-bin
+else
+    set DIR_BINARIES = .
+    set DIR_PROGS = bin
+    set MAKE = "$MAKE -f makefile-${MACHINE}"
+endif
+
+init:
+# Set useful shell variables
+set YES="YES";
+set NO="NO"
+
+# set/unset some .cshrc envs.
+unset noclobber
+set noglob
+
+# set interrupt trap
+onintr end_failure
+
+# Make dir command
+set MKDIR = "mkdir"
+
+# Copy command
+set CP = "cp -p"
+
+# Install command
+#set INSTALL = "install"
+#set INSTALL = "mv"
+set INSTALL = "cp"
+
+# Set up default responses
+set DEF_STADEN_ROOT = `pwd`
+
+set DEF_REQ_NONX = "$YES"
+set DEF_REQ_X = "$YES"
+set DEF_REQ_TED = "$YES"
+set DEF_REQ_MISC = "$YES"
+
+# directories
+set DIR_SRC = $DEF_STADEN_ROOT/src
+set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
+set DIR_MISC = $DIR_SRC/Misc
+set DIR_STADEN = $DIR_SRC/staden
+set DIR_TED = $DIR_SRC/ted
+set DIR_ABI = $DIR_SRC/abi
+set DIR_ALF = $DIR_SRC/alf
+set DIR_BAP = $DIR_SRC/bap
+set DIR_OSP = $DIR_SRC/bap/osp-bits
+set DIR_CONVERT = $DIR_SRC/convert
+set DIR_COP = $DIR_SRC/cop
+set DIR_FROG = $DIR_SRC/frog
+set DIR_GETMCH = $DIR_SRC/getMCH
+set DIR_SCF = $DIR_SRC/scf
+
+
+main:
+
+	
+preamble:
+	echo ""
+	echo ""
+	echo "* Please answer the following questions."
+	echo "  Default answers to questions are given in square brackets."
+	echo "  If you require help at any stage respond with a ? to the question."
+	echo ""
+
+ask_staden_root:
+	set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
+
+ask_require_nonx_progs: 
+	echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
+	set ANS_REQ_NONX = $<
+	if ("$ANS_REQ_NONX" == "?") then
+		echo "* If you do not have X windows on your system you will require"
+		echo "  these.  However, you will require Tektronics terminal emulation."
+		echo "  If you do not require all of the non-X programs, you should abort"
+		echo "  and manually make the ones you require."
+		echo ""
+		goto ask_require_nonx_progs
+	else if ("$ANS_REQ_NONX" != "") then
+		if ("$ANS_REQ_NONX" =~ [yY]*) then
+			set ANS_REQ_NONX=$YES
+		else if ("$ANS_REQ_NONX" =~ [nN]*) then
+			set ANS_REQ_NONX=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_NONX=$DEF_REQ_NONX
+	endif
+
+ask_require_x_progs:
+	echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
+	set ANS_REQ_X = $<
+	if ("$ANS_REQ_X" == "?") then
+		echo "* These are the programs that require X windows."
+		echo "  If you do not require all of the X programs, you should abort"
+		echo "  and manually make the ones you require."
+
+		echo ""
+		goto ask_require_x_progs
+	else if ("$ANS_REQ_X" != "") then
+		if ("$ANS_REQ_X" =~ [yY]*) then
+			set ANS_REQ_X=$YES
+		else if ("$ANS_REQ_X" =~ [nN]*) then
+			set ANS_REQ_X=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_X=$DEF_REQ_X
+	endif
+
+
+ask_require_ted:
+	echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
+	set ANS_REQ_TED = $<
+	if ("$ANS_REQ_TED" == "?") then
+		echo "* This is the trace editor program.  It allows you to look at"
+		echo "  traces obtained from automated fluorescent sequencing machines."
+		echo ""
+		goto ask_require_ted
+	else if ("$ANS_REQ_TED" != "") then
+		if ("$ANS_REQ_TED" =~ [yY]*) then
+			set ANS_REQ_TED=$YES
+		else if ("$ANS_REQ_TED" =~ [nN]*) then
+			set ANS_REQ_TED=$NO
+		else
+			goto ask_require_ted
+		endif
+	else
+		set ANS_REQ_TED=$DEF_REQ_TED
+	endif
+
+
+
+ask_require_misc:
+	echo -n "Compile other programs [$DEF_REQ_MISC]? "
+	set ANS_REQ_MISC = $<
+	if ("$ANS_REQ_MISC" == "?") then
+		echo "* Other programs include:"
+		echo "    alfsplit"
+		echo "    getABISampleName"
+		echo ""
+		goto ask_require_misc
+	else if ("$ANS_REQ_MISC" != "") then
+		if ("$ANS_REQ_MISC" =~ [yY]*) then
+			set ANS_REQ_MISC=$YES
+		else if ("$ANS_REQ_MISC" =~ [nN]*) then
+			set ANS_REQ_MISC=$NO
+		else
+			goto ask_require_misc
+		endif
+	else
+		set ANS_REQ_MISC=$DEF_REQ_MISC
+	endif
+
+
+
+time_taken_warning:
+	echo ""
+	echo "The installation procedure is now ready to start."
+	echo ""
+	echo "**** Warning:"
+	echo "    The installation will take considerable time to complete.  If you"
+	echo "    are installing the whole Staden Package from scratch it could"
+	echo "    take as long as an hour for all exectuables to be compiled and"
+	echo "    installed."
+	echo ""
+
+ask_goahead:
+	echo -n "Proceed with the installation [YES]? " 
+	set ANSWER=$<
+	if ("$ANSWER" == "?") then
+		echo "* Final confirmation to proceed with the installation.  Answer"
+		echo "  YES to proceed; otherwise, answer NO to abort the installation."
+		echo ""
+		goto ask_goahead
+	else if ("$ANSWER" != "") then
+		if ("$ANSWER" =~ [nN]*) then
+			goto chickens_exit
+		else if ("$ANSWER" !~ [yY]*) then
+			goto ask_goahead
+		endif
+	endif
+
+installation_proper:
+
+# make binaries directory if it doesn't exist
+
+	if (! -d $DIR_BIN) then
+		$MKDIR $DIR_BIN
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
+		echo ""
+		echo "+ Compiling miscellaneous library"
+
+		pushd $DIR_MISC > /dev/null
+
+		cd $DIR_BINARIES
+	        $MAKE all
+
+		popd > /dev/null
+
+	endif
+
+        if ("$ANS_REQ_NONX" == "$YES") then
+		echo ""
+		echo "+ Installing non X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE nprogs lprogs
+                $INSTALL mep $DIR_BIN
+		$INSTALL nip $DIR_BIN
+		$INSTALL pip $DIR_BIN
+		$INSTALL sap $DIR_BIN
+		$INSTALL sapf $DIR_BIN
+		$INSTALL sip $DIR_BIN
+		$INSTALL splitp1 $DIR_BIN
+		$INSTALL splitp2 $DIR_BIN
+		$INSTALL splitp3 $DIR_BIN
+		$INSTALL sethelp $DIR_BIN
+		$INSTALL gip $DIR_BIN
+		$INSTALL nipl $DIR_BIN
+		$INSTALL pipl $DIR_BIN
+		$INSTALL sipl $DIR_BIN
+		$INSTALL dap $DIR_BIN
+		$INSTALL nipf $DIR_BIN
+		$INSTALL vep $DIR_BIN
+		$INSTALL rep $DIR_BIN
+		$INSTALL lip $DIR_BIN
+		#$INSTALL convert_project $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE bap
+                $INSTALL bap $DIR_BIN
+		popd > /dev/null
+
+	endif
+
+	if ("$ANS_REQ_TED" == "$YES") then
+		echo ""
+		echo "+ Installing Trace editor"
+
+		pushd $DIR_TED > /dev/null
+		cd $DIR_BINARIES
+                $MAKE ted
+		$INSTALL ted $DIR_BIN
+		popd > /dev/null
+	endif
+
+	if ("$ANS_REQ_X" == "$YES") then
+		echo ""
+		echo "+ Installing X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+                $MAKE xprogs
+		$INSTALL xmep $DIR_BIN
+		$INSTALL xnip $DIR_BIN
+		$INSTALL xpip $DIR_BIN
+		$INSTALL xsap $DIR_BIN
+		$INSTALL xsip $DIR_BIN
+		$INSTALL xdap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE xbap
+                $INSTALL xbap $DIR_BIN
+		popd > /dev/null
+
+
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES") then
+		echo ""
+		echo "+ Installing miscellaneous programs"
+
+		pushd $DIR_ABI > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL getABISampleName $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_ALF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE alfsplit
+		$INSTALL alfsplit $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_CONVERT > /dev/null
+		cd $DIR_BINARIES
+                $MAKE convert
+		$INSTALL convert $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_COP > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL cop $DIR_BIN
+		$INSTALL cop-bap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_FROG > /dev/null
+		cd $DIR_BINARIES
+                $MAKE frog
+		$INSTALL frog $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_GETMCH > /dev/null
+		cd $DIR_BINARIES
+                $MAKE trace2seq
+		$INSTALL trace2seq $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_SCF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE makeSCF
+		$INSTALL makeSCF $DIR_BIN
+		popd > /dev/null
+
+
+
+	endif
+
+
+installation_done:
+	echo ""
+	echo "+ Installation completed"
+	echo ""
+
+	echo "  Some further initialisation is required in order to use the"
+	echo "  package.  csh users should insert the following in their .login"
+	echo "  files:"
+	echo "  "
+	echo "  	setenv STADENROOT $ANS_STADEN_ROOT"
+	echo '  	source $STADENROOT/staden.login'
+	echo "  "
+	echo "  Users of the Bourne shell, sh, should insert the following in"
+	echo "  their .profile:"
+	echo "  "
+	echo "  	STADENROOT=$ANS_STADEN_ROOT"
+	echo "  	export STADENROOT"
+	echo '  	. $STADENROOT/staden.profile'
+	echo "  "
+	echo "  These initialisations will alter the shell's search path so that"
+	echo "  it can find the programs in the STADEN Package"
+	echo "  "
+
+normal_exit:
+	exit 0
+
+chickens_exit:
+	echo ""
+	echo "+ Installation cancelled"
+	echo ""
+
+	exit 0
+
+end_failure:
+	unset noglob
+	echo ""
+	echo "Aborted STADEN Package installation on `date`" 
+	echo ""
+	exit 1
+
--- a/453
+++ b/453
@ -0,0 +1,453 @@
+#! /bin/csh -f
+#
+# staden_install - version 2.4
+#
+#	This is a prototype installation program.
+#
+# 9 March 1992
+#	Modified for installation on Sun, Alliant, etc
+#	No longer install 2rs
+#
+# 20 November 1992
+#	Now includes convert, cop, frog, getMCH and scf
+#
+# 25 November 1992
+#	SGI supported
+# 
+# 19 May 1993
+#	DEC Alpha, Solaris supported
+# 
+# Written by sd@uk.ac.cam.mrc-lmb
+#
+
+# prelim
+set prog = $0 ; set prog = $prog:t
+
+# Machines supported: al sun dec sgi alpha solaris
+#set MACHINE = `echo $prog | sed 's/.*-//'`
+set MACHINE = sun
+
+# For local (MRC-LMB) setup only
+#set LOCAL = `echo $prog | awk '/local/{print "YES";exit;}{print "NO";}'`
+set LOCAL = NO
+
+
+echo ""
+echo -n "Staden Package installation procedure - "
+switch (${MACHINE})
+	case "al":
+		echo "Alliant FX/2800 Concentrix version"
+		set MAKE = "make -sk"
+		breaksw
+	case "sun":
+		echo "SunOS version"
+		set MAKE = "make -sk"
+		breaksw
+	case "dec":
+		echo "DEC Ultrix (mips) version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "sgi":
+		echo "Silicon Graphics Iris version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "alpha":
+		echo "DEC Alpha OSF/1 version"
+		set MAKE = "gmake -sk"
+		breaksw
+	case "solaris":
+		echo "Solaris version"
+		set MAKE = "make -sk"
+		breaksw
+	default:
+		echo "Panic. Unknown version"
+		exit 1
+endsw
+echo ""
+echo "* starting initialization...please wait."
+echo ""
+
+# Binary fork of source directory
+if ($LOCAL == "YES") then
+    set DIR_BINARIES = ${MACHINE}-binaries
+    set DIR_PROGS = ${MACHINE}-bin
+else
+    set DIR_BINARIES = .
+    set DIR_PROGS = bin
+    set MAKE = "$MAKE -f makefile-${MACHINE}"
+endif
+
+init:
+# Set useful shell variables
+set YES="YES";
+set NO="NO"
+
+# set/unset some .cshrc envs.
+unset noclobber
+set noglob
+
+# set interrupt trap
+onintr end_failure
+
+# Make dir command
+set MKDIR = "mkdir"
+
+# Copy command
+set CP = "cp -p"
+
+# Install command
+#set INSTALL = "install"
+#set INSTALL = "mv"
+set INSTALL = "cp"
+
+# Set up default responses
+set DEF_STADEN_ROOT = `pwd`
+
+set DEF_REQ_NONX = "$YES"
+set DEF_REQ_X = "$YES"
+set DEF_REQ_TED = "$YES"
+set DEF_REQ_MISC = "$YES"
+
+# directories
+set DIR_SRC = $DEF_STADEN_ROOT/src
+set DIR_BIN = $DEF_STADEN_ROOT/$DIR_PROGS
+set DIR_MISC = $DIR_SRC/Misc
+set DIR_STADEN = $DIR_SRC/staden
+set DIR_TED = $DIR_SRC/ted
+set DIR_ABI = $DIR_SRC/abi
+set DIR_ALF = $DIR_SRC/alf
+set DIR_BAP = $DIR_SRC/bap
+set DIR_OSP = $DIR_SRC/bap/osp-bits
+set DIR_CONVERT = $DIR_SRC/convert
+set DIR_COP = $DIR_SRC/cop
+set DIR_FROG = $DIR_SRC/frog
+set DIR_GETMCH = $DIR_SRC/getMCH
+set DIR_SCF = $DIR_SRC/scf
+
+
+main:
+
+	
+preamble:
+	echo ""
+	echo ""
+	echo "* Please answer the following questions."
+	echo "  Default answers to questions are given in square brackets."
+	echo "  If you require help at any stage respond with a ? to the question."
+	echo ""
+
+ask_staden_root:
+	set ANS_STADEN_ROOT = $DEF_STADEN_ROOT
+
+ask_require_nonx_progs: 
+	echo -n "Compile all the non-X programs in the Staden Package [$DEF_REQ_NONX]? "
+	set ANS_REQ_NONX = $<
+	if ("$ANS_REQ_NONX" == "?") then
+		echo "* If you do not have X windows on your system you will require"
+		echo "  these.  However, you will require Tektronics terminal emulation."
+		echo "  If you do not require all of the non-X programs, you should abort"
+		echo "  and manually make the ones you require."
+		echo ""
+		goto ask_require_nonx_progs
+	else if ("$ANS_REQ_NONX" != "") then
+		if ("$ANS_REQ_NONX" =~ [yY]*) then
+			set ANS_REQ_NONX=$YES
+		else if ("$ANS_REQ_NONX" =~ [nN]*) then
+			set ANS_REQ_NONX=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_NONX=$DEF_REQ_NONX
+	endif
+
+ask_require_x_progs:
+	echo -n "Compile all the X programs in the Staden Package [$DEF_REQ_X]? "
+	set ANS_REQ_X = $<
+	if ("$ANS_REQ_X" == "?") then
+		echo "* These are the programs that require X windows."
+		echo "  If you do not require all of the X programs, you should abort"
+		echo "  and manually make the ones you require."
+
+		echo ""
+		goto ask_require_x_progs
+	else if ("$ANS_REQ_X" != "") then
+		if ("$ANS_REQ_X" =~ [yY]*) then
+			set ANS_REQ_X=$YES
+		else if ("$ANS_REQ_X" =~ [nN]*) then
+			set ANS_REQ_X=$NO
+		else
+			goto ask_require_nonx_progs
+		endif
+	else
+		set ANS_REQ_X=$DEF_REQ_X
+	endif
+
+
+ask_require_ted:
+	echo -n "Compile the trace editing program ted [$DEF_REQ_TED]? "
+	set ANS_REQ_TED = $<
+	if ("$ANS_REQ_TED" == "?") then
+		echo "* This is the trace editor program.  It allows you to look at"
+		echo "  traces obtained from automated fluorescent sequencing machines."
+		echo ""
+		goto ask_require_ted
+	else if ("$ANS_REQ_TED" != "") then
+		if ("$ANS_REQ_TED" =~ [yY]*) then
+			set ANS_REQ_TED=$YES
+		else if ("$ANS_REQ_TED" =~ [nN]*) then
+			set ANS_REQ_TED=$NO
+		else
+			goto ask_require_ted
+		endif
+	else
+		set ANS_REQ_TED=$DEF_REQ_TED
+	endif
+
+
+
+ask_require_misc:
+	echo -n "Compile other programs [$DEF_REQ_MISC]? "
+	set ANS_REQ_MISC = $<
+	if ("$ANS_REQ_MISC" == "?") then
+		echo "* Other programs include:"
+		echo "    alfsplit"
+		echo "    getABISampleName"
+		echo ""
+		goto ask_require_misc
+	else if ("$ANS_REQ_MISC" != "") then
+		if ("$ANS_REQ_MISC" =~ [yY]*) then
+			set ANS_REQ_MISC=$YES
+		else if ("$ANS_REQ_MISC" =~ [nN]*) then
+			set ANS_REQ_MISC=$NO
+		else
+			goto ask_require_misc
+		endif
+	else
+		set ANS_REQ_MISC=$DEF_REQ_MISC
+	endif
+
+
+
+time_taken_warning:
+	echo ""
+	echo "The installation procedure is now ready to start."
+	echo ""
+	echo "**** Warning:"
+	echo "    The installation will take considerable time to complete.  If you"
+	echo "    are installing the whole Staden Package from scratch it could"
+	echo "    take as long as an hour for all exectuables to be compiled and"
+	echo "    installed."
+	echo ""
+
+ask_goahead:
+	echo -n "Proceed with the installation [YES]? " 
+	set ANSWER=$<
+	if ("$ANSWER" == "?") then
+		echo "* Final confirmation to proceed with the installation.  Answer"
+		echo "  YES to proceed; otherwise, answer NO to abort the installation."
+		echo ""
+		goto ask_goahead
+	else if ("$ANSWER" != "") then
+		if ("$ANSWER" =~ [nN]*) then
+			goto chickens_exit
+		else if ("$ANSWER" !~ [yY]*) then
+			goto ask_goahead
+		endif
+	endif
+
+installation_proper:
+
+# make binaries directory if it doesn't exist
+
+	if (! -d $DIR_BIN) then
+		$MKDIR $DIR_BIN
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES" || "$ANS_REQ_X" == "$YES" || "$ANS_REQ_NONX" == "$YES" ) then
+		echo ""
+		echo "+ Compiling miscellaneous library"
+
+		pushd $DIR_MISC > /dev/null
+
+		cd $DIR_BINARIES
+	        $MAKE all
+
+		popd > /dev/null
+
+	endif
+
+        if ("$ANS_REQ_NONX" == "$YES") then
+		echo ""
+		echo "+ Installing non X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE nprogs lprogs
+                $INSTALL mep $DIR_BIN
+		$INSTALL nip $DIR_BIN
+		$INSTALL pip $DIR_BIN
+		$INSTALL sap $DIR_BIN
+		$INSTALL sapf $DIR_BIN
+		$INSTALL sip $DIR_BIN
+		$INSTALL splitp1 $DIR_BIN
+		$INSTALL splitp2 $DIR_BIN
+		$INSTALL splitp3 $DIR_BIN
+		$INSTALL sethelp $DIR_BIN
+		$INSTALL gip $DIR_BIN
+		$INSTALL nipl $DIR_BIN
+		$INSTALL pipl $DIR_BIN
+		$INSTALL sipl $DIR_BIN
+		$INSTALL dap $DIR_BIN
+		$INSTALL nipf $DIR_BIN
+		$INSTALL vep $DIR_BIN
+		$INSTALL rep $DIR_BIN
+		$INSTALL lip $DIR_BIN
+		#$INSTALL convert_project $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE bap
+                $INSTALL bap $DIR_BIN
+		popd > /dev/null
+
+	endif
+
+	if ("$ANS_REQ_TED" == "$YES") then
+		echo ""
+		echo "+ Installing Trace editor"
+
+		pushd $DIR_TED > /dev/null
+		cd $DIR_BINARIES
+                $MAKE ted
+		$INSTALL ted $DIR_BIN
+		popd > /dev/null
+	endif
+
+	if ("$ANS_REQ_X" == "$YES") then
+		echo ""
+		echo "+ Installing X programs"
+
+		pushd $DIR_STADEN > /dev/null
+		cd $DIR_BINARIES
+                $MAKE xprogs
+		$INSTALL xmep $DIR_BIN
+		$INSTALL xnip $DIR_BIN
+		$INSTALL xpip $DIR_BIN
+		$INSTALL xsap $DIR_BIN
+		$INSTALL xsip $DIR_BIN
+		$INSTALL xdap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_OSP > /dev/null
+		cd $DIR_BINARIES
+		$MAKE
+		popd > /dev/null		
+
+		pushd $DIR_BAP > /dev/null
+		cd $DIR_BINARIES
+	        $MAKE xbap
+                $INSTALL xbap $DIR_BIN
+		popd > /dev/null
+
+
+	endif
+
+	if ("$ANS_REQ_MISC" == "$YES") then
+		echo ""
+		echo "+ Installing miscellaneous programs"
+
+		pushd $DIR_ABI > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL getABISampleName $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_ALF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE alfsplit
+		$INSTALL alfsplit $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_CONVERT > /dev/null
+		cd $DIR_BINARIES
+                $MAKE convert
+		$INSTALL convert $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_COP > /dev/null
+		cd $DIR_BINARIES
+                $MAKE all
+		$INSTALL cop $DIR_BIN
+		$INSTALL cop-bap $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_FROG > /dev/null
+		cd $DIR_BINARIES
+                $MAKE frog
+		$INSTALL frog $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_GETMCH > /dev/null
+		cd $DIR_BINARIES
+                $MAKE trace2seq
+		$INSTALL trace2seq $DIR_BIN
+		popd > /dev/null
+
+		pushd $DIR_SCF > /dev/null
+		cd $DIR_BINARIES
+                $MAKE makeSCF
+		$INSTALL makeSCF $DIR_BIN
+		popd > /dev/null
+
+
+
+	endif
+
+
+installation_done:
+	echo ""
+	echo "+ Installation completed"
+	echo ""
+
+	echo "  Some further initialisation is required in order to use the"
+	echo "  package.  csh users should insert the following in their .login"
+	echo "  files:"
+	echo "  "
+	echo "  	setenv STADENROOT $ANS_STADEN_ROOT"
+	echo '  	source $STADENROOT/staden.login'
+	echo "  "
+	echo "  Users of the Bourne shell, sh, should insert the following in"
+	echo "  their .profile:"
+	echo "  "
+	echo "  	STADENROOT=$ANS_STADEN_ROOT"
+	echo "  	export STADENROOT"
+	echo '  	. $STADENROOT/staden.profile'
+	echo "  "
+	echo "  These initialisations will alter the shell's search path so that"
+	echo "  it can find the programs in the STADEN Package"
+	echo "  "
+
+normal_exit:
+	exit 0
+
+chickens_exit:
+	echo ""
+	echo "+ Installation cancelled"
+	echo ""
+
+	exit 0
+
+end_failure:
+	unset noglob
+	echo ""
+	echo "Aborted STADEN Package installation on `date`" 
+	echo ""
+	exit 1
+
--- a/Version-1993.0.7
+++ b/Version-1993.0.7
@ -0,0 +1,91 @@
+Wed Jul 7
+    *Version-1993.0.7*
+    New xbap and ted.
+    Can use Ctrl as well as Meta to shift cutoffs in contig editor.
+    Code to read in ABI traces now robust to ABI problem files, where
+    called base order is not base position order.
+
+Thu Jul 1
+    *Version-1993.0.6*
+    New xbap and bap, to fix bugs.
+    Break Contig was sometimes not recalculating consensus length correctly.
+    Contig Edit was trucating reading name lengths at 10 characters.
+
+Thu Jun 16
+    *Version-1993.0.5*
+    New xbap and bap executables. RS changed assembly in bap so that
+    when entry is not permitted the program asks for the percentage
+    mismatch - this allows display of alignments for all levels of
+    mismatch.
+
+Mon Jun 14 14:54:43 BST 1993
+    *Version-1993.0.4*
+    Bug in xdap. It was compiled with xbap's edUtils.h by mistake.
+
+Fri Jun 11 17:50:13 BST 1993
+    *Version-1993.0.3*
+    Bugs in bap/xbap fixed. New executables included.
+
+Thu Jun  3 13:53:38 BST 1993
+    *Version-1993.0.2*
+    Bugs in bap/xbap fixed. New executables included.
+
+Thu May 20 14:45:38 BST 1993
+    *Version-1993.0.1*
+    Changes to makefiles and Staden_install
+
+Fri Mar  5 11:27:22 GMT 1993
+    *Version-1993.0*
+    Now for DEC Alpha and Solaris
+    bap/xbap now includes double stranding and auto-creation of oligos
+
+Tue Jan 26 11:54:36 GMT 1993
+    *Version-1992.3.1*
+    Bug fixes
+    1. indexseqlibs/genbentryname1.c
+    2. convert bugs + new programs
+
+Mon Nov 23 13:50:39 WET 1992
+    *Version-1992.3*
+    Includes bap/xbap and utility programs
+
+
+Wed Sep 30 11:18:09 BST 1992
+    *Version-1992.2.1*
+    Source changes since last release
+	bug fixes to postscript output, sequence library programs
+    New sun and dec executables
+
+
+Thu Aug 27 15:27:05 BST 1992
+
+    *Version-1992.2*
+    
+
+Mon Jul 27 13:01:37 WET 1992
+
+    *Version-1992.1.3*
+    Miscellaneous bug fixes and enhancements
+    New sun and dec executables
+
+
+Tue Jun 16 16:07:41 BST 1992
+
+    *Version-1992.1.2*
+    Sun sparc executables now linked with cc and not gcc.
+    New makefile-sun files
+    New sources for hitNtrg.c and freetext4.c (indexseqlibs), and
+    tagU2.c (staden)
+
+
+Wed May 27 17:12:36 BST 1992
+
+    *Version-1992.1.1*
+    Inclusion of vep (vector excision program), plus minor changes and bug fixes
+
+
+Tue May 26 11:10:28 WET 1992
+
+    *Version-1992.1*
+    This version includes the port to DEC Ultrix (mips)
+
--- a/bin/alfsplit
+++ b/bin/alfsplit
--- a/bin/bap
+++ b/bin/bap
--- a/bin/convert
+++ b/bin/convert
--- a/bin/cop
+++ b/bin/cop
--- a/bin/cop-bap
+++ b/bin/cop-bap
--- a/bin/dap
+++ b/bin/dap
--- a/bin/frog
+++ b/bin/frog
--- a/bin/getABISampleName
+++ b/bin/getABISampleName
--- a/bin/gip
+++ b/bin/gip
--- a/bin/lip
+++ b/bin/lip
--- a/bin/makeSCF
+++ b/bin/makeSCF
--- a/bin/mep
+++ b/bin/mep
--- a/bin/nip
+++ b/bin/nip
--- a/bin/nipf
+++ b/bin/nipf
--- a/bin/nipl
+++ b/bin/nipl
--- a/bin/pip
+++ b/bin/pip
--- a/bin/pipl
+++ b/bin/pipl
--- a/bin/rep
+++ b/bin/rep
--- a/bin/sap
+++ b/bin/sap
--- a/bin/sapf
+++ b/bin/sapf
--- a/bin/sethelp
+++ b/bin/sethelp
--- a/bin/sip
+++ b/bin/sip
--- a/bin/sipl
+++ b/bin/sipl
--- a/bin/splitp1
+++ b/bin/splitp1
--- a/bin/splitp2
+++ b/bin/splitp2
--- a/bin/splitp3
+++ b/bin/splitp3
--- a/bin/ted
+++ b/bin/ted
--- a/bin/trace2seq
+++ b/bin/trace2seq
--- a/bin/vep
+++ b/bin/vep
--- a/bin/xbap
+++ b/bin/xbap
--- a/bin/xbap.1
+++ b/bin/xbap.1
--- a/bin/xdap
+++ b/bin/xdap
--- a/bin/xmep
+++ b/bin/xmep
--- a/bin/xnip
+++ b/bin/xnip
--- a/bin/xpip
+++ b/bin/xpip
--- a/bin/xsap
+++ b/bin/xsap
--- a/bin/xsip
+++ b/bin/xsip
--- a/doc/Converting_Sap_Databases
+++ b/doc/Converting_Sap_Databases
@ -0,0 +1,32 @@
+Converting Sap Databases For Be Used With XDAP         SD  10 July 1991
+=======================================================================
+
+The sequence assembly programmes dap and xdap are based on the programs
+sap and xsap, with major modifications. For a concise summary of the
+new features I refer you to Rodger and my paper, "A sequence assembly
+and editing program for efficient management of large projects"
+(Nucleic Acids Research, in press)
+
+The need for storing extra information in project databases has
+resulted in the creation of two files. For users who wish you use old
+(sap) databases with xdap, additional files must be created to use all
+the new features. The program 'convert_project' does this.  It is
+interactive, and asks you for names of relevant files, version numbers
+etc. Here is a sample program dialogue:
+
+
+    % convert_project
+    Database conversion program
+    Converts *.RD? file to *.TG? and *.CC? files
+
+    Project name ? test
+    Version ? 0
+    Conversion completed.
+
+
+Further, please ensure that the file TAGDB is in your project
+directory. Copies can be found in $STADTABL.  Alternatively ensure that
+the environment TAGDB variable is set to $STADTABL/TAGDB
+
+    setenv TAGDB $STADTABL/TAGDB
+
--- a/doc/README
+++ b/doc/README
@ -0,0 +1,30 @@
+Processing and printing LaTeX sources
+-------------------------------------
+
+Given a source file src.tex, run LaTeX to generate the bibliographic
+references:
+
+	latex src
+
+Now run BibTeX to search the bibliography for them:
+
+	bibtex src
+
+Now run LaTeX twice, first to pick up the references, second to bind
+forward references:
+
+	latex src
+	latex src
+
+This will have generated a src.dvi output file. Now we convert this
+to PostScript:
+
+	dvi2ps src.dvi >src.ps
+
+Now we can print this out:
+
+	lpr src.ps
+
+Most of the above is only necessay if you are building something from
+scratch, but it's best to go through it anyway until you fully
+understand how LaTeX works.
--- a/doc/gip-menu.PS
+++ b/doc/gip-menu.PS
@ -0,0 +1,131 @@
+%!
+/cm {28.2 mul} def
+/BOXSIZE 2 cm def
+
+/boxcen
+{
+% move to centre of box
+BOXSIZE mul 2 div BOXSIZE 2 div rmoveto
+exch
+% move back by correct amount to ensure letter is in centre of box
+dup stringwidth
+pop 2 div neg % halve & neg x offset
+% y offset appears to be zero! - so use constant 'square' char (eg X)
+(X) stringwidth pop 2 div neg
+} def
+
+/letter
+{
+dup BOXSIZE mul 0 rlineto
+0 BOXSIZE rlineto
+dup BOXSIZE mul neg 0 rlineto
+0 BOXSIZE neg rlineto
+closepath
+gsave
+dup boxcen rmoveto
+show
+stroke
+grestore
+BOXSIZE mul 0 rmoveto
+} def
+
+/nextline {0 BOXSIZE neg rmoveto} def
+
+/line
+{
+gsave
+1 letter
+1 letter
+1 letter
+1 letter
+grestore
+nextline
+} def
+
+/Times-Roman findfont 50 scalefont setfont
+newpath
+5 setlinewidth
+200 650 translate
+0 0 moveto
+%2 setlinecap
+
+gsave
+(A) (G) (C) (T) line
+(3) (4) (1) (2) line
+(B) (H) (D) (V) line
+(M) (N) (K) (L) line
+(-) (X) (Y) (R) line
+(8) (7) (6) (5) line
+/Times-Roman findfont 25 scalefont setfont
+gsave
+(DELETE) 2 letter
+(RESET) 2 letter
+grestore
+nextline
+/Times-Roman findfont 35 scalefont setfont
+gsave
+(STOP) 4 letter
+grestore
+nextline
+gsave
+(START) 4 letter
+grestore
+nextline
+gsave
+(CONFIRM) 4 letter
+grestore
+nextline
+% yukky from here on
+gsave
+0 BOXSIZE rmoveto
+1 cm 0 rlineto stroke
+grestore
+(ORIGIN) dup 4 boxcen rmoveto show pop
+(ORIGIN) stringwidth neg exch neg exch rmoveto
+(X) stringwidth exch 2 div rmoveto
+-5 0 rmoveto
+2 setlinewidth
+-45 21 rlineto
+6 0 rlineto
+-6 0 rmoveto
+0 -6 rlineto
+stroke
+grestore
+2 setlinewidth
+0 BOXSIZE 1.4 mul rmoveto
+6 6 rlineto
+-6 -6 rmoveto
+6 -6 rlineto
+-6 6 rmoveto
+80 0 rlineto
+5 -6 rmoveto
+/Times-Roman findfont 30 scalefont setfont
+(8 cm) show
+5 6 rmoveto
+76 0 rlineto
+-6 6 rlineto
+6 -6 rmoveto
+-6 -6 rlineto
+stroke
+0 0 moveto
+BOXSIZE .4 mul neg BOXSIZE rmoveto
+currentpoint translate
+newpath
+0 0 moveto
+90 rotate
+-6 6 rlineto
+6 -6 rmoveto
+-6 -6 rlineto
+6 6 rmoveto
+-244 0 rlineto
+-84 0 rmoveto
+0 -6 rmoveto
+(20 cm) show
+0 6 rmoveto
+-84 0 rmoveto
+-227 0 rlineto
+6 6 rlineto
+-6 -6 rmoveto
+6 -6 rlineto
+stroke
+showpage
--- a/doc/install.PS
+++ b/doc/install.PS
--- a/doc/install.tex
+++ b/doc/install.tex
@ -0,0 +1,172 @@
+\documentstyle[a4,11pt]{article}
+
+\title{Installing the Staden Package}
+\author{Simon Dear}
+\date{21 May 1993}
+
+
+
+\begin{document}
+\maketitle
+
+
+
+\section{Introduction}
+
+On the accompanying tape you will find executables for 
+one of SunOS 4.x, Sun
+Solaris 2.x, DEC Ultrix, DEC OSF/1 and Silicon Graphics SGI operating systems.
+Also there are sources for all the programs in the Staden package.
+Programs in the package are:
+\begin{description}
+
+\item[mep and xmep] Motif exploration program.
+\item[nip and xnip] Nucleotide interpretation program.
+\item[nipl] Nucleotide interpretation program (library).
+Searches nucleotide libraries for patterns of motifs.
+\item[pip and xpip] Protein interpretation program.
+\item[pipl] Protein interpretation program (library).
+Searches protein libraries for patterns of motifs.
+\item[sip and xsip] Similarity investigation program.
+\item[sipl] Similarity investigation program (library).
+Compares a probe protein or nucleic acid sequence against
+a library of sequences.
+\item[sap and xsap] The original sequence assembly program.
+\item[bap and xbap] Our latest, most advanced sequence assembly program.
+\item[dap and xdap] An obsolete assembly program, superceded by {\em bap}.
+\item[lip] Library interface program.
+\item[rep] Repeat examination program.
+\item[ted] X windows utility for displaying and editing
+fluorescent sequencing machine traces.
+\item[splitp1, splitp2 and splitp3] Refer to help/SPLITP.MEM.
+\item[sethelp] Builds online help files.
+\item[gip] Gel input program.
+\item[convert] Converts between {\em xdap\/} and {\em xbap\/} databases.
+\item[cop and cop-bap] Checks completed {\em xdap\/} and {\em xbap\/}
+databases for editing errors.
+\item[trace2seq] Extracts sequence from trace files.
+\item[getABISampleName] Extracts sample names from ABI trace files.
+\item[makeSCF] Converts existing trace files to the compact
+SCF format.
+\item[alfsplit] Splits the Pharmacia A.L.F. gel
+file into multiple files, one for each sample.
+\item[frog] Relabels lanes in ABI trace files.
+\item[+ numerous scripts (including {\em squirrel (v1.4)\/})]
+
+\end{description}
+
+
+\section{Requirements}
+
+You will need a tape drive to read the software off the distribution
+tape (QIC-150, TK50, or Exabyte). You will also need a large amount of
+disk storage to accommodate the whole package. For release
+version-1993.0, requirements were
+31Mb (SunOS 4.x),
+36Mb (Sun Solaris 2.x)
+30Mb (DEC Ultrix)
+37Mb (DEC OSF/1)
+and
+27Mb (Silicon Graphics SGI.)
+
+
+To compile the Staden package you will require:
+\begin{itemize}
+\item An ANSI C compiler.
+\item A FORTRAN-77 compiler.
+\item X11 (Release 4 or 5).
+\item GNU make (except with SunOS and Solaris 2.x.)
+\end{itemize}
+
+\section{Installation}
+
+To install the package,
+\begin{enumerate}
+\item Create a directory for where you would like the software to be
+placed. You may have to be superuser to do this.
+      \begin{verbatim} mkdir /home/Staden\end{verbatim}
+\item Change to this directory.
+      \begin{verbatim} cd /home/Staden\end{verbatim}
+\item Place the tape into the tape unit.
+\item Extract the software off the distribution tape (NOTE: the device name may be
+different on your machine):
+      \begin{verbatim} tar xvf /dev/rst0\end{verbatim}
+\item C shell users should set the environment variable {\bf STADENROOT}
+to be the directory where the package is installed and source the file
+{\em staden.login} found there. This is best done by adding lines to their
+{\em .login} file:
+\begin{verbatim}
+    setenv STADENROOT /home/Staden
+    source $STADENROOT/staden.login
+\end{verbatim}
+Users of the Bourne shell, sh, should similarly add lines their {\em .profile} file:
+\begin{verbatim}  
+    STADENROOT=/home/Staden
+    export STADENROOT
+    . $STADENROOT/staden.profile
+\end{verbatim}
+
+The startup routines set environment variables and modify the shell's
+search path so that it can find the programs in the Staden Package.
+When users next log on to the system, they will be able to use the
+programs.
+
+\end{enumerate}
+
+
+\section {Installation on Unsupported Platforms}
+
+Install the software as you would for a supported machine.  You will
+need to remake all executables.  The script {\em Staden\_install} can
+be used to help recompile the package. A large number of
+assumptions have been made, and you may need to change the makefiles
+to suit your system.
+
+The sources have been organised into subdirectories of the directory
+{\bf src}. In {\bf Misc} are routines common to many programs. They
+should be made first.  In {\bf staden} are all the programs of the
+Staden suite ({\em mep}, {\em nip}, {\em pip}, {\em sap}, {\em sip},
+{\em dap}, {\em gip}, {\em vep}, {\em lip} and {\em rep}) with the
+exception of {\em bap}.  Code for our latest sequence assembly program
+{\em bap} is in directories {\bf bap} and {\bf bap/osp-bits}.  Make
+the objects in {\bf staden} first, then the ones in {\bf
+bap/osp-bits}, and finally the ones in {\bf bap}. In {\bf ted} is the
+trace editing program.
+
+
+\section {Other Software Provided}
+
+Other software and scripts can be found in the {\bf alf\/}, {\bf
+abi\/}, {\bf cop\/}, {\bf getMCH\/}, {\bf scf\/}, {\bf frog\/} and {\bf
+scripts}
+directories.
+Each directory contains documentation describing the programs
+contained.
+
+Since release version-1993.0 we have distributed the {\em squirrel (v1.4)}
+package. Please read the disclaimer that accompanies this software.
+Additional sources and scripts can be found in {\bf expGetSeq}, {\bf vepe},
+{\bf newted} and {\bf squirrel-1.4} directories.
+
+Many scripts (including {\em squirrel}) and filters were developed at the MRC-LMB for
+{\bf INTERNAL USE ONLY}.
+We are aware that people elsewhere will want to develop
+similar software.
+We include them in the Staden Package merely as {\bf EXAMPLES} of
+what has been achieved elsewhere.
+{\bf THESE SCRIPTS WILL NOT WORK ON YOUR SYSTEM WITHOUT MODIFICATION.}
+
+\section {When All Else Fails...}
+If you have any problems please contact the authors,
+\mbox{Rodger Staden}
+\mbox{(\em rs@mrc-lmba.cam.ac.uk\/)},
+\mbox{Simon Dear}
+\mbox{(\em sd@mrc-lmba.cam.ac.uk\/)}
+and
+\mbox{James Bonfield}
+\mbox{(\em jkb@mrc-lmba.cam.ac.uk\/)},
+by email or by writing to us at:
+MRC Laboratory of Molecular Biology, Hills Road, Cambridge, \mbox{CB2 2QH}, U.K.
+We also welcome general comments on the package.
+
+\end{document}
--- a/doc/manual.rtf
+++ b/doc/manual.rtf
--- a/doc/ted.PS
+++ b/doc/ted.PS
--- a/doc/ted.tex
+++ b/doc/ted.tex
@ -0,0 +1,213 @@
+\documentstyle[12pt]{article}
+
+\title{A trace display and editing program for data from fluorescence based 
+sequencing machines}
+\author{Timothy Gleeson \and LaDeana Hillier}
+
+\begin{document}
+\maketitle
+\section*{}
+\subsection*{}
+\subsubsection*{ABSTRACT}
+
+``Ted'' ({\em T}race {\em ed}itor) 
+is a graphical editor for sequence and trace data from automated 
+fluorescence sequencing machines.  It provides facilities 
+for viewing sequence and trace data (in top or bottom strand 
+orientation), for editing the base sequence,  for 
+automated or manual trimming of the head (vector) and tail 
+(uncertain data) from the sequence, for vertical and horizontal trace 
+scaling, for keeping a history of sequence editing, and for output of 
+the edited sequence.  Ted has been used extensively in the C. 
+elegans genome sequencing project,
+both as a stand-alone program and integrated into 
+the Staden sequence assembly package, and  has 
+greatly aided in the efficiency 
+and accuracy of sequence editing.  It runs in the X 
+windows environment on Sun workstations and is available from the 
+authors.  Ted currently supports sequence and trace data from the ABI 
+373A and Pharmacia A.L.F. sequencers.
+
+\subsubsection*{INTRODUCTION}
+	Time involved in sequence editing is extensive, and anything easing 
+that burden will improve the efficiency of any major sequencing 
+project.  Having sequence and trace data available online in easily-
+manipulable form is invaluable. Ted (a Trace-EDitor) was developed to 
+fill this role in the C. elegans genome 
+sequencing project [1]. 
+
+\subsubsection*{METHODS}
+
+{\em Computing Design and Implementation.}
+When designing ted, we had a number of specific computing goals 
+in mind including portability and adaptability.  For portability, we 
+chose to write ted in ANSI C using the X windowing system and the 
+Xaw toolkit.  X provides basic capabilities for the creation and use 
+of windows, and the toolkit contains a number of pre-packaged 
+components, such as the ``sliders'' used for scrolling. X also allows 
+site, user and per-run defaults to be set.  Adaptability is also an 
+important goal since we are providing a new function to 
+research groups who are constantly adding new requirements.  
+
+	Stylistically, we have followed an ``Abstract Data Type''
+discipline.  In this discipline, a program is split into a number of 
+modules which provide separate, well-defined functions.  We 
+separate the interface of a module from its implementation.  For 
+example, a unified internal sequence format is used.  This can store 
+a varying amount of information.  However, there is a clear and 
+simple interface by which the rest of the program accesses this 
+module.  Such a style is not well supported by C, but its adoption has 
+been very successful.  The addition of new sequencing machines, and 
+thus new external data formats, may cause some changes in the 
+internal representation of the sequence but should not affect  
+the rest of the program.
+
+	Ted accepts a large number of optional command line arguments,
+many of which can also be specified as system defaults. This
+supports a mode of working whereby ted is invoked not directly by the
+user but instead by a script or another application which supplies
+arguments appropriate to the editing task.
+
+
+{\em Graphical Interface.}
+Ted currently accepts data from two fluorescence based sequencing
+machines, the Pharmacia A.L.F. and the ABI 373A.
+The sequencing machine data consists of 
+four traces of fluorescence levels together with the machine's 
+interpretation, which is a sequence of bases.  
+Ted displays 
+the traces and the machine-generated base list.  
+A second, initially identical, list of bases is provided for correction 
+by the user.
+
+	Ted has an X windows based 
+graphical interface. The trace file
+can either be input from the command line or by 
+clicking on the INPUT button after the program has been invoked.  
+Other parameters which the user may specify on the
+command line include: the output 
+file name; a base position or sequence string on which the trace is 
+to be centered;  a default trace magnification;  a 5' vector sequence 
+for automated elimination of the sequence head (vector); top or 
+bottom strand orientation; or any of the usual X-window parameters (e.g. 
+display, geometry...).
+
+	The graphics display (Figure 1) consists of the control 
+panel, the base position information, the original and edited sequence 
+data, and the graphical representation of the trace.  The user may 
+begin by using the control panel INPUT button to input a new trace 
+file at which time the user selects whether to view the sequence
+and trace in top or bottom strand orientation.
+The trace file is displayed and, if a 5' vector sequence has been 
+specified on the command line, the program attempts to select a 
+cutoff point corresponding to the vector sequence at the ``head'' of the 
+trace file.  The bases beyond the ``cutoff'' point are  
+displayed on a shaded background.  The user may modify the cutoff 
+position by clicking on the ``Adj left cut'' button and clicking on the 
+position of the desired cutoff.  Similarly, the user may adjust the 
+right cutoff of the sequence (chosen by starting at the 5' end of the 
+sequence and looking for the first occurrence when 2 out of 5 bases 
+are 'N') by scrolling along the sequence to that point, clicking on the 
+``Adj right cut'' button, and clicking on the appropriate base.  
+Automation of the ``cutoff'' process is optional; the user may compile 
+the program with that feature turned ``off.'' 
+
+	Clicking on the ``Edit seq'' button allows the user to enter the edit 
+mode.  The ``Search'' button can be used to skip from ``problem'' to 
+``problem'' (i.e., ambiguity to ambiguity) or to look for runs of 
+identical bases (e.g., TTTT) which are often mis-called by
+the machine software.
+
+  Bases can be inserted, deleted, or replaced as with
+any ordinary word-processor.  In difficult-to-read areas,  
+the trace may be vertically or horizontally scaled by dragging or 
+clicking on the magnification scroll bar or by clicking on the 
+vertical scaling buttons (``Scale down'', ``Scale up''), respectively.  
+Finally, the edited sequence is saved to an ascii file using the 
+``Output'' button.  A history of the editing session can also be saved
+along with the sequence. 
+The ``Quit'' button is used 
+to exit the program.  When reinvoking ted on an edited trace file the 
+edited base sequence, rather than the original sequence, is shown in 
+the edited base window.  The user may invoke ted by calling in any one 
+of the previous editing sessions.   
+
+
+\subsubsection*{APPLICATIONS AND CONCLUSIONS}
+
+	In the C. elegans genome sequencing project, data from the ABI or 
+A.L.F. sequencing machines' computers are transferred to Sun 
+workstations.  
+The user invokes a Unix shell script that calls ted systematically 
+on each of the new set of trace files creating a set of sequence files.
+The sequence files that are deemed to be of acceptable quality
+are then entered into the sequence 
+assembly program xdap [2] where the sequences are assembled into 
+contigs.  Portions of the ted trace-editor have been incorporated 
+into the xdap ``trace manager,''  which is used in 
+conjunction with the contig editor to view sets of aligned traces 
+at sites of discrepancies in the aligned sequences.  
+
+	Ted is also used at the stage of choosing oligo primers for the 
+``walking'' stage of the sequencing project.  It can be invoked directly 
+from the oligo selection program, osp [3], to allow examination
+of the trace data in the region of the primers so that  
+integrity of the sequence data can be verified.
+
+	Currently, no other programs are known to be available 
+which support editing of the ABI trace data. 
+Further, the modular design of the program should allow
+support for new types of sequencing machines, with new data 
+formats, to be implemented in a straightforward fashion.  
+
+
+\subsubsection*{AVAILABILITY}
+	Ted is freely available from the authors or from Rodger Staden and
+Simon Dear (MRC Laboratory of Molecular Biology, Hills Road, Cambridge,
+UK, CB2 2QH) for use on Sun workstations running X-windows (or OpenLook).
+
+
+\subsubsection*{ACKNOWLEDGMENTS}
+	The authors would like to thank all members of the C. elegans
+sequencing project with special thanks to the following people:
+John Sulston, Bob Waterston,  
+Phil Green, Rick Wilson, Richard Durbin, Simon Dear, and Rodger Staden 
+for their helpful suggestions for improvements in the ted interface 
+and for their parts in the development of ted.  This work was 
+supported by the Medical Research Council and NIH grant R01-HG00136.
+
+\subsubsection*{REFERENCES}
+
+1. Waterston, R., Sulston, J., et al. (1991), in preparation.
+
+2. Dear, S. and Staden, R. (1991) Nuc. Acids Res.,  in press.
+
+3. Hillier, L. and Green, P. (1991) submitted.
+
+
+{\bf Figure 1 legend.}
+
+Figure 1 shows a ``screen dump'' of the ted graphical interface.  
+The display consists of
+the control panel and the synchronized view of the base position
+information, original and edited sequence data, 
+and graphical representation of the trace (with each nucleotide's trace
+ being represented
+by a different color).  The control
+panel allows the user to read in new trace files (in either
+bottom or top strand orientation)
+as well as to search for a string of nucleotides or a certain base position.
+Scroll bars allow the user to adjust the magnification of or scroll through
+the sequence and trace data.  The user may also choose to change the vertical
+magnification of the trace data.  Further, sequence on the head (vector)
+or tail (uncertain data) of the sequence may be ``cutoff'' 
+using the adjust left and right cutoff buttons. Bases can be inserted, 
+deleted, or replaced as with
+any ordinary word-processor in the sequence data window. Finally, the
+sequence may be written to an ascii file using the output button on
+the control panel.
+
+\end{document}
+
+
+
--- a/help/BAP.RNO
+++ b/help/BAP.RNO
--- a/help/DAP.RNO
+++ b/help/DAP.RNO
--- a/help/GIP.RNO
+++ b/help/GIP.RNO
@ -0,0 +1,205 @@
+.NPA
+.left margin1
+.CENTER
+GIP
+.LEFT MARGIN1
+.PARA
+A digitizer is
+  a  two  dimensional  surface
+which is such that if a special pen is pressed onto it, the pens 
+coordinates can be recorded by a computer.
+These coordinates
+          can be interpreted by a program. 
+.para
+The digitizing device we use works by the pen emitting a high frequency 
+sound which is picked up by two microphones positioned at the rear of the 
+working area. The pen position is determined by triangulation and the 
+digitizing device sends the coordinates to the computer. As no special 
+surface is required the device can conveniently be positioned on a light 
+box giving the sequencer an unobscured view of the autoradiographs.
+.LEFT MARGIN1
+The digitizer
+             is called a GRAPHBAR MODEL GP7 made by
+             Science Accessories Corp,
+             970 Kings Highway West,
+             Southport,
+             Connecticut 06490,
+             USA.
+
+.para
+               The program uses a menu to allow the user to select commands or
+          to  enter  the  uncertainty  codes  for  areas  of  the gel that are
+          difficult to interpret.  A menu is simply a series of boxes drawn on
+          the  digitizing surface  that  each  contain  a  command or
+          uncertainty code.  When the user puts the pen down in these  special
+          regions  the program interprets the coordinates as commands and acts
+          appropriately. A copy of the menu should have been sent to you.
+It should be stuck down on the surface of the 
+light box in the digitizing area. For convenience it is best to position it 
+to the right of the digitizing area, but in practice as long as 
+its top 
+edge is parallel to the digitizer box, it can be put anywhere in the active 
+region.
+.sk1
+.left margin1
+ Entering gel readings using a digitizer
+.left margin1
+.para
+The autoradiograph should be stuck down on the light box with the lanes 
+running, as near is as 
+possible, at right angles to the digitizer. To read
+an autoradiograph placed on the light box
+the user  need  only  define the positions of
+the four sequencing lanes and the bases
+          to which they correspond and then use  the  pen  to  point  to  each
+          successive  band progressing up the gel.  The program examines the
+          coordinates of each pen position to see in which of the  four  
+lanes
+          it  lies  and  assigns  the  corresponding  base to be stored in the
+          computer.  Each time the pen tip is depressed to point to a position
+          on  the  surface of the digitizer the program sounds the bell on the
+          terminal (a different sound for each of the four bases on the 
+microcomputer version of the program)
+ to indicate to the user that a point has been recorded.  As
+          the  sequence  is read the program displays it on the screen.
+
+
+.para
+               The program uses a menu 
+to allow the user to select commands or
+          to  enter  the  uncertainty  codes  for  areas  of  the gel that are
+          difficult to interpret.  A menu is simply a series of boxes drawn on
+          the  digitizing surface  that  each  contain  a  command or
+          uncertainty code.  When the user puts the pen down in these  special
+          regions  the program interprets the coordinates as commands and acts
+          appropriately.     As    well    as    the     uncertainty     codes
+          A,C,G,T,1,2,3,4,B,D,H,V,R,Y,X,-,5,6,7,8  the  following commands are
+          included in the menu:  DELETE removes the last character  from 
+the  sequence;
+          RESET allows the lane centres to be redefined;
+START  means  begin  the  next
+          stage  of  the  procedure;  STOP means stop the current stage in the
+          procedure;  CONFIRM means confirm that the last command  or  set  of
+          coordinates  are  correct.
+.para
+The digitizing device also has a menu of its own. This lies in a two inch wide 
+strip immediately in front of the digitizing box. Pen positions within this 
+two inch strip are interpretted as commands to the digitizer and are not 
+sent to the GIP program. In general the only time users will need to use 
+the device menu is when they tell GIP where the program menu lies in the 
+digitizing area. This is done by first hitting ORIGIN in the device menu 
+and then hitting the bottom left hand corner of the program menu. The 
+program menu can hence be positioned anywhere in the active region but 
+should be arranged parallel to the digitizer.
+.para
+The user should try to hit the bands as near as possible to the centre of 
+the lanes because the program tracks the lanes up the film using the pen 
+positions. By using this tracking strategy the user only has to define the 
+centres of the bottom of the lanes before starting to read the film. The 
+program can correctly follow quite curved lanes and constantly checks that 
+its lane centre coordinates look sensible. If the lane centres appear to be 
+getting too close the program stops responding to the pen positions of 
+bands and hence does not ring the bell. If this occurs users must hit the
+reset box in the menu and the program will request them to redefine the 
+lane centres at the current reading position. Then they can continue 
+reading. As a further safeguard the program will only respond to pen 
+positions either in the menu or very close to the current reading position.
+.sk1
+.left margin1
+ Running the gel reading program
+.left margin1
+The autoradiograph should be firmly stuck down on the light box and the 
+program started by typing GIP. It will ask the first question.
+.left margin2
+" ? FILE OF FILE NAMES="
+.left margin2
+Type the name for the file of file names and then follow the instructions.
+.left margin2
+" HIT DIGITIZER MENU ORIGIN"
+.left margin2
+" THEN PROGRAM MENU ORIGIN"
+.left margin2
+" THEN HIT START IN PROGRAM MENU"
+.left margin2
+If the bell does not sound after you hit start try hitting metric in the 
+device menu (the program uses metric units, and some digitizers are set to 
+default to use inches; hitting metric switches between the two).
+.left margin2
+After the bell has sounded the program will give the default lane order.
+.left margin2
+" LANE ORDER IS T C A G"
+.left margin2
+" IF CORRECT HIT CONFIRM, ELSE HIT RESET"
+.left margin2
+If the lane order, reading from left to right is correct hit confirm in the 
+program menu. If you are using a different order hit reset and you will be 
+asked to define the lane order from left to right using the program menu 
+(as follows).
+.left margin2
+" DEFINE LANE ORDER (LEFT TO RIGHT) USING MENU"
+.left margin2
+Hit the boxes in the menu that contain the symbols A,C,G,T in the 
+left-right order of the lanes. The program will respond with the lane order 
+as above and ask for confirmation. When this is received, the next task is 
+to define the start positions of the next four lanes.
+.left margin2
+" HIT START, THEN HIT (LEFT TO RIGHT)"
+.left margin2
+" THE START POSITIONS FOR THE NEXT FOUR LANES"
+.left margin2
+Hit the centres of the four lanes at a height level with the first band 
+that is going to be read. The program will report the mean lane separations
+and asks for confirmation that they are correct.
+.left margin2
+" MEAN LANE SEPARATION IS XX"
+.left margin2
+" HIT CONFIRM TO CONTINUE"
+.left margin2
+Users will become familiar with the values from their films and will spot 
+any unusual numbers.
+Asking for confirmation allows users to try again if they had made a 
+mistake, but generally the lane separation values can be ignored.
+Hit confirm, and the program will give the message
+.left margin2
+" HIT START WHEN READY TO BEGIN READING"
+.left margin2
+Hit start and the program will give the message
+.left margin2
+" HIT BANDS, UNCERTAINTY CODES, RESET OR STOP"
+.left margin2
+Hit the bands, interpretting the sequence progressing 
+up the film. If necessary use the uncertainty codes. If the pen stops 
+responding hit reset and follow the instructions as above. When the 
+sequence becomes unreadable hit stop and the program will ask for a file 
+name for the gel reading just read.
+.left margin2
+" ? FILE NAME FOR THIS GEL READING="
+.left margin2
+Type the file name observing the rules about legal gel readings names.
+The program will ask if you wish 
+to read another sequence.
+.left margin2
+" TO ENTER ANOTHER GEL READING TYPE 1"
+.left margin2
+To enter another type 1 and you will be back to the step of defining the 
+lane order. Typing anything else will stop the program.
+.left margin1
+.sk1
+Running the microcomputer version of the gel reading program
+.left margin1
+The microcomputer version of GIP is slightly different and is called 
+GIPB. The BBC micro 
+does not have the capacity to process the gel readings beyond the reading 
+stage.
+This means that users of this program
+would need to transfer their gel readings from the micro to another machine 
+using a terminal emmulator. Transferring many files is tedious and so the 
+microcomputer version of the gel reading program stores all the gel 
+readings for each run of the program in a single file. This special 
+file contains both sequences and file names and can be moved in a single 
+transfer to another machine. Once on the other machine the single file must 
+be split into separate gel reading files and a file of file names. This is 
+done using the program BSPLIT. As far as using the microcomputer version
+of GIP, the only difference is that the first file name the program 
+requests is not a file of file names, but a name for the single file to 
+contain all the gel readings and their names.
--- a/help/MEP.RNO
+++ b/help/MEP.RNO
@ -0,0 +1,859 @@
+.NPA
+.SP 1
+.left margin1
+@-1. TX  0 @General
+.sp
+@-2. T   0 @Screen control
+.sp
+@-2. X   0 @Screen
+.sp
+@-3. TX  0 @Dictionary analysis
+.sp
+@0. TX  -1 @MEP
+.left margin2
+.para
+This is a program  for analysing families of nucleotide sequences in order 
+to find common motifs and potential binding sites.
+The ideas in this program were described in Staden, R.  "Methods 
+for discovering novel motifs in nucleic acid sequences". 
+Computer Applications in the Biosciences, 5, 293-298, (1989).
+.PARA
+The program can read 
+sequences stored in either of two formats: 1) all sequences aligned in a 
+single file; 2) all sequences in separate files and accessed through a file 
+of file names.
+.PARA
+The program contains functions that can answer several questions 
+about a set of sequences:
+.SK1
+.left margin2
+ Which words are most common?
+.left margin2
+ Which words occur in the most sequences?
+.left margin2
+ Which words contain the most information?
+.left margin2
+ Which words occur in equivalent positions in the sequences?
+.left margin2
+ Which words are inverted repeats?
+.left margin2
+ Which words occur on both strands of the sequences?
+.left margin2
+ Where are the inverted repeats?
+.left margin2
+ Where are the fuzzy words?
+.para
+ Most of the program is 
+concerned with analysing 
+what it terms "fuzzy 
+words" within the set of sequences. The analysis is explained 
+below. Note that the standard version of the programs is limited 
+to words of maximum length 8 letters, and a maximum fuzziness 
+of 2.
+.para
+The following analyses (preceded by their option numbers) are included:
+.lit
+  ? = Help
+  ! = Quit
+  3 = Read new sequences
+  4 = Redefine active region
+  5 = List the sequences
+  6 = List text file
+  7 = Direct output to disk
+ 10 = Clear graphics
+ 11 = Clear text
+ 12 = Draw ruler
+ 13 = Use cross hair
+ 14 = Reset margins
+ 15 = Label diagram
+ 16 = Draw map
+ 17 = Search for strings
+ 18 = Set strand
+ 19 = Set composition
+ 20 = Set word length
+ 21 = Set number of mismatches
+ 22 = Show settings
+ 23 = Make dictionary Dw
+ 24 = Make dictionary Ds
+ 25 = Make fuzzy dictionary Dm from Dw
+ 26 = Make fuzzy dictionary Dm from Ds
+ 27 = Make fuzzy dictionary Dh from Dm
+ 28 = Examine fuzzy dictionary Dm
+ 29 = Examine fuzzy dictionary Dh
+ 30 = Examine words in Dm
+ 31 = Examine words in Dh
+ 32 = Save or restore a dictionary
+ 33 = Find inverted repeats
+.end lit
+.para
+Some of these methods produce graphical 
+ results 
+and so the 
+program is generally used from a graphics terminal (a vdu on which lines 
+and points can be drawn as well as characters). 
+.para
+.LEFT MARGIN2
+The positions of each of the plots is defined relative to a users drawing 
+board which has size 1-10,000 in x and 1-10,000 in y.
+Plots for
+each option are drawn in a window defined by x0,y0 and xlength,ylength. 
+Where x0,y0 is the position of the bottom left hand corner of the window,
+  and xlength is the width of the window and ylength the 
+height of the window.
+.lit
+   --------------------------------------------------------- 10,000
+   1                                                       1
+   1       --------------------------------------   ^      1
+   1       1                                    1   1      1
+   1       1                                    1   1      1
+   1       1                                    1 ylength  1
+   1       1                                    1   1      1
+   1       1                                    1   1      1
+   1       --------------------------------------   v      1
+   1  x0,y0^                                               1
+   1       <---------------xlength-------------->          1
+   ---------------------------------------------------------      1
+   1                                                   10,000
+
+.end lit
+All values are in drawing board units (i.e. 1-10,000, 1-10,000).
+The default window positions are read from a file "MEPMARG" when the 
+program is started. Users can have their own file if required.
+.para
+The options for the program are accessed from 3 main menus: general, screen 
+control and dictionary analylsis.
+Both menus and options are selected by number.
+.para
+The most important and novel part of the program is its use of "fuzzy 
+dictionaries" and an information theory measure, to help show the most 
+interesting motifs.
+
+  Central to the method is the idea of a fuzzy dictionary of word 
+frequencies. A dictionary of word frequencies is an ordered list of 
+all the words in the sequences and a count of the number of times 
+that they occur. A fuzzy dictionary is an equivalent list but which 
+contains instead, for each word, a count of the number of times 
+similar words occur in the sequences. We term words that are 
+similar "relations". The fuzziness is defined by the number of 
+letters in a word that are allowed to be different. So if we had a 
+fuzziness of 1 we allow 1 letter to be different. For example, with 
+a fuzziness of 1, the entry in the fuzzy dictionary for the word 
+TTTTTT would contain a count of the numbers of times TTTTTT 
+occured plus the number of times all words differing by exactly 
+one letter from TTTTTT occured.   
+.para
+   Once the fuzzy dictionary has been created we can examine it in 
+several ways to find candidate control sequences. The simplest 
+question we can ask is which word in the dictionary is the most 
+common.  Sometimes this simple criterion of "most common" may 
+be adequate to discover a new motif but in general we would not 
+expect it to be sufficient. For example some words will be common 
+simply because of a base composition bias in the sequences being 
+analysed. In addition a word can be the most frequent and yet not 
+be "well defined". This last point is best explained by an example.
+.para
+   Suppose we were looking at  two letter words and allowing one 
+mismatch, and that there were 10 occurences of TT and 5 of AC. 
+We could align the 10 words that were one letter different from TT 
+and the 5 that were  related to AC. Then we could count the 
+number of times each base occured in each position for each of 
+these two sets of words. Suppose we got the two base frequency 
+tables shown below.
+.lit
+   TT                  AC
+       T 6 4               T 1 0
+       C 1 3               C 0 4
+       A 1 2               A 4 1
+       G 2 1               G 0 0
+
+.end lit
+These tables show that although TT occurs (with one letter 
+mismatch) more often than AC, the ratio of base frequencies for 
+AC at 4/5, 4/5 is higher than those for TT at 6/10, 4/10. Hence we 
+would say that AC was better defined than TT.
+Expressing this another way we would say that the definition of AC 
+contained more information than that for TT. The program 
+calculates the information content in a way that takes into account 
+both the sequence composition and the level of definition of the 
+motif.
+.para
+Definitions
+
+.para
+Here we deal only with the dictionary analysis.
+Suppose we are dealing with a set of 
+sequences and are examining them for words that are six 
+characters in length.
+
+.para
+Dictionary Dw contains a count of the number of times each word 
+occurs in the set of sequences. For example the entry for TTTTTT 
+contains a value equal to the number of times the word TTTTTT 
+occurs in the set of sequences.
+
+.para
+Dictionary Ds contains a count of the number of different sequences in 
+which each word occurs. For example if the entry for word TTTTTT 
+contains the value 10, it denotes that the word TTTTTT occurs in ten 
+different sequences. Unlike Dw it only counts words once for each 
+sequence. For example if we had a set of 100 sequences, the maximum 
+possible value that Ds could take is 100, and this would only happen if 
+a word occurred in every sequence. However for the same set of 
+sequences, Dw could contain values greater than 100, and this would 
+show that a word had occurred more than once in at least one 
+sequence.
+
+.para
+From either of the two dictionaries Dw or Ds we can calculate a fuzzy 
+dictionary Dm. For each word, the entry in the fuzzy dictionary Dm 
+contains the sum of the dictionary values (taken from either Dw or Ds) 
+for all words that differ from it by up to m letters. For example if m=2 
+the entry for TTTTTT contains the number of times that TTTTTT 
+occurs in the dictionary, plus the counts for all words that differ from 
+TTTTTT by 1 or 2 letters. 
+Obviously the interpretation of the values in Dm depends on which of 
+the two dictionaries Dw or Ds they were derived from. When derived 
+from Dw the entry for any word in Dm gives the total number of 
+times it, and its relations, occur in the set of sequences. When derived 
+from Ds the entry for any word in Dm gives the total number of 
+different sequences that contain a word and each of its relations.
+
+.para
+Finally, from fuzzy dictionary Dm we can derive fuzzy dictionary Dh. 
+All entries in Dh are zero except for the word(s), within each set of 
+relations, that are most frequent. For example if TTTTTT occurred 20 
+times but had a relation that occurred more often, then the entry for 
+TTTTTT would be zero. However if TTTTTT did not have a more 
+frequently occurring relation, then the entry for TTTTTT would 
+contain the value 20. 
+
+.LEFT MARGIN1
+@1. T 0 @Help
+.LEFT MARGIN2
+.para
+This option gives online help. The user should select option numbers and
+the current documentation will be given. Note that option 0 gives an
+introduction to the program, and that ? will get help from anywhere in 
+the 
+program.
+The following analyses (preceded by their option numbers) are included:
+.lit
+  ? = Help
+  ! = Quit
+  3 = Read new sequences
+  4 = Redefine active region
+  5 = List the sequences
+  6 = List text file
+  7 = Direct output to disk
+ 10 = Clear graphics
+ 11 = Clear text
+ 12 = Draw ruler
+ 13 = Use cross hair
+ 14 = Reset margins
+ 15 = Label diagram
+ 16 = Draw map
+ 17 = Search for strings
+ 18 = Set strand
+ 19 = Set composition
+ 20 = Set word length
+ 21 = Set number of mismatches
+ 22 = Show settings
+ 23 = Make dictionary Dw
+ 24 = Make dictionary Ds
+ 25 = Make fuzzy dictionary Dm from Dw
+ 26 = Make fuzzy dictionary Dm from Ds
+ 27 = Make fuzzy dictionary Dh from Dm
+ 28 = Examine fuzzy dictionary Dm
+ 29 = Examine fuzzy dictionary Dh
+ 30 = Examine words in Dm
+ 31 = Examine words in Dh
+ 32 = Save or restore a dictionary
+ 33 = Find inverted repeats
+.end lit
+.left margin1
+@2. T 0 @Quit
+.left margin2
+.para
+This function stops the program.
+.left margin1
+@3. TX 1 @Read a new sequence
+.LEFT MARGIN2
+.para
+It can read 
+sequences stored in either of two formats: 1) all sequences aligned in a 
+single file; 2) all sequences in separate files and accessed through a file 
+of file names. Typical dialogue follows:
+.lit
+ 
+X 1 Read file of aligned sequences
+  2 Use file of file names
+? 0,1,2 =
+ 
+? File of aligned sequences=F1
+Number of files           88
+
+.end lit
+.left margin1
+@4. TX 1 @Define active region
+.LEFT MARGIN2
+.para
+For its analytic functions 
+the program always works on a region of the sequence called the active 
+region. When  new sequences are read into the program the active region is 
+automatically set to start at the beginning of the sequences and go
+up to the end of the longest one.
+.left margin1
+@5. TX 1 @List a sequence
+.LEFT MARGIN2
+.para
+The sequence can be listed with line lengths of 50 bases with each sequence 
+numbered in the order in which they were read.
+Output can be directed to a disk file by 
+first selecting disk output. Typical dialogue follows.
+.lit
+
+? Menu or option number=5
+
+              10        20        30        40        50
+   1  TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
+   2  CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
+   3  TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
+   4  ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
+   5  AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
+   6  TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
+   7  ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
+   8  GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
+   9  AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
+  10  AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
+
+              60
+   1  TACCCGTTTTT
+   2  GCGTTTTTGT
+   3  TCATACCATAAG
+   4  TTTCATACC
+   5  ATTGTGAGC
+   6  TTCCGGCTCG
+   7  GAAGAGAGT
+   8  TCAGGTGT
+   9  ATGAATG
+  10  TAATTACG
+.end lit
+.left margin1
+@6. TX 1 @List a text file
+.LEFT MARGIN2
+.para
+Allows the user to have a text file displayed on the screen. It will appear 
+one page at a time.
+.left margin1
+@7. TX 1 @Direct output to disk
+.LEFT MARGIN2
+.para
+Used to direct output that would normally appear on the screen to a file. 
+.para
+Select redirection of either text or graphics, and 
+supply the name of the file that the output should be written to.
+.para
+ The results from the next options selected will not appear on the screen 
+but will be written to the file. When option 7 is selected again
+the file will be 
+closed and output will again appear on the screen.
+.left margin1
+@10. TX 2 @Clear graphics
+.LEFT MARGIN2
+.para
+ Clears the screen of both text and graphics.
+.left margin1
+@11. TX 2 @Clear text
+.LEFT MARGIN2
+.para
+ Clears only text from the screen.
+.left margin1
+@12. TX 2 @Draw a ruler
+.LEFT MARGIN2
+.para
+This option
+allows the user to draw a ruler or scale along the x axis of the screen to 
+help identify the coordinates of points of interest. The user can define 
+the position of the first amino acid to be marked (for example if the 
+active 
+region is 1501 to 8000, the user might wish to mark every 1000th amino 
+acid
+starting at either 1501 or 2000 - it depends if the user wishes to treat 
+the active region as an independent unit with its own numbering starting 
+at 
+its left edge, or as part of the whole sequence). The user can also define 
+the separation of the ticks on the scale and their height. If required the 
+labelling routine can be used to add numbers to the ticks.
+.left margin1
+@13. TX 2 @Use crosshair
+.LEFT MARGIN2
+.para
+This function puts
+a steerable cross on the screen that can be used to find the 
+coordinates of points in the sequence. The user can move the cross 
+around using the directional keys; when he hits the space bar the 
+program will print out the coordinates of the cross in sequence units and 
+the option will be exited.
+.para
+If instead, 
+you hit a , the position will be displayed but the cross will remain on 
+the screen.
+.para
+If a letter s is hit the sequence around the cross hair is displayed and 
+the cross remains on the screen.
+.left margin1
+@14. TX 2 @Reposition plots
+.LEFT MARGIN2
+.para
+The positions of each of the plots is defined relative to a users drawing 
+board which has size 1-10,000 in x and 1-10,000 in y.
+Plots for
+each option are drawn in a window defined by x0,y0 and xlength,ylength. 
+Where x0,y0 is the position of the bottom left hand corner of the window,
+  and xlength is the width of the window and ylength the 
+height of the window.
+.lit
+   --------------------------------------------------------- 10,000
+   1                                                       1
+   1       --------------------------------------   ^      1
+   1       1                                    1   1      1
+   1       1                                    1   1      1
+   1       1                                    1 ylength  1
+   1       1                                    1   1      1
+   1       1                                    1   1      1
+   1       --------------------------------------   v      1
+   1  x0,y0^                                               1
+   1       <---------------xlength-------------->          1
+   ---------------------------------------------------------      1
+   1                                                   10,000
+
+.end lit
+All values are in drawing board units (i.e. 1-10,000, 1-10,000).
+The default window positions are read from a file "MEPMARG" when the 
+program is started. Users can have their own file if required.
+As all the plots start 
+at the same position in x and have the same width, x0 and xlength are the 
+same for all options. Generally users will only want to change the start 
+level of the window y0 and its height ylength. 
+ This option 
+allows users to change window positions whilst running the program.
+The routine prompts first for the number of the option that the users 
+wishes 
+to reposition; then for the y start and height; then for the x start and 
+length. Note that changes to the x values affect all options. If the user 
+types only carriage return for any value it will remain unchanged. 
+The cross-hair can be used to choose suitable heights.
+.LEFT MARGIN1
+@15. TX 2 @Label a diagram
+.LEFT MARGIN2
+.para
+This routine allows users to label any diagrams they have produced. They 
+are asked to type in a label. When the user types carriage return to finish 
+typing the label the cross-hair appears on the screen. The user can 
+position it anywhere on the screen. If the user types R (for right justify)
+ the label will be 
+written on the diagram with its right end at the cross-hair position. 
+If the user types L (for left justify) the label will be written on the 
+diagram with its left end at the cross hair position.
+The 
+cross-hair will then immediately reappear. The user may put the same 
+label 
+on another part of the diagram as before or if he hits the space bar he 
+will be asked if he wishes to type in another label.
+.left margin1
+@16. TX 2 @Display a map
+.LEFT MARGIN2
+.para
+It is often convenient to plot a map alongside graphed analysis in order 
+to 
+indicate features within the sequence. This function allows users to 
+draw 
+maps using files arranged in the form of EMBL feature tables. Of course 
+the 
+EMBL table are usually only used for nucleic acid sequence annotation 
+but, 
+as long as the features are written in the correct format, they can be 
+employed by this routine. The map is composed of a line representing the 
+sequence and then further lines denoting the endpoints of each feature 
+the 
+user identifies. The user is asked to define height at which the line 
+representing the sequence should be drawn; then for the feature height; 
+then for the features to plot.
+.left margin1
+@17. TX 1 @Search for strings
+.left margin2
+.para
+Search for strings
+perfoms searches of all the sequences for selected words and 
+shows which sequences they are found in. The user types in a word and 
+defines the allowed number of mismatches. The results are listed or 
+plotted. If listed the display includes the sequence number, the position 
+in the sequence and the matching string.
+The results are plotted in the 
+following way. The x axis of the plot represents the length of the aligned
+sequences and the y direction is divided into sufficient strips to accommodate
+each sequence. So if a match is found in the 3rd sequence at a position
+equivalent to halfway along the longest of the sequences then a short 
+vertical line will be drawn at the midpoint of the 3rd strip. If the sequences
+are aligned it can be useful if the motifs happen to appear in 
+related positions. For example see the original publication. Typical 
+dialogue follows.
+.lit
+
+? Menu or option number=17
+X 1 Plot match positions
+  2 Plot histogram of matches
+? 0,1,2 =
+? Word to search for=TTGACA
+? Minimum match (0-6) (6) =5
+? (y/n) (y) Plot results N
+     2    35 TAGACA
+     5    14 TTTACA
+     6    37 TTTACA
+    11    14 TAGACA
+    14    14 TTGACA
+    17    14 GTGACA
+    17    22 TTAACA
+    20     1 TTGACA
+.end lit
+.left margin1
+@18. TX 3 @Set strand
+.left margin2
+.para
+Set strand allows the user to define which strand(s) of the sequences to 
+analyse: input stand, complement of input, or both.
+.left margin1
+@19. TX 3 @Set composition
+.left margin2
+.para
+Set composition gives the user three choices for setting the composition 
+of the sequences for use in the calculation of the information content of
+words. The user can select the overall composition of the sequences as read,
+an even composition, or can type in any other 4 values.
+.left margin1
+@20. TX 3 @Set word length
+.left margin2
+.para
+Set word length sets the length of word for which dictionaries will be made.
+.left margin1
+@21. TX 3 @Set number of mismatches
+.left margin2
+.para
+Set number of mismatches sets the level of fuzziness for the creation of 
+dictionary Dm. 
+.left margin1
+@22. TX 3 @Show settings
+.left margin2
+.para
+Show settings show the current settings for all parameters associated with 
+dictionary analysis. A typical diaplsy follows:
+.lit
+ ? Menu or option number=22
+ Current word length  =   6
+ Number of mismatches =   1
+ Start position       =     1
+ End position         =    63
+ Input strand only
+ Observed composition
+ Dictionary Dw unmade
+ Dictionary Ds unmade
+ Dictionary Dm unmade
+ Dictionary Dh unmade
+.end lit
+.left margin1
+@23. TX 3 @Make dictionary Dw
+.left margin2
+.para
+Make dictionary Dw creates a dictionary that contains a count  of the
+frequency of occurrence of each word in the collected sequences.
+.left margin1
+@24. TX 3 @Make dictionary Ds
+.left margin2
+.para
+Make dictionary Ds creates a dictionary that contains a count of the
+number of different sequences that contain each word.
+.left margin1
+@25. TX 3 @Make dictionary Dm from Dw
+.left margin2
+.para
+Make dictionary Dm  from Dw creates a dictionary from dictionary Dw that
+contains the frequency of occurrence of each word (say X) in Dw plus the
+frequency of occurrence of each word in Dw that differs from X by up to m 
+letters. Dm is called a fuzzy dictionary as it contains the frequencies of
+occurrence of all words plus the frequencies of all the words that are 
+similar to them.
+.left margin1
+@26. TX 3 @Make dictionary Dm from Ds
+.left margin2
+.para
+Make dictionary Dm  from Ds creates a dictionary from dictionary Ds that
+contains the frequency of occurrence of each word (say X) in Ds plus the
+frequency of occurrence of each word in Ds that differs from X by up to m 
+letters. Dm is called a fuzzy dictionary as it contains the frequencies of
+occurrence of all words plus the frequencies of all the words that are 
+similar to them.
+.left margin1
+@27. TX 3 @Make dictionary Dh from Dm
+.left margin2
+.para
+Make dictionary Dh  creates a dictionary from dictionary Dm and whose
+entries are zero except for those words in any set of related words that
+are most frequent. It finds the dominant words in each set of relations 
+and stores their counts.
+.left margin1
+@28. TX 3 @Examine fuzzy dictionary Dm
+.left margin2
+.para
+Examine dictionary Dm allows users to analyse the contents of dictionary
+Dm to find the most common words or those words that contain the most 
+information. The user supplies a frequency or information cutoff and chooses
+to have the results sorted on either value. The program will find the top 100
+words that achieve the cutoff values and present them to the user sorted
+as selected. The information content will be calcutated from either Dw or Ds 
+depending which was used to create Dm, and using the current composition 
+setting. Typical dialogue follows:
+.lit
+
+? Menu or option number=28
+Looking for highest scoring words
+The highest word score =          115
+? Minimum word score (0-115) (0) =60
+? Minimum information (0.00-1.00) (0.00) =.62
+X 1 Sort on information
+  2 Sort on word score
+? 0,1,2 =
+ 
+? Maximum number to list (0-100) (100) =
+ 
+The words are
+ Total words=           9 Maximum information=  0.7385326
+TTGACA      60   0.73850
+AAAAAC      64   0.66460
+AAAAAA      90   0.64880
+GTTTTT      66   0.64300
+TTTTTG      73   0.64070
+TTTTGT      63   0.63820
+TTTTTC      65   0.63810
+AAAATA      63   0.62670
+TATAAT      65   0.62510
+The highest word score =          115
+? Minimum word score (0-115) (0) =60
+? Minimum information (0.00-1.00) (0.00) =.62
+X 1 Sort on information
+  2 Sort on word score
+? 0,1,2 =2
+? Maximum number to list (0-100) (100) =
+ 
+The words are
+ Total words=           9 Maximum information=  0.7385326
+AAAAAA      90   0.64880
+TTTTTG      73   0.64070
+GTTTTT      66   0.64300
+TTTTTC      65   0.63810
+TATAAT      65   0.62510
+AAAAAC      64   0.66460
+TTTTGT      63   0.63820
+AAAATA      63   0.62670
+TTGACA      60   0.73850
+The highest word score =          115
+? Minimum word score (0-115) (0) =!
+
+.end lit
+.left margin1
+@29. TX 3 @Examine fuzzy dictionary Dh
+.left margin2
+.para
+Examine dictionary Dh allows users to analyse the contents of dictionary  Dh
+to find the most common words or those words that contain the most 
+information. The user supplies a frequency or information cutoff and chooses 
+to have the results sorted on either value. The program will find the top 100
+words that achieve the cutoff values and present them to the user sorted as
+selected. The information content will be calcutated from either Dw or Ds 
+depending which was used to create Dh and using the current composition 
+setting. Typical dialogue follows:
+.lit
+
+? Menu or option number=29
+Looking for highest scoring words
+The highest word score =          115
+? Minimum word score (0-115) (0) =60
+? Minimum information (0.00-1.00) (0.00) =.6
+X 1 Sort on information
+  2 Sort on word score
+? 0,1,2 =
+ 
+? Maximum number to list (0-100) (100) =
+ 
+The words are
+ Total words=           4 Maximum information=  0.7385326
+TTGACA      60   0.73850
+AAAAAA      90   0.64880
+TATAAT      65   0.62510
+TTTTTT     115   0.60630
+The highest word score =          115
+? Minimum word score (0-115) (0) =50
+? Minimum information (0.00-1.00) (0.00) =.5
+X 1 Sort on information
+  2 Sort on word score
+? 0,1,2 =
+ 
+? Maximum number to list (0-100) (100) =
+ 
+The words are
+ Total words=           8 Maximum information=  0.7385326
+TTGACA      60   0.73850
+TCTTGA      54   0.66080
+AAAAAA      90   0.64880
+TATAAT      65   0.62510
+ACTTTA      57   0.61960
+TTTTTT     115   0.60630
+AGTATA      51   0.60540
+TTATAA      55   0.59300
+The highest word score =          115
+? Minimum word score (0-115) (0) =50
+? Minimum information (0.00-1.00) (0.00) =
+ 
+X 1 Sort on information
+  2 Sort on word score
+? 0,1,2 =
+ 
+? Maximum number to list (0-100) (100) =
+ 
+The words are
+ Total words=           8 Maximum information=  0.7385326
+TTGACA      60   0.73850
+TCTTGA      54   0.66080
+AAAAAA      90   0.64880
+TATAAT      65   0.62510
+ACTTTA      57   0.61960
+TTTTTT     115   0.60630
+AGTATA      51   0.60540
+TTATAA      55   0.59300
+The highest word score =          115
+? Minimum word score (0-115) (0) =!
+
+.end lit
+.left margin1
+@30. TX 3 @Examine words in Dm
+.left margin2
+.para
+Examine words in Dm allows users to analyse the contents of dictonary Dm at the
+level of individual words to find their frequency, information content, and to
+see their base frequency table. The user types in a word to examine and the
+program displays the values and table. The information content will be 
+calcutated from either Dw or Ds depending which was used to create Dm,
+and using the current composition setting. Typical dialogue follows:
+.lit
+? Menu or option number=30
+? Word to examine=TTGACA
+TtgacA            60  0.7385326
+    56    56     6     7     5    11
+     4     3     2     1    52     1
+     1     4     2    53     3    48
+     3     1    54     3     4     4
+TTGACA
+? Word to examine=TATAAT
+taTAat            65  0.6251902
+    56     3    53     4     4    60
+     6     1     5     5     5     3
+     3    60     5    57    57     4
+     4     5     6     3     3     2
+TATAAT
+? Word to examine=
+
+.end lit
+.left margin1
+@31. TX 3 @Examine words in Dh
+.left margin2
+.para
+Examine words in Dh allows users to analyse the contents of dictonary Dh at the
+level of individual words to find their frequency, information content, and to
+see their base frequency table. The user types in a word to examine and the
+program displays the values and table. The information content will be 
+calcutated from either Dw or Ds depending which was used to create Dm,
+and using the current composition setting. Typical dialogue follows:
+.lit
+
+ ? Menu or option number=31
+? Word to examine=TTGACA
+TtgacA            60  0.7385326
+    56    56     6     7     5    11
+     4     3     2     1    52     1
+     1     4     2    53     3    48
+     3     1    54     3     4     4
+TTGACA
+? Word to examine=TATAAT
+taTAat            65  0.6251902
+    56     3    53     4     4    60
+     6     1     5     5     5     3
+     3    60     5    57    57     4
+     4     5     6     3     3     2
+TATAAT
+? Word to examine=GGGGGG
+gggggg             0  0.6199890
+     3     1     1     2     3     4
+     1     3     1     2     2     1
+     2     1     1     1     1     1
+    11    12    14    12    11    11
+GGGGGG
+? Word to examine=
+
+.end lit
+.left margin1
+@32. TX 3 @Save or restore a dictionary
+.left margin2
+.para
+Save or restore dictionary allows users to write or read any dictionary to 
+and from disk files. The user is asked te define the dictionary and file. The
+function is useful if the machine being used is very slow at calculating 
+because the files can be handled quickly. However note that the files 
+cannot be processed by any other program.
+.left margin1
+@33. TX 1 @Find inverted repeats
+.left margin2
+.para
+Find inverted repeats performs searches for simple inverted repeat sequences 
+in each sequence. They are defined by a range of loop sizes and a minimum 
+number of potential basepairs. The results can be plotted or listed. The x 
+axis of the plot represents the length of the aligned sequences and the y 
+direction is divided into sufficient strips to accommodate each sequence. 
+So if an inverted repeat is found in the 3rd sequence at a position equivalent
+to halfway along the longest of the sequences then a short vertical line will 
+be drawn at the midpoint of the 3rd strip. Alternatively, if the results are
+listed, the potential hairpin loops are drawn out, with the sequence number 
+and the position of the loop. Typical dialogue follows.
+.lit
+
+? Menu or option number=33
+Define the range of loop sizes
+? Minimum loop size (0-10) (3) =0
+? Maximum loop size (1-20) (3) =
+? Minimum number of basepairs (1-20) (6) =
+? (y/n) (y) Plot results N
+ Searching
+
+Sequence     3    34
+           C       
+          G.T      
+          T-A      
+          A-T      
+          T.G      
+          T.G      
+          G.T      
+     ATCTTT TATTTCA
+         33
+
+Sequence     5    35
+           T       
+          G.T      
+          T.G      
+          A-T      
+          T.G      
+          G.T      
+          C-G      
+          T.G      
+     TCCGGC AATTGTG
+         34
+.end lit
+.left margin1
+@ End of help
--- a/help/NIP.RNO
+++ b/help/NIP.RNO
--- a/help/NIPF.RNO
+++ b/help/NIPF.RNO
@ -0,0 +1,88 @@
+.NPA
+.SP 1
+.left margin1
+@-1. TX   0 @General
+.sp
+@-2. TX   0 @Screen control
+.sp
+@-3. TX   0 @Statistical analysis
+.sp
+@-1. TX   0 @General
+.sp
+@-2. TX   0 @Screen control
+.sp
+@-3. TX   0 @Statistical analysis
+.sp
+@0.  TX  -1 @NIPF
+.sp
+@1.  TX 1 @ Help
+.sp
+@2.  TX 1 @ Quit
+.sp
+@3.  TX 1 @ Read new sequence
+.sp
+@4.  TX 1 @ Redefine active region
+.sp
+@5.  TX 1 @ List the sequence
+.sp
+@6.  TX 1 @ List a text file
+.sp
+@7.  TX 1 @ Direct output to disk
+.sp
+@8.  TX 1 @ Write active sequence to disk
+.sp
+@9.  TX 1 @ List a translation
+.sp
+@32. TX 1 @ List showing base differences
+.sp
+@37. TX 1 @ List showing translation
+.sp
+@33. TX 1 @ List showing amino acid differences
+.sp
+@10. TX 2 @ Clear graphics
+.sp
+@11. TX 2 @ Clear text
+.sp
+@12. TX 2 @ Draw a ruler
+.sp
+@13. TX 2 @ Use cross hair
+.sp
+@14. TX 2 @ Reset margins
+.sp
+@15. TX 2 @ Label diagram
+.sp
+@16. TX 2 @ Display a map
+.sp
+@17. TX 3 @ Set comparison mode
+.sp
+@18. TX 3 @ Set sort mode
+.sp
+@21. TX 3 @ Count base changes
+.sp
+@22. TX 3 @ Count codon changes
+.sp
+@23. TX 3 @ Count genetic events
+.sp
+@24. TX 3 @ Show table of base changes
+.sp
+@36. TX 3 @ Show table of expressed base changes
+.sp
+@39. TX 3 @ Show table of silent base changes
+.sp
+@38. TX 3 @ Estimate mutation rate
+.sp
+@25. TX 3 @ Plot base changes
+.sp
+@26. TX 3 @ Plot expressed changes per base
+.sp
+@27. TX 3 @ Plot silent changes per base
+.sp
+@28. TX 3 @ Count expressed changes per base
+.sp
+@29. TX 3 @ Count silent changes per base
+.sp
+@30. TX 3 @ Count changed amino acids
+.sp
+@31. TX 3 @ Plot amino acid variability
+.sp
+@ end of help
--- a/help/PIP.RNO
+++ b/help/PIP.RNO
--- a/help/README
+++ b/help/README
@ -0,0 +1,38 @@
+    README file for help directory of staden package
+    -----------------------------------------------
+
+Should contain (at least) ProgramName_help where ProgramName is each of
+bap, dap, gip, mem, mep, nip, nipf, pip, sap, sip and also staden_help
+and stadenp_help.
+
+There are 3 main formats of file in this directory:
+
+PROGRAM.RNO:
+	This is the unformatted (runoff/nroff style) help for PROGRAM.
+	Any changes to the help should be performed on this file.
+
+program_help:
+	This is the online formatted help used by PROGRAM. It can also
+	be printed to produce hardcopy documentation.
+
+program_menu:
+	This is a file that describes the menus used in PROGRAM,
+	together with an index into the program_help file for the
+	online help. The format for each line is:
+	
+	<option number> <menu number> <program_help offset> <no. of
+	lines of help> <program type T(ext) or (X)windows> <option name>
+
+
+Exceptions to these are for the staden_help, stadenp_help, and
+splitp_help which do not have the relevant .RNO or _menu files. The
+file staden_help gives an introduction to the xterm user interface
+(written for vax and vms and so is out of date with the Unix
+versions).
+
+See the file splitp_help for information about the reformatting of the
+PROSITE motif library.
+
+Rebuild help files with the Unix command "make all". Ensure that the utility
+program sethelp is compiled and in the executables search path. The sources
+for the program sethelp are found in $STADENROOT/staden.
--- a/help/SAP.RNO
+++ b/help/SAP.RNO
--- a/help/SIP.RNO
+++ b/help/SIP.RNO
--- a/help/SPLITP.RNO
+++ b/help/SPLITP.RNO
@ -0,0 +1,125 @@
+.para
+Preparing the PROSITE protein motif library for use by the Staden programs
+.para
+Introduction
+.para
+A library of protein motifs (in our terminology, because they include 
+variable gaps, some would be called patterns) has recently become available 
+from Amos Bairoch,Departement de Biochimie Medicale,University of Geneva
+Currently it contains 317 patterns/motifs and arrives on tape or cdrom
+in two files: 
+a .dat file and a .doc file. There is also a user documentation file 
+prosite.usr. Here I outline what is required to prepare the PROSITE library for
+use by our programs.
+.para
+Three programs need to be run SPLITP1, SPLITP2, and SPLITP3.
+.PARA
+Outline of the PROSITE files
+.para
+ A typical entry in the .dat file is shown below.
+.lit
+
+ID   2FE2S_FERREDOXIN; PATTERN.
+AC   PS00197;
+DT   APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
+DE   2Fe-2S ferredoxins, iron-sulfur binding region signature.
+PA   C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C.
+NR   /RELEASE=14,15409;
+NR   /TOTAL=69(69); /POSITIVE=63(63); /UNKNOWN=0(0); /FALSE_POS=6(6);
+NR   /FALSE_NEG=5(5);
+CC   /TAXO-RANGE=A?EP?; /MAX-REPEAT=1;
+CC   /SITE=1,iron_sulfur; /SITE=5,iron_sulfur; /SITE=8,iron_sulfur;
+DR   P15788, FER$APHHA , T; P00250, FER$APHSA , T; P00223, FER$ARCLA , T;
+DR   P00227, FER$BRANA , T; P07838, FER$BRYMA , T; P13106, FER$BUMFI , T;
+DR   P00247, FER$CHLFR , T; P07839, FER$CHLRE , T; P00222, FER$COLES , T;
+DO   PDOC00175;
+//
+.end lit
+.para
+Each entry has an accession number (here PS00197), a pattern definition 
+(here C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C) and a documentation file 
+cross reference (here PDOC00175).
+This pattern means: C, gap of 1 or 2, any of STA, gap of 2, C, any of STA, 
+not P, C.
+.para
+  We need to convert all of these patterns into our pattern definitions 
+(as membership of a set, with the appopriate gap ranges) and write each 
+into a separate pattern file with corresponding "membership of a set" 
+weight matrices. Each 
+pattern file is named accession_number.pat (here PS00197.PAT). The 
+corresponding matrix files are accession_number.wtsa, 
+accession_number.wtsb, etc for however many are needed (here PS00197.WTSA 
+and PS00197.WTSB): two are needed because of the variable gap. 
+.para
+In addition we can optionally
+split the .dat and .doc files into separate files, one for each 
+entry, with names accession_number.dat and accession_number.doc. Also we
+create an index for the library prosite.lis, which 
+gives a one line description of each pattern, and ends with the pattern 
+file and documentation file numbers. The start of the file is shown below.
+.lit
+
+N-glycosylation site.                                                00001,00001
+Glycosaminoglycan attachment site.                                   00002,00002
+Tyrosine sulfatation site.                                           00003,00003
+cAMP- and cGMP-dependent protein kinase phosphorylation site.        00004,00004
+
+.end lit
+So the name of the pattern file for Glycosaminoglycan attachment site is 
+PS00002.PAT, and for the documentation file PDOC00002.DOC
+.para
+Finally we
+create a file of file names for all the patterns in the library.
+.para
+To use the complete PROSITE library from program pip, select "pattern searcher"
+and choose the 
+option "use file of pattern file names", and give the file name 
+prosite.nam). For any matches found, the accession number and pattern title
+will be 
+displayed.
+
+.para
+Running the conversion programs
+.para
+
+Only SPLITP3 is necessary for using the library. The others programs
+ only make the
+original files marginally easier to browse through and produce an index.
+.para
+SPLITP1 splits the prosite.dat file to create a separate file for each 
+entry. Each file is automatically named PSentry_number.dat. In addition it 
+creates an index for the library (see above).
+.para
+SPLITP2 performs the same operation for the Prosite.doc file, except that 
+no index is created. Files are named PSentry_number.doc.
+.para
+SPLITP3 creates a separate pattern file and weight matrix files for each 
+prosite entry from the file prosite.dat. Pattern files are named 
+PSentry_number.pat, weight matrix files PSentry_number.wtsa, 
+Psentry_number.wtsb, etc. The pattern title is the one line description
+of the motif. SPLITP3 also creates a file of file names. Notice that it
+will ask for a path name so that the path can be included in the file of
+file names. This is the path to the directory in which the pattern files
+are stored.
+.para
+Notes
+.para
+Obviously the use of files of file names is a general solution, and anybody 
+could now create their own set of interesting patterns for screening, or a 
+subset of prosite.nam, etc.
+.para
+   Note that 5 of the bairoch motifs contained the symbols > or < which 
+means that the motifs must appear exactly at the N or C termini of the 
+sequences. Currently our methods have no mechanism for such definitions and, 
+for example KDEL motifs, will be permitted to occur anywhere throughout 
+a sequence.
+
+.para
+Also, of course, the library does not have to be used solely for performing 
+mass screenings: each individual entry can be used as a single pattern by 
+giving the name of its .pat file - eg pathname/ps00002.pat
+In addition more sophisticated users will wish to copy pattern files and 
+weight matrices into their own directories and modify them. For example the 
+cutoff scores are probably chosen to be quite high in order to reduce the 
+number of false positives, and some users might wish to lower them.
+
--- a/help/STADEN.RNO
+++ b/help/STADEN.RNO
@ -0,0 +1,354 @@
+.npa
+.left margin2
+.para
+Introduction to the Staden sequence analysis package and its user interface
+.PARA
+The package contains the following programs:
+.lit
+
+  GIP     Gel input program
+  SAP     Sequence assemble program
+  NIP     Nucleotide interpretation program
+  PIP     Protein interpretation program
+  SIP     Similarity investigation program
+  MEP     Motif exploration program
+  NIPL    Nucleotide interpretation program (library)
+  PIPL    Protein interpretation program (library)
+  SIPL    Similarity investigation program (library)
+
+.end lit
+.left margin2
+GIP uses a digitiser for entry of DNA sequences from 
+autoradiographs.
+.left margin2
+SAP handles everything relating to assembling gel 
+readings in order to produce a consensus sequence. It can also deal with 
+families of protein sequences.
+.left margin2
+NIP provides functions for analysing and interpretting
+individual nucleotide sequences.
+.left margin2
+PIP provides functions for analysing and interpretting
+individual protein sequences.
+.left margin2
+MEP analyses families of nucleotide sequences to help discover new motifs.
+.left margin2
+NIPL performs pattern searches on nucleotide sequence libraries.
+.left margin2
+PIPL performs pattern searches on protein sequence libraries.
+.left margin2
+SIP provides functions for comparing and aligning 
+pairs of protein or nucleotide sequences.
+.left margin2
+SIPL searches nucleotide and protein sequence
+libraries for entries similar to probe sequences.
+.left margin2
+.sk1
+.para
+Documentation
+.para
+As is explained below, the 
+programs SAP, NIP, PIP, SIP and MEP have online help,
+and the help files have the names: HELPSAP, HELPNIP, HELPPIP, HELPSIP, 
+HELPMEP. These 
+files can be displayed on the screen or printed using the appropriate 
+commands. Currently the help for the other programs is also contained in 
+these files. For example help for NIPL is in HELPNIP. This file is called 
+HELPSTADEN.
+.para
+Sequence formats
+.para
+ The shotgun sequencing program SAP deals only with simple 
+text files for gel readings, and is a self-contained system.
+However as there is still no single agreed format
+ for finished sequences or for libraries of sequences,
+the other programs in the package can read data that is stored in several ways.
+.para
+The analytical programs can read individual sequences stored in the following
+formats: 
+Staden, EMBL, Genbank, PIR (also known as NBRF), and GCG, but for storing whole 
+libraries we use only PIR format. In addition 
+these programs can perform a number of 
+simple operations using libraries stored in this format. They can extract 
+entries by entry name, can search titles for keywords, can search the whole 
+of the annotation files for keywords, and can extract annotations for any 
+named entry.
+We reformat all sequence libraries into PIR format. Currently we 
+have NBRF, EMBL, SWISSPROT and VECBASE libraries in PIR format.
+.para
+The library searching programs operate only
+on sequences stored in PIR format.
+.para
+The analytical programs
+will operate with uppercase or lowercase sequence
+characters. In addition T and U are equivalent. SAP uses uppercase letters 
+for original gel readings and lowercase letters for characters that are 
+corrected by the automatic editor.
+Programs NIP  and PIP use IUB symbols for redundancy in back translations 
+and for sequence searches.
+The symbols are shown below.
+.LIT
+
+
+            NC-IUB SYMBOLS
+
+      A,C,G,T
+      R        (A,G)        'puRine'
+      Y        (T,C)        'pYrimidine'
+      W        (A,T)        'Weak'
+      S        (C,G)        'Strong'
+      M        (A,C)        'aMino'
+      K        (G,T)        'Keto'
+      H        (A,T,C)      'not G'
+      B        (G,C,T)      'not A'
+      V        (G,A,C)      'not T'
+      D        (G,A,T)      'not C'
+      N        (G,A,C,T)    'aNy'
+
+.end lit
+.PARA
+The user interface
+.PARA
+The user interface is common to all programs. 
+It consists of a set of menus and a uniform way
+ of presenting choices and obtaining input 
+from the user. This section describes: the 
+menu system; how options are selected and  other choices made; how values
+ are supplied to the program;  how help is obtained, and 
+how to escape from any part of a program. In addition it gives information 
+about saving results in files and the use of graphics for presenting 
+results.
+.para
+Menus
+.para
+Each program has several menus and numerous options. 
+Each menu or option has a unique number that is used to 
+identify it. Menu numbers are distinguished from 
+option numbers by being preceded by the letter 
+m (or M, all programs make no distinction between 
+upper and lower case letters). With the exception of 
+some parts of program SAP, the menus are not hierachical, 
+rather the options they each contain are simply lists of 
+related functions and their identifying numbers. 
+Therefore options can be selected independently
+ of the menu that is currently being shown on the 
+screen,  and the menus are simply memory aides. 
+All options and menus are selected by typing their 
+option number when the programs present the prompt 
+.para
+  "? Menu or option number =". 
+.para
+ To select a menu type its number preceded by 
+the letter M. To select an option type its number.
+If you type only "return" you will get menu m0 
+which is simply a list of menus. If you select an 
+option you will return to the current menu after the function is completed.
+.para
+ When you select an option, in many cases the 
+program will immediately perform the operation 
+selected without further dialogue.  If you precede an option 
+number by the letter d (e.g. D17), you 
+will force the program to offer dialogue about the selected option 
+before the function operates, 
+hence allowing you to change the value of any of its parameters.  If 
+you precede an option number by the symbol ? (e.g. ?17), 
+you will be given help on the option (here 17). 
+.para
+Where possible, equivalent or identical options have been given the same
+numbers in all programs, and so users quickly learn the numbers for 
+the functions they employ most often.
+.para
+Help
+.para
+As mentioned above, help about each option can be obtained by 
+preceding the option number  by the symbol ? when you are presented 
+with the prompt "? Menu or option number", but there are two further 
+ways of obtaining help. Whenever the program asks a question 
+you can respond by typing the symbol ? and you will receive information 
+about the current option. In addition, option number 1 
+in all the programs will give help on all of a programs functions.
+.para
+Quitting
+.para
+To exit from any point in a program you type ! for quit. 
+If a menu is on the screen this will stop the program, otherwise 
+you will be returned to the last menu.
+.Para
+Other interactions
+.para
+Questions are  presented in a few restricted ways. 
+In all cases typing only "return" in response to a question means 
+yes, and typing N or n means no.
+.para
+Obvious opposites such as "clear screen" and "keep picture" 
+are presented with only the default shown. For example 
+in this case the default is generally "keep picture" so the 
+program will display: 
+.para
+"(y/n) (y) Keep picture"
+.para
+and the picture will be retained if the user types anything other than N or 
+n, (in which case the screen will be cleared).
+.para
+Where there are choices that are not obvious opposites, or 
+there are more than two choices, two further conventions are used: 
+"radio buttons" and "check boxes".
+.para
+
+Radio buttons are used when only one of a number of choices can be 
+made at any one time. The choices are presented arranged one above the 
+other, each choice with a number for its selection, and the default 
+choice marked with an X. For example in the restriction 
+enzyme search routine the following choices are offered:
+.para
+.lit
+
+   Select output mode
+   1 order results enzyme by enzyme
+   2 order results by positon
+ X 3 show only infrequent cutters
+   4 show names above the sequence
+ ? Selection (1-4) (3) =
+
+.end lit
+Any single option can be selected by typing the option number, 
+and the default option, (here shown as 3), is also obtained by 
+typing only "return". Again help can be obtained by typing ? and 
+you can quit by typing !.
+.para
+Check boxes are used when any number of a set of choices can be 
+made (i.e. the choices are not exclusive). Choices are 
+made by typing choice numbers.  Each choice can be considered 
+as a switch whose setting is reversed when it is selected. Choices that are 
+currently switched on are marked with an X. 
+The user quits from making selections by typing only 
+"return". For example in the routine that plots base composition 
+you can plot the frequencies of any combination of bases, e.g. only 
+A, or A+T, or A+T+G  etc. 
+The following check box is offered to the user:
+.lit
+
+  X 1 T
+    2 C
+  X 3 A
+    4 G
+  ? Selection (1-4) () =
+
+.END LIT
+As shown this will plot the A+T composition. To switch off  T 
+you select 1, to switch on  C you select 2, etc, to quit, 
+having set the bases required you type only "return".
+.para
+Input of numerical values
+.para
+All input of integer or decimal numbers is presented in a 
+standard way with the allowed range shown in brackets and the default 
+value also in brackets. For example:
+.para
+ ? span (5-31) (11) =
+.para
+In this example you could type any number between 5 and 31, 
+or "return" only, or ! or ? (see above). Any other input will cause the 
+program to ask the question again. Typing only "return" gives the default 
+value (here 11).
+.para
+Use of the bell
+.para
+The programs use the bell to indicate that a task is completed. 
+This allows users to read textual results before they are scrolled up off 
+the screen, or to look at a plot before it is scrolled over by the menus. 
+When the bell sounds, the programs will wait
+ until return is typed. You can quit from these points by typing ! but 
+no help is available.
+.para
+Printing and saving results in files
+.para
+A few of the functions in the programs automatically  write their textual
+results 
+to disk files,  but for most functions you can choose whether results
+appear on the terminal screen or go to a file. This applies to both text 
+and graphical results.
+For these functions 
+the normal, or default, place for results to 
+appear is on the screen, and users need to decide before the 
+function is selected if they want to redirect the results to a file. 
+In all programs, option number 7, "Direct output to disk" gives control 
+over whether results appear on the screen or go to a file. When a program 
+is started results will be sent to the screen. If option 7 is selected 
+users will be given the choice of redirecting either text or graphics to a 
+file. The program will then ask users to supply a file name. From that 
+point on all results will be sent to the file until option 7 is selected again, 
+in which case the "redirection file" will be closed, and results will start 
+to appear on the screen.
+.para
+If these files contain textual results they can be looked at 
+from within the programs 
+by using option 6, "List a text file". Once you leave the program 
+you can use an appropriate system command to print the files. 
+There is no function within the programs to direct files to a printer.
+.para
+The converse of the above is also possible. That 
+is, it is possible to redirect results that would normally go to file, 
+so that they appear instead on the screen. This is often useful as a way 
+of checking results before saving them in a file. On a VAX using 
+VMS you do this by typing TT: for the name of the file that the 
+program would create. TT: is what VMS calls the screen.
+.para
+Use of graphics
+.para
+The analytical programs including NIP, PIP and SIP present the results of 
+many of their analyses graphically. The position at which the results for 
+any function appear on the screen is defined relative to a notional users 
+"drawing board" of dimension 10,000 by 10,000. This drawing board fills the 
+screen and results are drawn in windows defined using symbols x0,yo and 
+xlength,ylength, 
+where x0,y0 is the position of the bottom left hand corner of the window,
+  and xlength is the width of the window and ylength the 
+height of the window.
+.lit
+
+   --------------------------------------------------------- 10,000
+   1                                                       1
+   1       --------------------------------------   ^      1
+   1       1                                    1   1      1
+   1       1                                    1   1      1
+   1       1                                    1 ylength  1
+   1       1                                    1   1      1
+   1       1                                    1   1      1
+   1       --------------------------------------   v      1
+   1  x0,y0^                                               1
+   1       <---------------xlength-------------->          1
+   ---------------------------------------------------------      1
+   1                                                   10,000
+
+.end lit
+.para
+ The window positions for each option are read from a file 
+when a program is started. If required individual users could have their
+own set of plot positions, and also the positions
+ can be redefined from within the 
+programs using option number 14.
+.para
+For those analyses that draw continuous lines to represent results 
+(for example a plot of base composition) the user is asked to supply the 
+"Plot interval". All the analyses produce a value for every point along the 
+sequence but often it is unnecessary to actually plot the 
+values for all the points.
+The plot interval is simply the distance between the points 
+shown on the screen. If the user selects a plot interval of 1, every point 
+will be plotted; a plot interval of 3 will show every third point. It is a 
+way of speeding up the analyses.
+.para
+Saving graphics
+.para
+Many terminals are not capable of dumping their screen contents to a 
+file for subsequent printing. One convenient way of obtaining hard copy 
+of graphical results is to use a micro computer as a terminal. On 
+the Macintosh we use the terminal emulator versa
+termPro. This allows graphics to be saved as 
+Macintosh files that can be annotated and printed using 
+Macdraw and other painting programs. 
+.para
+Alternatively graphics can be redirected to a file and printed using a 
+laser printer with tektronix capability (see 
+"Printing and saving results in files").
--- a/help/bap_help
+++ b/help/bap_help
--- a/help/bap_menu
+++ b/help/bap_menu
@ -0,0 +1,84 @@
+-1 0 21 2 T General
+-1 0 21 2 X General
+-2 0 50 2 T Screen control
+-2 0 71 2 X Screen
+-3 0 98 2 T Modification
+-3 0 98 2 X Modification
+0 -1 116 332 T BAP
+0 -1 116 332 X BAP
+17 1 17434 18 T Screen against enzymes
+17 1 17434 18 X Screen against enzymes
+18 1 18477 23 T Screen against vector
+18 1 18477 23 X Screen against vector
+20 3 19859 121 T Auto assemble
+20 3 19859 121 X Auto assemble
+28 1 26426 43 T Highlight disagreements
+28 1 26426 43 X Highlight disagreements
+32 3 28846 17 T Extract gel readings
+32 3 28846 17 X Extract gel readings
+1 0 29607 3 T Help
+1 0 29607 3 X Help
+2 0 29676 5 T Quit
+2 0 29676 5 X Quit
+3 1 29869 230 T Open a database
+3 1 29869 230 X Open a database
+4 3 41499 320 T Edit contig
+4 3 41499 320 X Edit contig
+5 1 56688 43 T Display a contig
+5 1 56688 43 X Display a contig
+6 1 58990 6 T List a text file
+6 1 58990 6 X List a text file
+8 1 59248 93 T Calculate a consensus
+8 1 59248 93 X Calculate a consensus
+25 1 63707 41 T Show relationships
+25 1 63707 41 X Show relationships
+23 3 65650 11 T Complement a contig
+23 3 65650 11 X Complement a contig
+22 3 66173 59 T Join contigs
+22 3 66173 59 X Join contigs
+24 1 69194 11 T Copy the database
+24 1 69194 11 X Copy the database
+19 1 69740 43 T Check database
+19 1 69740 43 X Check database
+29 1 71898 82 T Examine quality
+29 1 71898 82 X Examine quality
+26 3 75715 84 T Alter relationships
+26 3 75715 84 X Alter relationships
+27 1 79641 17 T Set display parameters
+27 1 79641 17 X Set display parameters
+30 3 80503 7 T Shuffle pads
+30 3 80503 7 X Shuffle pads
+10 2 80866 3 T Clear graphics
+10 2 80866 3 X Clear graphics
+11 2 80931 3 T Clear text
+11 2 80931 3 X Clear text
+12 2 80996 12 T Draw a ruler.
+12 2 80996 12 X Draw a ruler.
+14 2 81730 38 T Reposition plots
+14 2 81730 38 X Reposition plots
+15 2 84069 28 T Label a diagram
+15 2 84069 28 X Label a diagram
+16 2 85174 3 T Display a map
+16 2 85174 3 X Display a map
+7 1 85228 12 T Redirect output
+7 1 85228 12 X Redirect output
+13 2 85731 43 T Use crosshair
+13 2 85731 43 X Use crosshair
+33 2 87876 12 T Plot single contig
+33 2 87876 12 X Plot single contig
+34 2 88578 10 T Plot all contigs
+34 2 88578 10 X Plot all contigs
+31 3 89160 21 T Disassemble readings
+31 3 89160 21 X Disassemble readings
+35 3 90372 94 T Find internal joins
+35 1 90372 94 T Find internal joins
+35 3 90372 94 X Find internal joins
+35 1 90372 94 X Find internal joins
+36 3 96201 30 T Double strand
+36 3 96201 30 X Double strand
+37 3 97555 64 T Auto-select oligos
+37 3 97555 64 X Auto-select oligos
+38 1 100421 30 T Check assembly
+38 1 100421 30 X Check assembly
+39 1 102178 90 T Find read pairs
+39 1 102178 90 X Find read pairs
--- a/help/dap_help
+++ b/help/dap_help
--- a/help/dap_menu
+++ b/help/dap_menu
@ -0,0 +1,79 @@
+-1 0 21 2 T General
+-1 0 21 2 X General
+-2 0 50 2 T Screen control
+-2 0 71 2 X Screen
+-3 0 98 2 T Modification
+-3 0 98 2 X Modification
+0 -1 116 351 T SAP
+0 -1 116 351 X SAP
+17 1 18801 18 T Screen against enzymes
+17 1 18801 18 X Screen against enzymes
+18 1 19844 22 T Screen against vector
+18 1 19844 22 X Screen against vector
+20 3 21171 113 T Auto assemble
+20 3 21171 113 X Auto assemble
+28 1 27332 42 T Highlight disagreements
+28 1 27332 42 X Highlight disagreements
+32 3 29694 22 T Extract gel readings
+32 3 29694 22 X Extract gel readings
+1 0 30797 3 T Help
+1 0 30797 3 X Help
+2 0 30866 5 T Quit
+2 0 30866 5 X Quit
+3 1 31059 237 T Open a database
+3 1 31059 237 X Open a database
+4 3 43258 239 T Edit contig
+4 3 43258 239 X Edit contig
+9 3 54180 42 T Screen edit
+5 1 56376 45 T Display a contig
+5 1 56376 45 X Display a contig
+6 1 58862 6 T List a text file
+6 1 58862 6 X List a text file
+8 1 59120 93 T Calculate a consensus
+8 1 59120 93 X Calculate a consensus
+25 1 63651 41 T Show relationships
+25 1 63651 41 X Show relationships
+21 3 65587 101 T Enter new gel reading
+21 3 65587 101 X Enter new gel reading
+23 3 70677 11 T Complement a contig
+23 3 70677 11 X Complement a contig
+22 3 71200 63 T Join contigs
+22 3 71200 63 X Join contigs
+24 1 74467 11 T Copy the database
+24 1 74467 11 X Copy the database
+19 1 75013 41 T Check database
+19 1 75013 41 X Check database
+29 1 77032 82 T Examine quality
+29 1 77032 82 X Examine quality
+26 3 80849 101 T Alter relationships
+26 3 80849 101 X Alter relationships
+27 1 86065 17 T Set display parameters
+27 1 86065 17 X Set display parameters
+30 3 86933 48 T Auto edit a contig
+30 3 86933 48 X Auto edit a contig
+10 2 89409 3 T Clear graphics
+10 2 89409 3 X Clear graphics
+11 2 89474 3 T Clear text
+11 2 89474 3 X Clear text
+12 2 89539 12 T Draw a ruler.
+12 2 89539 12 X Draw a ruler.
+14 2 90273 38 T Reposition plots
+14 2 90273 38 X Reposition plots
+15 2 92612 28 T Label a diagram
+15 2 92612 28 X Label a diagram
+16 2 93717 27 T Display a map
+16 2 93717 27 X Display a map
+7 1 94692 12 T Redirect output
+7 1 94692 12 X Redirect output
+13 2 95163 43 T Use crosshair
+13 2 95163 43 X Use crosshair
+33 2 97308 12 T Plot single contig
+33 2 97308 12 X Plot single contig
+34 2 98010 10 T Plot all contigs
+34 2 98010 10 X Plot all contigs
+31 3 98592 12 T Type in gel readings
+31 3 98592 12 X Type in gel readings
+35 3 99223 92 T Find internal joins
+35 1 99223 92 T Find internal joins
+35 3 99223 92 X Find internal joins
+35 1 99223 92 X Find internal joins
--- a/help/gip_help
+++ b/help/gip_help
@ -0,0 +1,198 @@
+                                  GIP
+
+        A digitizer is a  two  dimensional  surface which is such that
+ if  a  special  pen  is  pressed onto it, the pens coordinates can be
+ recorded by a computer.  These coordinates can be  interpreted  by  a
+ program.
+
+        The digitizing device we use works by the pen emitting a  high
+ frequency  sound  which is picked up by two microphones positioned at
+ the rear of the working area.  The  pen  position  is  determined  by
+ triangulation  and the digitizing device sends the coordinates to the
+ computer.  As  no  special  surface  is  required  the   device   can
+ conveniently  be  positioned  on  a light box giving the sequencer an
+ unobscured view of the autoradiographs.
+ The digitizer  is  called  a  GRAPHBAR  MODEL  GP7  made  by  Science
+ Accessories  Corp,  970  Kings  Highway  West, Southport, Connecticut
+ 06490, USA.
+
+        The program uses a menu to allow the user to  select  commands
+ or  to   enter  the  uncertainty  codes  for  areas  of  the gel that
+ are difficult to interpret.  A menu is simply a series of boxes drawn
+ on  the   digitizing  surface   that   each   contain   a  command or
+ uncertainty code.  When the user puts the pen down in these   special
+ regions   the program interprets the coordinates as commands and acts
+ appropriately. A copy of the menu should have been sent to  you.   It
+ should  be  stuck  down  on  the  surface  of  the  light  box in the
+ digitizing area. For convenience it is best to  position  it  to  the
+ right of the digitizing area, but in practice as long as its top edge
+ is parallel to the digitizer box, it  can  be  put  anywhere  in  the
+ active region.
+
+ Entering gel readings using a digitizer
+
+        The autoradiograph should be stuck down on the light box  with
+ the  lanes  running,  as  near is as possible, at right angles to the
+ digitizer. To read an autoradiograph placed on the light box the user
+ need  only  define the positions of the four sequencing lanes and the
+ bases to which they correspond and then use  the  pen  to  point   to
+ each  successive   band progressing up the gel.  The program examines
+ the coordinates of each pen position to see in  which  of  the   four
+ lanes  it   lies  and  assigns  the  corresponding  base to be stored
+ in the computer.  Each time the pen tip is depressed to  point  to  a
+ position  on   the   surface  of the digitizer the program sounds the
+ bell on the terminal (a different sound for each of the four bases on
+ the  microcomputer  version  of  the program) to indicate to the user
+ that a point has been  recorded.   As  the   sequence   is  read  the
+ program displays it on the screen.
+
+        The program uses a menu to allow the user to  select  commands
+ or  to   enter  the  uncertainty  codes  for  areas  of  the gel that
+ are difficult to interpret.  A menu is simply a series of boxes drawn
+ on  the   digitizing  surface   that   each   contain   a  command or
+ uncertainty code.  When the user puts the pen down in these   special
+ regions   the program interprets the coordinates as commands and acts
+ appropriately.     As    well    as    the     uncertainty      codes
+ A,C,G,T,1,2,3,4,B,D,H,V,R,Y,X,-,5,6,7,8   the  following commands are
+ included in the menu:  DELETE removes the last  character   from  the
+ sequence; RESET allows the lane centres to be redefined; START  means
+ begin  the  next stage  of  the   procedure;   STOP  means  stop  the
+ current  stage in the procedure;  CONFIRM means confirm that the last
+ command  or  set  of coordinates  are  correct.
+
+        The digitizing device also has a menu of its own. This lies in
+ a two inch wide strip immediately in front of the digitizing box. Pen
+ positions within this two inch strip are interpretted as commands  to
+ the  digitizer  and  are  not sent to the GIP program. In general the
+ only time users will need to use the device menu is  when  they  tell
+ GIP  where the program menu lies in the digitizing area. This is done
+ by first hitting ORIGIN in the  device  menu  and  then  hitting  the
+ bottom  left  hand  corner  of the program menu. The program menu can
+ hence be positioned anywhere in  the  active  region  but  should  be
+ arranged parallel to the digitizer.
+
+        The user should try to hit the bands as near  as  possible  to
+ the  centre  of the lanes because the program tracks the lanes up the
+ film using the pen positions. By using  this  tracking  strategy  the
+ user only has to define the centres of the bottom of the lanes before
+ starting to read the film. The program  can  correctly  follow  quite
+ curved  lanes  and constantly checks that its lane centre coordinates
+ look sensible. If the lane centres appear to be getting too close the
+ program stops responding to the pen positions of bands and hence does
+ not ring the bell. If this occurs users must hit the reset box in the
+ menu  and  the program will request them to redefine the lane centres
+ at the current reading position. Then they can continue reading. As a
+ further  safeguard  the  program  will  only respond to pen positions
+ either in the menu or very close to the current reading position.
+
+ Running the gel reading program
+ The autoradiograph should be firmly stuck down on the light  box  and
+ the program started by typing GIP. It will ask the first question.
+  " ? FILE OF FILE NAMES="
+  Type the name for the  file  of  file  names  and  then  follow  the
+  instructions.
+  " HIT DIGITIZER MENU ORIGIN"
+  " THEN PROGRAM MENU ORIGIN"
+  " THEN HIT START IN PROGRAM MENU"
+  If the bell does not sound after you hit start try hitting metric in
+  the  device menu (the program uses metric units, and some digitizers
+  are set to default to use inches; hitting  metric  switches  between
+  the two).
+  After the bell has sounded the program will give  the  default  lane
+  order.
+  " LANE ORDER IS T C A G"
+  " IF CORRECT HIT CONFIRM, ELSE HIT RESET"
+  If the lane order, reading from left to right is correct hit confirm
+  in  the  program  menu. If you are using a different order hit reset
+  and you will be asked to define the lane order from  left  to  right
+  using the program menu (as follows).
+  " DEFINE LANE ORDER (LEFT TO RIGHT) USING MENU"
+  Hit the boxes in the menu that contain the symbols  A,C,G,T  in  the
+  left-right  order  of  the  lanes. The program will respond with the
+  lane order as above and ask for confirmation. When this is received,
+  the  next  task  is  to  define the start positions of the next four
+  lanes.
+  " HIT START, THEN HIT (LEFT TO RIGHT)"
+  " THE START POSITIONS FOR THE NEXT FOUR LANES"
+  Hit the centres of the four lanes at a height level with  the  first
+  band that is going to be read. The program will report the mean lane
+  separations and asks for confirmation that they are correct.
+  " MEAN LANE SEPARATION IS XX"
+  " HIT CONFIRM TO CONTINUE"
+  Users will become familiar with the values from their films and will
+  spot  any  unusual numbers.  Asking for confirmation allows users to
+  try again if they  had  made  a  mistake,  but  generally  the  lane
+  separation values can be ignored.  Hit confirm, and the program will
+  give the message
+  " HIT START WHEN READY TO BEGIN READING"
+  Hit start and the program will give the message
+  " HIT BANDS, UNCERTAINTY CODES, RESET OR STOP"
+  Hit the bands, interpretting the sequence progressing up  the  film.
+  If  necessary use the uncertainty codes. If the pen stops responding
+  hit reset and follow the instructions as above.  When  the  sequence
+  becomes unreadable hit stop and the program will ask for a file name
+  for the gel reading just read.
+  " ? FILE NAME FOR THIS GEL READING="
+  Type the file name observing the  rules  about  legal  gel  readings
+  names.  The program will ask if you wish to read another sequence.
+  " TO ENTER ANOTHER GEL READING TYPE 1"
+  To enter another type 1 and you will be back to the step of defining
+  the lane order. Typing anything else will stop the program.
+
+ Running the microcomputer version of the gel reading program
+ The microcomputer version of GIP is slightly different and is  called
+ GIPB.  The  BBC  micro  does not have the capacity to process the gel
+ readings beyond the reading stage.  This means  that  users  of  this
+ program  would  need to transfer their gel readings from the micro to
+ another machine using a terminal emmulator. Transferring  many  files
+ is  tedious  and  so  the  microcomputer  version  of the gel reading
+ program stores all the gel readings for each run of the program in  a
+ single file. This special file contains both sequences and file names
+ and can be moved in a single transfer to another machine. Once on the
+ other machine the single file must be split into separate gel reading
+ files and a file of file  names.  This  is  done  using  the  program
+ BSPLIT.  As  far  as using the microcomputer version of GIP, the only
+ difference is that the first file name the program requests is not  a
+ file of file names, but a name for the single file to contain all the
+ gel readings and their names.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/help/gip_menu
+++ b/help/gip_menu
--- a/help/makefile
+++ b/help/makefile
@ -0,0 +1,48 @@
+#
+# Make file for help files - this requires gmake on some systems.
+#
+PROGS = bap       dap       gip       mep         nip      \
+	nipf      pip       sap       sip        #mem
+
+HELPS = bap_help  dap_help  gip_help  mep_help    nip_help \
+	nipf_help pip_help  sap_help  sip_help   #mem_help
+
+MENUS = bap_menu  dap_menu  gip_menu  mep_menu    nip_menu \
+	nipf_menu pip_menu  sap_menu  sip_menu   #mem_menu
+
+all:	$(PROGS)
+
+DOIT = rm -f $@_help $@_menu; ./runoff $?
+
+bap:	BAP.RNO
+	$(DOIT)
+
+dap:	DAP.RNO
+	$(DOIT)
+
+gip:	GIP.RNO
+	$(DOIT)
+
+#mem:	MEM.RNO
+#	$(DOIT)
+
+mep:	MEP.RNO
+	$(DOIT)
+
+nip:	NIP.RNO
+	$(DOIT)
+
+nipf:	NIPF.RNO
+	$(DOIT)
+
+pip:	PIP.RNO
+	$(DOIT)
+
+sap:	SAP.RNO
+	$(DOIT)
+
+sip:	SIP.RNO
+	$(DOIT)
+
+clean:
+	rm -f $(HELPS) $(MENUS)
--- a/help/mem_help
+++ b/help/mem_help
@ -0,0 +1,698 @@
+
+ @0. B 1 @MEP
+  This is a program  for analysing families of nucleotide sequences in
+  order  to find common motifs and potential binding sites.  The ideas
+  in  this  program  were  described  in  Staden,  R.   "Methods   for
+  discovering  novel  motifs  in  nucleic  acid  sequences".  Computer
+  Applications in the Biosciences, 5, 293-298, (1989).
+
+        The program  can  read  sequences  stored  in  either  of  two
+  formats: 1) all sequences aligned in a single file; 2) all sequences
+  in separate files and accessed through a file of file names.
+
+        The  program  contains  functions  that  can  answer   several
+  questions about a set of sequences:
+
+  Which words are most common?
+  Which words occur in the most sequences?
+  Which words contain the most information?
+  Which words occur in equivalent positions in the sequences?
+  Which words are inverted repeats?
+  Which words occur on both strands of the sequences?
+  Where are the inverted repeats?
+  Where are the fuzzy words?
+
+        Most of the program is concerned with analysing what it  terms
+  "fuzzy words" within the set of sequences. The analysis is explained
+  below. Note that the standard version of the programs is limited  to
+  words of maximum length 8 letters, and a maximum fuzziness of 2.
+
+        The following analyses (preceded by their option numbers)  are
+  included:
+    ? = Help
+    ! = Quit
+    3 = Read new sequences
+    4 = Redefine active region
+    5 = List the sequences
+    6 = List text file
+    7 = Direct output to disk
+   10 = Clear graphics
+   11 = Clear text
+   12 = Draw ruler
+   13 = Use cross hair
+   14 = Reset margins
+   15 = Label diagram
+   16 = Draw map
+   17 = Search for strings
+   18 = Set strand
+   19 = Set composition
+   20 = Set word length
+   21 = Set number of mismatches
+   22 = Show settings
+   23 = Make dictionary Dw
+   24 = Make dictionary Ds
+   25 = Make fuzzy dictionary Dm from Dw
+   26 = Make fuzzy dictionary Dm from Ds
+   27 = Make fuzzy dictionary Dh from Dm
+   28 = Examine fuzzy dictionary Dm
+   29 = Examine fuzzy dictionary Dh
+   30 = Examine words in Dm
+   31 = Examine words in Dh
+   32 = Save or restore a dictionary
+   33 = Find inverted repeats
+
+        Some of these methods produce graphical  results  and  so  the
+  program  is  generally used from a graphics terminal (a vdu on which
+  lines and points can be drawn as well as characters).
+
+  The positions of each of the plots is defined relative  to  a  users
+  drawing board which has size 1-10,000 in x and 1-10,000 in y.  Plots
+  for each  option  are  drawn  in  a  window  defined  by  x0,y0  and
+  xlength,ylength. Where x0,y0 is the position of the bottom left hand
+  corner of the window, and xlength is the width  of  the  window  and
+  ylength the height of the window.
+     --------------------------------------------------------- 10,000
+     1                                                       1
+     1       --------------------------------------   ^      1
+     1       1                                    1   1      1
+     1       1                                    1   1      1
+     1       1                                    1 ylength  1
+     1       1                                    1   1      1
+     1       1                                    1   1      1
+     1       --------------------------------------   v      1
+     1  x0,y0^                                               1
+     1       <---------------xlength-------------->          1
+     ---------------------------------------------------------      1
+     1                                                   10,000
+
+  All values are in drawing board  units  (i.e.  1-10,000,  1-10,000).
+  The default window positions are read from a file "MEPMARG" when the
+  program is started. Users can have their own file if required.
+
+        The options for the program are accessed from  3  main  menus:
+  general,  screen  control  and dictionary analylsis.  Both menus and
+  options are selected by number.
+
+        The most important and novel part of the program is its use of
+  "fuzzy dictionaries" and an information theory measure, to help show
+  the most interesting motifs.  Central to the method is the idea of a
+  fuzzy   dictionary   of  word  frequencies.  A  dictionary  of  word
+  frequencies is an ordered list of all the words in the sequences and
+  a  count  of the number of times that they occur. A fuzzy dictionary
+  is an equivalent list but which contains instead, for each  word,  a
+  count  of  the number of times similar words occur in the sequences.
+  We term words that are similar "relations". The fuzziness is defined
+  by the number of letters in a word that are allowed to be different.
+  So if we had a fuzziness of 1 we allow 1 letter to be different. For
+  example,  with  a  fuzziness of 1, the entry in the fuzzy dictionary
+  for the word TTTTTT would contain a count of the  numbers  of  times
+  TTTTTT  occured  plus  the  number  of  times all words differing by
+  exactly one letter from TTTTTT occured.
+
+        Once the fuzzy dictionary has been created we can  examine  it
+  in  several  ways  to find candidate control sequences. The simplest
+  question we can ask is which word in  the  dictionary  is  the  most
+  common.   Sometimes  this  simple  criterion of "most common" may be
+  adequate to discover a new motif but in general we would not  expect
+  it  to  be  sufficient. For example some words will be common simply
+  because of a base composition bias in the sequences being  analysed.
+  In  addition  a  word  can be the most frequent and yet not be "well
+  defined". This last point is best explained by an example.
+
+        Suppose we were looking at  two letter words and allowing  one
+  mismatch,  and  that  there were 10 occurences of TT and 5 of AC. We
+  could align the 10 words that were one letter different from TT  and
+  the  5  that  were  related to AC. Then we could count the number of
+  times each base occured in each position for each of these two  sets
+  of words. Suppose we got the two base frequency tables shown below.
+     TT                  AC
+         T 6 4               T 1 0
+         C 1 3               C 0 4
+         A 1 2               A 4 1
+         G 2 1               G 0 0
+
+  These tables show that although TT occurs (with one letter mismatch)
+  more often than AC, the ratio of base frequencies for AC at 4/5, 4/5
+  is higher than those for TT at 6/10, 4/10. Hence we would  say  that
+  AC was better defined than TT.  Expressing this another way we would
+  say that the definition of AC contained more information  than  that
+  for TT. The program calculates the information content in a way that
+  takes into account both the sequence composition and  the  level  of
+  definition of the motif.
+
+        Definitions
+
+        Here we deal only with the dictionary  analysis.   Suppose  we
+  are dealing with a set of sequences and are examining them for words
+  that are six characters in length.
+
+        Dictionary Dw contains a count of the  number  of  times  each
+  word  occurs  in  the  set  of  sequences. For example the entry for
+  TTTTTT contains a value equal to the number of times the word TTTTTT
+  occurs in the set of sequences.
+
+        Dictionary Ds contains a count  of  the  number  of  different
+  sequences  in  which  each word occurs. For example if the entry for
+  word TTTTTT contains the value 10, it denotes that the  word  TTTTTT
+  occurs  in  ten  different sequences. Unlike Dw it only counts words
+  once for each  sequence.  For  example  if  we  had  a  set  of  100
+  sequences, the maximum possible value that Ds could take is 100, and
+  this would only happen if a word occurred in every sequence. However
+  for  the same set of sequences, Dw could contain values greater than
+  100, and this would show that a word had occurred more than once  in
+  at least one sequence.
+
+        From either of the two dictionaries Dw or Ds we can  calculate
+  a  fuzzy  dictionary  Dm.  For  each  word,  the  entry in the fuzzy
+  dictionary Dm contains the sum of the dictionary values (taken  from
+  either  Dw  or  Ds)  for  all  words  that differ from it by up to m
+  letters. For example if m=2 the entry for TTTTTT contains the number
+  of  times  that TTTTTT occurs in the dictionary, plus the counts for
+  all words that differ from TTTTTT by 1 or 2 letters.  Obviously  the
+  interpretation  of  the  values  in  Dm  depends on which of the two
+  dictionaries Dw or Ds they were derived from. When derived  from  Dw
+  the entry for any word in Dm gives the total number of times it, and
+  its relations, occur in the set of sequences. When derived  from  Ds
+  the  entry  for  any  word in Dm gives the total number of different
+  sequences that contain a word and each of its relations.
+
+        Finally,  from  fuzzy  dictionary  Dm  we  can  derive   fuzzy
+  dictionary  Dh.  All  entries in Dh are zero except for the word(s),
+  within each set of relations, that are most frequent. For example if
+  TTTTTT  occurred  20  times  but  had  a relation that occurred more
+  often, then the entry for TTTTTT would be zero.  However  if  TTTTTT
+  did  not  have  a more frequently occurring relation, then the entry
+  for TTTTTT would contain the value 20.
+ @1. B 1 @Help
+  This option gives online help. The user should select option numbers
+  and  the  current  documentation  will  be given. Note that option 0
+  gives an introduction to the program, and that ? will get help  from
+  anywhere  in the program.  The following analyses (preceded by their
+  option numbers) are included:
+    ? = Help
+    ! = Quit
+    3 = Read new sequences
+    4 = Redefine active region
+    5 = List the sequences
+    6 = List text file
+    7 = Direct output to disk
+   10 = Clear graphics
+   11 = Clear text
+   12 = Draw ruler
+   13 = Use cross hair
+   14 = Reset margins
+   15 = Label diagram
+   16 = Draw map
+   17 = Search for strings
+   18 = Set strand
+   19 = Set composition
+   20 = Set word length
+   21 = Set number of mismatches
+   22 = Show settings
+   23 = Make dictionary Dw
+   24 = Make dictionary Ds
+   25 = Make fuzzy dictionary Dm from Dw
+   26 = Make fuzzy dictionary Dm from Ds
+   27 = Make fuzzy dictionary Dh from Dm
+   28 = Examine fuzzy dictionary Dm
+   29 = Examine fuzzy dictionary Dh
+   30 = Examine words in Dm
+   31 = Examine words in Dh
+   32 = Save or restore a dictionary
+   33 = Find inverted repeats
+ @2. B 1 @Quit
+  This function stops the program.
+ @3. B 1 @Read a new sequence.
+
+        It can read sequences stored in either of two formats: 1)  all
+  sequences  aligned  in  a  single file; 2) all sequences in separate
+  files and accessed through a file of file  names.  Typical  dialogue
+  follows:
+
+  X 1 Read file of aligned sequences
+    2 Use file of file names
+  ? 0,1,2 =
+
+  ? File of aligned sequences=F1
+  Number of files           88
+
+ @4. B 1 @Define active region
+  For its analytic functions the program always works on a  region  of
+  the  sequence called the active region. When  new sequences are read
+  into the program the active region is automatically set to start  at
+  the  beginning  of the sequences and go up to the end of the longest
+  one.
+ @5. B 1 @List a sequence.
+  The sequence can be listed with line lengths of 50 bases  with  each
+  sequence  numbered in the order in which they were read.  Output can
+  be directed to a disk file by first selecting disk  output.  Typical
+  dialogue follows.
+
+  ? Menu or option number=5
+
+                10        20        30        40        50
+     1  TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
+     2  CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
+     3  TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
+     4  ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
+     5  AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
+     6  TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
+     7  ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
+     8  GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
+     9  AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
+    10  AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
+
+                60
+     1  TACCCGTTTTT
+     2  GCGTTTTTGT
+     3  TCATACCATAAG
+     4  TTTCATACC
+     5  ATTGTGAGC
+     6  TTCCGGCTCG
+     7  GAAGAGAGT
+     8  TCAGGTGT
+     9  ATGAATG
+    10  TAATTACG
+ @6. B 1 @List a text file.
+  Allows the user to have a text file displayed on the screen. It will
+  appear one page at a time.
+ @7. B 1 @Direct output to disk
+
+        Used to direct output that would normally appear on the screen
+  to a file.
+
+        Select redirection of either text or graphics, and supply  the
+  name of the file that the output should be written to.
+
+        The results from the next options selected will not appear  on
+  the  screen  but  will  be  written  to  the  file. When option 7 is
+  selected again the file will be closed and output will again  appear
+  on the screen.
+ @10. B 1 @Clear graphics
+  Clears the screen of both text and graphics.
+ @11. B 1 @Clear text
+  Clears only text from the screen.
+ @12. B 1 @Draw a ruler.
+  This option allows the user to draw a ruler or  scale  along  the  x
+  axis  of  the  screen  to help identify the coordinates of points of
+  interest. The user can define the position of the first  amino  acid
+  to  be marked (for example if the active region is 1501 to 8000, the
+  user might wish to mark every 1000th amino acid starting  at  either
+  1501  or  2000  -  it depends if the user wishes to treat the active
+  region as an independent unit with its own numbering starting at its
+  left  edge,  or  as  part  of the whole sequence). The user can also
+  define the separation of the ticks on the scale and their height. If
+  required  the  labelling  routine  can be used to add numbers to the
+  ticks.
+ @13. B 1 @Use crosshair.
+  This function puts a steerable cross on the screen that can be  used
+  to find the coordinates of points in the sequence. The user can move
+  the cross around using the directional keys; when he hits the  space
+  bar  the  program  will  print  out  the coordinates of the cross in
+  sequence units and the option will be exited.
+
+        If instead, you hit a , the position will be displayed but the
+  cross will remain on the screen.
+
+        If a letter s is hit the sequence around  the  cross  hair  is
+  displayed and the cross remains on the screen.
+ @14. B 1 @Reposition plots
+  The positions of each of the plots is defined relative  to  a  users
+  drawing board which has size 1-10,000 in x and 1-10,000 in y.  Plots
+  for each  option  are  drawn  in  a  window  defined  by  x0,y0  and
+  xlength,ylength. Where x0,y0 is the position of the bottom left hand
+  corner of the window, and xlength is the width  of  the  window  and
+  ylength the height of the window.
+     --------------------------------------------------------- 10,000
+     1                                                       1
+     1       --------------------------------------   ^      1
+     1       1                                    1   1      1
+     1       1                                    1   1      1
+     1       1                                    1 ylength  1
+     1       1                                    1   1      1
+     1       1                                    1   1      1
+     1       --------------------------------------   v      1
+     1  x0,y0^                                               1
+     1       <---------------xlength-------------->          1
+     ---------------------------------------------------------      1
+     1                                                   10,000
+
+  All values are in drawing board  units  (i.e.  1-10,000,  1-10,000).
+  The default window positions are read from a file "MEPMARG" when the
+  program is started. Users can have their own file if  required.   As
+  all  the  plots  start  at  the same position in x and have the same
+  width, x0 and xlength are the same for all options. Generally  users
+  will  only  want  to change the start level of the window y0 and its
+  height ylength. This option allows users to change window  positions
+  whilst  running  the  program.   The  routine  prompts first for the
+  number of the option that the users wishes to reposition;  then  for
+  the  y  start and height; then for the x start and length. Note that
+  changes to the x values affect all options. If the user  types  only
+  carriage  return  for any value it will remain unchanged. The cross-
+  hair can be used to choose suitable heights.
+ @15. B 1 @Label a diagram
+  This routine allows users to label any diagrams they have  produced.
+  They  are  asked  to  type  in a label. When the user types carriage
+  return to finish typing the label  the  cross-hair  appears  on  the
+  screen. The user can position it anywhere on the screen. If the user
+  types R (for right justify) the label will be written on the diagram
+  with  its  right end at the cross-hair position. If the user types L
+  (for left justify) the label will be written on the diagram with its
+  left  end  at  the  cross  hair  position.  The cross-hair will then
+  immediately reappear. The user may put the  same  label  on  another
+  part of the diagram as before or if he hits the space bar he will be
+  asked if he wishes to type in another label.
+ @16. B 1 @Display a map.
+  It is often convenient to plot a map alongside graphed  analysis  in
+  order to indicate features within the sequence. This function allows
+  users to draw maps using files arranged in the form of EMBL  feature
+  tables.  Of  course the EMBL table are usually only used for nucleic
+  acid sequence annotation but, as long as the features are written in
+  the correct format, they can be employed by this routine. The map is
+  composed of a line representing the sequence and then further  lines
+  denoting the endpoints of each feature the user identifies. The user
+  is asked to  define  height  at  which  the  line  representing  the
+  sequence  should be drawn; then for the feature height; then for the
+  features to plot.
+ @17. B 1 @Search for strings
+  Search for  strings  perfoms  searches  of  all  the  sequences  for
+  selected words and shows which sequences they are found in. The user
+  types in a word and defines the allowed number  of  mismatches.  The
+  results  are  listed  or plotted. If listed the display includes the
+  sequence number, the position  in  the  sequence  and  the  matching
+  string.  The results are plotted in the following way. The x axis of
+  the plot represents the length of the aligned sequences  and  the  y
+  direction  is  divided  into  sufficient  strips to accommodate each
+  sequence. So if a match is found in the 3rd sequence at  a  position
+  equivalent  to  halfway  along  the  longest of the sequences then a
+  short vertical line will be drawn at the midpoint of the 3rd  strip.
+  If  the  sequences are aligned it can be useful if the motifs happen
+  to appear  in  related  positions.  For  example  see  the  original
+  publication. Typical dialogue follows.
+
+  ? Menu or option number=17
+  X 1 Plot match positions
+    2 Plot histogram of matches
+  ? 0,1,2 =
+  ? Word to search for=TTGACA
+  ? Minimum match (0-6) (6) =5
+  ? (y/n) (y) Plot results N
+       2    35 TAGACA
+       5    14 TTTACA
+       6    37 TTTACA
+      11    14 TAGACA
+      14    14 TTGACA
+      17    14 GTGACA
+      17    22 TTAACA
+      20     1 TTGACA
+ @18. B 1 @Set strand
+  Set strand  allows  the  user  to  define  which  strand(s)  of  the
+  sequences to analyse: input stand, complement of input, or both.
+ @19. B 1 @Set composition
+  Set composition  gives  the  user  three  choices  for  setting  the
+  composition  of  the  sequences  for  use  in the calculation of the
+  information content of  words.  The  user  can  select  the  overall
+  composition  of  the  sequences as read, an even composition, or can
+  type in any other 4 values.
+ @20. B 1 @Set word length
+  Set word length sets the length of word for which dictionaries  will
+  be made.
+ @21. B 1 @Set number of mismatches
+  Set number of  mismatches  sets  the  level  of  fuzziness  for  the
+  creation of dictionary Dm.
+ @22. B 1 @Show settings
+  Show  settings  show  the  current  settings  for   all   parameters
+  associated with dictionary analysis. A typical diaplsy follows:
+   ? Menu or option number=22
+   Current word length  =   6
+   Number of mismatches =   1
+   Start position       =     1
+   End position         =    63
+   Input strand only
+   Observed composition
+   Dictionary Dw unmade
+   Dictionary Ds unmade
+   Dictionary Dm unmade
+   Dictionary Dh unmade
+ @23. B 1 @Make dictionary Dw
+  Make dictionary Dw creates a dictionary that contains  a  count   of
+  the frequency of occurrence of each word in the collected sequences.
+ @24. B 1 @Make dictionary Ds
+  Make dictionary Ds creates a dictionary that contains a count of the
+  number of different sequences that contain each word.
+ @25. B 1 @Make dictionary Dm from Dw
+  Make dictionary Dm  from Dw creates a dictionary from dictionary  Dw
+  that contains the frequency of occurrence of each word (say X) in Dw
+  plus the frequency of occurrence of each word  in  Dw  that  differs
+  from  X  by  up  to m letters. Dm is called a fuzzy dictionary as it
+  contains the  frequencies  of  occurrence  of  all  words  plus  the
+  frequencies of all the words that are similar to them.
+ @26. B 1 @Make dictionary Dm from Ds
+  Make dictionary Dm  from Ds creates a dictionary from dictionary  Ds
+  that contains the frequency of occurrence of each word (say X) in Ds
+  plus the frequency of occurrence of each word  in  Ds  that  differs
+  from  X  by  up  to m letters. Dm is called a fuzzy dictionary as it
+  contains the  frequencies  of  occurrence  of  all  words  plus  the
+  frequencies of all the words that are similar to them.
+ @27. B 1 @Make dictionary Dh from Dm
+  Make dictionary Dh  creates a  dictionary  from  dictionary  Dm  and
+  whose  entries are zero except for those words in any set of related
+  words that are most frequent. It finds the dominant  words  in  each
+  set of relations and stores their counts.
+ @28. B 1 @Examine dictionary Dm
+  Examine dictionary Dm  allows  users  to  analyse  the  contents  of
+  dictionary  Dm  to  find  the  most common words or those words that
+  contain the most information.  The  user  supplies  a  frequency  or
+  information  cutoff and chooses to have the results sorted on either
+  value. The program will find the top  100  words  that  achieve  the
+  cutoff  values  and present them to the user sorted as selected. The
+  information  content  will  be  calcutated  from  either  Dw  or  Ds
+  depending  which  was  used  to  create  Dm,  and  using the current
+  composition setting. Typical dialogue follows:
+
+  ? Menu or option number=28
+  Looking for highest scoring words
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =60
+  ? Minimum information (0.00-1.00) (0.00) =.62
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =
+
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           9 Maximum information=  0.7385326
+  TTGACA      60   0.73850
+  AAAAAC      64   0.66460
+  AAAAAA      90   0.64880
+  GTTTTT      66   0.64300
+  TTTTTG      73   0.64070
+  TTTTGT      63   0.63820
+  TTTTTC      65   0.63810
+  AAAATA      63   0.62670
+  TATAAT      65   0.62510
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =60
+  ? Minimum information (0.00-1.00) (0.00) =.62
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =2
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           9 Maximum information=  0.7385326
+  AAAAAA      90   0.64880
+  TTTTTG      73   0.64070
+  GTTTTT      66   0.64300
+  TTTTTC      65   0.63810
+  TATAAT      65   0.62510
+  AAAAAC      64   0.66460
+  TTTTGT      63   0.63820
+  AAAATA      63   0.62670
+  TTGACA      60   0.73850
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =!
+
+ @29. B 1 @Examine dictionary Dh
+  Examine dictionary Dh  allows  users  to  analyse  the  contents  of
+  dictionary   Dh  to  find  the most common words or those words that
+  contain the most information.  The  user  supplies  a  frequency  or
+  information  cutoff and chooses to have the results sorted on either
+  value. The program will find the top  100  words  that  achieve  the
+  cutoff  values  and present them to the user sorted as selected. The
+  information  content  will  be  calcutated  from  either  Dw  or  Ds
+  depending  which  was  used  to  create  Dh  and  using  the current
+  composition setting. Typical dialogue follows:
+
+  ? Menu or option number=29
+  Looking for highest scoring words
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =60
+  ? Minimum information (0.00-1.00) (0.00) =.6
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =
+
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           4 Maximum information=  0.7385326
+  TTGACA      60   0.73850
+  AAAAAA      90   0.64880
+  TATAAT      65   0.62510
+  TTTTTT     115   0.60630
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =50
+  ? Minimum information (0.00-1.00) (0.00) =.5
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =
+
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           8 Maximum information=  0.7385326
+  TTGACA      60   0.73850
+  TCTTGA      54   0.66080
+  AAAAAA      90   0.64880
+  TATAAT      65   0.62510
+  ACTTTA      57   0.61960
+  TTTTTT     115   0.60630
+  AGTATA      51   0.60540
+  TTATAA      55   0.59300
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =50
+  ? Minimum information (0.00-1.00) (0.00) =
+
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =
+
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           8 Maximum information=  0.7385326
+  TTGACA      60   0.73850
+  TCTTGA      54   0.66080
+  AAAAAA      90   0.64880
+  TATAAT      65   0.62510
+  ACTTTA      57   0.61960
+  TTTTTT     115   0.60630
+  AGTATA      51   0.60540
+  TTATAA      55   0.59300
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =!
+
+ @30. B 1 @Examine words in Dm
+  Examine words  in  Dm  allows  users  to  analyse  the  contents  of
+  dictonary  Dm  at  the  level  of  individual  words  to  find their
+  frequency, information content, and  to  see  their  base  frequency
+  table.  The user types in a word to examine and the program displays
+  the values and table. The information  content  will  be  calcutated
+  from  either  Dw  or  Ds  depending which was used to create Dm, and
+  using the current composition setting. Typical dialogue follows:
+  ? Menu or option number=30
+  ? Word to examine=TTGACA
+  TtgacA            60  0.7385326
+      56    56     6     7     5    11
+       4     3     2     1    52     1
+       1     4     2    53     3    48
+       3     1    54     3     4     4
+  TTGACA
+  ? Word to examine=TATAAT
+  taTAat            65  0.6251902
+      56     3    53     4     4    60
+       6     1     5     5     5     3
+       3    60     5    57    57     4
+       4     5     6     3     3     2
+  TATAAT
+  ? Word to examine=
+
+ @31. B 1 @Examine words in Dh
+  Examine words  in  Dh  allows  users  to  analyse  the  contents  of
+  dictonary  Dh  at  the  level  of  individual  words  to  find their
+  frequency, information content, and  to  see  their  base  frequency
+  table.  The user types in a word to examine and the program displays
+  the values and table. The information  content  will  be  calcutated
+  from  either  Dw  or  Ds  depending which was used to create Dm, and
+  using the current composition setting. Typical dialogue follows:
+
+   ? Menu or option number=31
+  ? Word to examine=TTGACA
+  TtgacA            60  0.7385326
+      56    56     6     7     5    11
+       4     3     2     1    52     1
+       1     4     2    53     3    48
+       3     1    54     3     4     4
+  TTGACA
+  ? Word to examine=TATAAT
+  taTAat            65  0.6251902
+      56     3    53     4     4    60
+       6     1     5     5     5     3
+       3    60     5    57    57     4
+       4     5     6     3     3     2
+  TATAAT
+  ? Word to examine=GGGGGG
+  gggggg             0  0.6199890
+       3     1     1     2     3     4
+       1     3     1     2     2     1
+       2     1     1     1     1     1
+      11    12    14    12    11    11
+  GGGGGG
+  ? Word to examine=
+
+ @32. B 1 @Save or restore a dictionary
+  Save or restore  dictionary  allows  users  to  write  or  read  any
+  dictionary  to  and from disk files. The user is asked te define the
+  dictionary and file. The function is useful  if  the  machine  being
+  used  is  very  slow at calculating because the files can be handled
+  quickly. However note that the files  cannot  be  processed  by  any
+  other program.
+ @33. B 1 @Find inverted repeats
+  Find inverted repeats performs searches for simple  inverted  repeat
+  sequences  in  each  sequence.  They  are defined by a range of loop
+  sizes and a minimum number of potential basepairs. The  results  can
+  be  plotted  or listed. The x axis of the plot represents the length
+  of the aligned  sequences  and  the  y  direction  is  divided  into
+  sufficient  strips  to  accommodate each sequence. So if an inverted
+  repeat is found in the 3rd sequence  at  a  position  equivalent  to
+  halfway  along  the  longest  of the sequences then a short vertical
+  line will be drawn at the midpoint of the 3rd strip.  Alternatively,
+  if  the  results  are  listed, the potential hairpin loops are drawn
+  out, with the sequence number and the position of the loop.  Typical
+  dialogue follows.
+
+  ? Menu or option number=33
+  Define the range of loop sizes
+  ? Minimum loop size (0-10) (3) =0
+  ? Maximum loop size (1-20) (3) =
+  ? Minimum number of basepairs (1-20) (6) =
+  ? (y/n) (y) Plot results N
+   Searching
+
+  Sequence     3    34
+             C
+            G.T
+            T-A
+            A-T
+            T.G
+            T.G
+            G.T
+       ATCTTT TATTTCA
+           33
+
+  Sequence     5    35
+             T
+            G.T
+            T.G
+            A-T
+            T.G
+            G.T
+            C-G
+            T.G
+       TCCGGC AATTGTG
+           34
+
+
+ @ End of help
--- a/help/mem_menu
+++ b/help/mem_menu
@ -0,0 +1,32 @@
+0 1 15 184 B MEP
+1 1 9304 37 B Help
+2 1 10465 2 B Quit
+3 1 10531 14 B Read a new sequence.
+4 1 10932 6 B Define active region
+5 1 11250 31 B List a sequence.
+6 1 12393 3 B List a text file.
+7 1 12525 12 B Direct output to disk
+10 1 12996 2 B Clear graphics
+11 1 13065 2 B Clear text
+12 1 13126 12 B Draw a ruler.
+13 1 13871 12 B Use crosshair.
+14 1 14459 34 B Reposition plots
+15 1 16611 12 B Label a diagram
+16 1 17394 12 B Display a map.
+17 1 18154 31 B Search for strings
+18 1 19507 3 B Set strand
+19 1 19672 6 B Set composition
+20 1 20013 3 B Set word length
+21 1 20131 3 B Set number of mismatches
+22 1 20256 14 B Show settings
+23 1 20718 3 B Make dictionary Dw
+24 1 20890 3 B Make dictionary Ds
+25 1 21055 7 B Make dictionary Dm from Dw
+26 1 21505 7 B Make dictionary Dm from Ds
+27 1 21955 5 B Make dictionary Dh from Dm
+28 1 22245 55 B Examine dictionary Dm
+29 1 24148 70 B Examine dictionary Dh
+30 1 26410 25 B Examine words in Dm
+31 1 27437 33 B Examine words in Dh
+32 1 28701 7 B Save or restore a dictionary
+33 1 29106 46 B Find inverted repeats
--- a/help/mep_help
+++ b/help/mep_help
@ -0,0 +1,792 @@
+
+ @-1. TX  0 @General
+
+ @-2. T   0 @Screen control
+
+ @-2. X   0 @Screen
+
+ @-3. TX  0 @Dictionary analysis
+
+ @0. TX  -1 @MEP
+
+        This is  a  program   for  analysing  families  of  nucleotide
+  sequences  in  order  to  find  common  motifs and potential binding
+  sites.  The ideas in this  program  were  described  in  Staden,  R.
+  "Methods  for  discovering  novel motifs in nucleic acid sequences".
+  Computer Applications in the Biosciences, 5, 293-298, (1989).
+
+        The program  can  read  sequences  stored  in  either  of  two
+  formats: 1) all sequences aligned in a single file; 2) all sequences
+  in separate files and accessed through a file of file names.
+
+        The  program  contains  functions  that  can  answer   several
+  questions about a set of sequences:
+
+  Which words are most common?
+  Which words occur in the most sequences?
+  Which words contain the most information?
+  Which words occur in equivalent positions in the sequences?
+  Which words are inverted repeats?
+  Which words occur on both strands of the sequences?
+  Where are the inverted repeats?
+  Where are the fuzzy words?
+
+        Most of the program is concerned with analysing what it  terms
+  "fuzzy words" within the set of sequences. The analysis is explained
+  below. Note that the standard version of the programs is limited  to
+  words of maximum length 8 letters, and a maximum fuzziness of 2.
+
+        The following analyses (preceded by their option numbers)  are
+  included:
+    ? = Help
+    ! = Quit
+    3 = Read new sequences
+    4 = Redefine active region
+    5 = List the sequences
+    6 = List text file
+    7 = Direct output to disk
+   10 = Clear graphics
+   11 = Clear text
+   12 = Draw ruler
+   13 = Use cross hair
+   14 = Reset margins
+   15 = Label diagram
+   16 = Draw map
+   17 = Search for strings
+   18 = Set strand
+   19 = Set composition
+   20 = Set word length
+   21 = Set number of mismatches
+   22 = Show settings
+   23 = Make dictionary Dw
+   24 = Make dictionary Ds
+   25 = Make fuzzy dictionary Dm from Dw
+   26 = Make fuzzy dictionary Dm from Ds
+   27 = Make fuzzy dictionary Dh from Dm
+   28 = Examine fuzzy dictionary Dm
+   29 = Examine fuzzy dictionary Dh
+   30 = Examine words in Dm
+   31 = Examine words in Dh
+   32 = Save or restore a dictionary
+   33 = Find inverted repeats
+
+        Some of these methods produce graphical  results  and  so  the
+  program  is  generally used from a graphics terminal (a vdu on which
+  lines and points can be drawn as well as characters).
+
+  The positions of each of the plots is defined relative  to  a  users
+  drawing board which has size 1-10,000 in x and 1-10,000 in y.  Plots
+  for each  option  are  drawn  in  a  window  defined  by  x0,y0  and
+  xlength,ylength. Where x0,y0 is the position of the bottom left hand
+  corner of the window, and xlength is the width  of  the  window  and
+  ylength the height of the window.
+     --------------------------------------------------------- 10,000
+     1                                                       1
+     1       --------------------------------------   ^      1
+     1       1                                    1   1      1
+     1       1                                    1   1      1
+     1       1                                    1 ylength  1
+     1       1                                    1   1      1
+     1       1                                    1   1      1
+     1       --------------------------------------   v      1
+     1  x0,y0^                                               1
+     1       <---------------xlength-------------->          1
+     ---------------------------------------------------------      1
+     1                                                   10,000
+
+  All values are in drawing board  units  (i.e.  1-10,000,  1-10,000).
+  The default window positions are read from a file "MEPMARG" when the
+  program is started. Users can have their own file if required.
+
+        The options for the program are accessed from  3  main  menus:
+  general,  screen  control  and dictionary analylsis.  Both menus and
+  options are selected by number.
+
+        The most important and novel part of the program is its use of
+  "fuzzy dictionaries" and an information theory measure, to help show
+  the most interesting motifs.  Central to the method is the idea of a
+  fuzzy   dictionary   of  word  frequencies.  A  dictionary  of  word
+  frequencies is an ordered list of all the words in the sequences and
+  a  count  of the number of times that they occur. A fuzzy dictionary
+  is an equivalent list but which contains instead, for each  word,  a
+  count  of  the number of times similar words occur in the sequences.
+  We term words that are similar "relations". The fuzziness is defined
+  by the number of letters in a word that are allowed to be different.
+  So if we had a fuzziness of 1 we allow 1 letter to be different. For
+  example,  with  a  fuzziness of 1, the entry in the fuzzy dictionary
+  for the word TTTTTT would contain a count of the  numbers  of  times
+  TTTTTT  occured  plus  the  number  of  times all words differing by
+  exactly one letter from TTTTTT occured.
+
+        Once the fuzzy dictionary has been created we can  examine  it
+  in  several  ways  to find candidate control sequences. The simplest
+  question we can ask is which word in  the  dictionary  is  the  most
+  common.   Sometimes  this  simple  criterion of "most common" may be
+  adequate to discover a new motif but in general we would not  expect
+  it  to  be  sufficient. For example some words will be common simply
+  because of a base composition bias in the sequences being  analysed.
+  In  addition  a  word  can be the most frequent and yet not be "well
+  defined". This last point is best explained by an example.
+
+        Suppose we were looking at  two letter words and allowing  one
+  mismatch,  and  that  there were 10 occurences of TT and 5 of AC. We
+  could align the 10 words that were one letter different from TT  and
+  the  5  that  were  related to AC. Then we could count the number of
+  times each base occured in each position for each of these two  sets
+  of words. Suppose we got the two base frequency tables shown below.
+     TT                  AC
+         T 6 4               T 1 0
+         C 1 3               C 0 4
+         A 1 2               A 4 1
+         G 2 1               G 0 0
+
+  These tables show that although TT occurs (with one letter mismatch)
+  more often than AC, the ratio of base frequencies for AC at 4/5, 4/5
+  is higher than those for TT at 6/10, 4/10. Hence we would  say  that
+  AC was better defined than TT.  Expressing this another way we would
+  say that the definition of AC contained more information  than  that
+  for TT. The program calculates the information content in a way that
+  takes into account both the sequence composition and  the  level  of
+  definition of the motif.
+
+        Definitions
+
+        Here we deal only with the dictionary  analysis.   Suppose  we
+  are dealing with a set of sequences and are examining them for words
+  that are six characters in length.
+
+        Dictionary Dw contains a count of the  number  of  times  each
+  word  occurs  in  the  set  of  sequences. For example the entry for
+  TTTTTT contains a value equal to the number of times the word TTTTTT
+  occurs in the set of sequences.
+
+        Dictionary Ds contains a count  of  the  number  of  different
+  sequences  in  which  each word occurs. For example if the entry for
+  word TTTTTT contains the value 10, it denotes that the  word  TTTTTT
+  occurs  in  ten  different sequences. Unlike Dw it only counts words
+  once for each  sequence.  For  example  if  we  had  a  set  of  100
+  sequences, the maximum possible value that Ds could take is 100, and
+  this would only happen if a word occurred in every sequence. However
+  for  the same set of sequences, Dw could contain values greater than
+  100, and this would show that a word had occurred more than once  in
+  at least one sequence.
+
+        From either of the two dictionaries Dw or Ds we can  calculate
+  a  fuzzy  dictionary  Dm.  For  each  word,  the  entry in the fuzzy
+  dictionary Dm contains the sum of the dictionary values (taken  from
+  either  Dw  or  Ds)  for  all  words  that differ from it by up to m
+  letters. For example if m=2 the entry for TTTTTT contains the number
+  of  times  that TTTTTT occurs in the dictionary, plus the counts for
+  all words that differ from TTTTTT by 1 or 2 letters.  Obviously  the
+  interpretation  of  the  values  in  Dm  depends on which of the two
+  dictionaries Dw or Ds they were derived from. When derived  from  Dw
+  the entry for any word in Dm gives the total number of times it, and
+  its relations, occur in the set of sequences. When derived  from  Ds
+  the  entry  for  any  word in Dm gives the total number of different
+  sequences that contain a word and each of its relations.
+
+        Finally,  from  fuzzy  dictionary  Dm  we  can  derive   fuzzy
+  dictionary  Dh.  All  entries in Dh are zero except for the word(s),
+  within each set of relations, that are most frequent. For example if
+  TTTTTT  occurred  20  times  but  had  a relation that occurred more
+  often, then the entry for TTTTTT would be zero.  However  if  TTTTTT
+  did  not  have  a more frequently occurring relation, then the entry
+  for TTTTTT would contain the value 20.
+ @1. T 0 @Help
+
+        This option gives online help. The user should  select  option
+  numbers  and  the  current  documentation  will  be given. Note that
+  option 0 gives an introduction to the program, and that ?  will  get
+  help from anywhere in the program.  The following analyses (preceded
+  by their option numbers) are included:
+    ? = Help
+    ! = Quit
+    3 = Read new sequences
+    4 = Redefine active region
+    5 = List the sequences
+    6 = List text file
+    7 = Direct output to disk
+   10 = Clear graphics
+   11 = Clear text
+   12 = Draw ruler
+   13 = Use cross hair
+   14 = Reset margins
+   15 = Label diagram
+   16 = Draw map
+   17 = Search for strings
+   18 = Set strand
+   19 = Set composition
+   20 = Set word length
+   21 = Set number of mismatches
+   22 = Show settings
+   23 = Make dictionary Dw
+   24 = Make dictionary Ds
+   25 = Make fuzzy dictionary Dm from Dw
+   26 = Make fuzzy dictionary Dm from Ds
+   27 = Make fuzzy dictionary Dh from Dm
+   28 = Examine fuzzy dictionary Dm
+   29 = Examine fuzzy dictionary Dh
+   30 = Examine words in Dm
+   31 = Examine words in Dh
+   32 = Save or restore a dictionary
+   33 = Find inverted repeats
+ @2. T 0 @Quit
+
+        This function stops the program.
+ @3. TX 1 @Read a new sequence
+
+        It can read sequences stored in either of two formats: 1)  all
+  sequences  aligned  in  a  single file; 2) all sequences in separate
+  files and accessed through a file of file  names.  Typical  dialogue
+  follows:
+
+  X 1 Read file of aligned sequences
+    2 Use file of file names
+  ? 0,1,2 =
+
+  ? File of aligned sequences=F1
+  Number of files           88
+
+ @4. TX 1 @Define active region
+
+        For its analytic functions  the  program  always  works  on  a
+  region of the sequence called the active region. When  new sequences
+  are read into the program the active region is automatically set  to
+  start  at the beginning of the sequences and go up to the end of the
+  longest one.
+ @5. TX 1 @List a sequence
+
+        The sequence can be listed with line lengths of 50 bases  with
+  each sequence numbered in the order in which they were read.  Output
+  can be directed to a disk  file  by  first  selecting  disk  output.
+  Typical dialogue follows.
+
+  ? Menu or option number=5
+
+                10        20        30        40        50
+     1  TAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCA
+     2  CAAATAATCAATGTGGACTTTTCTGCCGTGATTATAGACACTTTTGTTAC
+     3  TAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTATT
+     4  ACTAATTTATTCCATGTCACACTTTTCGCATCTTTGTTATGCTATGGTTA
+     5  AGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGA
+     6  TAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGC
+     7  ACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCG
+     8  GGGGCAAGGAGGATGGAAAGAGGTTGCCGTATAAAGAAACTAGAGTCCGT
+     9  AGGGGGTGGAGGATTTAAGCCATCTCCTGATGACGCATAGTCAGCCCATC
+    10  AAAACGTCATCGCTTGCATTAGAAAGGTTTCTGGCCGACCTTATAACCAT
+
+                60
+     1  TACCCGTTTTT
+     2  GCGTTTTTGT
+     3  TCATACCATAAG
+     4  TTTCATACC
+     5  ATTGTGAGC
+     6  TTCCGGCTCG
+     7  GAAGAGAGT
+     8  TCAGGTGT
+     9  ATGAATG
+    10  TAATTACG
+ @6. TX 1 @List a text file
+
+        Allows the user to have a text file displayed on  the  screen.
+  It will appear one page at a time.
+ @7. TX 1 @Direct output to disk
+
+        Used to direct output that would normally appear on the screen
+  to a file.
+
+        Select redirection of either text or graphics, and supply  the
+  name of the file that the output should be written to.
+
+        The results from the next options selected will not appear  on
+  the  screen  but  will  be  written  to  the  file. When option 7 is
+  selected again the file will be closed and output will again  appear
+  on the screen.
+ @10. TX 2 @Clear graphics
+
+        Clears the screen of both text and graphics.
+ @11. TX 2 @Clear text
+
+        Clears only text from the screen.
+ @12. TX 2 @Draw a ruler
+
+        This option allows the user to draw a ruler or scale along the
+  x  axis  of the screen to help identify the coordinates of points of
+  interest. The user can define the position of the first  amino  acid
+  to  be marked (for example if the active region is 1501 to 8000, the
+  user might wish to mark every 1000th amino acid starting  at  either
+  1501  or  2000  -  it depends if the user wishes to treat the active
+  region as an independent unit with its own numbering starting at its
+  left  edge,  or  as  part  of the whole sequence). The user can also
+  define the separation of the ticks on the scale and their height. If
+  required  the  labelling  routine  can be used to add numbers to the
+  ticks.
+ @13. TX 2 @Use crosshair
+
+        This function puts a steerable cross on the screen that can be
+  used to find the coordinates of points in the sequence. The user can
+  move the cross around using the directional keys; when he  hits  the
+  space bar the program will print out the coordinates of the cross in
+  sequence units and the option will be exited.
+
+        If instead, you hit a , the position will be displayed but the
+  cross will remain on the screen.
+
+        If a letter s is hit the sequence around  the  cross  hair  is
+  displayed and the cross remains on the screen.
+ @14. TX 2 @Reposition plots
+
+        The positions of each of the plots is defined  relative  to  a
+  users  drawing board which has size 1-10,000 in x and 1-10,000 in y.
+  Plots for each option are drawn in a window  defined  by  x0,y0  and
+  xlength,ylength. Where x0,y0 is the position of the bottom left hand
+  corner of the window, and xlength is the width  of  the  window  and
+  ylength the height of the window.
+     --------------------------------------------------------- 10,000
+     1                                                       1
+     1       --------------------------------------   ^      1
+     1       1                                    1   1      1
+     1       1                                    1   1      1
+     1       1                                    1 ylength  1
+     1       1                                    1   1      1
+     1       1                                    1   1      1
+     1       --------------------------------------   v      1
+     1  x0,y0^                                               1
+     1       <---------------xlength-------------->          1
+     ---------------------------------------------------------      1
+     1                                                   10,000
+
+  All values are in drawing board  units  (i.e.  1-10,000,  1-10,000).
+  The default window positions are read from a file "MEPMARG" when the
+  program is started. Users can have their own file if  required.   As
+  all  the  plots  start  at  the same position in x and have the same
+  width, x0 and xlength are the same for all options. Generally  users
+  will  only  want  to change the start level of the window y0 and its
+  height ylength. This option allows users to change window  positions
+  whilst  running  the  program.   The  routine  prompts first for the
+  number of the option that the users wishes to reposition;  then  for
+  the  y  start and height; then for the x start and length. Note that
+  changes to the x values affect all options. If the user  types  only
+  carriage  return  for any value it will remain unchanged. The cross-
+  hair can be used to choose suitable heights.
+ @15. TX 2 @Label a diagram
+
+        This routine allows users to  label  any  diagrams  they  have
+  produced.  They  are  asked  to type in a label. When the user types
+  carriage return to finish typing the label the cross-hair appears on
+  the  screen. The user can position it anywhere on the screen. If the
+  user types R (for right justify) the label will be  written  on  the
+  diagram  with  its right end at the cross-hair position. If the user
+  types L (for left justify) the label will be written on the  diagram
+  with  its  left end at the cross hair position.  The cross-hair will
+  then immediately reappear. The  user  may  put  the  same  label  on
+  another part of the diagram as before or if he hits the space bar he
+  will be asked if he wishes to type in another label.
+ @16. TX 2 @Display a map
+
+        It is  often  convenient  to  plot  a  map  alongside  graphed
+  analysis  in  order  to  indicate features within the sequence. This
+  function allows users to draw maps using files arranged in the  form
+  of  EMBL  feature  tables. Of course the EMBL table are usually only
+  used for nucleic acid  sequence  annotation  but,  as  long  as  the
+  features  are written in the correct format, they can be employed by
+  this routine. The  map  is  composed  of  a  line  representing  the
+  sequence  and  then  further  lines  denoting  the endpoints of each
+  feature the user identifies. The user is asked to define  height  at
+  which  the  line representing the sequence should be drawn; then for
+  the feature height; then for the features to plot.
+ @17. TX 1 @Search for strings
+
+        Search for strings perfoms searches of all the  sequences  for
+  selected words and shows which sequences they are found in. The user
+  types in a word and defines the allowed number  of  mismatches.  The
+  results  are  listed  or plotted. If listed the display includes the
+  sequence number, the position  in  the  sequence  and  the  matching
+  string.  The results are plotted in the following way. The x axis of
+  the plot represents the length of the aligned sequences  and  the  y
+  direction  is  divided  into  sufficient  strips to accommodate each
+  sequence. So if a match is found in the 3rd sequence at  a  position
+  equivalent  to  halfway  along  the  longest of the sequences then a
+  short vertical line will be drawn at the midpoint of the 3rd  strip.
+  If  the  sequences are aligned it can be useful if the motifs happen
+  to appear  in  related  positions.  For  example  see  the  original
+  publication. Typical dialogue follows.
+
+  ? Menu or option number=17
+  X 1 Plot match positions
+    2 Plot histogram of matches
+  ? 0,1,2 =
+  ? Word to search for=TTGACA
+  ? Minimum match (0-6) (6) =5
+  ? (y/n) (y) Plot results N
+       2    35 TAGACA
+       5    14 TTTACA
+       6    37 TTTACA
+      11    14 TAGACA
+      14    14 TTGACA
+      17    14 GTGACA
+      17    22 TTAACA
+      20     1 TTGACA
+ @18. TX 3 @Set strand
+
+        Set strand allows the user to define which  strand(s)  of  the
+  sequences to analyse: input stand, complement of input, or both.
+ @19. TX 3 @Set composition
+
+        Set composition gives the user three choices for  setting  the
+  composition  of  the  sequences  for  use  in the calculation of the
+  information content of  words.  The  user  can  select  the  overall
+  composition  of  the  sequences as read, an even composition, or can
+  type in any other 4 values.
+ @20. TX 3 @Set word length
+
+        Set word length sets the length of word for which dictionaries
+  will be made.
+ @21. TX 3 @Set number of mismatches
+
+        Set number of mismatches sets the level of fuzziness  for  the
+  creation of dictionary Dm.
+ @22. TX 3 @Show settings
+
+        Show settings show the current  settings  for  all  parameters
+  associated with dictionary analysis. A typical diaplsy follows:
+   ? Menu or option number=22
+   Current word length  =   6
+   Number of mismatches =   1
+   Start position       =     1
+   End position         =    63
+   Input strand only
+   Observed composition
+   Dictionary Dw unmade
+   Dictionary Ds unmade
+   Dictionary Dm unmade
+   Dictionary Dh unmade
+ @23. TX 3 @Make dictionary Dw
+
+        Make dictionary Dw creates a dictionary that contains a  count
+  of  the  frequency  of  occurrence  of  each  word  in the collected
+  sequences.
+ @24. TX 3 @Make dictionary Ds
+
+        Make dictionary Ds creates a dictionary that contains a  count
+  of the number of different sequences that contain each word.
+ @25. TX 3 @Make dictionary Dm from Dw
+
+        Make  dictionary  Dm   from  Dw  creates  a  dictionary   from
+  dictionary Dw that contains the frequency of occurrence of each word
+  (say X) in Dw plus the frequency of occurrence of each  word  in  Dw
+  that  differs  from  X  by  up  to  m  letters. Dm is called a fuzzy
+  dictionary as it contains the frequencies of occurrence of all words
+  plus the frequencies of all the words that are similar to them.
+ @26. TX 3 @Make dictionary Dm from Ds
+
+        Make  dictionary  Dm   from  Ds  creates  a  dictionary   from
+  dictionary Ds that contains the frequency of occurrence of each word
+  (say X) in Ds plus the frequency of occurrence of each  word  in  Ds
+  that  differs  from  X  by  up  to  m  letters. Dm is called a fuzzy
+  dictionary as it contains the frequencies of occurrence of all words
+  plus the frequencies of all the words that are similar to them.
+ @27. TX 3 @Make dictionary Dh from Dm
+
+        Make dictionary Dh  creates a dictionary  from  dictionary  Dm
+  and  whose  entries  are  zero  except for those words in any set of
+  related words that are most frequent. It finds the dominant words in
+  each set of relations and stores their counts.
+ @28. TX 3 @Examine fuzzy dictionary Dm
+
+        Examine dictionary Dm allows users to analyse the contents  of
+  dictionary  Dm  to  find  the  most common words or those words that
+  contain the most information.  The  user  supplies  a  frequency  or
+  information  cutoff and chooses to have the results sorted on either
+  value. The program will find the top  100  words  that  achieve  the
+  cutoff  values  and present them to the user sorted as selected. The
+  information  content  will  be  calcutated  from  either  Dw  or  Ds
+  depending  which  was  used  to  create  Dm,  and  using the current
+  composition setting. Typical dialogue follows:
+
+  ? Menu or option number=28
+  Looking for highest scoring words
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =60
+  ? Minimum information (0.00-1.00) (0.00) =.62
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =
+
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           9 Maximum information=  0.7385326
+  TTGACA      60   0.73850
+  AAAAAC      64   0.66460
+  AAAAAA      90   0.64880
+  GTTTTT      66   0.64300
+  TTTTTG      73   0.64070
+  TTTTGT      63   0.63820
+  TTTTTC      65   0.63810
+  AAAATA      63   0.62670
+  TATAAT      65   0.62510
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =60
+  ? Minimum information (0.00-1.00) (0.00) =.62
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =2
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           9 Maximum information=  0.7385326
+  AAAAAA      90   0.64880
+  TTTTTG      73   0.64070
+  GTTTTT      66   0.64300
+  TTTTTC      65   0.63810
+  TATAAT      65   0.62510
+  AAAAAC      64   0.66460
+  TTTTGT      63   0.63820
+  AAAATA      63   0.62670
+  TTGACA      60   0.73850
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =!
+
+ @29. TX 3 @Examine fuzzy dictionary Dh
+
+        Examine dictionary Dh allows users to analyse the contents  of
+  dictionary   Dh  to  find  the most common words or those words that
+  contain the most information.  The  user  supplies  a  frequency  or
+  information  cutoff and chooses to have the results sorted on either
+  value. The program will find the top  100  words  that  achieve  the
+  cutoff  values  and present them to the user sorted as selected. The
+  information  content  will  be  calcutated  from  either  Dw  or  Ds
+  depending  which  was  used  to  create  Dh  and  using  the current
+  composition setting. Typical dialogue follows:
+
+  ? Menu or option number=29
+  Looking for highest scoring words
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =60
+  ? Minimum information (0.00-1.00) (0.00) =.6
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =
+
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           4 Maximum information=  0.7385326
+  TTGACA      60   0.73850
+  AAAAAA      90   0.64880
+  TATAAT      65   0.62510
+  TTTTTT     115   0.60630
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =50
+  ? Minimum information (0.00-1.00) (0.00) =.5
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =
+
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           8 Maximum information=  0.7385326
+  TTGACA      60   0.73850
+  TCTTGA      54   0.66080
+  AAAAAA      90   0.64880
+  TATAAT      65   0.62510
+  ACTTTA      57   0.61960
+  TTTTTT     115   0.60630
+  AGTATA      51   0.60540
+  TTATAA      55   0.59300
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =50
+  ? Minimum information (0.00-1.00) (0.00) =
+
+  X 1 Sort on information
+    2 Sort on word score
+  ? 0,1,2 =
+
+  ? Maximum number to list (0-100) (100) =
+
+  The words are
+   Total words=           8 Maximum information=  0.7385326
+  TTGACA      60   0.73850
+  TCTTGA      54   0.66080
+  AAAAAA      90   0.64880
+  TATAAT      65   0.62510
+  ACTTTA      57   0.61960
+  TTTTTT     115   0.60630
+  AGTATA      51   0.60540
+  TTATAA      55   0.59300
+  The highest word score =          115
+  ? Minimum word score (0-115) (0) =!
+
+ @30. TX 3 @Examine words in Dm
+
+        Examine words in Dm allows users to analyse  the  contents  of
+  dictonary  Dm  at  the  level  of  individual  words  to  find their
+  frequency, information content, and  to  see  their  base  frequency
+  table.  The user types in a word to examine and the program displays
+  the values and table. The information  content  will  be  calcutated
+  from  either  Dw  or  Ds  depending which was used to create Dm, and
+  using the current composition setting. Typical dialogue follows:
+  ? Menu or option number=30
+  ? Word to examine=TTGACA
+  TtgacA            60  0.7385326
+      56    56     6     7     5    11
+       4     3     2     1    52     1
+       1     4     2    53     3    48
+       3     1    54     3     4     4
+  TTGACA
+  ? Word to examine=TATAAT
+  taTAat            65  0.6251902
+      56     3    53     4     4    60
+       6     1     5     5     5     3
+       3    60     5    57    57     4
+       4     5     6     3     3     2
+  TATAAT
+  ? Word to examine=
+
+ @31. TX 3 @Examine words in Dh
+
+        Examine words in Dh allows users to analyse  the  contents  of
+  dictonary  Dh  at  the  level  of  individual  words  to  find their
+  frequency, information content, and  to  see  their  base  frequency
+  table.  The user types in a word to examine and the program displays
+  the values and table. The information  content  will  be  calcutated
+  from  either  Dw  or  Ds  depending which was used to create Dm, and
+  using the current composition setting. Typical dialogue follows:
+
+   ? Menu or option number=31
+  ? Word to examine=TTGACA
+  TtgacA            60  0.7385326
+      56    56     6     7     5    11
+       4     3     2     1    52     1
+       1     4     2    53     3    48
+       3     1    54     3     4     4
+  TTGACA
+  ? Word to examine=TATAAT
+  taTAat            65  0.6251902
+      56     3    53     4     4    60
+       6     1     5     5     5     3
+       3    60     5    57    57     4
+       4     5     6     3     3     2
+  TATAAT
+  ? Word to examine=GGGGGG
+  gggggg             0  0.6199890
+       3     1     1     2     3     4
+       1     3     1     2     2     1
+       2     1     1     1     1     1
+      11    12    14    12    11    11
+  GGGGGG
+  ? Word to examine=
+
+ @32. TX 3 @Save or restore a dictionary
+
+        Save or restore dictionary allows users to write or  read  any
+  dictionary  to  and from disk files. The user is asked te define the
+  dictionary and file. The function is useful  if  the  machine  being
+  used  is  very  slow at calculating because the files can be handled
+  quickly. However note that the files  cannot  be  processed  by  any
+  other program.
+ @33. TX 1 @Find inverted repeats
+
+        Find inverted repeats performs searches  for  simple  inverted
+  repeat  sequences  in  each sequence. They are defined by a range of
+  loop sizes and a minimum number of potential basepairs. The  results
+  can  be  plotted  or  listed.  The x axis of the plot represents the
+  length of the aligned sequences and the y direction is divided  into
+  sufficient  strips  to  accommodate each sequence. So if an inverted
+  repeat is found in the 3rd sequence  at  a  position  equivalent  to
+  halfway  along  the  longest  of the sequences then a short vertical
+  line will be drawn at the midpoint of the 3rd strip.  Alternatively,
+  if  the  results  are  listed, the potential hairpin loops are drawn
+  out, with the sequence number and the position of the loop.  Typical
+  dialogue follows.
+
+  ? Menu or option number=33
+  Define the range of loop sizes
+  ? Minimum loop size (0-10) (3) =0
+  ? Maximum loop size (1-20) (3) =
+  ? Minimum number of basepairs (1-20) (6) =
+  ? (y/n) (y) Plot results N
+   Searching
+
+  Sequence     3    34
+             C
+            G.T
+            T-A
+            A-T
+            T.G
+            T.G
+            G.T
+       ATCTTT TATTTCA
+           33
+
+  Sequence     5    35
+             T
+            G.T
+            T.G
+            A-T
+            T.G
+            G.T
+            C-G
+            T.G
+       TCCGGC AATTGTG
+           34
+ @ End of help
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/help/mep_menu
+++ b/help/mep_menu
@ -0,0 +1,68 @@
+-1 0 22 2 T General
+-1 0 22 2 X General
+-2 0 51 2 T Screen control
+-2 0 72 2 X Screen
+-3 0 106 2 T Dictionary analysis
+-3 0 106 2 X Dictionary analysis
+0 -1 124 185 T MEP
+0 -1 124 185 X MEP
+1 0 9423 38 T Help
+2 0 10594 3 T Quit
+3 1 10667 14 T Read a new sequence
+3 1 10667 14 X Read a new sequence
+4 1 11069 7 T Define active region
+4 1 11069 7 X Define active region
+5 1 11396 32 T List a sequence
+5 1 11396 32 X List a sequence
+6 1 12548 4 T List a text file
+6 1 12548 4 X List a text file
+7 1 12690 12 T Direct output to disk
+7 1 12690 12 X Direct output to disk
+10 2 13162 3 T Clear graphics
+10 2 13162 3 X Clear graphics
+11 2 13239 3 T Clear text
+11 2 13239 3 X Clear text
+12 2 13307 13 T Draw a ruler
+12 2 13307 13 X Draw a ruler
+13 2 14053 13 T Use crosshair
+13 2 14053 13 X Use crosshair
+14 2 14643 35 T Reposition plots
+14 2 14643 35 X Reposition plots
+15 2 16797 13 T Label a diagram
+15 2 16797 13 X Label a diagram
+16 2 17589 13 T Display a map
+16 2 17589 13 X Display a map
+17 1 18384 32 T Search for strings
+17 1 18384 32 X Search for strings
+18 3 19739 4 T Set strand
+18 3 19739 4 X Set strand
+19 3 19906 7 T Set composition
+19 3 19906 7 X Set composition
+20 3 20249 4 T Set word length
+20 3 20249 4 X Set word length
+21 3 20374 4 T Set number of mismatches
+21 3 20374 4 X Set number of mismatches
+22 3 20501 15 T Show settings
+22 3 20501 15 X Show settings
+23 3 20965 5 T Make dictionary Dw
+23 3 20965 5 X Make dictionary Dw
+24 3 21152 4 T Make dictionary Ds
+24 3 21152 4 X Make dictionary Ds
+25 3 21326 8 T Make dictionary Dm from Dw
+25 3 21326 8 X Make dictionary Dm from Dw
+26 3 21787 8 T Make dictionary Dm from Ds
+26 3 21787 8 X Make dictionary Dm from Ds
+27 3 22248 6 T Make dictionary Dh from Dm
+27 3 22248 6 X Make dictionary Dh from Dm
+28 3 22551 56 T Examine fuzzy dictionary Dm
+28 3 22551 56 X Examine fuzzy dictionary Dm
+29 3 24462 71 T Examine fuzzy dictionary Dh
+29 3 24462 71 X Examine fuzzy dictionary Dh
+30 3 26726 26 T Examine words in Dm
+30 3 26726 26 X Examine words in Dm
+31 3 27755 34 T Examine words in Dh
+31 3 27755 34 X Examine words in Dh
+32 3 29021 8 T Save or restore a dictionary
+32 3 29021 8 X Save or restore a dictionary
+33 1 29428 45 T Find inverted repeats
+33 1 29428 45 X Find inverted repeats
--- a/help/nip_help
+++ b/help/nip_help
--- a/help/nip_menu
+++ b/help/nip_menu
@ -0,0 +1,156 @@
+-1 0 22 2 T General
+-1 0 22 2 X General
+-2 0 51 2 T Screen control
+-2 0 72 2 X Screen
+-3 0 118 2 T Statistical analysis of content
+-3 0 143 2 X Statistics
+-4 0 180 2 T Structures and repeats
+-4 0 205 2 X Structures
+-5 0 242 2 T Translation and codons
+-5 0 242 2 X Translation and codons
+-6 0 279 2 T Gene search by content
+-6 0 279 2 X Gene search by content
+-7 0 309 2 T General signals
+-7 0 309 2 X General signals
+-8 0 340 2 T Specific signals
+-8 0 340 2 X Specific signals
+0 -1 359 16 T NIP
+0 -1 359 16 X NIP
+1 0 1155 7 T Help
+1 0 1155 7 X Help
+2 0 1469 3 T Quit
+2 0 1469 3 X Quit
+3 1 1543 220 T Read a new sequence
+3 1 1543 220 X Read a new sequence
+4 1 11372 15 T Define active region
+4 1 11372 15 X Define active region
+5 1 12100 24 T List a sequence
+5 1 12100 24 X List a sequence
+6 1 13103 6 T List a text file.
+6 1 13103 6 X List a text file.
+7 1 13300 12 T Direct output to disk
+7 1 13300 12 X Direct output to disk
+8 1 13785 10 T Write active region to disk
+8 1 13785 10 X Write active region to disk
+9 1 14128 31 T Edit the sequence
+9 1 14128 31 X Edit the sequence
+10 2 15970 3 T Clear graphics
+10 2 15970 3 X Clear graphics
+11 2 16036 3 T Clear text
+11 2 16036 3 X Clear text
+12 2 16101 12 T Draw a ruler
+12 2 16101 12 X Draw a ruler
+13 2 16833 13 T Use crosshair
+13 2 16833 13 X Use crosshair
+14 2 17443 35 T Reposition plots
+14 2 17443 35 X Reposition plots
+15 2 19598 28 T Label a diagram
+15 2 19598 28 X Label a diagram
+16 2 20703 34 T Display a map
+16 2 20703 34 X Display a map
+17 1 22073 599 T Search for restriction enzymes
+17 1 22073 599 X Search for restriction enzymes
+18 7 46675 105 T Compare a short sequence
+18 1 46675 105 T Compare a short sequence
+18 7 46675 105 X Compare a short sequence
+18 1 46675 105 X Compare a short sequence
+19 7 49650 106 T Compare a short sequence using a score matrix
+19 7 49650 106 X Compare a short sequence using a score matrix
+20 7 53349 230 T Search for a motif using a weight matrix
+20 7 53349 230 X Search for a motif using a weight matrix
+21 3 63267 4 T Count base composition
+21 3 63267 4 X Count base composition
+22 3 63440 14 T Count dinucleotide frequencies
+22 3 63440 14 X Count dinucleotide frequencies
+23 5 64100 179 T Count codons and amino acids
+23 3 64100 179 T Count codons and amino acids
+23 5 64100 179 X Count codons and amino acids
+23 3 64100 179 X Count codons and amino acids
+24 3 72137 57 T Plot base composition
+24 3 72137 57 X Plot base composition
+25 3 73213 23 T Plot local deviations in base composition
+25 3 73213 23 X Plot local deviations in base composition
+26 3 74495 23 T Plot local deviations from dinucleotide composition
+26 3 74495 23 X Plot local deviations from dinucleotide composition
+27 3 75793 23 T Plot local deviations from trinucleotide composition
+27 3 75793 23 X Plot local deviations from trinucleotide composition
+28 5 77065 18 T Calculate codon constraint
+28 5 77065 18 X Calculate codon constraint
+59 3 77869 12 T Plot negentropy
+59 3 77869 12 X Plot negentropy
+30 4 78454 74 T Search for hairpin loops
+30 4 78454 74 X Search for hairpin loops
+31 4 80321 23 T Search for long range inverted repeats
+31 4 80321 23 X Search for long range inverted repeats
+32 4 81157 37 T Search for repeats
+32 4 81157 37 X Search for repeats
+33 4 82467 12 T Search for z dna (total ry, yr)
+33 4 82467 12 X Search for z dna (total ry, yr)
+34 4 82984 12 T Search for z dna (runs of ry, yr)
+34 4 82984 12 X Search for z dna (runs of ry, yr)
+35 4 83623 15 T Search for z dna (best phased value)
+35 4 83623 15 X Search for z dna (best phased value)
+36 4 84350 92 T Local similarity or complementarity search
+36 4 84350 92 X Local similarity or complementarity search
+37 5 87778 39 T Set genetic code
+37 5 87778 39 X Set genetic code
+38 4 89050 74 T Examine repeats
+38 3 89050 74 T Examine repeats
+39 5 91670 286 T Translate and list in upto six phases
+39 5 91670 286 X Translate and list in upto six phases
+40 5 103780 134 T Translate and write the protein sequence to disk
+40 5 103780 134 X Translate and write the protein sequence to disk
+41 5 108198 71 T Calculate and write codon table to disk
+41 5 108198 71 X Calculate and write codon table to disk
+42 6 111525 132 T Codon usage method
+42 6 111525 132 X Codon usage method
+43 6 118508 182 T Positional base preference method.
+43 6 118508 182 X Positional base preference method.
+44 6 127924 39 T Uneven positional base frequencies.
+44 6 127924 39 X Uneven positional base frequencies.
+45 6 130287 33 T Codon improbability on base composition
+45 6 130287 33 X Codon improbability on base composition
+46 6 132146 28 T Codon improbability on amino acid composition
+46 6 132146 28 X Codon improbability on amino acid composition
+47 6 133744 14 T Shepherd RNY preference method
+47 6 133744 14 X Shepherd RNY preference method
+48 6 134410 30 T Ficketts method
+48 6 134410 30 X Ficketts method
+49 6 136094 139 T tRNA gene search.
+49 6 136094 139 X tRNA gene search.
+50 7 141894 4 T Plot start codons
+50 7 141894 4 X Plot start codons
+51 7 142027 4 T Plot stop codons
+51 7 142027 4 X Plot stop codons
+52 7 142188 4 T Plot stop codons on the complementary strand
+52 7 142188 4 X Plot stop codons on the complementary strand
+53 7 142365 4 T Plot stop codons on both strands
+53 7 142365 4 X Plot stop codons on both strands
+54 5 142536 45 T Search for longest open reading frames
+54 5 142536 45 X Search for longest open reading frames
+55 8 144437 67 T Search for E. coli promoter (general)
+55 8 144437 67 X Search for E. coli promoter (general)
+56 8 148004 4 T Search for E. coli promoter (general) strand
+56 8 148004 4 X Search for E. coli promoter (general) strand
+57 8 148210 4 T Search for E. coli promoter sequences. (-35 and -10)
+57 8 148210 4 X Search for E. coli promoter sequences. (-35 and -10)
+58 8 148405 44 T Search for procaryotic ribosome binding sites
+58 8 148405 44 X Search for procaryotic ribosome binding sites
+29 1 150862 4 T Reverse and complement the sequence
+29 1 150862 4 X Reverse and complement the sequence
+60 7 151001 142 T Search using a dinucleotide weight matrix
+60 7 151001 142 X Search using a dinucleotide weight matrix
+61 8 157292 31 T Search for eukaryotic ribosome binding sites
+61 8 157292 31 X Search for eukaryotic ribosome binding sites
+62 8 158730 56 T Search for splice junctions
+62 8 158730 56 X Search for splice junctions
+63 7 162089 7 T Search using a weight matrix (complementary)
+63 7 162089 7 X Search using a weight matrix (complementary)
+64 3 162471 36 T Plot observed-expected word frequencies
+64 3 162471 36 X Plot observed-expected word frequencies
+65 9 164175 5 T Search for polya sites
+65 9 164175 5 X Search for polya sites
+66 1 164369 4 T Interconvert t and u
+66 1 164369 4 X Interconvert t and u
+67 7 164520 797 T Search for patterns of motifs
+67 7 164520 797 X Search for patterns of motifs
--- a/help/nipf_help
+++ b/help/nipf_help
@ -0,0 +1,132 @@
+
+ @-1. TX   0 @General
+
+ @-2. TX   0 @Screen control
+
+ @-3. TX   0 @Statistical analysis
+
+ @-1. TX   0 @General
+
+ @-2. TX   0 @Screen control
+
+ @-3. TX   0 @Statistical analysis
+
+ @0.  TX  -1 @NIPF
+
+ @1.  TX 1 @ Help
+
+ @2.  TX 1 @ Quit
+
+ @3.  TX 1 @ Read new sequence
+
+ @4.  TX 1 @ Redefine active region
+
+ @5.  TX 1 @ List the sequence
+
+ @6.  TX 1 @ List a text file
+
+ @7.  TX 1 @ Direct output to disk
+
+ @8.  TX 1 @ Write active sequence to disk
+
+ @9.  TX 1 @ List a translation
+
+ @32. TX 1 @ List showing base differences
+
+ @37. TX 1 @ List showing translation
+
+ @33. TX 1 @ List showing amino acid differences
+
+ @10. TX 2 @ Clear graphics
+
+ @11. TX 2 @ Clear text
+
+ @12. TX 2 @ Draw a ruler
+
+ @13. TX 2 @ Use cross hair
+
+ @14. TX 2 @ Reset margins
+
+ @15. TX 2 @ Label diagram
+
+ @16. TX 2 @ Display a map
+
+ @17. TX 3 @ Set comparison mode
+
+ @18. TX 3 @ Set sort mode
+
+ @21. TX 3 @ Count base changes
+
+ @22. TX 3 @ Count codon changes
+
+ @23. TX 3 @ Count genetic events
+
+ @24. TX 3 @ Show table of base changes
+
+ @36. TX 3 @ Show table of expressed base changes
+
+ @39. TX 3 @ Show table of silent base changes
+
+ @38. TX 3 @ Estimate mutation rate
+
+ @25. TX 3 @ Plot base changes
+
+ @26. TX 3 @ Plot expressed changes per base
+
+ @27. TX 3 @ Plot silent changes per base
+
+ @28. TX 3 @ Count expressed changes per base
+
+ @29. TX 3 @ Count silent changes per base
+
+ @30. TX 3 @ Count changed amino acids
+
+ @31. TX 3 @ Plot amino acid variability
+
+ @ end of help
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/help/nipf_menu
+++ b/help/nipf_menu
@ -0,0 +1,84 @@
+-1 0 23 2 T General
+-1 0 23 2 X General
+-2 0 53 2 T Screen control
+-2 0 53 2 X Screen control
+-3 0 89 2 T Statistical analysis
+-3 0 89 2 X Statistical analysis
+-1 0 112 2 T General
+-1 0 112 2 X General
+-2 0 142 2 T Screen control
+-2 0 142 2 X Screen control
+-3 0 178 2 T Statistical analysis
+-3 0 178 2 X Statistical analysis
+0 -1 198 2 T NIPF
+0 -1 198 2 X NIPF
+1 1 217 2 T Help
+1 1 217 2 X Help
+2 1 236 2 T Quit
+2 1 236 2 X Quit
+3 1 268 2 T Read new sequence
+3 1 268 2 X Read new sequence
+4 1 305 2 T Redefine active region
+4 1 305 2 X Redefine active region
+5 1 337 2 T List the sequence
+5 1 337 2 X List the sequence
+6 1 368 2 T List a text file
+6 1 368 2 X List a text file
+7 1 404 2 T Direct output to disk
+7 1 404 2 X Direct output to disk
+8 1 448 2 T Write active sequence to disk
+8 1 448 2 X Write active sequence to disk
+9 1 481 2 T List a translation
+9 1 481 2 X List a translation
+32 1 525 2 T List showing base differences
+32 1 525 2 X List showing base differences
+37 1 564 2 T List showing translation
+37 1 564 2 X List showing translation
+33 1 614 2 T List showing amino acid differences
+33 1 614 2 X List showing amino acid differences
+10 2 643 2 T Clear graphics
+10 2 643 2 X Clear graphics
+11 2 668 2 T Clear text
+11 2 668 2 X Clear text
+12 2 695 2 T Draw a ruler
+12 2 695 2 X Draw a ruler
+13 2 724 2 T Use cross hair
+13 2 724 2 X Use cross hair
+14 2 752 2 T Reset margins
+14 2 752 2 X Reset margins
+15 2 780 2 T Label diagram
+15 2 780 2 X Label diagram
+16 2 808 2 T Display a map
+16 2 808 2 X Display a map
+17 3 842 2 T Set comparison mode
+17 3 842 2 X Set comparison mode
+18 3 870 2 T Set sort mode
+18 3 870 2 X Set sort mode
+21 3 903 2 T Count base changes
+21 3 903 2 X Count base changes
+22 3 937 2 T Count codon changes
+22 3 937 2 X Count codon changes
+23 3 972 2 T Count genetic events
+23 3 972 2 X Count genetic events
+24 3 1013 2 T Show table of base changes
+24 3 1013 2 X Show table of base changes
+36 3 1064 2 T Show table of expressed base changes
+36 3 1064 2 X Show table of expressed base changes
+39 3 1112 2 T Show table of silent base changes
+39 3 1112 2 X Show table of silent base changes
+38 3 1149 2 T Estimate mutation rate
+38 3 1149 2 X Estimate mutation rate
+25 3 1181 2 T Plot base changes
+25 3 1181 2 X Plot base changes
+26 3 1227 2 T Plot expressed changes per base
+26 3 1227 2 X Plot expressed changes per base
+27 3 1270 2 T Plot silent changes per base
+27 3 1270 2 X Plot silent changes per base
+28 3 1317 2 T Count expressed changes per base
+28 3 1317 2 X Count expressed changes per base
+29 3 1361 2 T Count silent changes per base
+29 3 1361 2 X Count silent changes per base
+30 3 1401 2 T Count changed amino acids
+30 3 1401 2 X Count changed amino acids
+31 3 1443 2 T Plot amino acid variability
+31 3 1443 2 X Plot amino acid variability
--- a/help/pip_help
+++ b/help/pip_help
--- a/help/pip_menu
+++ b/help/pip_menu
@ -0,0 +1,80 @@
+-1 0 21 2 T General
+-1 0 21 2 X General
+-2 0 50 2 T Screen control
+-2 0 71 2 X Screen
+-3 0 117 2 T Statistical analysis of content
+-3 0 142 2 X Statistics
+-4 0 179 2 T Structures and repeats
+-4 0 204 2 X Structures
+-5 0 225 2 T Search
+-5 0 225 2 X Search
+0 -1 243 76 T PIP
+0 -1 243 76 X PIP
+1 0 3546 8 T Help
+1 0 3546 8 X Help
+2 0 3889 3 T Quit
+2 0 3889 3 X Quit
+3 1 3962 220 T Read a new sequence
+3 1 3962 220 X Read a new sequence
+4 1 13792 12 T Redefine active region
+4 1 13792 12 X Redefine active region
+5 1 14480 33 T List a sequence
+5 1 14480 33 X List a sequence
+6 1 15941 4 T List a text file
+6 1 15941 4 X List a text file
+7 1 16083 12 T Direct output to disk
+7 1 16083 12 X Direct output to disk
+8 1 16567 7 T Write active region to disk
+8 1 16567 7 X Write active region to disk
+9 1 16922 26 T Edit the sequence
+9 1 16922 26 X Edit the sequence
+10 2 18386 3 T Clear graphics
+10 2 18386 3 X Clear graphics
+11 2 18463 3 T Clear text
+11 2 18463 3 X Clear text
+12 2 18531 13 T Draw a ruler
+12 2 18531 13 X Draw a ruler
+13 2 19278 13 T Use cross hair
+13 2 19278 13 X Use cross hair
+14 2 19865 35 T Reset margins
+14 2 19865 35 X Reset margins
+15 2 22019 13 T Label a diagram
+15 2 22019 13 X Label a diagram
+16 2 22811 13 T Display a map
+16 2 22811 13 X Display a map
+17 5 23611 254 T Short sequence search
+17 1 23611 254 T Short sequence search
+17 5 23611 254 X Short sequence search
+17 1 23611 254 X Short sequence search
+18 5 34012 57 T Compare a sequence
+18 1 34012 57 T Compare a sequence
+18 5 34012 57 X Compare a sequence
+18 1 34012 57 X Compare a sequence
+19 5 35654 69 T Compare a sequence using a score matrix
+19 1 35654 69 T Compare a sequence using a score matrix
+19 5 35654 69 X Compare a sequence using a score matrix
+19 1 35654 69 X Compare a sequence using a score matrix
+20 5 37587 214 T Search for a motif using a weight matrix
+20 5 37587 214 X Search for a motif using a weight matrix
+21 3 46771 20 T Calculate amino acid composition
+21 3 46771 20 X Calculate amino acid composition
+22 4 47655 20 T Plot hydrophobicity
+22 3 47655 20 T Plot hydrophobicity
+22 4 47655 20 X Plot hydrophobicity
+22 3 47655 20 X Plot hydrophobicity
+23 4 48439 19 T Plot charge
+23 3 48439 19 T Plot charge
+23 4 48439 19 X Plot charge
+23 3 48439 19 X Plot charge
+24 4 48953 72 T Plot robson prediction
+24 4 48953 72 X Plot robson prediction
+26 4 51912 32 T Draw a helix wheel
+26 4 51912 32 X Draw a helix wheel
+25 4 53561 36 T Plot hydrophobic moment
+25 3 53561 36 T Plot hydrophobic moment
+25 4 53561 36 X Plot hydrophobic moment
+25 3 53561 36 X Plot hydrophobic moment
+27 1 55101 87 T Back translate to dna
+27 1 55101 87 X Back translate to dna
+28 5 59337 809 T Search for patterns of motifs
+28 5 59337 809 X Search for patterns of motifs
--- a/help/sap_help
+++ b/help/sap_help
--- a/help/sap_menu
+++ b/help/sap_menu
@ -0,0 +1,76 @@
+-1 0 21 2 T General
+-1 0 21 2 X General
+-2 0 50 2 T Screen control
+-2 0 71 2 X Screen
+-3 0 98 2 T Modification
+-3 0 98 2 X Modification
+0 -1 116 379 T SAP
+0 -1 116 379 X SAP
+17 1 19213 18 T Screen against restriction enzymes
+17 1 19213 18 X Screen against restriction enzymes
+18 1 20256 22 T Screen against vector
+18 1 20256 22 X Screen against vector
+20 2 21583 113 T Auto assemble
+20 2 21583 113 X Auto assemble
+28 1 27744 42 T Highlight disagreements
+28 1 27744 42 X Highlight disagreements
+32 3 30106 22 T Extract gel readings
+32 3 30106 22 X Extract gel readings
+1 0 31209 3 T Help
+1 0 31209 3 X Help
+2 0 31277 5 T Help
+2 0 31277 5 X Help
+3 1 31470 175 T Open a database
+3 1 31470 175 X Open a database
+4 3 40550 64 T Edit
+4 3 40550 64 X Edit
+9 3 43796 40 T Screen edit
+9 3 43796 40 X Screen edit
+5 1 45923 45 T Display a contig
+5 1 45923 45 X Display a contig
+6 1 48409 6 T List a text file
+6 1 48409 6 X List a text file
+8 1 48667 94 T Calculate a consensus
+8 1 48667 94 X Calculate a consensus
+25 1 53186 41 T Show relationships
+25 1 53186 41 X Show relationships
+21 3 55121 99 T Enter new gel reading
+21 3 55121 99 X Enter new gel reading
+23 3 60131 11 T Complement a contig
+23 3 60131 11 X Complement a contig
+22 3 60644 70 T Join contigs
+22 3 60644 70 X Join contigs
+24 1 64235 11 T Copy the database
+24 1 64235 11 X Copy the database
+19 1 64781 41 T Check database
+19 1 64781 41 X Check database
+29 1 66799 82 T Examine quality
+29 1 66799 82 X Examine quality
+26 3 70617 92 T Alter relationships
+26 3 70617 92 X Alter relationships
+27 1 75377 17 T Set display parameters
+27 1 75377 17 X Set display parameters
+30 3 76245 48 T Auto edit a contig
+30 3 76245 48 X Auto edit a contig
+10 2 78721 3 T Clear graphics
+10 2 78721 3 X Clear graphics
+11 2 78786 3 T Clear text
+11 2 78786 3 X Clear text
+12 2 78851 12 T Draw a ruler.
+12 2 78851 12 X Draw a ruler.
+14 2 79585 38 T Reposition plots
+14 2 79585 38 X Reposition plots
+15 2 81933 28 T Label a diagram
+15 2 81933 28 X Label a diagram
+16 2 83039 27 T Display a map.
+16 2 83039 27 X Display a map.
+7 1 84014 12 T Redirect output
+7 1 84014 12 X Redirect output
+13 2 84485 41 T Use crosshair
+13 2 84485 41 X Use crosshair
+33 2 86611 11 T Plot single contig
+33 2 86611 11 X Plot single contig
+34 2 87312 9 T Plot all contigs
+34 2 87312 9 X Plot all contigs
+31 3 87884 9 T Type in gel readings
+31 3 87884 9 X Type in gel readings
--- a/help/sip_help
+++ b/help/sip_help
--- a/help/sip_menu
+++ b/help/sip_menu
@ -0,0 +1,78 @@
+-1 0 22 2 T General
+-1 0 22 2 X General
+-2 0 51 2 T Screen control
+-2 0 72 2 X Screen
+-3 0 101 2 T Set parameters
+-3 0 101 2 X Set parameters
+-4 0 126 2 T Comparison
+-4 0 126 2 X Comparison
+0 -1 144 208 T SIP
+0 -1 144 208 X SIP
+1 0 12690 39 T Help
+1 0 12690 39 X Help
+2 0 13755 3 T Quit
+2 0 13755 3 X Quit
+3 1 13828 220 T Read a new sequence
+3 1 13828 220 X Read a new sequence
+4 1 23656 10 T Define active region
+4 1 23656 10 X Define active region
+5 1 24191 16 T List a sequence
+5 1 24191 16 X List a sequence
+6 1 25001 4 T List a text file
+6 1 25001 4 X List a text file
+7 1 25143 12 T Direct output to disk
+7 1 25143 12 X Direct output to disk
+8 1 25627 4 T Write active region to disk
+8 1 25627 4 X Write active region to disk
+9 1 25764 5 T Edit the sequences
+9 1 25764 5 X Edit the sequences
+10 2 25944 3 T Clear graphics
+10 2 25944 3 X Clear graphics
+11 2 26021 3 T Clear text
+11 2 26021 3 X Clear text
+12 2 26089 15 T Draw a ruler
+12 2 26089 15 X Draw a ruler
+13 2 26869 54 T Use cross hair
+13 2 26869 54 X Use cross hair
+14 2 28754 29 T Reposition plots
+14 2 28754 29 X Reposition plots
+15 2 30429 13 T Label a diagram
+15 2 30429 13 X Label a diagram
+16 2 31213 7 T Display a map
+16 2 31213 7 X Display a map
+17 4 31596 19 T Apply identities algorithm
+17 4 31596 19 X Apply identities algorithm
+18 4 32260 81 T Apply proportional algorithm
+18 4 32260 81 X Apply proportional algorithm
+19 4 36686 42 T List matching spans
+19 4 36686 42 X List matching spans
+20 3 37569 16 T Set span length
+20 3 37569 16 X Set span length
+21 3 38560 13 T Set proportional score
+21 3 38560 13 X Set proportional score
+22 3 39251 6 T Set identities score
+22 3 39251 6 X Set identities score
+23 3 39544 79 T Calculate expected scores
+23 3 39544 79 X Calculate expected scores
+24 3 43148 90 T Calculate observed scores
+24 3 43148 90 X Calculate observed scores
+25 3 46152 26 T Show current parameter settings
+25 3 46152 26 X Show current parameter settings
+27 2 46802 5 T Draw a /
+27 2 46802 5 X Draw a /
+26 4 46991 57 T Quick scan
+26 4 46991 57 X Quick scan
+28 4 49883 90 T Align sequences
+28 4 49883 90 X Align sequences
+29 1 55133 4 T Complement the sequences
+29 1 55133 4 X Complement the sequences
+30 3 55256 9 T Switch main diagonal
+30 3 55256 9 X Switch main diagonal
+31 3 55755 8 T Switch identities
+31 3 55755 8 X Switch identities
+32 3 56202 17 T change score matrix
+32 3 56202 17 X change score matrix
+33 3 56884 16 T Set number of sd's for Quickscan
+33 3 56884 16 X Set number of sd's for Quickscan
+34 3 57767 13 T Set gap penalities
+34 3 57767 13 X Set gap penalities
--- a/help/splitp_help
+++ b/help/splitp_help
@ -0,0 +1,132 @@
+
+        Preparing the PROSITE protein motif library  for  use  by
+the Staden programs
+
+        Introduction
+
+        A library of protein motifs (in our terminology,  because
+they  include  variable  gaps, some would be called patterns) has
+recently  become  available  from  Amos  Bairoch,Departement   de
+Biochimie Medicale,University of Geneva Currently it contains 317
+patterns/motifs and arrives on tape or cdrom in two files: a .dat
+file  and  a  .doc  file. There is also a user documentation file
+prosite.usr. Here I outline  what  is  required  to  prepare  the
+PROSITE library for use by our programs.
+
+        Three programs need  to  be  run  SPLITP1,  SPLITP2,  and
+SPLITP3.
+
+        Outline of the PROSITE files
+
+        A typical entry in the .dat file is shown below.
+
+ID   2FE2S_FERREDOXIN; PATTERN.
+AC   PS00197;
+DT   APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
+DE   2Fe-2S ferredoxins, iron-sulfur binding region signature.
+PA   C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C.
+NR   /RELEASE=14,15409;
+NR   /TOTAL=69(69); /POSITIVE=63(63); /UNKNOWN=0(0); /FALSE_POS=6(6);
+NR   /FALSE_NEG=5(5);
+CC   /TAXO-RANGE=A?EP?; /MAX-REPEAT=1;
+CC   /SITE=1,iron_sulfur; /SITE=5,iron_sulfur; /SITE=8,iron_sulfur;
+DR   P15788, FER$APHHA , T; P00250, FER$APHSA , T; P00223, FER$ARCLA , T;
+DR   P00227, FER$BRANA , T; P07838, FER$BRYMA , T; P13106, FER$BUMFI , T;
+DR   P00247, FER$CHLFR , T; P07839, FER$CHLRE , T; P00222, FER$COLES , T;
+DO   PDOC00175;
+//
+
+        Each entry has an  accession  number  (here  PS00197),  a
+pattern definition (here C-x(1,2)-[STA]-x(2)-C-[STA]-{P}-C) and a
+documentation  file  cross  reference  (here  PDOC00175).    This
+pattern  means: C, gap of 1 or 2, any of STA, gap of 2, C, any of
+STA, not P, C.
+
+        We need to convert all of these patterns into our pattern
+definitions  (as  membership  of  a  set, with the appopriate gap
+ranges)  and  write  each  into  a  separate  pattern  file  with
+corresponding "membership of a set" weight matrices. Each pattern
+file  is  named  accession_number.pat  (here  PS00197.PAT).   The
+corresponding    matrix    files    are    accession_number.wtsa,
+accession_number.wtsb, etc for  however  many  are  needed  (here
+PS00197.WTSA  and  PS00197.WTSB):  two  are needed because of the
+variable gap.
+
+        In addition we can optionally split  the  .dat  and  .doc
+files  into  separate  files,  one  for  each  entry,  with names
+accession_number.dat and accession_number.doc. Also we create  an
+index  for  the  library  prosite.lis,  which  gives  a  one line
+description of each pattern, and ends with the pattern  file  and
+documentation file numbers. The start of the file is shown below.
+
+N-glycosylation site.                                                00001,00001
+Glycosaminoglycan attachment site.                                   00002,00002
+Tyrosine sulfatation site.                                           00003,00003
+cAMP- and cGMP-dependent protein kinase phosphorylation site.        00004,00004
+
+So the name of the pattern file for Glycosaminoglycan  attachment
+site is PS00002.PAT, and for the documentation file PDOC00002.DOC
+
+        Finally we create a  file  of  file  names  for  all  the
+patterns in the library.
+
+        To use the complete PROSITE  library  from  program  pip,
+select  "pattern  searcher"  and  choose  the option "use file of
+pattern file names", and give the file name prosite.nam). For any
+matches  found,  the  accession  number and pattern title will be
+displayed.
+
+        Running the conversion programs
+
+        Only SPLITP3 is necessary  for  using  the  library.  The
+others programs only make the original files marginally easier to
+browse through and produce an index.
+
+        SPLITP1 splits the prosite.dat file to create a  separate
+file   for   each   entry.   Each  file  is  automatically  named
+PSentry_number.dat. In addition  it  creates  an  index  for  the
+library (see above).
+
+        SPLITP2 performs the same operation for  the  Prosite.doc
+file,   except   that  no  index  is  created.  Files  are  named
+PSentry_number.doc.
+
+        SPLITP3 creates a separate pattern file and weight matrix
+files  for  each prosite entry from the file prosite.dat. Pattern
+files  are  named   PSentry_number.pat,   weight   matrix   files
+PSentry_number.wtsa,  Psentry_number.wtsb, etc. The pattern title
+is the one line description of the motif. SPLITP3 also creates  a
+file  of  file  names. Notice that it will ask for a path name so
+that the path can be included in the file of file names. This  is
+the path to the directory in which the pattern files are stored.
+
+        Notes
+
+        Obviously the use of files of file  names  is  a  general
+solution,   and  anybody  could  now  create  their  own  set  of
+interesting patterns for screening, or a subset  of  prosite.nam,
+etc.
+
+        Note that 5 of the bairoch motifs contained the symbols >
+or  < which means that the motifs must appear exactly at the N or
+C termini  of  the  sequences.  Currently  our  methods  have  no
+mechanism for such definitions and, for example KDEL motifs, will
+be permitted to occur anywhere throughout a sequence.
+
+        Also, of course, the library does not  have  to  be  used
+solely  for performing mass screenings: each individual entry can
+be used as a single pattern by giving the name of its .pat file -
+eg pathname/ps00002.pat In addition more sophisticated users will
+wish to copy pattern files and weight  matrices  into  their  own
+directories  and  modify  them. For example the cutoff scores are
+probably chosen to be quite high in order to reduce the number of
+false positives, and some users might wish to lower them.
+
+
+
+
+
+
+
+
+
--- a/help/staden.references
+++ b/help/staden.references
@ -0,0 +1,81 @@
+
+  References with further information about the methods
+ 
+            Staden, R.  Nucl.  Acid Res.  8, 817-825 (1980)
+                 A computer program to search for tRNA genes. (NIP)
+            Staden, R.  Nucl.  Acid Res.  8, 3673-3694 (1980)
+                 A new computer method for the storage and  manipulation
+                 of DNA gel reading data.  (SAP).
+            Staden, R.  Nucl.  Acid Res.  10, 2951-2961 (1982)
+                 An  interactive  graphics  program  for  comparing  and
+                 aligning   nucleic   acid  and  amino  acid  sequences.
+                 (SIP).
+            Staden, R.  Nucl.  Acid Res.  10, 4731-4751 (1982)
+                 Automation of the computer handling of gel reading data
+                 produced by the shotgun method of DNA sequencing.(SAP)
+            Staden, R.  and  McLachlan,  A.,D.   Nucl.   Acid  Res.   10
+                 141-156 (1982)
+                 Codon preference and its  use  in  identifying  protein
+                 coding regions in long DNA sequences. (NIP)
+            Staden, R.  Nucl.  Acid Res.  12, 499-503 (1984)
+                 A computer program to enter DNA gel reading data into a
+                 computer.  (GIP)
+            Staden, R.  Nucl.  Acid Res.  12, 551-567 (1984)
+                 Measurements of the effects that coding for  a  protein
+                 has  on  on  a  DNA  sequence and their use for finding
+                 genes.  (NIP:  positional base preferences, uneven
+                 positional base frequencies)
+            Staden, R.  Nucl.  Acid Res.  12, 505-519 (1984)
+                 Computer methods to  locate  signals  in  nucleic  acid
+                 sequences.    NIP:   promoters,  ribosome  binding
+                 sites, intron/exon junctions.
+            McLachlan A D, Staden R and Boswell D R,  Nucl.   Acid  Res.
+                 12, 9567-9575 (1984)
+                 Measure of strength of codon preference. (NIP)
+            Staden R, Computer methods to locate genes  and  signals  in
+                 nucleic acid sequences, Genetic Engineering: Principles
+                 and Methods Vol. 7, Edited  by  J. K. Setlow   and   A.
+                 Hollaender, Plenum Publishing Corp.  1985. (NIP)
+            Staden R Nucl. Acid. Res. 14, 217-231 (1986)
+                 The current status and portability of our sequence
+                 handling software. Summary for May 1985.
+            Staden R "Computer Handling of DNA sequencing projects" in
+                 Nucleic acid and protein sequence analysis, A practical
+                 approach, 173-217. Edited by M.J.Bishop and C.J.Rawlings,
+                 IRL press (1987). (SAP)
+            Staden R, Methods to define and locate patterns of motifs in
+                  sequences. CABIOS 4 53-60 (1988). (NIP, PIP,
+                  NIPL, PIPL)
+            Staden R, Methods for calculating the probabilities of finding
+                  patterns in sequences. CABIOS 5 89-96 (1989). (NIP, PIP,
+                  NIPL, PIPL)
+            Staden R,  "Methods for discovering novel motifs in nucleic acid
+                   sequences". CABIOS 5, 293-298, (1989). (MEP)
+            Staden R, Methods to search for patterns in protein and nucleic
+                  acid sequences. In Doolittle, R,R (ed), Methods in
+                  Enzymology, 183, Academic Press, San Diego, CA, 193-211.
+                  (1990) (NIP, NIPL, PIP, PIPL)
+            Staden R, Finding protein coding regions in genomic sequences.
+                  In Doolittle, R,R (ed), Methods in Enzymology, 183, 
+                  Academic Press, San Diego, CA, 163-180. (1990) (NIP)
+            Gleeson T J and Staden R, An X windows and UNIX implementation
+                  of our sequence analysis package. CABIOS 7 398 (1991)
+	    Staden R, Screening protein and nucleic acid sequences against
+                  libraries of patterns. DNA Sequence, in press (NIP, PIP,
+                  SPLITP1, SPLITP2, SPLITP3, PROSITE)
+            Dear S and Staden R, A sequence assembly and editing program for
+                   efficient management of large projects. Nucleic Acids
+                   Research 19 3907-3911 (1991) (XDAP)
+	    Staden R and Dear S, Indexing the sequence libraries: Software
+		   providing a common indexing system for all the standard
+		   sequence libraries. DNA Sequence 3, 99-105 (1992).
+            Dear S and Staden R, A standard file format for data from DNA
+		   sequencing instruments. DNA Sequence 3, 107-110 (1992)
+	    Gleeson T and Hillier L, A trace display and editing program
+		   for data from fluorescence based sequencing machines.
+		   Nucleic Acids Research 19 6481-6483 (1991) (TED)
+	    Staden R, Staden package update. Genome News 13 12-13 (1993)
+
+ 
+
+
--- a/help/staden_help
+++ b/help/staden_help
@ -0,0 +1,184 @@
+
+        Introduction to the Staden sequence analysis package  and  its
+  user interface
+
+        The package contains the following programs:
+
+    GIP     Gel input program
+    SAP     Sequence assemble program
+    NIP     Nucleotide interpretation program
+    PIP     Protein interpretation program
+    SIP     Similarity investigation program
+    MEP     Motif exploration program
+    NIPL    Nucleotide interpretation program (library)
+    PIPL    Protein interpretation program (library)
+    SIPL    Similarity investigation program (library)
+
+  GIP  uses  a   digitiser   for   entry   of   DNA   sequences   from
+  autoradiographs.
+  SAP handles everything relating to assembling gel readings in  order
+  to  produce  a consensus sequence. It can also deal with families of
+  protein sequences.
+  NIP provides functions for analysing  and  interpretting  individual
+  nucleotide sequences.
+  PIP provides functions for analysing  and  interpretting  individual
+  protein sequences.
+  MEP analyses families of nucleotide sequences to help  discover  new
+  motifs.
+  NIPL performs pattern searches on nucleotide sequence libraries.
+  PIPL performs pattern searches on protein sequence libraries.
+  SIP provides functions for comparing and aligning pairs  of  protein
+  or nucleotide sequences.
+  SIPL searches nucleotide and protein sequence libraries for  entries
+  similar to probe sequences.
+
+
+        Documentation
+
+        As is explained below, the programs SAP, NIP, PIP, SIP and MEP
+  have  online  help,  and  the  help  files  have the names: HELPSAP,
+  HELPNIP, HELPPIP, HELPSIP, HELPMEP. These files can be displayed  on
+  the  screen or printed using the appropriate commands. Currently the
+  help for the other programs is also contained in  these  files.  For
+  example help for NIPL is in HELPNIP. This file is called HELPSTADEN.
+
+        Sequence formats
+
+        The shotgun sequencing program SAP deals only with simple text
+  files  for gel readings, and is a self-contained system.  However as
+  there is still no single agreed format for finished sequences or for
+  libraries  of  sequences, the other programs in the package can read
+  data that is stored in several ways.
+
+        The analytical programs can read individual  sequences  stored
+  in  the following formats: Staden, EMBL, Genbank, PIR (also known as
+  NBRF), and GCG, but for storing whole  libraries  we  use  only  PIR
+  format.  In  addition  these programs can perform a number of simple
+  operations using libraries stored in this format. They  can  extract
+  entries  by  entry  name, can search titles for keywords, can search
+  the whole of the annotation files  for  keywords,  and  can  extract
+  annotations for any named entry.  We reformat all sequence libraries
+  into PIR format. Currently we have NBRF, EMBL, SWISSPROT and VECBASE
+  libraries in PIR format.
+
+        The library  searching  programs  operate  only  on  sequences
+  stored in PIR format.
+
+        The  analytical  programs  will  operate  with  uppercase   or
+  lowercase  sequence  characters. In addition T and U are equivalent.
+  SAP uses uppercase letters for original gel readings  and  lowercase
+  letters  for  characters that are corrected by the automatic editor.
+  Programs NIP  and  PIP  use  IUB  symbols  for  redundancy  in  back
+  translations  and  for  sequence  searches.   The  symbols are shown
+  below.
+
+
+              NC-IUB SYMBOLS
+
+        A,C,G,T
+        R        (A,G)        'puRine'
+        Y        (T,C)        'pYrimidine'
+        W        (A,T)        'Weak'
+        S        (C,G)        'Strong'
+        M        (A,C)        'aMino'
+        K        (G,T)        'Keto'
+        H        (A,T,C)      'not G'
+        B        (G,C,T)      'not A'
+        V        (G,A,C)      'not T'
+        D        (G,A,T)      'not C'
+        N        (G,A,C,T)    'aNy'
+
+
+        The user interface
+
+        The user interface is common to all programs. It consists of a
+  set  of  menus and a uniform way of presenting choices and obtaining
+  input from the user. This section describes: the  menu  system;  how
+  options  are  selected  and   other  choices  made;  how  values are
+  supplied to the program;  how help is obtained, and  how  to  escape
+  from  any  part of a program. In addition it gives information about
+  saving results in files and  the  use  of  graphics  for  presenting
+  results.
+
+        Menus
+
+        Each program has several menus and numerous options. Each menu
+  or  option  has  a  unique  number that is used to identify it. Menu
+  numbers are distinguished from option numbers by being  preceded  by
+  the  letter  m (or M, all programs make no distinction between upper
+  and lower case letters). With the exception of some parts of program
+  SAP,  the  menus  are  not hierachical, rather the options they each
+  contain are simply lists of related functions and their  identifying
+  numbers. Therefore options can be selected independently of the menu
+  that is currently being shown on the  screen,   and  the  menus  are
+  simply  memory  aides.  All options and menus are selected by typing
+  their option number when the programs present the prompt
+
+        "? Menu or option number =".
+
+        To select a menu type its number preceded by the letter M.  To
+  select  an  option  type  its number.  If you type only "return" you
+  will get menu m0 which is simply a list of menus. If you  select  an
+  option  you  will  return  to the current menu after the function is
+  completed.
+
+        When you select an option, in  many  cases  the  program  will
+  immediately perform the operation selected without further dialogue.
+  If you precede an option number by the letter d (e.g. D17), you will
+  force the program to offer dialogue about the selected option before
+  the function operates, hence allowing you to change the value of any
+  of  its parameters.  If you precede an option number by the symbol ?
+  (e.g. ?17), you will be given help on the option (here 17).
+
+        Where possible, equivalent  or  identical  options  have  been
+  given  the  same numbers in all programs, and so users quickly learn
+  the numbers for the functions they employ most often.
+
+        Help
+
+        As mentioned above, help about each option can be obtained  by
+  preceding  the option number  by the symbol ? when you are presented
+  with the prompt "? Menu or option number", but there are two further
+  ways of obtaining help. Whenever the program asks a question you can
+  respond by typing the symbol ?  and  you  will  receive  information
+  about  the  current  option. In addition, option number 1 in all the
+  programs will give help on all of a programs functions.
+
+        Quitting
+
+        To exit from any point in a program you type ! for quit. If  a
+  menu is on the screen this will stop the program, otherwise you will
+  be returned to the last menu.
+
+        Other interactions
+
+        Questions are  presented in a  few  restricted  ways.  In  all
+  cases  typing only "return" in response to a question means yes, and
+  typing N or n means no.
+
+        Obvious opposites such as "clear screen"  and  "keep  picture"
+  are  presented with only the default shown. For example in this case
+  the default is generally "keep picture" so the program will display:
+
+        "(y/n) (y) Keep picture"
+
+        and the picture will be retained if the  user  types  anything
+  other than N or n, (in which case the screen will be cleared).
+
+        Where there are choices that are  not  obvious  opposites,  or
+  there  are  more than two choices, two further conventions are used:
+  "radio buttons" and "check boxes".
+
+        Radio buttons are used when only one of a  number  of  choices
+  can  be made at any one time. The choices are presented arranged one
+  above the other, each choice with a number for  its  selection,  and
+  the  default choice marked with an X. For example in the restriction
+  enzyme search routine the following choices are offered:
+
+
+           Select output mode
+     1 order results enzyme by enzyme
+     2 order results by positon
+   X 3 show only infrequent cutters
+     4 show names above the sequence
+   
--- a/help/stadenp_help
+++ b/help/stadenp_help
@ -0,0 +1,26 @@
+  Standard Staden Programs
+
+  gip        Gel input program
+  sap        Sequence assembly program
+  (x)dap     Sequence assembly program
+  (x)nip     Nucleotide interpretation program
+  (x)pip     Protein interpretation program
+  (x)sip     Similarity investigation program
+  (x)mep     Motif exploration program
+  nipl       Nucleotide interpretation program (library)
+  pipl       Protein interpretation program (library)
+  sipl       Similarity investigation program (library) 
+  Those with (x) have both tektronix (say nip) and x (say xnip) versions.
+  Environment variables for help files
+  HELPSAP    sap
+  HELPDAP    dap
+  HELPGIP    gip
+  HELPNIP    nip
+  HELPPIP    pip
+  HELPSIP    sip
+  HELPMEP    mep
+  HELPSTADEN Introduction and user interface
+  e.g. to read HELPSTADEN type  'more $HELPSTADEN'
+ 
+
+
--- a/help/ted.help
+++ b/help/ted.help
@ -0,0 +1,168 @@
+Trace Editor Help
+-----------------
+
+The ted trace editor is a prototype to allow the display and editing
+of traces from sequencing machines, and the simple editing of plain
+sequences. It runs under the X window system. It provides simultaneous
+display of traces and bases. The editing allows individual bases to be
+removed and new ones added, and also a range of bases at either end to
+be cutoff. Currently, only ABI result files and plain sequences are
+accepted.
+
+Only one trace can be edited at a time.
+
+
+Invocation
+----------
+
+ted can be run from the command line by simply typing:
+
+    ted
+
+It will come up with no sequence initially displayed. If provided with
+any arguments it does not understand, or invalid combinations of
+arguments, ted will exit with a message indicating its intended usage.
+ted accepts the standard X arguments allowing, for example, background
+colour or geometry to be specified. ted can accept an argument
+specifying an initial file to display. The key for this is the format
+of the file, for example:
+
+    ted -ABI {ABI format filename}
+    ted -plain {plain format filename}
+
+The file is then displayed at 50% magnification, with the caret 
+initially positioned at the first base.
+
+When an initial file is given, a base number of interest and/or a
+magnification can also be given, for example:
+
+    ted -ABI {ABI format file} -baseNum 280 -mag 30
+
+or the bottom strand may be specified:
+ 
+    ted -ABI {ABI format file} -baseNum 280 -mag 30 -bottom 1
+	or
+    ted -ABI {ABI format file} -bottom 1
+
+or a string of nucleotides on which the center the window:
+	
+    ted -ABI {ABI format file} -astring 1
+	or
+    ted -ABI {ABI format file} -astring 1 -mag 30 -bottom 1
+
+Options can be specified in any order.
+
+An output filename can be specified in a similar manner:
+
+    ted -ABI inputfilename -output outputfilename
+
+The default output filename is inputfilename.seq
+
+If you are running the program on a remote machine, you must
+specify a display parameter:
+
+    ted -display machine_name:0.2
+
+You can also specify the size of the opening window or
+other screen parameters by the following:
+
+    ted  -geometry [{width}][x{height}][{+-}{xoff}[{+-}{yoff}]]
+	   [-fg {color}] [-bg {color}] [-bd {color}] [-bw {pixels}]
+
+Displays
+--------
+
+When running, ted displays the name of the file it is currently
+operating on (if any) and the original number of bases.
+
+A so-called viewport presents four different synchronised views of
+part of the trace. The top one indicates the sequence indices - the
+first digit of the number if positioned over the base to which that 
+number corresponds.  Below this is a list of the bases as originally
+found in the file (this is the interpretation of the trace as made by
+the sequencing machine). Below this is the list of bases as edited by
+the user --- initially, if this file has not been edited in the past,
+this is identical to the list of original bases.  However, if in a 
+previous session the user has edited this sequence, the edited
+version of the sequence will appear in the edit window. 
+The final display is of the traces produced by the sequencing
+machine for the four respective bases.
+
+Two controls allow the view presented to be adjusted: both are
+horizontal sliders or scrollbars. The first affects the magnification
+at which the trace is viewed. The minimum magnification is such that
+the whole of the trace is visible within the viewport; when a trace is
+first input, this is the magnification used. The maximum magnification
+is such that bases are spaced out with several characters of space
+between them --- this should allow more than enough room for base
+insertions to be clearly visible. The second scrollbar is immediately
+above the viewport and allows the user to select which part of the
+trace is viewed. Both the sliders work in a similar way: the middle
+mouse button can be used to drag the thumb to any desired position,
+the left and right mouse buttons can be clicked within the scrollbar
+to indicate that paging up or down is desired. In the case of the
+viewport scrollbar, the amount of paging is determined by how far up
+the scrollbar the pointer is.
+
+The whole ted window can be expanded and contracted (to an extent) by
+dragging the "grow-region" provided by whatever window manager is
+running. The viewport takes up all of this change in size.
+
+Controls
+--------
+
+ted has four buttons. "Quit" exits the program after first checking
+whether there is a sequence which has been edited and not saved.
+
+"Help" pops up this window which has a scrollbar on the left allowing
+all the text to be viewed.
+
+"Input" presents a dialogue which asks for the format and name of a
+file to be processed. The bases and (if this is not a plain format
+file) traces are read in and displayed for editing. The only
+conversion performed on bases is from 'N' to '-'.
+
+"Output" presents a dialogue which asks for a filename into which the
+edited and clipped bases can be saved. The default value can be set
+on the command line using the "-output" keyword. No conversion of bases
+is performed on output.
+
+ted operates in one of three editing modes, one of which is selected
+from three "radio buttons". The currently selected mode is
+highlighted.
+
+Editing
+-------
+
+In "Edit sequence" mode, the (lower) list of editable bases can be
+edited in much the same way as a text editor operates. A "caret" which
+is visible in the display of edited bases can be moved left and right
+with the cursor keys (these are sometimes called arrow keys and often
+appear on numeric keypads). It can also be positioned by clicking any
+button while the pointer is pointing into either of the list of bases
+or the traces. The DELETE key deletes the base immediately to the left
+of the caret. Any printing character can be inserted to the right of
+the caret by simply typing it. Inserted characters are placed halfway
+between their neighbours, or if a space is left by the deletion of a
+base originally there, its position is used. A base can thus be
+changed by deleting it and entering the new base.
+
+Note that in the current version of ted the caret is not constrained
+to remain within the viewed part of the display and that editing can
+still continue while it is thus invisible. Such editing would probably
+only occur by accident.
+
+ted provides a facility to define a cutoff at either end of the trace.
+A number of the leftmost bases (corresponding to the vector) and the
+rightmost bases (corresponding to the point where the data become
+unreliable) can be defined by setting the editor into "Adjust left
+cutoff" or "Adjust right cutoff" mode. In either of these modes, the
+pointer and mouse buttons can be used to indicate the cutoff point,
+and the cursor keys can be used to adjust this leftwards or
+rightwards. Initially, the cutoff regions are both empty. The cutoff
+regions are clearly indicated on the list of edited bases display and
+on the traces display by being drawn with a dimmed background.
+
+When the sequence is written out, the list of edited bases, with both
+cutoff regions removed, is written. The output contains newlines
+for convenient formatting and always ends with one.
--- a/manl/staden.l
+++ b/manl/staden.l
@ -0,0 +1,102 @@
+.TH staden 1L "November 1991" "MRC LMB" "LOCAL"
+.SH NAME
+staden, xstaden \- sequence analysis suite
+.SH DESCRIPTION
+.I staden
+is a suite of programs for sequence analysis. Currently available are
+.I mep,
+.I nip,
+.I pip,
+.I sap,
+.I sip,
+.I nipl,
+.I pipl,
+.I and sipl.
+These all run under the SUN X11
+.I xterm
+Tektronics terminal emulator, but also work with the VT640 terminal
+and the VersaTermPro and MS-Kermit emulators if they login to a SUN.
+.PP
+.I xstaden
+is the same set of programs, named
+.I xmep,
+.I xnip,
+.I xpip,
+.I xsap,
+.I xdap,
+and
+.I xsip,
+which run directly under X providing a convenient user interface,
+including resizable output and pull-down menus. All these programs
+accept the standard X arguments. The library searching programs
+nipl, pipl and sipl are only available in xterm form.
+.PP
+Sequence library access is provided for the format as distributed
+on CDROM by EMBL. The CDROM contains the EMBL nucleotide library and
+the SWISSPROT protein library. The libraries can be left on the
+CDROM or transferred to hard disk.
+.PP
+The programs also provide an interface to the PROSITE protein motif
+library.
+.PP
+Some initialisation is required in order to use the package. csh users
+should insert the following in their .login files:
+.IP
+ setenv STADENROOT /home/BioSW/staden
+.IP
+ source $STADENROOT/staden.login
+.LP
+Users of the Bourne shell, sh, should insert the following in
+their .profile:
+.IP
+ STADENROOT=/home/BioSW/staden
+.IP
+ export STADENROOT
+.IP
+ . $STADENROOT/staden.profile
+.LP
+These initialisations will alter your shell's search path so
+that it can find the program binaries, and other files that are
+required.
+.SH ENVIRONMENT
+The following environment variables may be set in the
+user's \fI .login\fP or \fI .profile\fP file:
+.TP 20
+.BI STADENROOT= /home/BioSW/staden
+This must be set in the user's initialisation.
+.TP 20
+.BI SEQEDT= editor
+Set the editor to be used by the package.  The default is
+\fIemacs\fP.
+.SH FILES
+.PD 0
+.TP 30
+$STADENROOT/staden.login
+csh initialisation
+.TP 30
+$STADENROOT/staden.profile
+sh initialisation
+.TP 30
+$STADENROOT/tables
+Tables used by the programs
+.TP 30
+$STADENROOT/help
+Helpfiles used by the programs, documentation of the user interface
+and of each of the programs.
+.TP 30
+$STADENROOT/tables/SEQUENCELIBRARIES
+Defines the sequence libraries available, their file descriptors
+and the prompts to appear on the users screen.
+.SH AUTHOR
+Rodger Staden, MRC Laboratory of Molecular Biology, Hills Rd., Cambridge,
+CB2 2QH, UK.
+.SH BUGS
+.PP
+When using the xterm programs and in graphics input mode,
+a carriage return should not be
+entered on its own but should be preceded by some other character,
+such as SPACE, COMMA or K. If a carriage return is entered on its
+own, some garbage will (relatively) harmelssly appear on the plot.
+.PP
+General comments on the package can be sent to
+\fI<rs@uk.ac.cam.mrc-lmb>\fP 
--- a/manl/ted.l
+++ b/manl/ted.l
@ -0,0 +1,107 @@
+.TH ted 1L "July 1991" "MRC LMB" "LOCAL"
+.SH NAME
+ted \- trace editor
+.SH SYNOPSIS
+.B ted
+[(
+.B -ABI\||\|-ALF\||\|-plain
+)
+.I tracefilename
+[
+.B -baseNum
+.I number
+]
+.B [
+.B -mag
+.I number
+( 1 to 100 )
+] 
+.B [
+.B -bottom
+.I number
+(1(true) or 0(false))
+.B ]
+.B [
+.B -astring
+.I nucleotide-string
+]] 
+.B [
+.B -enzyme
+.I  5' cutting sequence
+] 
+.B [
+.B -raw
+.I  filename
+(to be placed at head of xdap compatible .seq file)
+.B ]
+[
+.B -output
+.I outputfilename
+]
+
+.SH DESCRIPTION
+.B ted
+is a simple prototype editor for traces produced from automatic
+sequencing machines. It allows the traces (from the ABI
+or ALF sequencing machines) produced to be
+displayed along with the machines interpretation of these into
+bases and an initially identical sequence which can be edited
+by the user. A cutoff region can be defined at both ends. The
+edited and clipped list of bases can then be written out.
+.LP
+When initially run,
+.B ted
+displays the trace file
+.I tracefilename
+(if given) of the specified format centered on the base number
+.I baseNum
+(if given). If no file is provided,
+.B ted
+initially displays nothing.
+.LP
+The display consists of
+the control panel and the synchronized view of the base position
+information, original and edited sequence data, 
+and graphical representation of the trace (with each nucleotide's trace
+being represented by a different color).  The control
+panel allows the user to read in new trace files (in either
+bottom or top strand orientation)
+as well as to search for a string of nucleotides or a certain base position.
+The information button brings up signal strength and average spacing for
+ABI files.
+Scroll bars allow the user to adjust the magnification of or scroll through
+the sequence and trace data.  The user may also choose to change the vertical
+magnification of the trace data.  Further, sequence on the head (vector)
+or tail (uncertain data) of the sequence may be ``cutoff'' 
+using the adjust left and right cutoff buttons. Bases can be inserted, 
+deleted, or replaced as with
+any ordinary word-processor in the sequence data window. Finally, the
+sequence may be written to an ascii file using the output button on
+the control panel.  The output filename is specified in a dialogue, 
+but a default value of inputfilename.seq is provided or the default value
+can be given with the
+.I outputfilename
+argument.
+.LP
+A simple help system is provided.
+.SH FILES
+.PD 0
+.TP 20
+.B ted.help
+Text provided in the help window.
+.TP
+.B /usr/lib/X11/app-defaults/Xted
+Default application resources.
+.SH ENVIRONMENT
+.TP 20
+.SB XFILESEARCHPATH
+Specifies the locations where
+.B ted.help
+is sought.
+If this is not defined,
+.B ted.help
+must be in the
+.B /usr/lib/X11/app-defaults
+directory.
+.SH AUTHORS
+Tim Gleeson, LaDeana Hillier, Simon Dear.
--- a/src/Misc/README
+++ b/src/Misc/README
@ -0,0 +1,7 @@
+Miscellaneous Routines                Simon Dear, 14 April 1992
+---------------------------------------------------------------
+
+The source modules in this directory are for commonly used
+routines. The archive misc.a should be made before any
+other programs supplied on this tape.
+
--- a/src/Misc/crash.c
+++ b/src/Misc/crash.c
@ -0,0 +1,15 @@
+#include "misc.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>  /* varargs needed for v*printf() prototypes */
+
+void crash (char* format,...)
+{
+    va_list args ;
+
+    va_start (args,format) ;
+    vfprintf (stderr,format,args) ;
+    va_end (args) ;
+
+    exit (1) ;
+}
--- a/src/Misc/date.c
+++ b/src/Misc/date.c
@ -0,0 +1,14 @@
+#include "misc.h"
+#include <stdio.h>
+
+/******************************************************************************/
+/*
+** Time and date calculations
+*/
+#include <time.h>
+char *date_str()
+{
+    time_t clock;
+    clock = time(NULL);
+    return ctime(&clock);
+}
--- a/src/Misc/filenames.c
+++ b/src/Misc/filenames.c
@ -0,0 +1,39 @@
+#include "misc.h"
+#include <string.h>
+
+char *fn_tail(char *fn)
+/*
+** Return file part (:t) of
+** directory path
+*/
+{
+    int len;
+    char *s;
+
+    len = strlen(fn);
+    for(s=fn+len-1;len && *s != '/'; len--, s--) ;
+    s++;
+
+    return s;
+}
+
+
+void fn_toupper (char *s)
+/*
+** Convert file to upper case
+** ignoring directory path head
+*/
+{
+    str_toupper(fn_tail(s));
+}
+
+
+
+void fn_tolower (char *s)
+/*
+** Convert file to lower case
+** ignoring directory path head
+*/
+{
+    str_tolower(fn_tail(s));
+}
--- a/src/Misc/files.c
+++ b/src/Misc/files.c
@ -0,0 +1,41 @@
+#include "misc.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Alliant's Concentrix <sys/stat.h> is hugely deficient */
+/* Define things we require in this program              */
+/* Methinks S_IFMT and S_IFDIR aren't defined in POSIX   */
+#ifndef S_ISDIR
+#define S_ISDIR(m)      (((m)&S_IFMT) == S_IFDIR)
+#endif /*!S_ISDIR*/
+#ifndef S_ISREG
+#define S_ISREG(m)      (((m)&S_IFMT) == S_IFREG)
+#endif /*!S_ISREG*/
+
+int is_directory(char * fn)
+{
+    struct stat buf;
+    if ( stat(fn,&buf) ) return 0;
+    return S_ISDIR(buf.st_mode);
+}
+
+int is_file(char * fn)
+{
+    struct stat buf;
+    if ( stat(fn,&buf) ) return 0;
+    return S_ISREG(buf.st_mode);
+}
+
+int file_exists(char * fn)
+{
+    struct stat buf;
+    return ( stat(fn,&buf) == 0);
+}
+
+int file_size(char * fn)
+{
+    struct stat buf;
+    if ( stat(fn,&buf) != 0) return 0;
+    return buf.st_size;
+}
+
--- a/src/Misc/find.c
+++ b/src/Misc/find.c
@ -0,0 +1,39 @@
+#include "misc.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+char *myfind(char *file, char* searchpath, int (*found) (char *) )
+{
+    static char wholePath[1024];
+    char *path;
+    char *delimiters=":";
+    char *f;
+
+    f = NULL;
+    if (found(file)) {
+	strcpy(wholePath,file);
+	f = wholePath;
+    } else if (searchpath != NULL) {
+	char *paths;
+
+	paths = (char *) malloc(strlen(searchpath)+1);
+	strcpy(paths,searchpath);
+
+	path = (char *) strtok(paths,delimiters);
+	while (path!= NULL) {
+
+	    (void) strcpy(wholePath,path);
+	    (void) strcat(wholePath,"/");
+	    (void) strcat(wholePath,file);
+	    if (found(wholePath)) {
+		f = wholePath;
+		break;
+	    }
+	    path = (char *) strtok((char *)NULL,delimiters);
+	}
+	free(paths);
+    }
+
+    return f;;
+}
--- a/Show more
+++ b/Show more