init: extra

2023-04-16 13:07:57 +08:00 · 2023-04-16 13:07:57 +08:00 · 650d39cff9
commit 650d39cff9
parent a445d20c65
9 changed files with 2252 additions and 0 deletions
--- a/980
+++ b/980
@ -0,0 +1,980 @@
+||||||||||| ReadSeq supported formats   (revised 30Dec92)
+--------------------------------------------------------
+
+    -f[ormat=]Name Format name for output:
+         1. IG/Stanford           10. Olsen (in-only)
+         2. GenBank/GB            11. Phylip3.2
+         3. NBRF                  12. Phylip
+         4. EMBL                  13. Plain/Raw
+         5. GCG                   14. PIR/CODATA
+         6. DNAStrider            15. MSF
+         7. Fitch                 16. ASN.1
+         8. Pearson/Fasta         17. PAUP
+         9. Zuker (in-only)       18. Pretty (out-only)
+
+In general, output supports only minimal subsets of each format
+needed for sequence data exchanges.  Features, descriptions
+and other format-unique information is discarded.
+
+Users of Olsen multi sequence editor (VMS).  The Olsen format
+here is produced with the print command:
+  print/out=some.file
+Use Genbank output from readseq to produce a format that this
+editor can read, and use the command
+  load/genbank some.file
+Dan Davison has a VMS program that will convert to/from the
+Olsen native binary data format.  E-mail davison@uh.edu
+
+Warning: Phylip format input is now supported (30Dec92), however the
+auto-detection of Phylip format is very probabilistic and messy,
+especially distinguishing sequential from interleaved versions. It
+is not recommended that one use readseq to convert files from Phylip
+format to others unless essential.
+
+
+
+||||||||||| ReadSeq usage             (revised 11Nov91)
+--------------------------------------------------------
+
+A. determine file format:
+
+        short skiplines;  /* result: number of header lines to skip (or 0) */
+        short error;      /* error result or 0 */
+        short format;     /* resulting format code, see ureadseq.h */
+        char  *filename   = "Mysequence.file"
+
+        format = seqFileFormat( filename, &skiplines, &error);
+        if (error!=0) fail;
+
+B. read number and list of sequences (optional)
+        short numseqs;    /* resulting number of sequences found in file */
+        char  *seqlist;   /* list of sequence names, newline separated, 0 terminated */
+
+        seqlist = listSeqs( filename, skiplines, format, &numseqs, &error);
+        if (error!=0)  display (seqlist);
+        free( seqlist);
+
+C.  read individual sequences as desired
+        short seqIndex;   /* sequence index #, or == kListSeqs for listSeqs equivalent */
+        long  seqlen;     /* length of seq */
+        char  seqid[256]; /* sequence name */
+        char  *seq;       /* sequence, 0 terminated, free when done */
+
+        seq = readSeq( seqIndex, filename, skiplines, format,
+                      &seqlen, &numseqs, &error, seqid);
+        if (error!=0) manipulate(seq);
+        free(seq);
+
+D. write sequences as desired
+        int nlines;     /* number of lines of sequence written */
+        FILE* fout;     /* open file pointer (stdout or other) */
+        short outform;  /* output format, see ureadseq.h */
+
+        nlines = writeSeq( fout, seq, seqlen, format, outform, seqid);
+
+
+Note (30Dec92): There is various processing done by the main program (in readseq.c),
+  rather than just in the subroutines (in ureadseq.c).  Especially for interleaved
+  output formats, the writeSeq subroutine does not handle interleaving, nor some of
+  the formatting at the top and end of output files.  While seqFileFormat, listSeqs,
+  and readSeq subroutines are fairly self-contained, the writeSeq depends a lot on
+  auxilliary processing.  At some point, this may be revised so writeSeq is self-
+  contained.
+
+Note 2: The NCBI toolkit (ftp from ncbi.nlm.nih.gov) is needed for the ASN.1 format
+  reading (see ureadasn.c).  A bastard (but workable I hope) ASN.1 format is written
+  by writeSeq alone.
+
+
+
+|||||||||||  sequence formats....
+---------------------------------------------------
+
+stanford/IG
+;comments
+;...
+seq1 info
+abcd...
+efgh1 (or 2 = terminator)
+;another seq
+;....
+seq2 info
+abcd...1
+--- for e.g. ----
+;     Dro5s-T.Seq  Length: 120  April 6, 1989  21:22  Check: 9487  ..
+dro5stseq
+GCCAACGACCAUACCACGCUGAAUACAUCGGUUCUCGUCCGAUCACCGAAAUUAAGCAGCGUCGCGGGCG
+GUUAGUACUUAGAUGGGGGACCGCUUGGGAACACCGCGUGUUGUUGGCCU1
+
+;  TOIG of: Dro5srna.Seq  check: 9487  from: 1  to: 120
+---------------------------------------------------
+
+Genbank:
+LOCUS    seq1 ID..
+...
+ORIGIN ...
+123456789abcdefg....(1st 9 columns are formatting)
+     hijkl...
+//         (end of sequence)
+LOCUS     seq2 ID ..
+...
+ORIGIN
+      abcd...
+//
+---------------------------------------------------
+
+NBRF format: (from uwgcg ToNBRF)
+>DL;DRO5SRNA
+Iubio$Dua0:[Gilbertd.Gcg]Dro5srna.Seq;2 => DRO5SRNA
+
+      51  AAUUAAGCAG CGUCGCGGGC GGUUAGUACU UAGAUGGGGG ACCGCUUGGG
+     101  AACACCGCGU GUUGUUGGCC U
+
+---------------------------------------------------
+
+EMBL format
+ID345 seq1 id   (the 345 are spaces)
+... other info
+SQ345Sequence   (the 3,4,5 are spaces)
+abcd...
+hijk...
+//              (! this is proper end string: 12Oct90)
+ID    seq2 id
+...
+SQ   Sequence
+abcd...
+...
+//
+---------------------------------------------------
+
+UW GCG Format:
+comments of any form, up to ".." signal
+signal line has seq id, and " Check: ####   .."
+only 1 seq/file
+
+-- e.g. --- (GCG from GenBank)
+LOCUS       DROEST6      1819 bp ss-mRNA            INV       31-AUG-1987
+    ... much more ...
+ORIGIN      1 bp upstream of EcoRI site; chromosome BK9 region 69A1.
+
+INVERTEBRATE:DROEST6  Length: 1819  January 9, 1989  16:48  Check: 8008  ..
+
+       1  GAATTCGCCG GAGTGAGGAG CAACATGAAC TACGTGGGAC TGGGACTTAT
+
+      51  CATTGTGCTG AGCTGCCTTT GGCTCGGTTC GAACGCGAGT GATACAGATG
+
+
+---------------------------------------------------
+
+DNAStrider (Mac) = modified Stanford:
+; ### from DNA Strider  Friday, April 7, 1989   11:04:24 PM
+; DNA sequence  pBR322   4363  b.p. complete sequence
+;
+abcd...
+efgh
+//  (end of sequence)
+---------------------------------------------------
+
+Fitch format:
+Dro5srna.Seq
+ GCC AAC GAC CAU ACC ACG CUG AAU ACA UCG GUU CUC GUC CGA UCA CCG AAA UUA AGC AGC
+ GUC GCG GGC GGU UAG UAC UUA GAU GGG GGA CCG CUU GGG AAC ACC GCG UGU UGU UGG CCU
+Droest6.Seq
+ GAA TTC GCC GGA GTG AGG AGC AAC ATG AAC TAC GTG GGA CTG GGA CTT ATC ATT GTG CTG
+ AGC TGC CTT TGG CTC GGT TCG AAC GCG AGT GAT ACA GAT GAC CCT CTG TTG GTG CAG CTG
+---------------------------------------------------
+
+W.Pearson/Fasta format:
+>BOVPRL GenBank entry BOVPRL from omam file.  907 nucleotides.
+TGCTTGGCTGAGGAGCCATAGGACGAGAGCTTCCTGGTGAAGTGTGTTTCTTGAAATCAT
+
+---------------------------------------------------
+Phylip version 3.2 format (e.g., DNAML):
+
+   5   13 YF                (# seqs, #bases, YF)
+Alpha     AACGTGGCCAAAT
+          aaaagggccc...  (continued sp. alpha)
+Beta      AAGGTCGCCAAAC
+          aaaagggccc...  (continued sp. beta)
+Gamma     CATTTCGTCACAA
+          aaaagggccc...  (continued sp. Gamma)
+1234567890^-- bases must start in col 11, and run 'til #bases 
+        (spaces & newlines are okay)
+---------------------------------------------------
+Phylip version 3.3 format (e.g., DNAML):
+
+  5    42  YF             (# seqs, #bases, YF)
+Turkey    AAGCTNGGGC ATTTCAGGGT
+Salmo gairAAGCCTTGGC AGTGCAGGGT
+H. SapiensACCGGTTGGC CGTTCAGGGT
+Chimp     AAACCCTTGC CGTTACGCTT
+Gorilla   AAACCCTTGC CGGTACGCTT
+1234567890^-- bases must start in col 11
+  !! this version interleaves the species -- contrary to
+     all other output formats.
+
+GAGCCCGGGC AATACAGGGT AT
+GAGCCGTGGC CGGGCACGGT AT
+ACAGGTTGGC CGTTCAGGGT AA
+AAACCGAGGC CGGGACACTC AT
+AAACCATTGC CGGTACGCTT AA
+
+---------------------------------------------------
+Phylip version 3.4 format (e.g., DNAML)
+-- Both Interleaved and sequential are permitted
+
+   5   13                (# seqs, #bases)
+Alpha     AACGTGGCCAAAT
+          aaaagggccc...  (continued sp. alpha)
+Beta      AAGGTCGCCAAAC
+          aaaagggccc...  (continued sp. beta)
+Gamma     CATTTCGTCACAA
+          aaaagggccc...  (continued sp. Gamma)
+1234567890^-- bases must start in col 11, and run 'til #bases 
+        (spaces, newlines and numbers are are ignored)
+
+---------------------------------------------------
+Gary Olsen (multiple) sequence editor /print format:
+
+!---------------------
+!17Oct91 -- error in original copy of olsen /print format, shifted right 1 space
+! here is correct copy:
+  301  40 Tb.thiop  CGCAGCGAAA----------GCUNUGCUAAUACCGCAUA-CGnCCUG-----------------------------------------------------  Tb.thiop
+123456789012345678901
+  301  42 Rhc.purp  CGUAGCGAAA----------GUUACGCUAAUACCGCAUA-UUCUGUG-----------------------------------------------------  Rhc.purp
+
+  301  44 Rhc.gela  nnngnCGAAA----------GCCGGAUUAAUACCGCAUA-CGACCUA-----------------------------------------------------  Rhc.gela
+!---------------------
+
+ RNase P RNA components.  on 20-FEB-90 17:23:58
+
+    1 (E.c. pr ):  Base pairing in Escherichia coli RNase P RNA.
+    2 (chrom   ):  Chromatium
+      :
+   12 (B.brevis):  Bacillus brevis RNase P RNA, B. James.
+   13 ( 90% con):   90% conserved
+   14 (100% con):  100% conserved
+   15 (gram+ pr):  pairing
+
+1
+ RNase P RNA components.  on 20-FEB-90 17:23:58
+
+ Posi-   Sequence
+ tion:   identity:   Data:
+
+     1   1 E.c. pr      <<<<<<<<<< {{{{{{{{<<:<<<<<<<<<<^<<<<<<====>>>>  E.c. pr
+     1   2 chrom        GGAGUCGGCCAGACAGUCGCUUCCGUCCU------------------  chrom
+            :
+     1  12 B.brevis  AUGCAGGAAAUGCGGGUAGCCGCUGCCGCAAUCGUCU-------------  B.brevis
+1234567890123456789012 <! this should be 21 not 22,
+! this example must be inset on left by 1 space from olsen /print files !
+     1  13  90% con           G  C G  A  CGC GC               -    -      90% con
+     1  14 100% con                G  A  CGC                             100% con
+     1  15 gram+ pr     <<<<<<<<<< {{{{{{{{<<<<<<<<<<<<<===============  gram+ pr
+
+    60   1 E.c. pr   >>>>>>^>>^>>>>:>>    <<<^<<<< {{{{{                 E.c. pr
+    60   2 chrom     -----GGUG-ACGGGGGAGGAAAGUCCGG-GCUCCAU-------------  chrom
+    :       :
+    60  10 B.stearo  ----UU-CG-GCCGUAGAGGAAAGUCCAUGCUCGCACGGUGCUGAGAUGC  B.stearo
+
+
+---------------------------------------------------
+  GCG MSF format
+Title line
+
+picorna.msf  MSF: 100  Type: P  January 17, 1991  17:53  Check: 541
+..
+Name: Cb3              Len:   100  Check: 7009  Weight:  1.00
+Name: E                Len:   100  Check:   60  Weight:  1.00
+
+//
+
+   1                                                   50
+Cb3  ...gpvedai .......t.. aaigr..vad tvgtgptnse aipaltaaet
+  E  gvenae.kgv tentna.tad fvaqpvylpe .nqt...... kv.affynrs
+
+   51                                                 100
+
+Cb3  ghtsqvvpgd tmqtrhvkny hsrsestien flcrsacvyf teykn.....
+  E  ...spi.gaf tvks...... gs.lesgfap .fsngtc.pn sviltpgpqf
+
+---------------------------------------------------
+     PIR format
+This is NBRF-PIR MAILSERVER version 1.45
+Command-> get PIR3:A31391
+\\\
+ENTRY           A31391       #Type Protein
+TITLE           *Esterase-6 - Fruit fly (Drosophila melanogaster)
+
+DATE            03-Aug-1992 #Sequence 03-Aug-1992 #Text 03-Aug-1992
+PLACEMENT          0.0    0.0    0.0    0.0    0.0
+COMMENT         *This entry is not verified.
+SOURCE          Drosophila melanogaster
+
+REFERENCE
+   #Authors     Cooke P.H., Oakeshott J.G.
+   #Citation    submitted to GenBank, April 1989
+   #Reference-number A31391
+   #Accession   A31391
+   #Cross-reference GB:J04167
+
+SUMMARY       #Molecular-weight 61125  #Length 544  #Checksum  1679
+SEQUENCE
+                5        10        15        20        25        30
+      1 M N Y V G L G L I I V L S C L W L G S N A S D T D D P L L V
+     31 Q L P Q G K L R G R D N G S Y Y S Y E S I P Y A E P P T G D
+     61 L R F E A P E P Y K Q K W S D I F D A T K T P V A C L Q W D
+     91 Q F T P G A N K L V G E E D C L T V S V Y K P K N S K R N S
+    121 F P V V A H I H G G A F M F G A A W Q N G H E N V M R E G K
+    151 F I L V K I S Y R L G P L G F V S T G D R D L P G N Y G L K
+    181 D Q R L A L K W I K Q N I A S F G G E P Q N V L L V G H S A
+    211 G G A S V H L Q M L R E D F G Q L A R A A F S F S G N A L D
+    241 P W V I Q K G A R G R A F E L G R N V G C E S A E D S T S L
+    271 K K C L K S K P A S E L V T A V R K F L I F S Y V P F A P F
+    301 S P V L E P S D A P D A I I T Q D P R D V I K S G K F G Q V
+    331 P W A V S Y V T E D G G Y N A A L L L K E R K S G I V I D D
+    361 L N E R W L E L A P Y L L F Y R D T K T K K D M D D Y S R K
+    391 I K Q E Y I G N Q R F D I E S Y S E L Q R L F T D I L F K N
+    421 S T Q E S L D L H R K Y G K S P A Y A Y V Y D N P A E K G I
+    451 A Q V L A N R T D Y D F G T V H G D D Y F L I F E N F V R D
+    481 V E M R P D E Q I I S R N F I N M L A D F A S S D N G S L K
+    511 Y G E C D F K D N V G S E K F Q L L A I Y I D G C Q N R Q H
+    541 V E F P
+///
+\\\
+---------------------------------------------------
+PAUP format:
+The NEXUS Format
+
+Every block starts with "BEGIN blockname;" and ends with "END;".
+Each block is composed of one or more statements, each
+terminated by a semicolon (;).
+
+Comments may be included in NEXUS files by enclosing them within
+square brackets, as in "[This is a comment]."
+
+NEXUS-conforming files are identified by a "#NEXUS" directive at
+the very beginning of the file (line 1, column 1).  If the
+#NEXUS is omitted PAUP issues a warning but continues
+processing.
+
+NEXUS files are entirely free-format.  Blanks, tabs, and
+newlines may be placed anywhere in the file.  Unless RESPECTCASE
+is requested, commands and data may be entered in upper case,
+lower case, or a mixture of upper and lower case.
+
+The following conventions are used in the syntax descriptions of
+the various blocks.  Upper-case items are entered exactly as
+shown.  Lower-case items inside of angle brackets -- e.g., <x>
+-- represent items to be substituted by the user.  Items inside
+of square brackets -- e.g., [X] -- are optional.  Items inside
+of curly braces and separated by vertical bars -- e.g.,  { X | Y
+| Z } -- are mutually exclusive options.
+
+
+The DATA Block
+
+The DATA block contains the data matrix and other associated
+information.  Its syntax is:
+
+BEGIN DATA;
+DIMENSIONS NTAX=<number of taxa> NCHAR=<number of characters>;
+  [ FORMAT  [ MISSING=<missing-symbol> ]
+        [ LABELPOS={ LEFT | RIGHT } ]
+        [ SYMBOLS="<symbols-list>" ]
+        [ INTERLEAVE ]
+        [ MATCHCHAR=<match-symbol> ]
+        [ EQUATE="<symbol>=<expansion> [<symbol>=<expansion>...]" ]
+        [ TRANSPOSE ]
+        [ RESPECTCASE ]
+        [ DATATYPE = { STANDARD | DNA | RNA | PROTEIN } ]; ]
+        [ OPTIONS [ IGNORE={ INVAR | UNINFORM } ]
+        [ MSTAXA = { UNCERTAIN | POLYMORPH | VARIABLE } ]
+        [ ZAP = "<list of zapped characters>" ] ; ]
+  [ CHARLABELS <label_1> label_2><3E><> <label_NCHAR> ; ]
+  [ TAXLABELS <label1_1> <label1_2> <label1_NTAX> ; ]
+  [ STATELABELS <currently ignored by PAUP> ; ]
+  MATRIX <data-matrix> ;
+  END;
+
+--- example PAUP file
+
+#NEXUS
+
+[!Brown et al. (1982) primate mitochondrial DNA]
+
+begin data;
+  dimensions ntax=5 nchar=896;
+  format datatype=dna matchchar=. interleave missing='-';
+  matrix
+[                              2                    4                    6            8                    ]
+[         1                    1                    1                    1            1                    ]
+human     aagcttcaccggcgcagtca ttctcataatcgcccacggR cttacatcctcattactatt ctgcctagcaaactcaaact acgaacgcactcacagtcgc
+chimp     ................a.t. .c.................a ...............t.... ..................t. .t........c.........
+gorilla   ..................tg ....t.....t........a ........a......t.... .................... .......a..c.....c...
+orang     ................ac.. cc.....g..t.....t..a ..c........cc....g.. .................... .......a..c.....c...
+gibbon    ......t..a..t...ac.g .c.................a ..a..c..t..cc.g..... ......t............. .......a........c...
+
+[         8                    8                    8                    8            8              8     ]
+[         0                    2                    4                    6            8              9     ]
+[         1                    1                    1                    1            1              6     ]
+human     cttccccacaacaatattca tgtgcctagaccaagaagtt attatctcgaactgacactg agccacaacccaaacaaccc agctctccctaagctt
+chimp     t................... .a................c. ........a.....g..... ...a................ ................
+gorilla   ..................tc .a................c. ........a.g......... ...a.............tt. .a..............
+orang     ta....a...........t. .c.......ga......acc ..cg..a.a......tg... .a.a..c.....g...cta. .a.....a........
+gibbon    a..t.......t........ ....ac...........acc .....t..a........... .a.tg..........gctag .a..............
+  ;
+end;
+---------------------------------------------------
+
+
+
+
+
+
+|||||||||||  Sample SMTP mail header
+---------------------------------------------------
+
+- - - - - - - - -
+From GenBank-Retrieval-System@genbank.bio.net Sun Nov 10 17:28:56 1991
+Received: from genbank.bio.net by sunflower.bio.indiana.edu
+        (4.1/9.5jsm) id AA19328; Sun, 10 Nov 91 17:28:55 EST
+Received: by genbank.bio.net (5.65/IG-2.0)
+        id AA14458; Sun, 10 Nov 91 14:30:03 -0800
+Date: Sun, 10 Nov 91 14:30:03 -0800
+Message-Id: <9111102230.AA14458@genbank.bio.net>
+From: Database Server <GenBank-Retrieval-System@genbank.bio.net>
+To: gilbertd@sunflower.bio.indiana.edu
+Subject: Results of Query for drorna
+Status: R
+
+No matches on drorna.
+- - - - - -
+From GenBank-Retrieval-System@genbank.bio.net Sun Nov 10 17:28:49 1991
+Received: from genbank.bio.net by sunflower.bio.indiana.edu
+        (4.1/9.5jsm) id AA19323; Sun, 10 Nov 91 17:28:47 EST
+Received: by genbank.bio.net (5.65/IG-2.0)
+        id AA14461; Sun, 10 Nov 91 14:30:03 -0800
+Date: Sun, 10 Nov 91 14:30:03 -0800
+Message-Id: <9111102230.AA14461@genbank.bio.net>
+From: Database Server <GenBank-Retrieval-System@genbank.bio.net>
+To: gilbertd@sunflower.bio.indiana.edu
+Subject: Results of Query for droest6
+Status: R
+
+LOCUS       DROEST6      1819 bp ss-mRNA            INV       31-AUG-1987
+DEFINITION  D.melanogaster esterase-6 mRNA, complete cds.
+ACCESSION   M15961
+
+
+
+
+
+
+
+
+
+
+
+
+|||||||||||  GCG manual discussion of sequence symbols:
+---------------------------------------------------
+
+III_SEQUENCE_SYMBOLS
+
+
+     GCG programs allow all upper and lower  case  letters,  periods  (.),
+asterisks  (*),  pluses  (+),  ampersands  (&),  and ats (@) as symbols in
+biological sequences.  Nucleotide  symbols,  their  complements,  and  the
+standard  one-letter amino acid symbols are shown below in separate lists.
+The meanings of the symbols +, &, and @ have not  been  assigned  at  this
+writing (March, 1989).
+
+     GCG uses the  letter  codes  for  amino  acid  codes  and  nucleotide
+ambiguity    proposed    by    IUB    (Nomenclature    Committee,    1985,
+Eur. J. Biochem. 150; 1-5).  These codes are  compatible  with  the  codes
+used by the EMBL, GenBank, and NBRF data libraries.
+
+
+                               NUCLEOTIDES
+
+     The meaning of each symbol, its complement,  and  the  Cambridge  and
+Stanford  equivalents  are  shown below.  Cambridge files can be converted
+into GCG files and vice versa with the programs FROMSTADEN  and  TOSTADEN.
+IntelliGenetics  sequence  files  can  be interconverted with the programs
+FROMIG and TOIG.
+
+IUB/GCG      Meaning     Complement   Staden/Sanger  Stanford
+
+   A             A             T             A            A
+   C             C             G             C            C
+   G             G             C             G            G
+  T/U            T             A             T           T/U
+   M           A or C          K             5            J
+   R           A or G          Y             R            R
+   W           A or T          W             7            L
+   S           C or G          S             8            M
+   Y           C or T          R             Y            Y
+   K           G or T          M             6            K
+   V        A or C or G        B       not supported      N
+   H        A or C or T        D       not supported      N
+   D        A or G or T        H       not supported      N
+   B        C or G or T        V       not supported      N
+  X/N     G or A or T or C     X            -/X           N
+   .    not G or A or T or C   .       not supported      ?
+
+
+  The frame ambiguity codes used by Staden are not  supported  by  GCG
+and   are  translated  by  FROMSTADEN  as  the  lower  case  single  base
+equivalent.
+
+     Staden Code          Meaning              GCG
+
+         D                C or CC                c
+         V                T or TT                t
+         B                A or AA                a
+         H                G or GG                g
+         K                C or CX                c
+         L                T or TX                t
+         M                A or AX                a
+         N                G or GX                g
+
+
+                        AMINO ACIDS
+
+  Here is a list of the standard one-letter amino acid codes and their
+three-letter  equivalents.   The synonymous codons and their depiction in
+the IUB codes are shown.  You should recognize that the codons  following
+semicolons  (;)  are  not  sufficiently specific to define a single amino
+acid even though they represent the best possible back  translation  into
+the IUB codes!  All of the relationships in this list can be redefined by
+the user in a local data file described below.
+
+                                                      IUB
+Symbol 3-letter  Meaning      Codons                Depiction
+ A    Ala       Alanine      GCT,GCC,GCA,GCG         !GCX
+ B    Asp,Asn   Aspartic,
+                Asparagine   GAT,GAC,AAT,AAC         !RAY
+ C    Cys       Cysteine     TGT,TGC                 !TGY
+ D    Asp       Aspartic     GAT,GAC                 !GAY
+ E    Glu       Glutamic     GAA,GAG                 !GAR
+ F    Phe     Phenylalanine  TTT,TTC                 !TTY
+ G    Gly       Glycine      GGT,GGC,GGA,GGG         !GGX
+ H    His       Histidine    CAT,CAC                 !CAY
+ I    Ile       Isoleucine   ATT,ATC,ATA             !ATH
+ K    Lys       Lysine       AAA,AAG                 !AAR
+ L    Leu       Leucine      TTG,TTA,CTT,CTC,CTA,CTG
+!TTR,CTX,YTR;YTX
+ M    Met       Methionine   ATG                     !ATG
+ N    Asn       Asparagine   AAT,AAC                 !AAY
+ P    Pro       Proline      CCT,CCC,CCA,CCG         !CCX
+ Q    Gln       Glutamine    CAA,CAG                 !CAR
+ R    Arg       Arginine     CGT,CGC,CGA,CGG,AGA,AGG
+!CGX,AGR,MGR;MGX
+ S    Ser       Serine       TCT,TCC,TCA,TCG,AGT,AGC !TCX,AGY;WSX
+ T    Thr       Threonine    ACT,ACC,ACA,ACG         !ACX
+ V    Val       Valine       GTT,GTC,GTA,GTG         !GTX
+ W    Trp       Tryptophan   TGG                     !TGG
+ X    Xxx       Unknown                              !XXX
+ Y    Tyr       Tyrosine     TAT, TAC                !TAY
+ Z    Glu,Gln   Glutamic,
+                Glutamine    GAA,GAG,CAA,CAG         !SAR
+ *    End       Terminator   TAA, TAG, TGA           !TAR,TRA;TRR
+
+
+
+
+
+
+
+
+|||||||||||  docs from PSC on sequence formats:
+---------------------------------------------------
+
+
+          Nucleic Acid and Protein Sequence File Formats
+
+
+It will probably save you some time if you have your data in a usable
+format before you send it to us.  However, we do have the University of
+Wisconsin Genetics Computing Group programs running on our VAXen and
+this package includes several reformatting utilities.  Our programs
+usually recognize any of several standard formats, including GenBank,
+EMBL, NBRF, and MolGen/Stanford.  For the purposes of annotating an
+analysis we find the GenBank and EMBL formats most useful, particularly
+if you have already received an accession number from one of these
+organizations for your sequence.
+
+Our programs do not require that all of the line types available in
+GenBank, EMBL, or NBRF file formats be present for the file format to
+be recognized and processed.  The following pages outline the essential
+details required for correct processing of files by our programs.
+Additional information may be present but will generally be ignored.
+
+
+                      GenBank File Format
+
+File Header
+
+1.  The first line in the file must have "GENETIC SEQUENCE DATA BANK"
+    in spaces 20 through 46 (see LINE  1, below).
+2.  The next 8 lines may contain arbitrary text.  They are ignored but
+    are required to maintain the GenBank format (see LINE 2 - LINE 9).
+
+Sequence Data Entries
+
+3.  Each sequence entry in the file should have the following format.
+    a) first line:   Must have LOCUS in the first 5 spaces.  The
+                     genetic locus name or identifier must be in spaces
+                     13 - 22.  The length of the sequences is right
+                     justified in spaces 23 through 29 (see LINE  10).
+    b) second line:  Must have DEFINITION in the first 10 spaces.
+                     Spaces 13 - 80 are free form text to identify the
+                     sequence (see LINE  11).
+    c) third line:   Must have ACCESSION in the first 9 spaces.  Spaces
+                     13 - 18 must hold the primary accession number
+                     (see LINE  12).
+    d) fourth line:  Must have ORIGIN in the first 6 spaces.  Nothing
+                     else is required on this line, it indicates that
+                     the nucleic acid sequence begins on the next line
+                     (see LINE  13).
+    e) fifth line:   Begins the nucleotide sequence.  The first 9
+                     spaces of each sequence line may either be blank
+                     or may contain the position in the sequence of the
+                     first nucleotide on the line.  The next 66 spaces
+                     hold the nucleotide sequence in six blocks of ten
+                     nucleotides.  Each of the six blocks begins with a
+                     blank space followed by ten nucleotides.  Thus the
+                     first nucleotide is in space eleven of the line while
+                     the last is in space 75 (see LINE  14, LINE  15).
+    f) last line:    Must have // in the first 2 spaces to indicate
+                     termination of the sequence (see LINE  16).
+
+NOTE:  Multiple sequences may appear in each file.  To begin another
+       sequence go back to a) and start again.
+
+
+                         Example GenBank file
+
+
+LINE  1  :                   GENETIC SEQUENCE DATA BANK
+LINE  2  :
+LINE  3  :
+LINE  4  :
+LINE  5  :
+LINE  6  :
+LINE  7  :
+LINE  8  :
+LINE  9  :
+LINE 10  :LOCUS       L_Name     Length BP
+LINE 11  :DEFINITION  Describe the sequence any way you want
+LINE 12  :ACCESSION   Accession Number
+LINE 13  :ORIGIN
+LINE 14  :        1 acgtacgtac gtacgtacgt acgtacgtac gtacgtacgt a...
+LINE 15  :       61 acgt...
+LINE 16  ://
+
+
+
+                         EMBL File Format
+
+Unlike the GenBank file format the EMBL file format does not require
+a series of header lines.  Thus the first line in the file begins
+the first sequence entry of the file.
+
+1.  The first line of each sequence entry contains the two letters ID
+    in the first two spaces.  This is followed by the EMBL identifier
+    in spaces 6 through 14.  (See LINE  1).
+
+2.  The second line of each sequence entry has the two letters AC in
+    the first two spaces.  This is followed by the accession number in
+    spaces 6 through 11.  (See LINE  2).
+
+3.  The third line of each sequence entry has the two letters DE in the
+    first two spaces.  This is followed by a free form text definition
+    in spaces 6 through 72.  (See LINE  3).
+
+4.  The fourth line in each sequence entry has the two letters SQ in
+    the first two spaces.  This is followed by the length of the
+    sequence beginning at or after space 13.  After the sequence length
+    there is a blank space and the two letters BP.  (See LINE  4).
+
+5.  The nucleotide sequence begins on the fifth line of the sequence
+    entry.  Each line of sequence begins with four blank spaces. The
+    next 66 spaces hold the nucleotide sequence in six blocks of ten
+    nucleotides.  Each of the six blocks begins with a blank space
+    followed by ten nucleotides.  Thus the first nucleotide is in space
+    6 of the line while the last is in space 70.  (See LINE  5 -
+    LINE  6).
+
+6.  The last line of each sequence entry in the file is a terminator
+    line which has the two characters // in the first two spaces.
+    (See LINE  7).
+
+7.  Multiple sequences may appear in each file.  To begin another
+    sequence go back to item 1 and start again.
+
+
+                          Example EMBL file
+
+LINE  1  :ID   ID_name
+LINE  2  :AC   Accession number
+LINE  3  :DE   Describe the sequence any way you want
+LINE  4  :SQ          Length BP
+LINE  5  :     ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTA...
+LINE  6  :     ACGT...
+LINE  7  ://
+
+
+
+            NBRF (protein or nucleic acid) File Format
+
+1.  The first line of each sequence entry begins with a greater than
+  symbol, >.  This is immediately followed by the two character
+  sequence type specifier.  Space four must contain a semi-colon.
+  Beginning in space five is the sequence name or identification code
+  for the NBRF database.  The code is from four to six letters and
+  numbers.  (See LINE  1).
+
+!!!! >> add these to readseq
+          Specifier             Sequence type
+
+              P1                protein, complete
+              F1                protein, fragment
+              DL                DNA, linear
+              DC                DNA, circular
+              RL                RNA, linear
+              RC                RNA, circular
+              N1                functional RNA, other than tRNA
+              N3                tRNA
+
+2.  The second line of each sequence entry contains two kinds of
+  information.  First is the sequence name which is separated from
+  the organism or organelle name by the three character sequence
+  blank space, dash, blank space, " - ".  There is no special
+  character marking the beginning of this line.  (See LINE  2).
+
+3.  Either the amino acid or nucleic acid sequence begins on line three
+  and can begin in any space, including the first.  The sequence is
+  free format and may be interrupted by blanks for ease of reading.
+  Protein sequences man contain special punctuation to indicate
+  various indeterminacies in the sequence.  In the NBRF data files
+  all lines may be up to 500 characters long.  However some PSC
+  programs currently have a limit of 130 characters per line
+  (including blanks), and BitNet will not accept lines of over eighty
+  characters.  (See LINE  3, LINE  4, and LINE  5).
+
+  The last character in the sequence must be an asterisks, *.
+
+                       Example NBRF file
+
+ LINE  1  :>P1;CBRT
+ LINE  2  :Cytochrome b - Rat mitochondrion (SGC1)
+ LINE  3  :M T N I R K S H P L F K I I N H S F I D L P A P S
+ LINE  4  : VTHICRDVN Y GWL IRY
+ LINE  5  :TWIGGQPVEHPFIIIGQLASISYFSIILILMPISGIVEDKMLKWN*
+
+
+
+                MolGen/Stanford File Format
+
+1.  The first line in a sequence file is a comment line.  This line
+  begins with a semi-colon in the first space.  This line need
+  not be present.  If it is present it holds descriptive text.
+  There may be as many comment lines as desired at the first of
+  sequence file.  (See LINE  1).
+
+2.  The second line must be present and contains an identifier or
+  name for the sequence in the first ten spaces.  (See LINE  2).
+
+3.  The sequence begins on the third line and occupies up to eighty
+  spaces.  Spaces may be included in the sequence for ease of
+  reading.  The sequence continues for as many line as needed
+  and is terminated with a 1 or 2.  1 indicates a linear sequence
+  while 2 marks a circular sequence.  (See LINE  3 and LINE  4).
+
+                          Example MolGen/Stanford file
+
+LINE  1  :;  Describe the sequence any way you want
+LINE  2  :ECTRNAGLY2
+LINE  3  :ACGCACGTAC ACGTACGTAC   A C G T C C G T ACG TAC GTA CGT
+LINE  4  :  GCTTA   GG G C T A1
+
+
+
+
+|||||||||||  Phylip file format
+---------------------------------------------------
+
+        Phylip 3.3 File Format (DNA sequences)
+
+
+     The input and output formats for PROTPARS and for RESTML are described  in
+their  document  files.   In  general  their input formats are similar to those
+described here, except that the one-letter codes for data are specific to those
+programs  and  are  described in those document files.  Since the input formats
+for the eight DNA sequence programs apply to  all  eight,  they  are  described
+here.   Their  input  formats are standard: the data have A's, G's, C's and T's
+(or U's).  The first line of the input file contains the number of species  and
+the  number  of  sites.   As  with  the other programs, options information may
+follow this.  In the case of DNAML, DNAMLK,  and  DNADIST  an  additional  line
+(described  in  the  document file for these pograms) may follow the first one.
+Following this, each species starts on a new line.  The first 10 characters  of
+that  line  are the species name.  There then follows the base sequence of that
+species, each character being one of the letters A, B, C, D, G, H, K, M, N,  O,
+R, S, T, U, V, W, X, Y, ?, or - (a period was also previously allowed but it is
+no longer allowed, because it sometimes is used to in aligned sequences to mean
+"the  same  as  the  sequence  above").   Blanks  will  be ignored, and so will
+numerical digits.  This allows GENBANK and EMBL sequence  entries  to  be  read
+with minimum editing.
+
+     These characters can be  either  upper  or  lower  case.   The  algorithms
+convert  all  input  characters  to upper case (which is how they are treated).
+The characters constitute the IUPAC (IUB) nucleic acid code  plus  some  slight
+extensions.  They enable input of nucleic acid sequences taking full account of
+any ambiguities in the sequence.
+
+The sequences can continue over multiple lines; when this is done the sequences
+must  be  either  in  "interleaved"  format, similar to the output of alignment
+programs, or "sequential" format.  These are described  in  the  main  document
+file.   In sequential format all of one sequence is given, possibly on multiple
+lines, before the next starts.  In interleaved format the  first  part  of  the
+file  should  contain  the first part of each of the sequences, then possibly a
+line containing nothing but a carriage-return character, then the  second  part
+of  each  sequence, and so on.  Only the first parts of the sequences should be
+preceded by names.  Here is a hypothetical example of interleaved format:
+
+  5    42
+Turkey    AAGCTNGGGC ATTTCAGGGT
+Salmo gairAAGCCTTGGC AGTGCAGGGT
+H. SapiensACCGGTTGGC CGTTCAGGGT
+Chimp     AAACCCTTGC CGTTACGCTT
+Gorilla   AAACCCTTGC CGGTACGCTT
+
+GAGCCCGGGC AATACAGGGT AT
+GAGCCGTGGC CGGGCACGGT AT
+ACAGGTTGGC CGTTCAGGGT AA
+AAACCGAGGC CGGGACACTC AT
+AAACCATTGC CGGTACGCTT AA
+
+while in sequential format the same sequences would be:
+
+  5    42
+Turkey    AAGCTNGGGC ATTTCAGGGT
+GAGCCCGGGC AATACAGGGT AT
+Salmo gairAAGCCTTGGC AGTGCAGGGT
+GAGCCGTGGC CGGGCACGGT AT
+H. SapiensACCGGTTGGC CGTTCAGGGT
+ACAGGTTGGC CGTTCAGGGT AA
+Chimp     AAACCCTTGC CGTTACGCTT
+AAACCGAGGC CGGGACACTC AT
+Gorilla   AAACCCTTGC CGGTACGCTT
+AAACCATTGC CGGTACGCTT AA
+
+
+Note, of course, that a portion of a sequence like this:
+
+   300   AAGCGTGAAC GTTGTACTAA TRCAG
+
+is perfectly legal, assuming that the species name  has  gone  before,  and  is
+filled  out  to  full  length  by  blanks.  The above digits and blanks will be
+ignored, the sequence being taken as starting at the first base symbol (in this
+case an A).
+
+     The present versions of the programs may sometimes have difficulties  with
+the  blank  lines  between  groups of lines, and if so you might want to retype
+those lines, making sure that they have only a  carriage-return  and  no  blank
+characters on them, or you may perhaps have to eliminate them.  The symptoms of
+this problem are that the programs complain that the sequences are not properly
+aligned, and you can find no other cause for this complaint.
+
+------------------------------------------------
+
+
+|||||||||||  ASN.1 file format
+---------------------------------------------------
+
+
+ASN.1 -- see NCBI toolkit docs, source and examples (ncbi.nlm.nih.gov)
+
+Example asn.1 sequence file----
+
+Bioseq-set ::= {
+seq-set {
+  seq {
+    id { local id 1 } ,                 -- id essential
+    descr {  title "Dummy sequence data from nowhere"  } ,  -- optional
+    inst {                              -- inst essential
+      repr raw ,
+      mol dna ,
+      length 156 ,
+      topology linear ,
+      seq-data
+        iupacna "GAATTCATTTTTGAAACAAATCGACCTGACGACGGAATGGTACTCGAATTA
+TGGGCCAAAGGGTTTTATGGGACAAATTAATAGGTGTTCATTATATGCCACTTTCGGAGATTAGATACAGCAATGCAG
+TGGATTCAAAGCAATAGAGTTGTTCTT" 
+      } } ,
+
+        seq {
+          id { local id 2 } ,
+          descr {  title "Dummy sequence 2 data from somewhere else"  } ,
+          inst {
+                repr raw ,
+                mol dna ,
+                length 150 ,
+                topology linear ,
+                seq-data
+                  iupacna "TTTTTTTTTTTTGAAACAAATCGACCTGACGACGGAATGGTACTCGAATTA
+TGGGCCAAAGGGTTTTATGGGACAAATTAATAGGTGTTCATTATATGCCACTTTCGGAGATTAGATACAGCAATGCAG
+TGGATTCAAAGCAATAGAGTT" 
+            }
+          }
+        }
+      }
+
+
+partial ASN.1 description from toolkit
+
+Bioseq ::= SEQUENCE {
+    id SET OF Seq-id ,            -- equivalent identifiers
+    descr Seq-descr OPTIONAL , -- descriptors
+    inst Seq-inst ,            -- the sequence data
+    annot SET OF Seq-annot OPTIONAL }
+
+Seq-inst ::= SEQUENCE {            -- the sequence data itself
+    repr ENUMERATED {              -- representation class
+        not-set (0) ,              -- empty
+        virtual (1) ,              -- no seq data
+        raw (2) ,                  -- continuous sequence
+        seg (3) ,                  -- segmented sequence
+        const (4) ,                -- constructed sequence
+        ref (5) ,                  -- reference to another sequence
+        consen (6) ,               -- consensus sequence or pattern
+        map (7) ,                  -- ordered map (genetic, restriction)
+        other (255) } ,
+    mol ENUMERATED {               -- molecule class in living organism
+        not-set (0) ,              --   > cdna = rna
+        dna (1) ,
+        rna (2) ,
+        aa (3) ,
+        na (4) ,                   -- just a nucleic acid
+        other (255) } ,
+    length INTEGER OPTIONAL ,      -- length of sequence in residues
+    fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
+    topology ENUMERATED {          -- topology of molecule
+        not-set (0) ,
+        linear (1) ,
+        circular (2) ,
+        tandem (3) ,               -- some part of tandem repeat
+        other (255) } DEFAULT linear ,
+    strand ENUMERATED {            -- strandedness in living organism
+        not-set (0) ,
+        ss (1) ,                   -- single strand
+        ds (2) ,                   -- double strand
+        mixed (3) ,
+        other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
+    seq-data Seq-data OPTIONAL ,   -- the sequence
+    ext Seq-ext OPTIONAL ,         -- extensions for special types
+  hist Seq-hist OPTIONAL }       -- sequence history
+
+------------------------------------------------
--- a/Make.com
+++ b/Make.com
@ -0,0 +1,63 @@
+$!
+$!VAX-VMS cc make file for readseq
+$!
+$ echo := write sys$output
+$ if p1.eqs."TEST" then goto tests
+$
+$ echo "compiling readseq..."
+$ cc readseq, ureadseq
+$!
+$ echo "linking readseq..."
+$ link readseq, ureadseq, sys$library:vaxcrtl/lib
+$!
+$tests:
+$!
+$ echo "defining readseq symbol:"
+$ dd = f$environment("default")
+$ readseq :== $ 'dd'readseq.exe
+$ show symbol readseq
+$!
+$ echo ""
+$ echo "test for general read/write of all chars:"
+$ readseq -p alphabet.std -otest.alpha
+$ diff test.alpha alphabet.std
+$!
+$ echo ""
+$ echo "test for valid format conversions"
+$!
+$ readseq -v -p -f=ig   nucleic.std -otest.ig
+$ readseq -v -p -f=gb   test.ig     -otest.gb
+$ readseq -v -p -f=nbrf test.gb     -otest.nbrf
+$ readseq -v -p -f=embl test.nbrf   -otest.embl
+$ readseq -v -p -f=gcg  test.embl   -otest.gcg
+$ readseq -v -p -f=strider test.gcg -otest.strider
+$ readseq -v -p -f=fitch test.strider -otest.fitch
+$ readseq -v -p -f=fasta test.fitch -otest.fasta
+$ readseq -v -p -f=pir  test.fasta  -otest.pir
+$ readseq -v -p -f=ig   test.pir    -otest.ig-b
+$ diff test.ig test.ig-b
+$!
+$ echo ""
+$ echo "Test for multiple-sequence format conversions:"
+$ readseq -p -f=ig    multi.std   -otest.m-ig
+$ readseq -p -f=gb    test.m-ig   -otest.m-gb
+$ readseq -p -f=nbrf  test.m-gb   -otest.m-nbrf
+$ readseq -p -f=embl  test.m-nbrf -otest.m-embl
+$ readseq -p -f=fasta test.m-embl -otest.m-fasta
+$ readseq -p -f=pir   test.m-fasta -otest.m-pir
+$ readseq -p -f=msf   test.m-pir  -otest.m-msf
+$ readseq -p -f=paup  test.m-msf  -otest.m-paup
+$ readseq -p -f=ig    test.m-paup -otest.m-ig-b
+$ diff test.m-ig test.m-ig-b
+$ echo ""
+$ echo "Expect differences in the header lines due to"
+$ echo "different format headers.  If any sequence lines"
+$ echo "differ, or if checksums differ, there is a problem."
+$!
+$! #cleanup
+$! delete test.*;
+$ echo "-----------"
+$ echo ""
+$ echo "To clean up test files, command me:
+$ echo "  DELETE test.*;"
+$!
--- a/Make.ncbi
+++ b/Make.ncbi
@ -0,0 +1,109 @@
+#
+# Unix Makefile for readseq
+# to use, command me:
+#  %  make       -- or --
+#  %  make CC=your-c-compiler-name
+#
+
+# pick an ANSI C compiler (the default Sun CC is not ANSI)
+CC=gcc  # Gnu C Compiler
+#CC=cc  # SGI Irix
+#CC=vcc # some DEC Ultrix
+
+CFLAGS=
+#CFLAGS= -DSMALLCHECKSUM  # if you prefer to use a GCG-standard 13 bit checksum
+#    instead of a full 32 bit checksum. This may enhance compatibility w/ GCG software
+
+SOURCES= readseq.c ureadseq.c ureadseq.h ureadasn.c
+DOCS= Readme readseq.help Formats Stdfiles Makefile Make.com add.gdemenu *.std
+
+
+# NCBI toolkit support for ASN.1 reader
+
+# this is path to NCBI toolkit, you must set for your system:
+NCBI=/bio/mb/ncbi
+#
+OTHERLIBS=-lm
+LIB1=-lncbi
+LIB2=-lncbiobj
+LIB3=-lncbicdr
+LIB4=-lvibrant
+INCPATH=$(NCBI)/include
+LIBPATH=$(NCBI)/lib
+NCFLAGS=$(CFLAGS) -DNCBI -I$(INCPATH)
+NLDFLAGS=-I$(INCPATH) -L$(LIBPATH)
+NLIBS=$(LIB1) $(LIB2) $(OTHERLIBS)
+
+
+all: build test
+
+#build: $(SOURCES)
+#	@echo "Compiling readseq..."
+#	$(CC) $(CFLAGS) -o readseq readseq.c ureadseq.c
+
+# if using NCBI, uncomment these lines in place of build: above
+build: $(SOURCES)
+	@echo "Compiling readseq with NCBI toolkit support...";
+	$(CC) -o readseq $(NLDFLAGS) $(NCFLAGS) readseq.c ureadseq.c ureadasn.c $(NLIBS)
+
+test: $(SOURCES) readseq
+	@echo ""
+	@echo "Test for general read/write of all chars:"
+	./readseq -p alphabet.std -otest.alpha
+	-diff test.alpha alphabet.std
+
+	@echo ""
+	@echo "Test for valid format conversions:"
+	./readseq -v -p -f=ig   nucleic.std -otest.ig
+	./readseq -v -p -f=gb   test.ig     -otest.gb
+	./readseq -v -p -f=nbrf test.gb     -otest.nbrf
+	./readseq -v -p -f=embl test.nbrf   -otest.embl
+	./readseq -v -p -f=gcg  test.embl   -otest.gcg
+	./readseq -v -p -f=strider test.gcg -otest.strider
+	./readseq -v -p -f=fitch test.strider -otest.fitch
+	./readseq -v -p -f=fasta test.fitch -otest.fasta
+	./readseq -v -p -f=pir  test.fasta  -otest.pir
+	./readseq -v -p -f=ig   test.pir    -otest.ig-b
+	-diff test.ig test.ig-b
+
+	@echo ""
+	@echo "Test for multiple-sequence format conversions:"
+	./readseq -p -f=ig    multi.std   -otest.m-ig
+	./readseq -p -f=gb    test.m-ig   -otest.m-gb
+	./readseq -p -f=nbrf  test.m-gb   -otest.m-nbrf
+	./readseq -p -f=embl  test.m-nbrf -otest.m-embl
+	./readseq -p -f=fasta test.m-embl -otest.m-fasta
+	./readseq -p -f=pir   test.m-fasta -otest.m-pir
+	./readseq -p -f=msf   test.m-pir  -otest.m-msf
+	./readseq -p -f=paup  test.m-msf  -otest.m-paup
+	./readseq -p -f=ig    test.m-paup -otest.m-ig-b
+	-diff test.m-ig test.m-ig-b
+#
+# if using NCBI, uncomment these lines
+	@echo ""
+	@echo "Test of NCBI ASN.1 conversions:"
+	./readseq -p -f=asn test.m-ig  -otest.m-asn
+	./readseq -p -f=ig  test.m-asn -otest.m-ig-c
+	-diff test.m-ig test.m-ig-c
+#
+	@echo ""
+	@echo "Expect differences in the header lines due to"
+	@echo "different format headers.  If any sequence lines"
+	@echo "differ, or if the checksums differ, there is a problem."
+	@echo "----------------------"
+	@echo ""
+	@echo "To clean up test files, command me:"
+	@echo "    make clean"
+
+
+clean:
+	rm -f *.o core test.*
+
+shar:
+	@echo "shell archiving files..."
+	-rm -f readseq*.shar
+	mkdir readseqd
+	cp $(SOURCES) readseqd
+	cp $(DOCS) readseqd
+	shar -v readseqd > readseq.shar
+	rm -rf readseqd
--- a/160
+++ b/160
@ -0,0 +1,160 @@
+
+ * ReadSeq  -- 1 Feb 93
+ *
+ * Reads and writes nucleic/protein sequences in various
+ * formats. Data files may have multiple sequences.
+ *
+ * Copyright 1990 by d.g.gilbert
+ * biology dept., indiana university, bloomington, in 47405
+ * e-mail: gilbertd@bio.indiana.edu
+ *
+ * This program may be freely copied and used by anyone.
+ * Developers are encourged to incorporate parts in their
+ * programs, rather than devise their own private sequence
+ * format.
+ *
+ * This should compile and run with any ANSI C compiler.
+ * Please advise me of any bugs, additions or corrections.
+
+Readseq has been updated.   There have been a number of enhancements
+and a few bug corrections since the previous general release in Nov 91
+(see below).  If you are using earlier versions, I recommend you update to
+this release.
+
+Readseq is particularly useful as it automatically detects many
+sequence formats, and interconverts among them.
+Formats added to this release include
+  + MSF multi sequence format used by GCG software
+  + PAUP's multiple sequence (NEXUS) format
+  + PIR/CODATA format used by PIR
+  + ASN.1 format used by NCBI
+  + Pretty print with various options for nice looking output.
+
+As well, Phylip format can now be used as input.  Options to
+reverse-compliment and to degap sequences have been added.  A menu
+addition for users of the GDE sequence editor is included.
+
+This program is available thru Internet gopher, as
+
+  gopher ftp.bio.indiana.edu
+  browse into the IUBio-Software+Data/molbio/readseq/ folder
+  select the readseq.shar document
+
+Or thru anonymous FTP in this manner:
+  my_computer> ftp  ftp.bio.indiana.edu  (or IP address 129.79.224.25)
+    username:  anonymous
+    password:  my_username@my_computer
+  ftp> cd molbio/readseq
+  ftp> get readseq.shar
+  ftp> bye
+
+readseq.shar is a Unix shell archive of the readseq files.
+This file can be editted by any text editor to reconstitute the
+original files, for those who do not have a Unix system or an
+Unshar program.  Read the top of this .shar file for further
+instructions.
+
+There are also pre-compiled executables for the following computers:
+Silicon Graphics Iris, Sparc (Sun Sparcstation & clones), VMS-Vax,
+Macintosh. Use binary ftp to transfer these, except Macintosh.  The
+Mac version is just the command-line program in a window, not very
+handy.
+
+C source files:
+  readseq.c ureadseq.c ureadasn.c ureadseq.h
+Document files:
+  Readme (this doc)
+  Readseq.help (longer than this doc)
+  Formats (description of sequence file formats)
+  add.gdemenu (GDE program users can add this to the .GDEmenu file)
+  Stdfiles -- test sequence files
+  Makefile -- Unix make file
+  Make.com -- VMS make file
+  *.std    -- files for testing validity of readseq
+
+
+Example usage:
+  readseq
+      -- for interactive use
+  readseq my.1st.seq  my.2nd.seq  -all  -format=genbank  -output=my.gb
+      -- convert all of two input files to one genbank format output file
+  readseq my.seq -all -form=pretty -nameleft=3 -numleft -numright -numtop -match
+      -- output to standard output a file in a pretty format
+  readseq my.seq -item=9,8,3,2 -degap -CASE -rev -f=msf -out=my.rev
+      -- select 4 items from input, degap, reverse, and uppercase them
+  cat *.seq | readseq -pipe -all -format=asn > bunch-of.asn
+      -- pipe a bunch of data thru readseq, converting all to asn
+
+
+The brief usage of readseq is as follows. The "[]" denote
+optional parts of the syntax:
+
+  readseq -help
+readSeq (27Dec92), multi-format molbio sequence reader.
+usage: readseq [-options] in.seq > out.seq
+ options
+    -a[ll]         select All sequences
+    -c[aselower]   change to lower case
+    -C[ASEUPPER]   change to UPPER CASE
+    -degap[=-]     remove gap symbols
+    -i[tem=2,3,4]  select Item number(s) from several
+    -l[ist]        List sequences only
+    -o[utput=]out.seq  redirect Output
+    -p[ipe]        Pipe (command line, <stdin, >stdout)
+    -r[everse]     change to Reverse-complement
+    -v[erbose]     Verbose progress
+    -f[ormat=]#    Format number for output,  or
+    -f[ormat=]Name Format name for output:
+         1. IG/Stanford           10. Olsen (in-only)
+         2. GenBank/GB            11. Phylip3.2
+         3. NBRF                  12. Phylip
+         4. EMBL                  13. Plain/Raw
+         5. GCG                   14. PIR/CODATA
+         6. DNAStrider            15. MSF
+         7. Fitch                 16. ASN.1
+         8. Pearson/Fasta         17. PAUP
+         9. Zuker                 18. Pretty (out-only)
+
+   Pretty format options:
+    -wid[th]=#            sequence line width
+    -tab=#                left indent
+    -col[space]=#         column space within sequence line on output
+    -gap[count]           count gap chars in sequence numbers
+    -nameleft, -nameright[=#]   name on left/right side [=max width]
+    -nametop              name at top/bottom
+    -numleft, -numright   seq index on left/right side
+    -numtop, -numbot      index on top/bottom
+    -match[=.]            use match base for 2..n species
+    -inter[line=#]        blank line(s) between sequence blocks
+
+
+
+Recent changes:
+
+4 May 92
+ added 32 bit CRC checksum as alternative to GCG 6.5bit checksum
+Aug 92
+= fixed Olsen format input to handle files w/ more sequences,
+  not to mess up when more than one seq has same identifier,
+  and to convert number masks to symbols.
+= IG format fix to understand ^L
+30 Dec 92
+* revised command-line & interactive interface.  Suggested form is now
+    readseq infile -format=genbank -output=outfile -item=1,3,4 ...
+  but remains compatible with prior commandlines:
+    readseq infile -f2 -ooutfile -i3 ...
+ added GCG MSF multi sequence file format
+ added PIR/CODATA format
+ added NCBI ASN.1 sequence file format
+ added Pretty, multi sequence pretty output (only)
+ added PAUP multi seq format
+ added degap option
+ added Gary Williams (GWW, G.Williams@CRC.AC.UK) reverse-complement option.
+ added support for reading Phylip formats (interleave & sequential)
+* string fixes, dropped need for compiler flags NOSTR, FIXTOUPPER, NEEDSTRCASECMP
+* changed 32bit checksum to default, -DSMALLCHECKSUM for GCG version
+
+1Feb93
+= reverted Genbank output format to fixed left margin 
+  (change in 30 Dec release), so GDE and others relying on fixed margin
+  can read this.
--- a/Readseq.help
+++ b/Readseq.help
@ -0,0 +1,229 @@
+
+ * ReadSeq.Help -- 30 Dec 92
+ *
+ * Reads and writes nucleic/protein sequences in various
+ * formats. Data files may have multiple sequences.
+ *
+ * Copyright 1990 by d.g.gilbert
+ * biology dept., indiana university, bloomington, in 47405
+ * e-mail: gilbertd@bio.indiana.edu
+ *
+ * This program may be freely copied and used by anyone.
+ * Developers are encourged to incorporate parts in their
+ * programs, rather than devise their own private sequence
+ * format.
+ *
+ * This should compile and run with any ANSI C compiler.
+ * Please advise me of any bugs, additions or corrections.
+
+Readseq is particularly useful as it automatically detects many
+sequence formats, and interconverts among them.
+
+Formats which readseq currently understands:
+
+  * IG/Stanford, used by Intelligenetics and others
+  * GenBank/GB, genbank flatfile format
+  * NBRF format
+  * EMBL, EMBL flatfile format
+  * GCG, single sequence format of GCG software
+  * DNAStrider, for common Mac program
+  * Fitch format, limited use
+  * Pearson/Fasta, a common format used by Fasta programs and others
+  * Zuker format, limited use. Input only.
+  * Olsen, format printed by Olsen VMS sequence editor. Input only.
+  * Phylip3.2, sequential format for Phylip programs
+  * Phylip, interleaved format for Phylip programs (v3.3, v3.4)
+  * Plain/Raw, sequence data only (no name, document, numbering)
+  + MSF multi sequence format used by GCG software
+  + PAUP's multiple sequence (NEXUS) format
+  + PIR/CODATA format used by PIR
+  + ASN.1 format used by NCBI
+  + Pretty print with various options for nice looking output. Output only.
+
+See the included "Formats" file for detail on file formats.
+
+
+Example usage:
+  readseq
+      -- for interactive use
+
+  readseq my.1st.seq  my.2nd.seq  -all  -format=genbank  -output=my.gb
+      -- convert all of two input files to one genbank format output file
+
+  readseq my.seq -all -form=pretty -nameleft=3 -numleft -numright -numtop -match
+      -- output to standard output a file in a pretty format
+
+  readseq my.seq -item=9,8,3,2 -degap -CASE -rev -f=msf -out=my.rev
+      -- select 4 items from input, degap, reverse, and uppercase them
+
+  cat *.seq | readseq -pipe -all -format=asn > bunch-of.asn
+      -- pipe a bunch of data thru readseq, converting all to asn
+
+
+The brief usage of readseq is as follows. The "[]" denote
+optional parts of the syntax:
+
+readseq -help
+readSeq (27Dec92), multi-format molbio sequence reader.
+usage: readseq [-options] in.seq > out.seq
+ options
+    -a[ll]         select All sequences
+    -c[aselower]   change to lower case
+    -C[ASEUPPER]   change to UPPER CASE
+    -degap[=-]     remove gap symbols
+    -i[tem=2,3,4]  select Item number(s) from several
+    -l[ist]        List sequences only
+    -o[utput=]out.seq  redirect Output
+    -p[ipe]        Pipe (command line, <stdin, >stdout)
+    -r[everse]     change to Reverse-complement
+    -v[erbose]     Verbose progress
+    -f[ormat=]#    Format number for output,  or
+    -f[ormat=]Name Format name for output:
+         1. IG/Stanford           10. Olsen (in-only)
+         2. GenBank/GB            11. Phylip3.2
+         3. NBRF                  12. Phylip
+         4. EMBL                  13. Plain/Raw
+         5. GCG                   14. PIR/CODATA
+         6. DNAStrider            15. MSF
+         7. Fitch                 16. ASN.1
+         8. Pearson/Fasta         17. PAUP
+         9. Zuker                 18. Pretty (out-only)
+
+   Pretty format options:
+    -wid[th]=#            sequence line width
+    -tab=#                left indent
+    -col[space]=#         column space within sequence line on output
+    -gap[count]           count gap chars in sequence numbers
+    -nameleft, -nameright[=#]   name on left/right side [=max width]
+    -nametop              name at top/bottom
+    -numleft, -numright   seq index on left/right side
+    -numtop, -numbot      index on top/bottom
+    -match[=.]            use match base for 2..n species
+    -inter[line=#]        blank line(s) between sequence blocks
+
+
+Notes:
+
+In use, readseq will respond to command line arguments, or to
+interactive use.  Command line arguments cannot be combined
+but must each follow a switch character (-).  In this release,
+the command line options are now words, with an equals (=)
+to separate parameter(s) fromt he command.  You cannot put a
+space between a command and its parameter, as is usual for
+Unix programs (this is to preserve compatibility with VMS).
+The command line syntax of the earlier versions is still
+supported.
+
+See the file Formats for details of the sequence formats which
+are supported by readseq.  The auto-detection feature of
+readseq which distinguishes these formats looks for some of the
+unique keywords and symbols that are found in each format. It
+is not infallible at this, though it attempts to exclude unknown
+formats.  In general, if you feed to readseq a sequence file that
+you know is one of these common formats, you are okay.  If you feed
+it data that might be oddball formats, or non-sequence data,
+you might well get garbage results.  Also, different developers
+are always thinking up minor twists on these common formats
+(like PAUP requiring a blank line between blocks of Phylip format,
+or IG adding form feeds between sequences), which may cause hassles.
+
+In general, output supports only minimal subsets of each format
+needed for sequence data exchanges.  Features, descriptions
+and other format-unique information is discarded.
+
+The pretty format requires additional options to generate a
+nice output.  Try the various pretty options to see what you like.
+Pretty format is OUPUT only, readseq cannot read a Pretty format
+file.
+
+Readseq is NOT optimized for LARGE files.  It generally makes several
+reads thru each input file (one per sequence output at present, future
+version may optimize this).  It should handle input and output files
+and sequences of any size, but will slow down quite a bit for very large
+(multi megabyte) sized files. It is NOT recommended for converting
+databanks or large subsets there-of.  It is primarily directed at the
+small files that researchers use to maintain their personal data, which
+they frequently need to interconvert for the various analysis programs
+which so frequently require a special format.
+
+Users of Olsen multi sequence editor (VMS).  The Olsen format
+here is produced with the print command:
+  print/out=some.file
+Use Genbank output from readseq to produce a format that this
+editor can read, and use the command
+  load/genbank some.file
+Dan Davison has a VMS program that will convert to/from the
+Olsen native binary data format.  E-mail davison@uh.edu
+
+Warning: Phylip format input is now supported (30Dec92), however the
+auto-detection of Phylip format is very probabilistic and messy,
+especially distinguishing sequential from interleaved versions. It
+is not recommended that one use readseq to convert files from Phylip
+format to others unless essential.
+
+
+This program is available thru Internet gopher, as
+
+  gopher ftp.bio.indiana.edu
+  browse into the IUBio-Software+Data/molbio/readseq/ folder
+  select the readseq.shar document
+
+Or thru anonymous FTP in this manner:
+  my_computer> ftp  ftp.bio.indiana.edu  (or IP address 129.79.224.25)
+    username:  anonymous
+    password:  my_username@my_computer
+  ftp> cd molbio/readseq
+  ftp> get readseq.shar
+  ftp> bye
+
+readseq.shar is a Unix shell archive of the readseq files.
+This file can be editted by any text editor to reconstitute the
+original files, for those who do not have a Unix system or an
+Unshar program.  Read the top of this .shar file for further
+instructions.
+
+There are also pre-compiled executables for the following computers:
+Silicon Graphics Iris, Sparc (Sun Sparcstation & clones), VMS-Vax,
+Macintosh. Use binary ftp to transfer these, except Macintosh.  The
+Mac version is just the command-line program in a window, not very
+handy.
+
+C source files:
+  readseq.c ureadseq.c ureadasn.c ureadseq.h
+
+Document files:
+  Readme (this doc)
+  Formats (description of sequence file formats)
+  add.gdemenu (GDE program users can add this to the .GDEmenu file)
+  Stdfiles -- test sequence files
+  Makefile -- Unix make file
+  Make.com -- VMS make file
+  *.std    -- files for testing validity of readseq
+
+
+Recent changes (see also readseq.c for all history of changes):
+
+4 May 92
+ added 32 bit CRC checksum as alternative to GCG 6.5bit checksum
+Aug 92
+= fixed Olsen format input to handle files w/ more sequences,
+  not to mess up when more than one seq has same identifier,
+  and to convert number masks to symbols.
+= IG format fix to understand ^L
+30 Dec 92
+* revised command-line & interactive interface.  Suggested form is now
+    readseq infile -format=genbank -output=outfile -item=1,3,4 ...
+  but remains compatible with prior commandlines:
+    readseq infile -f2 -ooutfile -i3 ...
+ added GCG MSF multi sequence file format
+ added PIR/CODATA format
+ added NCBI ASN.1 sequence file format
+ added Pretty, multi sequence pretty output (only)
+ added PAUP multi seq format
+ added degap option
+ added Gary Williams (GWW, G.Williams@CRC.AC.UK) reverse-complement option.
+ added support for reading Phylip formats (interleave & sequential)
+* string fixes, dropped need for compiler flags NOSTR, FIXTOUPPER, NEEDSTRCASECMP
+* changed 32bit checksum to default, -DSMALLCHECKSUM for GCG version
+
+
--- a/134
+++ b/134
@ -0,0 +1,134 @@
+/* Stdfiles 
+	generate standard files to test readseq
+*/
+
+C
+#include <stdio.h>
+/* no sequence formats use chars > #126, ignore these */
+main(void)
+{
+	int c;
+	puts("> alphabet['!'..'~']");
+	for (c = '!'; c <= '~'; c++) putc(c,stdout);
+	putc('\n', stdout);
+}
+
+link  -w  -t MPST -c 'MPS ' c.o  <20>
+		"{Libraries}"Interface.o  "{Libraries}"ToolLibs.o <20>
+		"{Libraries}"Runtime.o  "{CLibraries}"StdClib.o 
+link.out > alphabet.orig
+
+
+C
+#include <stdio.h>
+main(void)
+{
+/* note: symbols "*" and "/" removed as terminators for various formats */
+const char *aminos		= "ABCDEFGHIKLMNPQRSTVWXYZ";  
+const char *primenuc	= "ACGTU";
+const char *allsymbols 	= "_.-?<>{}[]()!@#$%^&=+;:'|`~\"\\";
+
+	char *c, all[256];
+	int	count;
+	
+	strcpy(all, aminos);
+	strcat(all, primenuc);
+	strcat(all, allsymbols);
+	puts("> nucleic/amino test");
+	for (count=0; count<4; count++) {
+		for (c = all; *c!=0; c++) putc(*c, stdout);
+		putc('\n', stdout);
+		}
+}
+
+link  -w  -t MPST -c 'MPS ' c.o  <20>
+		"{Libraries}"Interface.o  "{Libraries}"ToolLibs.o <20>
+		"{Libraries}"Runtime.o  "{CLibraries}"StdClib.o 
+link.out > nucleic.std
+
+#--------------------------
+
+#standards (ship w/ readseq)
+#note: not all alphabet.orig chars are expected to be passed by
+#     readseq.  Numbers are dropped.
+readseq -p alphabet.orig > alphabet.std
+readseq -p -C  alphabet.std > upper.std
+
+cat alphabet.orig
+	> alphabet['!'..'~']
+	!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+
+cat alphabet.std
+	>alphabet['!'..'~'], 83 bases, 9429 checksum.
+	!"#$%&'()*+-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]
+	^_`abcdefghijklmnopqrstuvwxyz{|}~
+
+cat upper.std
+	>alphabet['!'..'~'], 83 bases, 9429 checksum.
+	!"#$%&'()*+-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]
+	^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~
+
+cat nucleic.std
+	> nucleic/amino test
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;:'|`~"\
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;:'|`~"\
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;:'|`~"\
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;:'|`~"\
+
+readseq -p nucleic.std
+	>nucleic/amino test, 228 bases, 5952 checksum.
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;
+	:'|`~"\ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#
+	$%^&=+;:'|`~"\ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}
+	[]()!@#$%^&=+;:'|`~"\ABCDEFGHIKLMNPQRSTVWXYZACGTU_
+	.-?<>{}[]()!@#$%^&=+;:'|`~"\
+
+
+#----------------------------------
+
+#test for general read/write of all chars:
+readseq -p alphabet.std -otest.alpha
+diff test.alpha alphabet.std
+
+#test for valid toupper, general read/write:
+readseq -p -C  alphabet.std -otest.upper
+diff test.upper upper.std
+#for vms, use "-C" to preserve case
+# readseq -p "-C"  alphabet.std -otest.upper
+
+#test for multiple sequence file conversions
+# leave out gcg, raw; 
+# test of long seq conversion ?
+# test of mail-header seq conversion ?
+
+#test for valid format conversions
+readseq -v -p -f1 nucleic.std -otest.f1
+readseq -v -p -f2 test.f1 -otest.f2
+readseq -v -p -f3 test.f2 -otest.f3
+readseq -v -p -f4 test.f3 -otest.f4
+readseq -v -p -f5 test.f4 -otest.f5
+readseq -v -p -f6 test.f5 -otest.f6
+readseq -v -p -f7 test.f6 -otest.f7
+readseq -v -p -f8 test.f7 -otest.f8
+readseq -v -p -f1 test.f8 -otest.f1b   
+diff test.f1 test.f1b
+compare test.f1 test.f1b
+
+readseq -v -p -f13 test.f8 -otest.f13   # raw, drops name
+readseq -v -p -f9 test.f8 -otest.f9   	# zuker, little used
+#readseq -v -p -f10 test.f9 -otest.f10  # olsen, input only (output=raw)
+readseq -v -p -f11 test.f8 -otest.f11	# phylip 3.2, output only
+readseq -v -p -f12 test.f8 -otest.f12	# phylip 3.3, output only
+readseq -v -p -f14 test.f8 -otest.f14	# phylip 3.4, output only
+
+
+#clean up
+rm test.<2E>
+
+
+#-----------------------------
+# some general tests
+
+readseq -h 
+
+readseq
--- a/add.gdemenu
+++ b/add.gdemenu
@ -0,0 +1,123 @@
+#
+# dgg added new readseq formats, 29 dec 92
+#
+
+item:Export Foreign Format
+itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE
+itemhelp:readseq.help
+
+arg:FORMAT
+argtype:choice_menu
+argchoice:GenBank:genbank
+argchoice:IG/Stanford:ig
+argchoice:NBRF:nbrf
+argchoice:EMBL:embl
+argchoice:GCG:gcg
+argchoice:DNA Strider:strider
+argchoice:Fitch:fitch
+argchoice:Pearson/Fasta:pearson
+argchoice:Zuker:zuker
+argchoice:Olsen:olsen
+argchoice:Phylip:phylip
+#argchoice:Phylip v3.2:phylip3.2
+argchoice:Plain text:raw
+argchoice:ASN.1:asn
+argchoice:PIR:pir
+argchoice:MSF:msf
+argchoice:PAUP:paup
+argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop
+
+arg:OUTPUTFILE
+argtype:text
+arglabel:Save as?
+
+in:in1
+informat:genbank
+
+
+#
+#dgg addition for new readseq, 24 dec 92
+#
+
+item:Pretty Print
+itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)&
+itemhelp:readseq.help
+
+#nametop is bad !?
+
+in:in1
+informat:genbank
+
+arg:NAMETOP
+argtype:chooser
+arglabel:Names at top  ?
+argchoice:No:
+argchoice:Yes:-nametop
+
+arg:NAMELEFT
+argtype:chooser
+arglabel:Names at left ?
+argchoice:No:
+argchoice:Yes:-nameleft
+
+arg:NAMERIGHT
+argtype:chooser
+arglabel:Names at right?
+argchoice:Yes:-nameright
+argchoice:No:
+
+arg:NUMTOP
+argtype:chooser
+arglabel:Numbers at top  ?
+argchoice:Yes:-numtop
+argchoice:No:
+
+arg:NUMBOT
+argtype:chooser
+arglabel:Numbers at tail ?
+argchoice:No:
+argchoice:Yes:-numbot
+
+arg:NUMLEFT
+argtype:chooser
+arglabel:Numbers at left ?
+argchoice:Yes:-numleft
+argchoice:No:
+
+arg:NUMRIGHT
+argtype:chooser
+arglabel:Numbers at right?
+argchoice:Yes:-numright
+argchoice:No:
+
+arg:MATCH
+argtype:chooser
+arglabel:Use match '.' for 2..n species?
+argchoice:No:
+argchoice:Yes:-match
+
+arg:GAPC
+argtype:chooser
+arglabel:Count gap symbols?
+argchoice:No:
+argchoice:Yes:-gap
+
+arg:WIDTH
+argtype:slider
+arglabel:Sequence width?
+argmin:10
+argmax:200
+argvalue:50
+
+arg:COLS
+argtype:slider
+arglabel:Column spacers?
+argmin:0
+argmax:50
+argvalue:10
+
+
+### pretty print insert end
+#
+
+
--- a/macinit.r
+++ b/macinit.r
@ -0,0 +1,412 @@
+/*------------------------------------------------------------------------------
+#
+#
+#	MultiFinder-Aware Simple Input/Output Window resource
+#
+#	for ReadSeq
+#
+------------------------------------------------------------------------------*/
+
+#include "systypes.r"
+#include "types.r"
+
+
+resource 'MENU' (20000, preload) {
+	20000,
+	textMenuProc,
+	0x7FFFFFFD,
+	enabled,
+	apple,
+	{	/* array: 2 elements */
+		/* [1] */
+		"About ReadSeq<65>", noIcon, noKey, noMark, plain,
+		/* [2] */
+		"-", noIcon, noKey, noMark, plain
+	}
+};
+
+resource 'MENU' (20001, preload) {
+	20001,
+	textMenuProc,
+	0x0,
+	enabled,
+	"File",
+	{	/* array: 11 elements */
+		/* [1] */
+		"New", noIcon, "N", noMark, plain,
+		/* [2] */
+		"Open", noIcon, "O", noMark, plain,
+		/* [3] */
+		"-", noIcon, noKey, noMark, plain,
+		/* [4] */
+		"Close", noIcon, "W", noMark, plain,
+		/* [5] */
+		"Save", noIcon, "S", noMark, plain,
+		/* [6] */
+		"Save As<41>", noIcon, noKey, noMark, plain,
+		/* [7] */
+		"-", noIcon, noKey, noMark, plain,
+		/* [8] */
+		"Page Setup<75>", noIcon, noKey, noMark, plain,
+		/* [9] */
+		"Print<EFBFBD>", noIcon, noKey, noMark, plain,
+		/* [10] */
+		"-", noIcon, noKey, noMark, plain,
+		/* [11] */
+		"Quit", noIcon, "Q", noMark, plain
+	}
+};
+
+resource 'MENU' (20002, preload) {
+	20002,
+	textMenuProc,
+	0x0,
+	enabled,
+	"Edit",
+	{	/* array: 6 elements */
+		/* [1] */
+		"Undo", noIcon, "Z", noMark, plain,
+		/* [2] */
+		"-", noIcon, noKey, noMark, plain,
+		/* [3] */
+		"Cut", noIcon, "X", noMark, plain,
+		/* [4] */
+		"Copy", noIcon, "C", noMark, plain,
+		/* [5] */
+		"Paste", noIcon, "V", noMark, plain,
+		/* [6] */
+		"Clear", noIcon, noKey, noMark, plain
+	}
+};
+
+resource 'MENU' (20003, preload) {
+	20003,
+	textMenuProc,
+	allEnabled,
+	enabled,
+	"Font",
+	{	/* array: 0 elements */
+	}
+};
+
+resource 'ALRT' (20000, purgeable) {
+	{98, 108, 314, 405},
+	20000,
+	{	/* array: 4 elements */
+		/* [1] */
+		OK, visible, silent,
+		/* [2] */
+		OK, visible, silent,
+		/* [3] */
+		OK, visible, silent,
+		/* [4] */
+		OK, visible, silent
+	}
+};
+
+resource 'ALRT' (20001, purgeable) {
+	{40, 20, 150, 260},
+	20001,
+	{	/* array: 4 elements */
+		/* [1] */
+		OK, visible, silent,
+		/* [2] */
+		OK, visible, silent,
+		/* [3] */
+		OK, visible, silent,
+		/* [4] */
+		OK, visible, silent
+	}
+};
+
+resource 'ALRT' (20002, preload) {
+	{72, 64, 212, 372},
+	20002,
+	{	/* array: 4 elements */
+		/* [1] */
+		OK, visible, silent,
+		/* [2] */
+		OK, visible, silent,
+		/* [3] */
+		OK, visible, silent,
+		/* [4] */
+		OK, visible, silent
+	}
+};
+
+resource 'DITL' (20000, purgeable) {
+	{	/* array DITLarray: 8 elements */
+		/* [1] */
+		{191, 98, 211, 178},
+		Button {
+			enabled,
+			"OK"
+		},
+		/* [2] */
+		{110, 24, 130, 256},
+		StaticText {
+			disabled,
+			" Copyright <20> 1990 by d.g.gilbert\n"
+		},
+		/* [3] */
+		{6, 93, 24, 281},
+		StaticText {
+			disabled,
+			"A tool for molecular biology."
+		},
+		/* [4] */
+		{31, 25, 86, 281},
+		StaticText {
+			disabled,
+			"Reads and writes nucleic or protein sequ"
+			"ences in various formats. Data files may"
+			" have multiple sequences."
+		},
+		/* [5] */
+		{6, 17, 22, 92},
+		StaticText {
+			disabled,
+			"ReadSeq"
+		},
+		/* [6] */
+		{150, 28, 186, 262},
+		StaticText {
+			disabled,
+			"land mail: biology dept., indiana univer"
+			"sity, bloomington, in 47405\n"
+		},
+		/* [7] */
+		{129, 25, 153, 258},
+		StaticText {
+			disabled,
+			" e-mail: gilbertd@bio.indiana.edu\n"
+		},
+		/* [8] */
+		{86, 12, 107, 281},
+		StaticText {
+			disabled,
+			"This program may be freely distributed."
+		}
+	}
+};
+
+resource 'DITL' (20001, purgeable) {
+	{	/* array DITLarray: 3 elements */
+		/* [1] */
+		{80, 150, 100, 230},
+		Button {
+			enabled,
+			"OK"
+		},
+		/* [2] */
+		{10, 60, 60, 230},
+		StaticText {
+			disabled,
+			"Error. ^0."
+		},
+		/* [3] */
+		{8, 8, 40, 40},
+		Icon {
+			disabled,
+			2
+		}
+	}
+};
+
+resource 'DITL' (20002, preload) {
+	{	/* array DITLarray: 4 elements */
+		/* [1] */
+		{58, 25, 76, 99},
+		Button {
+			enabled,
+			"Yes"
+		},
+		/* [2] */
+		{86, 25, 104, 99},
+		Button {
+			enabled,
+			"No"
+		},
+		/* [3] */
+		{12, 20, 45, 277},
+		StaticText {
+			disabled,
+			"Save changes before closing?"
+		},
+		/* [4] */
+		{86, 195, 104, 269},
+		Button {
+			enabled,
+			"Cancel"
+		}
+	}
+};
+
+resource 'CNTL' (20000, purgeable, preload) {
+	{-1, 465, 272, 481},
+	0,
+	invisible,
+	0,
+	0,
+	scrollBarProc,
+	0,
+	""
+};
+
+resource 'CNTL' (20001, purgeable, preload) {
+	{271, -1, 287, 466},
+	0,
+	invisible,
+	0,
+	0,
+	scrollBarProc,
+	0,
+	""
+};
+
+data 'pzza' (128, purgeable) {
+	$"4D50 5320"                                          /* MPS  */
+};
+
+resource 'MBAR' (20000, preload) {
+	{	/* array MenuArray: 4 elements */
+		/* [1] */
+		20000,
+		/* [2] */
+		20001,
+		/* [3] */
+		20002,
+		/* [4] */
+		20003
+	}
+};
+
+resource 'WIND' (20000, purgeable, preload) {
+	{0, 0, 286, 480},
+	zoomDocProc,
+	invisible,
+	noGoAway,
+	0x0,
+	"untitled"
+};
+
+resource 'STR#' (20000, purgeable) {
+	{	/* array StringArray: 11 elements */
+		/* [1] */
+		"You must run on 512Ke or later",
+		/* [2] */
+		"Application Memory Size is too small",
+		/* [3] */
+		"Not enough memory to run SIOW",
+		/* [4] */
+		"Not enough memory to do Cut",
+		/* [5] */
+		"Cannot do Cut",
+		/* [6] */
+		"Cannot do Copy",
+		/* [7] */
+		"Cannot exceed 32,000 characters with Pas"
+		"te",
+		/* [8] */
+		"Not enough memory to do Paste",
+		/* [9] */
+		"Cannot create window",
+		/* [10] */
+		"Cannot exceed 32,000 characters",
+		/* [11] */
+		"Cannot do PasteFont not found"
+	}
+};
+
+resource 'SIZE' (-1) {
+	reserved,
+	acceptSuspendResumeEvents,
+	reserved,
+	canBackground,
+	multiFinderAware,
+	backgroundAndForeground,
+	dontGetFrontClicks,
+	ignoreChildDiedEvents,
+	not32BitCompatible,
+	notHighLevelEventAware,
+	onlyLocalHLEvents,
+	notStationeryAware,
+	dontUseTextEditServices,
+	reserved,
+	reserved,
+	reserved,
+	124928,
+	38912
+};
+
+resource 'SIZE' (0) {
+	reserved,
+	acceptSuspendResumeEvents,
+	reserved,
+	canBackground,
+	multiFinderAware,
+	backgroundAndForeground,
+	dontGetFrontClicks,
+	ignoreChildDiedEvents,
+	not32BitCompatible,
+	notHighLevelEventAware,
+	onlyLocalHLEvents,
+	notStationeryAware,
+	dontUseTextEditServices,
+	reserved,
+	reserved,
+	reserved,
+	256000,
+	38912
+};
+
+data 'siow' (0) {
+	$"0F52 6561 6453 6571 2069 6E20 5349 4F57"            /* .ReadSeq in SIOW */
+};
+
+resource 'BNDL' (128) {
+	'siow',
+	0,
+	{	/* array TypeArray: 2 elements */
+		/* [1] */
+		'ICN#',
+		{	/* array IDArray: 1 elements */
+			/* [1] */
+			0, 128
+		},
+		/* [2] */
+		'FREF',
+		{	/* array IDArray: 1 elements */
+			/* [1] */
+			0, 128
+		}
+	}
+};
+
+resource 'FREF' (128) {
+	'APPL',
+	0,
+	""
+};
+
+resource 'ICN#' (128) {
+	{	/* array: 2 elements */
+		/* [1] */
+		$"0000 0000 0000 0000 0010 4100 0010 2200"
+		$"0020 2200 0020 2100 0020 4100 0010 4200"
+		$"0010 4200 0010 2200 0020 2100 0020 0100"
+		$"00FF FF00 03FF FFE0 0791 03F0 0ED1 0E7C"
+		$"1C31 321C 380D C10E 3FFF FFFE 3003 C106"
+		$"380D 300E 1E31 0E3C 1FC1 01F8 07FF FFE0"
+		$"00FF FE",
+		/* [2] */
+		$"0000 0000 0000 0000 0010 4100 0010 2200"
+		$"0020 2200 0020 2100 0020 4100 0010 4200"
+		$"0010 4200 0010 2200 0020 2100 0020 0100"
+		$"00FF FF00 03FF FFE0 07FF FFF0 0FFF FFFC"
+		$"1FFF FFFC 3FFF FFFE 3FFF FFFE 3FFF FFFE"
+		$"3FFF FFFE 1FFF FFFC 1FFF FFF8 07FF FFE0"
+		$"00FF FE"
+	}
+};
+
--- a/readseqSIOW.make
+++ b/readseqSIOW.make
@ -0,0 +1,42 @@
+#  Macintosh MPW-C Makefile
+#  using Simple Input/Output Window library
+#
+#   File:       ReadseqSIOW.make
+#   Target:     ReadseqSIOW
+#   Sources:    readseq.c ureadseq.c ureadasn.c macinit.c
+#   Created:    Wednesday, November 13, 1991 8:23:00 PM
+
+
+#OBJECTS = macinit.c.o readseq.c.o ureadseq.c.o
+#COptions =  -D SIOW  # -r
+
+#if NCBI is available, set path here to NCBI toolkit:
+NCBI = "{Boot}@molbio:ncbi:"
+OBJECTS = macinit.c.o readseq.c.o ureadseq.c.o ureadasn.c.o
+COptions =  -D SIOW -d NCBI -i "{NCBI}"include:  
+NCBILIBS = "{NCBI}"lib:libncbi.o "{NCBI}"lib:libncbiobj.o "{NCBI}"lib:libvibrant.o
+#endif NCBI
+
+ReadseqSIOW <20><> ReadseqSIOW.make {OBJECTS}
+	Link -d -c '????' -t APPL <20>
+		{OBJECTS} <20>
+		"{CLibraries}"StdClib.o <20>
+		"{MPW}"Libraries:Libraries:SIOW.o <20>
+		"{Libraries}"Runtime.o <20>
+		"{Libraries}"Interface.o <20>
+#if NCBI
+		{NCBILIBS} <20>
+		"{CLibraries}"CSANELib.o <20>
+		"{CLibraries}"Math.o <20>
+#endif NCBI
+		-o ReadseqSIOW
+		
+readseq.c.o <20> ReadseqSIOW.make readseq.c
+ureadseq.c.o <20> ReadseqSIOW.make ureadseq.c
+macinit.c.o <20> ReadseqSIOW.make macinit.c
+#if NCBI
+ureadasn.c.o <20> ReadseqSIOW.make ureadasn.c
+#endif NCBI
+
+ReadseqSIOW <20><> macinit.r
+	Rez -a macinit.r -o ReadseqSIOW