From 650d39cff944d4bbf3e0749350978dc45571d474 Mon Sep 17 00:00:00 2001
From: Kuoi <starsareintherose@outlook.com>
Date: Sun, 16 Apr 2023 13:07:57 +0800
Subject: [PATCH] init: extra

---
 Formats          | 980 +++++++++++++++++++++++++++++++++++++++++++++++
 Make.com         |  63 +++
 Make.ncbi        | 109 ++++++
 Readme           | 160 ++++++++
 Readseq.help     | 229 +++++++++++
 Stdfiles         | 134 +++++++
 add.gdemenu      | 123 ++++++
 macinit.r        | 412 ++++++++++++++++++++
 readseqSIOW.make |  42 ++
 9 files changed, 2252 insertions(+)
 create mode 100644 Formats
 create mode 100644 Make.com
 create mode 100644 Make.ncbi
 create mode 100644 Readme
 create mode 100644 Readseq.help
 create mode 100644 Stdfiles
 create mode 100644 add.gdemenu
 create mode 100644 macinit.r
 create mode 100644 readseqSIOW.make

diff --git a/Formats b/Formats
new file mode 100644
index 0000000..cb39ce8
--- /dev/null
+++ b/Formats
@@ -0,0 +1,980 @@
+||||||||||| ReadSeq supported formats   (revised 30Dec92)
+--------------------------------------------------------
+
+    -f[ormat=]Name Format name for output:
+         1. IG/Stanford           10. Olsen (in-only)
+         2. GenBank/GB            11. Phylip3.2
+         3. NBRF                  12. Phylip
+         4. EMBL                  13. Plain/Raw
+         5. GCG                   14. PIR/CODATA
+         6. DNAStrider            15. MSF
+         7. Fitch                 16. ASN.1
+         8. Pearson/Fasta         17. PAUP
+         9. Zuker (in-only)       18. Pretty (out-only)
+
+In general, output supports only minimal subsets of each format
+needed for sequence data exchanges.  Features, descriptions
+and other format-unique information is discarded.
+
+Users of Olsen multi sequence editor (VMS).  The Olsen format
+here is produced with the print command:
+  print/out=some.file
+Use Genbank output from readseq to produce a format that this
+editor can read, and use the command
+  load/genbank some.file
+Dan Davison has a VMS program that will convert to/from the
+Olsen native binary data format.  E-mail davison@uh.edu
+
+Warning: Phylip format input is now supported (30Dec92), however the
+auto-detection of Phylip format is very probabilistic and messy,
+especially distinguishing sequential from interleaved versions. It
+is not recommended that one use readseq to convert files from Phylip
+format to others unless essential.
+
+
+
+||||||||||| ReadSeq usage             (revised 11Nov91)
+--------------------------------------------------------
+
+A. determine file format:
+
+        short skiplines;  /* result: number of header lines to skip (or 0) */
+        short error;      /* error result or 0 */
+        short format;     /* resulting format code, see ureadseq.h */
+        char  *filename   = "Mysequence.file"
+
+        format = seqFileFormat( filename, &skiplines, &error);
+        if (error!=0) fail;
+
+B. read number and list of sequences (optional)
+        short numseqs;    /* resulting number of sequences found in file */
+        char  *seqlist;   /* list of sequence names, newline separated, 0 terminated */
+
+        seqlist = listSeqs( filename, skiplines, format, &numseqs, &error);
+        if (error!=0)  display (seqlist);
+        free( seqlist);
+
+C.  read individual sequences as desired
+        short seqIndex;   /* sequence index #, or == kListSeqs for listSeqs equivalent */
+        long  seqlen;     /* length of seq */
+        char  seqid[256]; /* sequence name */
+        char  *seq;       /* sequence, 0 terminated, free when done */
+
+        seq = readSeq( seqIndex, filename, skiplines, format,
+                      &seqlen, &numseqs, &error, seqid);
+        if (error!=0) manipulate(seq);
+        free(seq);
+
+D. write sequences as desired
+        int nlines;     /* number of lines of sequence written */
+        FILE* fout;     /* open file pointer (stdout or other) */
+        short outform;  /* output format, see ureadseq.h */
+
+        nlines = writeSeq( fout, seq, seqlen, format, outform, seqid);
+
+
+Note (30Dec92): There is various processing done by the main program (in readseq.c),
+  rather than just in the subroutines (in ureadseq.c).  Especially for interleaved
+  output formats, the writeSeq subroutine does not handle interleaving, nor some of
+  the formatting at the top and end of output files.  While seqFileFormat, listSeqs,
+  and readSeq subroutines are fairly self-contained, the writeSeq depends a lot on
+  auxilliary processing.  At some point, this may be revised so writeSeq is self-
+  contained.
+
+Note 2: The NCBI toolkit (ftp from ncbi.nlm.nih.gov) is needed for the ASN.1 format
+  reading (see ureadasn.c).  A bastard (but workable I hope) ASN.1 format is written
+  by writeSeq alone.
+
+
+
+|||||||||||  sequence formats....
+---------------------------------------------------
+
+stanford/IG
+;comments
+;...
+seq1 info
+abcd...
+efgh1 (or 2 = terminator)
+;another seq
+;....
+seq2 info
+abcd...1
+--- for e.g. ----
+;     Dro5s-T.Seq  Length: 120  April 6, 1989  21:22  Check: 9487  ..
+dro5stseq
+GCCAACGACCAUACCACGCUGAAUACAUCGGUUCUCGUCCGAUCACCGAAAUUAAGCAGCGUCGCGGGCG
+GUUAGUACUUAGAUGGGGGACCGCUUGGGAACACCGCGUGUUGUUGGCCU1
+
+;  TOIG of: Dro5srna.Seq  check: 9487  from: 1  to: 120
+---------------------------------------------------
+
+Genbank:
+LOCUS    seq1 ID..
+...
+ORIGIN ...
+123456789abcdefg....(1st 9 columns are formatting)
+     hijkl...
+//         (end of sequence)
+LOCUS     seq2 ID ..
+...
+ORIGIN
+      abcd...
+//
+---------------------------------------------------
+
+NBRF format: (from uwgcg ToNBRF)
+>DL;DRO5SRNA
+Iubio$Dua0:[Gilbertd.Gcg]Dro5srna.Seq;2 => DRO5SRNA
+
+      51  AAUUAAGCAG CGUCGCGGGC GGUUAGUACU UAGAUGGGGG ACCGCUUGGG
+     101  AACACCGCGU GUUGUUGGCC U
+
+---------------------------------------------------
+
+EMBL format
+ID345 seq1 id   (the 345 are spaces)
+... other info
+SQ345Sequence   (the 3,4,5 are spaces)
+abcd...
+hijk...
+//              (! this is proper end string: 12Oct90)
+ID    seq2 id
+...
+SQ   Sequence
+abcd...
+...
+//
+---------------------------------------------------
+
+UW GCG Format:
+comments of any form, up to ".." signal
+signal line has seq id, and " Check: ####   .."
+only 1 seq/file
+
+-- e.g. --- (GCG from GenBank)
+LOCUS       DROEST6      1819 bp ss-mRNA            INV       31-AUG-1987
+    ... much more ...
+ORIGIN      1 bp upstream of EcoRI site; chromosome BK9 region 69A1.
+
+INVERTEBRATE:DROEST6  Length: 1819  January 9, 1989  16:48  Check: 8008  ..
+
+       1  GAATTCGCCG GAGTGAGGAG CAACATGAAC TACGTGGGAC TGGGACTTAT
+
+      51  CATTGTGCTG AGCTGCCTTT GGCTCGGTTC GAACGCGAGT GATACAGATG
+
+
+---------------------------------------------------
+
+DNAStrider (Mac) = modified Stanford:
+; ### from DNA Strider  Friday, April 7, 1989   11:04:24 PM
+; DNA sequence  pBR322   4363  b.p. complete sequence
+;
+abcd...
+efgh
+//  (end of sequence)
+---------------------------------------------------
+
+Fitch format:
+Dro5srna.Seq
+ GCC AAC GAC CAU ACC ACG CUG AAU ACA UCG GUU CUC GUC CGA UCA CCG AAA UUA AGC AGC
+ GUC GCG GGC GGU UAG UAC UUA GAU GGG GGA CCG CUU GGG AAC ACC GCG UGU UGU UGG CCU
+Droest6.Seq
+ GAA TTC GCC GGA GTG AGG AGC AAC ATG AAC TAC GTG GGA CTG GGA CTT ATC ATT GTG CTG
+ AGC TGC CTT TGG CTC GGT TCG AAC GCG AGT GAT ACA GAT GAC CCT CTG TTG GTG CAG CTG
+---------------------------------------------------
+
+W.Pearson/Fasta format:
+>BOVPRL GenBank entry BOVPRL from omam file.  907 nucleotides.
+TGCTTGGCTGAGGAGCCATAGGACGAGAGCTTCCTGGTGAAGTGTGTTTCTTGAAATCAT
+
+---------------------------------------------------
+Phylip version 3.2 format (e.g., DNAML):
+
+   5   13 YF                (# seqs, #bases, YF)
+Alpha     AACGTGGCCAAAT
+          aaaagggccc...  (continued sp. alpha)
+Beta      AAGGTCGCCAAAC
+          aaaagggccc...  (continued sp. beta)
+Gamma     CATTTCGTCACAA
+          aaaagggccc...  (continued sp. Gamma)
+1234567890^-- bases must start in col 11, and run 'til #bases 
+        (spaces & newlines are okay)
+---------------------------------------------------
+Phylip version 3.3 format (e.g., DNAML):
+
+  5    42  YF             (# seqs, #bases, YF)
+Turkey    AAGCTNGGGC ATTTCAGGGT
+Salmo gairAAGCCTTGGC AGTGCAGGGT
+H. SapiensACCGGTTGGC CGTTCAGGGT
+Chimp     AAACCCTTGC CGTTACGCTT
+Gorilla   AAACCCTTGC CGGTACGCTT
+1234567890^-- bases must start in col 11
+  !! this version interleaves the species -- contrary to
+     all other output formats.
+
+GAGCCCGGGC AATACAGGGT AT
+GAGCCGTGGC CGGGCACGGT AT
+ACAGGTTGGC CGTTCAGGGT AA
+AAACCGAGGC CGGGACACTC AT
+AAACCATTGC CGGTACGCTT AA
+
+---------------------------------------------------
+Phylip version 3.4 format (e.g., DNAML)
+-- Both Interleaved and sequential are permitted
+
+   5   13                (# seqs, #bases)
+Alpha     AACGTGGCCAAAT
+          aaaagggccc...  (continued sp. alpha)
+Beta      AAGGTCGCCAAAC
+          aaaagggccc...  (continued sp. beta)
+Gamma     CATTTCGTCACAA
+          aaaagggccc...  (continued sp. Gamma)
+1234567890^-- bases must start in col 11, and run 'til #bases 
+        (spaces, newlines and numbers are are ignored)
+
+---------------------------------------------------
+Gary Olsen (multiple) sequence editor /print format:
+
+!---------------------
+!17Oct91 -- error in original copy of olsen /print format, shifted right 1 space
+! here is correct copy:
+  301  40 Tb.thiop  CGCAGCGAAA----------GCUNUGCUAAUACCGCAUA-CGnCCUG-----------------------------------------------------  Tb.thiop
+123456789012345678901
+  301  42 Rhc.purp  CGUAGCGAAA----------GUUACGCUAAUACCGCAUA-UUCUGUG-----------------------------------------------------  Rhc.purp
+
+  301  44 Rhc.gela  nnngnCGAAA----------GCCGGAUUAAUACCGCAUA-CGACCUA-----------------------------------------------------  Rhc.gela
+!---------------------
+
+ RNase P RNA components.  on 20-FEB-90 17:23:58
+
+    1 (E.c. pr ):  Base pairing in Escherichia coli RNase P RNA.
+    2 (chrom   ):  Chromatium
+      :
+   12 (B.brevis):  Bacillus brevis RNase P RNA, B. James.
+   13 ( 90% con):   90% conserved
+   14 (100% con):  100% conserved
+   15 (gram+ pr):  pairing
+
+1
+ RNase P RNA components.  on 20-FEB-90 17:23:58
+
+ Posi-   Sequence
+ tion:   identity:   Data:
+
+     1   1 E.c. pr      <<<<<<<<<< {{{{{{{{<<:<<<<<<<<<<^<<<<<<====>>>>  E.c. pr
+     1   2 chrom        GGAGUCGGCCAGACAGUCGCUUCCGUCCU------------------  chrom
+            :
+     1  12 B.brevis  AUGCAGGAAAUGCGGGUAGCCGCUGCCGCAAUCGUCU-------------  B.brevis
+1234567890123456789012 <! this should be 21 not 22,
+! this example must be inset on left by 1 space from olsen /print files !
+     1  13  90% con           G  C G  A  CGC GC               -    -      90% con
+     1  14 100% con                G  A  CGC                             100% con
+     1  15 gram+ pr     <<<<<<<<<< {{{{{{{{<<<<<<<<<<<<<===============  gram+ pr
+
+    60   1 E.c. pr   >>>>>>^>>^>>>>:>>    <<<^<<<< {{{{{                 E.c. pr
+    60   2 chrom     -----GGUG-ACGGGGGAGGAAAGUCCGG-GCUCCAU-------------  chrom
+    :       :
+    60  10 B.stearo  ----UU-CG-GCCGUAGAGGAAAGUCCAUGCUCGCACGGUGCUGAGAUGC  B.stearo
+
+
+---------------------------------------------------
+  GCG MSF format
+Title line
+
+picorna.msf  MSF: 100  Type: P  January 17, 1991  17:53  Check: 541
+..
+Name: Cb3              Len:   100  Check: 7009  Weight:  1.00
+Name: E                Len:   100  Check:   60  Weight:  1.00
+
+//
+
+   1                                                   50
+Cb3  ...gpvedai .......t.. aaigr..vad tvgtgptnse aipaltaaet
+  E  gvenae.kgv tentna.tad fvaqpvylpe .nqt...... kv.affynrs
+
+   51                                                 100
+
+Cb3  ghtsqvvpgd tmqtrhvkny hsrsestien flcrsacvyf teykn.....
+  E  ...spi.gaf tvks...... gs.lesgfap .fsngtc.pn sviltpgpqf
+
+---------------------------------------------------
+     PIR format
+This is NBRF-PIR MAILSERVER version 1.45
+Command-> get PIR3:A31391
+\\\
+ENTRY           A31391       #Type Protein
+TITLE           *Esterase-6 - Fruit fly (Drosophila melanogaster)
+
+DATE            03-Aug-1992 #Sequence 03-Aug-1992 #Text 03-Aug-1992
+PLACEMENT          0.0    0.0    0.0    0.0    0.0
+COMMENT         *This entry is not verified.
+SOURCE          Drosophila melanogaster
+
+REFERENCE
+   #Authors     Cooke P.H., Oakeshott J.G.
+   #Citation    submitted to GenBank, April 1989
+   #Reference-number A31391
+   #Accession   A31391
+   #Cross-reference GB:J04167
+
+SUMMARY       #Molecular-weight 61125  #Length 544  #Checksum  1679
+SEQUENCE
+                5        10        15        20        25        30
+      1 M N Y V G L G L I I V L S C L W L G S N A S D T D D P L L V
+     31 Q L P Q G K L R G R D N G S Y Y S Y E S I P Y A E P P T G D
+     61 L R F E A P E P Y K Q K W S D I F D A T K T P V A C L Q W D
+     91 Q F T P G A N K L V G E E D C L T V S V Y K P K N S K R N S
+    121 F P V V A H I H G G A F M F G A A W Q N G H E N V M R E G K
+    151 F I L V K I S Y R L G P L G F V S T G D R D L P G N Y G L K
+    181 D Q R L A L K W I K Q N I A S F G G E P Q N V L L V G H S A
+    211 G G A S V H L Q M L R E D F G Q L A R A A F S F S G N A L D
+    241 P W V I Q K G A R G R A F E L G R N V G C E S A E D S T S L
+    271 K K C L K S K P A S E L V T A V R K F L I F S Y V P F A P F
+    301 S P V L E P S D A P D A I I T Q D P R D V I K S G K F G Q V
+    331 P W A V S Y V T E D G G Y N A A L L L K E R K S G I V I D D
+    361 L N E R W L E L A P Y L L F Y R D T K T K K D M D D Y S R K
+    391 I K Q E Y I G N Q R F D I E S Y S E L Q R L F T D I L F K N
+    421 S T Q E S L D L H R K Y G K S P A Y A Y V Y D N P A E K G I
+    451 A Q V L A N R T D Y D F G T V H G D D Y F L I F E N F V R D
+    481 V E M R P D E Q I I S R N F I N M L A D F A S S D N G S L K
+    511 Y G E C D F K D N V G S E K F Q L L A I Y I D G C Q N R Q H
+    541 V E F P
+///
+\\\
+---------------------------------------------------
+PAUP format:
+The NEXUS Format
+
+Every block starts with "BEGIN blockname;" and ends with "END;".
+Each block is composed of one or more statements, each
+terminated by a semicolon (;).
+
+Comments may be included in NEXUS files by enclosing them within
+square brackets, as in "[This is a comment]."
+
+NEXUS-conforming files are identified by a "#NEXUS" directive at
+the very beginning of the file (line 1, column 1).  If the
+#NEXUS is omitted PAUP issues a warning but continues
+processing.
+
+NEXUS files are entirely free-format.  Blanks, tabs, and
+newlines may be placed anywhere in the file.  Unless RESPECTCASE
+is requested, commands and data may be entered in upper case,
+lower case, or a mixture of upper and lower case.
+
+The following conventions are used in the syntax descriptions of
+the various blocks.  Upper-case items are entered exactly as
+shown.  Lower-case items inside of angle brackets -- e.g., <x>
+-- represent items to be substituted by the user.  Items inside
+of square brackets -- e.g., [X] -- are optional.  Items inside
+of curly braces and separated by vertical bars -- e.g.,  { X | Y
+| Z } -- are mutually exclusive options.
+
+
+The DATA Block
+
+The DATA block contains the data matrix and other associated
+information.  Its syntax is:
+
+BEGIN DATA;
+DIMENSIONS NTAX=<number of taxa> NCHAR=<number of characters>;
+  [ FORMAT  [ MISSING=<missing-symbol> ]
+        [ LABELPOS={ LEFT | RIGHT } ]
+        [ SYMBOLS="<symbols-list>" ]
+        [ INTERLEAVE ]
+        [ MATCHCHAR=<match-symbol> ]
+        [ EQUATE="<symbol>=<expansion> [<symbol>=<expansion>...]" ]
+        [ TRANSPOSE ]
+        [ RESPECTCASE ]
+        [ DATATYPE = { STANDARD | DNA | RNA | PROTEIN } ]; ]
+        [ OPTIONS [ IGNORE={ INVAR | UNINFORM } ]
+        [ MSTAXA = { UNCERTAIN | POLYMORPH | VARIABLE } ]
+        [ ZAP = "<list of zapped characters>" ] ; ]
+  [ CHARLABELS <label_1> label_2>�� <label_NCHAR> ; ]
+  [ TAXLABELS <label1_1> <label1_2> <label1_NTAX> ; ]
+  [ STATELABELS <currently ignored by PAUP> ; ]
+  MATRIX <data-matrix> ;
+  END;
+
+--- example PAUP file
+
+#NEXUS
+
+[!Brown et al. (1982) primate mitochondrial DNA]
+
+begin data;
+  dimensions ntax=5 nchar=896;
+  format datatype=dna matchchar=. interleave missing='-';
+  matrix
+[                              2                    4                    6            8                    ]
+[         1                    1                    1                    1            1                    ]
+human     aagcttcaccggcgcagtca ttctcataatcgcccacggR cttacatcctcattactatt ctgcctagcaaactcaaact acgaacgcactcacagtcgc
+chimp     ................a.t. .c.................a ...............t.... ..................t. .t........c.........
+gorilla   ..................tg ....t.....t........a ........a......t.... .................... .......a..c.....c...
+orang     ................ac.. cc.....g..t.....t..a ..c........cc....g.. .................... .......a..c.....c...
+gibbon    ......t..a..t...ac.g .c.................a ..a..c..t..cc.g..... ......t............. .......a........c...
+
+[         8                    8                    8                    8            8              8     ]
+[         0                    2                    4                    6            8              9     ]
+[         1                    1                    1                    1            1              6     ]
+human     cttccccacaacaatattca tgtgcctagaccaagaagtt attatctcgaactgacactg agccacaacccaaacaaccc agctctccctaagctt
+chimp     t................... .a................c. ........a.....g..... ...a................ ................
+gorilla   ..................tc .a................c. ........a.g......... ...a.............tt. .a..............
+orang     ta....a...........t. .c.......ga......acc ..cg..a.a......tg... .a.a..c.....g...cta. .a.....a........
+gibbon    a..t.......t........ ....ac...........acc .....t..a........... .a.tg..........gctag .a..............
+  ;
+end;
+---------------------------------------------------
+
+
+
+
+
+
+|||||||||||  Sample SMTP mail header
+---------------------------------------------------
+
+- - - - - - - - -
+From GenBank-Retrieval-System@genbank.bio.net Sun Nov 10 17:28:56 1991
+Received: from genbank.bio.net by sunflower.bio.indiana.edu
+        (4.1/9.5jsm) id AA19328; Sun, 10 Nov 91 17:28:55 EST
+Received: by genbank.bio.net (5.65/IG-2.0)
+        id AA14458; Sun, 10 Nov 91 14:30:03 -0800
+Date: Sun, 10 Nov 91 14:30:03 -0800
+Message-Id: <9111102230.AA14458@genbank.bio.net>
+From: Database Server <GenBank-Retrieval-System@genbank.bio.net>
+To: gilbertd@sunflower.bio.indiana.edu
+Subject: Results of Query for drorna
+Status: R
+
+No matches on drorna.
+- - - - - -
+From GenBank-Retrieval-System@genbank.bio.net Sun Nov 10 17:28:49 1991
+Received: from genbank.bio.net by sunflower.bio.indiana.edu
+        (4.1/9.5jsm) id AA19323; Sun, 10 Nov 91 17:28:47 EST
+Received: by genbank.bio.net (5.65/IG-2.0)
+        id AA14461; Sun, 10 Nov 91 14:30:03 -0800
+Date: Sun, 10 Nov 91 14:30:03 -0800
+Message-Id: <9111102230.AA14461@genbank.bio.net>
+From: Database Server <GenBank-Retrieval-System@genbank.bio.net>
+To: gilbertd@sunflower.bio.indiana.edu
+Subject: Results of Query for droest6
+Status: R
+
+LOCUS       DROEST6      1819 bp ss-mRNA            INV       31-AUG-1987
+DEFINITION  D.melanogaster esterase-6 mRNA, complete cds.
+ACCESSION   M15961
+
+
+
+
+
+
+
+
+
+
+
+
+|||||||||||  GCG manual discussion of sequence symbols:
+---------------------------------------------------
+
+III_SEQUENCE_SYMBOLS
+
+
+     GCG programs allow all upper and lower  case  letters,  periods  (.),
+asterisks  (*),  pluses  (+),  ampersands  (&),  and ats (@) as symbols in
+biological sequences.  Nucleotide  symbols,  their  complements,  and  the
+standard  one-letter amino acid symbols are shown below in separate lists.
+The meanings of the symbols +, &, and @ have not  been  assigned  at  this
+writing (March, 1989).
+
+     GCG uses the  letter  codes  for  amino  acid  codes  and  nucleotide
+ambiguity    proposed    by    IUB    (Nomenclature    Committee,    1985,
+Eur. J. Biochem. 150; 1-5).  These codes are  compatible  with  the  codes
+used by the EMBL, GenBank, and NBRF data libraries.
+
+
+                               NUCLEOTIDES
+
+     The meaning of each symbol, its complement,  and  the  Cambridge  and
+Stanford  equivalents  are  shown below.  Cambridge files can be converted
+into GCG files and vice versa with the programs FROMSTADEN  and  TOSTADEN.
+IntelliGenetics  sequence  files  can  be interconverted with the programs
+FROMIG and TOIG.
+
+IUB/GCG      Meaning     Complement   Staden/Sanger  Stanford
+
+   A             A             T             A            A
+   C             C             G             C            C
+   G             G             C             G            G
+  T/U            T             A             T           T/U
+   M           A or C          K             5            J
+   R           A or G          Y             R            R
+   W           A or T          W             7            L
+   S           C or G          S             8            M
+   Y           C or T          R             Y            Y
+   K           G or T          M             6            K
+   V        A or C or G        B       not supported      N
+   H        A or C or T        D       not supported      N
+   D        A or G or T        H       not supported      N
+   B        C or G or T        V       not supported      N
+  X/N     G or A or T or C     X            -/X           N
+   .    not G or A or T or C   .       not supported      ?
+
+
+  The frame ambiguity codes used by Staden are not  supported  by  GCG
+and   are  translated  by  FROMSTADEN  as  the  lower  case  single  base
+equivalent.
+
+     Staden Code          Meaning              GCG
+
+         D                C or CC                c
+         V                T or TT                t
+         B                A or AA                a
+         H                G or GG                g
+         K                C or CX                c
+         L                T or TX                t
+         M                A or AX                a
+         N                G or GX                g
+
+
+                        AMINO ACIDS
+
+  Here is a list of the standard one-letter amino acid codes and their
+three-letter  equivalents.   The synonymous codons and their depiction in
+the IUB codes are shown.  You should recognize that the codons  following
+semicolons  (;)  are  not  sufficiently specific to define a single amino
+acid even though they represent the best possible back  translation  into
+the IUB codes!  All of the relationships in this list can be redefined by
+the user in a local data file described below.
+
+                                                      IUB
+Symbol 3-letter  Meaning      Codons                Depiction
+ A    Ala       Alanine      GCT,GCC,GCA,GCG         !GCX
+ B    Asp,Asn   Aspartic,
+                Asparagine   GAT,GAC,AAT,AAC         !RAY
+ C    Cys       Cysteine     TGT,TGC                 !TGY
+ D    Asp       Aspartic     GAT,GAC                 !GAY
+ E    Glu       Glutamic     GAA,GAG                 !GAR
+ F    Phe     Phenylalanine  TTT,TTC                 !TTY
+ G    Gly       Glycine      GGT,GGC,GGA,GGG         !GGX
+ H    His       Histidine    CAT,CAC                 !CAY
+ I    Ile       Isoleucine   ATT,ATC,ATA             !ATH
+ K    Lys       Lysine       AAA,AAG                 !AAR
+ L    Leu       Leucine      TTG,TTA,CTT,CTC,CTA,CTG
+!TTR,CTX,YTR;YTX
+ M    Met       Methionine   ATG                     !ATG
+ N    Asn       Asparagine   AAT,AAC                 !AAY
+ P    Pro       Proline      CCT,CCC,CCA,CCG         !CCX
+ Q    Gln       Glutamine    CAA,CAG                 !CAR
+ R    Arg       Arginine     CGT,CGC,CGA,CGG,AGA,AGG
+!CGX,AGR,MGR;MGX
+ S    Ser       Serine       TCT,TCC,TCA,TCG,AGT,AGC !TCX,AGY;WSX
+ T    Thr       Threonine    ACT,ACC,ACA,ACG         !ACX
+ V    Val       Valine       GTT,GTC,GTA,GTG         !GTX
+ W    Trp       Tryptophan   TGG                     !TGG
+ X    Xxx       Unknown                              !XXX
+ Y    Tyr       Tyrosine     TAT, TAC                !TAY
+ Z    Glu,Gln   Glutamic,
+                Glutamine    GAA,GAG,CAA,CAG         !SAR
+ *    End       Terminator   TAA, TAG, TGA           !TAR,TRA;TRR
+
+
+
+
+
+
+
+
+|||||||||||  docs from PSC on sequence formats:
+---------------------------------------------------
+
+
+          Nucleic Acid and Protein Sequence File Formats
+
+
+It will probably save you some time if you have your data in a usable
+format before you send it to us.  However, we do have the University of
+Wisconsin Genetics Computing Group programs running on our VAXen and
+this package includes several reformatting utilities.  Our programs
+usually recognize any of several standard formats, including GenBank,
+EMBL, NBRF, and MolGen/Stanford.  For the purposes of annotating an
+analysis we find the GenBank and EMBL formats most useful, particularly
+if you have already received an accession number from one of these
+organizations for your sequence.
+
+Our programs do not require that all of the line types available in
+GenBank, EMBL, or NBRF file formats be present for the file format to
+be recognized and processed.  The following pages outline the essential
+details required for correct processing of files by our programs.
+Additional information may be present but will generally be ignored.
+
+
+                      GenBank File Format
+
+File Header
+
+1.  The first line in the file must have "GENETIC SEQUENCE DATA BANK"
+    in spaces 20 through 46 (see LINE  1, below).
+2.  The next 8 lines may contain arbitrary text.  They are ignored but
+    are required to maintain the GenBank format (see LINE 2 - LINE 9).
+
+Sequence Data Entries
+
+3.  Each sequence entry in the file should have the following format.
+    a) first line:   Must have LOCUS in the first 5 spaces.  The
+                     genetic locus name or identifier must be in spaces
+                     13 - 22.  The length of the sequences is right
+                     justified in spaces 23 through 29 (see LINE  10).
+    b) second line:  Must have DEFINITION in the first 10 spaces.
+                     Spaces 13 - 80 are free form text to identify the
+                     sequence (see LINE  11).
+    c) third line:   Must have ACCESSION in the first 9 spaces.  Spaces
+                     13 - 18 must hold the primary accession number
+                     (see LINE  12).
+    d) fourth line:  Must have ORIGIN in the first 6 spaces.  Nothing
+                     else is required on this line, it indicates that
+                     the nucleic acid sequence begins on the next line
+                     (see LINE  13).
+    e) fifth line:   Begins the nucleotide sequence.  The first 9
+                     spaces of each sequence line may either be blank
+                     or may contain the position in the sequence of the
+                     first nucleotide on the line.  The next 66 spaces
+                     hold the nucleotide sequence in six blocks of ten
+                     nucleotides.  Each of the six blocks begins with a
+                     blank space followed by ten nucleotides.  Thus the
+                     first nucleotide is in space eleven of the line while
+                     the last is in space 75 (see LINE  14, LINE  15).
+    f) last line:    Must have // in the first 2 spaces to indicate
+                     termination of the sequence (see LINE  16).
+
+NOTE:  Multiple sequences may appear in each file.  To begin another
+       sequence go back to a) and start again.
+
+
+                         Example GenBank file
+
+
+LINE  1  :                   GENETIC SEQUENCE DATA BANK
+LINE  2  :
+LINE  3  :
+LINE  4  :
+LINE  5  :
+LINE  6  :
+LINE  7  :
+LINE  8  :
+LINE  9  :
+LINE 10  :LOCUS       L_Name     Length BP
+LINE 11  :DEFINITION  Describe the sequence any way you want
+LINE 12  :ACCESSION   Accession Number
+LINE 13  :ORIGIN
+LINE 14  :        1 acgtacgtac gtacgtacgt acgtacgtac gtacgtacgt a...
+LINE 15  :       61 acgt...
+LINE 16  ://
+
+
+
+                         EMBL File Format
+
+Unlike the GenBank file format the EMBL file format does not require
+a series of header lines.  Thus the first line in the file begins
+the first sequence entry of the file.
+
+1.  The first line of each sequence entry contains the two letters ID
+    in the first two spaces.  This is followed by the EMBL identifier
+    in spaces 6 through 14.  (See LINE  1).
+
+2.  The second line of each sequence entry has the two letters AC in
+    the first two spaces.  This is followed by the accession number in
+    spaces 6 through 11.  (See LINE  2).
+
+3.  The third line of each sequence entry has the two letters DE in the
+    first two spaces.  This is followed by a free form text definition
+    in spaces 6 through 72.  (See LINE  3).
+
+4.  The fourth line in each sequence entry has the two letters SQ in
+    the first two spaces.  This is followed by the length of the
+    sequence beginning at or after space 13.  After the sequence length
+    there is a blank space and the two letters BP.  (See LINE  4).
+
+5.  The nucleotide sequence begins on the fifth line of the sequence
+    entry.  Each line of sequence begins with four blank spaces. The
+    next 66 spaces hold the nucleotide sequence in six blocks of ten
+    nucleotides.  Each of the six blocks begins with a blank space
+    followed by ten nucleotides.  Thus the first nucleotide is in space
+    6 of the line while the last is in space 70.  (See LINE  5 -
+    LINE  6).
+
+6.  The last line of each sequence entry in the file is a terminator
+    line which has the two characters // in the first two spaces.
+    (See LINE  7).
+
+7.  Multiple sequences may appear in each file.  To begin another
+    sequence go back to item 1 and start again.
+
+
+                          Example EMBL file
+
+LINE  1  :ID   ID_name
+LINE  2  :AC   Accession number
+LINE  3  :DE   Describe the sequence any way you want
+LINE  4  :SQ          Length BP
+LINE  5  :     ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTA...
+LINE  6  :     ACGT...
+LINE  7  ://
+
+
+
+            NBRF (protein or nucleic acid) File Format
+
+1.  The first line of each sequence entry begins with a greater than
+  symbol, >.  This is immediately followed by the two character
+  sequence type specifier.  Space four must contain a semi-colon.
+  Beginning in space five is the sequence name or identification code
+  for the NBRF database.  The code is from four to six letters and
+  numbers.  (See LINE  1).
+
+!!!! >> add these to readseq
+          Specifier             Sequence type
+
+              P1                protein, complete
+              F1                protein, fragment
+              DL                DNA, linear
+              DC                DNA, circular
+              RL                RNA, linear
+              RC                RNA, circular
+              N1                functional RNA, other than tRNA
+              N3                tRNA
+
+2.  The second line of each sequence entry contains two kinds of
+  information.  First is the sequence name which is separated from
+  the organism or organelle name by the three character sequence
+  blank space, dash, blank space, " - ".  There is no special
+  character marking the beginning of this line.  (See LINE  2).
+
+3.  Either the amino acid or nucleic acid sequence begins on line three
+  and can begin in any space, including the first.  The sequence is
+  free format and may be interrupted by blanks for ease of reading.
+  Protein sequences man contain special punctuation to indicate
+  various indeterminacies in the sequence.  In the NBRF data files
+  all lines may be up to 500 characters long.  However some PSC
+  programs currently have a limit of 130 characters per line
+  (including blanks), and BitNet will not accept lines of over eighty
+  characters.  (See LINE  3, LINE  4, and LINE  5).
+
+  The last character in the sequence must be an asterisks, *.
+
+                       Example NBRF file
+
+ LINE  1  :>P1;CBRT
+ LINE  2  :Cytochrome b - Rat mitochondrion (SGC1)
+ LINE  3  :M T N I R K S H P L F K I I N H S F I D L P A P S
+ LINE  4  : VTHICRDVN Y GWL IRY
+ LINE  5  :TWIGGQPVEHPFIIIGQLASISYFSIILILMPISGIVEDKMLKWN*
+
+
+
+                MolGen/Stanford File Format
+
+1.  The first line in a sequence file is a comment line.  This line
+  begins with a semi-colon in the first space.  This line need
+  not be present.  If it is present it holds descriptive text.
+  There may be as many comment lines as desired at the first of
+  sequence file.  (See LINE  1).
+
+2.  The second line must be present and contains an identifier or
+  name for the sequence in the first ten spaces.  (See LINE  2).
+
+3.  The sequence begins on the third line and occupies up to eighty
+  spaces.  Spaces may be included in the sequence for ease of
+  reading.  The sequence continues for as many line as needed
+  and is terminated with a 1 or 2.  1 indicates a linear sequence
+  while 2 marks a circular sequence.  (See LINE  3 and LINE  4).
+
+                          Example MolGen/Stanford file
+
+LINE  1  :;  Describe the sequence any way you want
+LINE  2  :ECTRNAGLY2
+LINE  3  :ACGCACGTAC ACGTACGTAC   A C G T C C G T ACG TAC GTA CGT
+LINE  4  :  GCTTA   GG G C T A1
+
+
+
+
+|||||||||||  Phylip file format
+---------------------------------------------------
+
+        Phylip 3.3 File Format (DNA sequences)
+
+
+     The input and output formats for PROTPARS and for RESTML are described  in
+their  document  files.   In  general  their input formats are similar to those
+described here, except that the one-letter codes for data are specific to those
+programs  and  are  described in those document files.  Since the input formats
+for the eight DNA sequence programs apply to  all  eight,  they  are  described
+here.   Their  input  formats are standard: the data have A's, G's, C's and T's
+(or U's).  The first line of the input file contains the number of species  and
+the  number  of  sites.   As  with  the other programs, options information may
+follow this.  In the case of DNAML, DNAMLK,  and  DNADIST  an  additional  line
+(described  in  the  document file for these pograms) may follow the first one.
+Following this, each species starts on a new line.  The first 10 characters  of
+that  line  are the species name.  There then follows the base sequence of that
+species, each character being one of the letters A, B, C, D, G, H, K, M, N,  O,
+R, S, T, U, V, W, X, Y, ?, or - (a period was also previously allowed but it is
+no longer allowed, because it sometimes is used to in aligned sequences to mean
+"the  same  as  the  sequence  above").   Blanks  will  be ignored, and so will
+numerical digits.  This allows GENBANK and EMBL sequence  entries  to  be  read
+with minimum editing.
+
+     These characters can be  either  upper  or  lower  case.   The  algorithms
+convert  all  input  characters  to upper case (which is how they are treated).
+The characters constitute the IUPAC (IUB) nucleic acid code  plus  some  slight
+extensions.  They enable input of nucleic acid sequences taking full account of
+any ambiguities in the sequence.
+
+The sequences can continue over multiple lines; when this is done the sequences
+must  be  either  in  "interleaved"  format, similar to the output of alignment
+programs, or "sequential" format.  These are described  in  the  main  document
+file.   In sequential format all of one sequence is given, possibly on multiple
+lines, before the next starts.  In interleaved format the  first  part  of  the
+file  should  contain  the first part of each of the sequences, then possibly a
+line containing nothing but a carriage-return character, then the  second  part
+of  each  sequence, and so on.  Only the first parts of the sequences should be
+preceded by names.  Here is a hypothetical example of interleaved format:
+
+  5    42
+Turkey    AAGCTNGGGC ATTTCAGGGT
+Salmo gairAAGCCTTGGC AGTGCAGGGT
+H. SapiensACCGGTTGGC CGTTCAGGGT
+Chimp     AAACCCTTGC CGTTACGCTT
+Gorilla   AAACCCTTGC CGGTACGCTT
+
+GAGCCCGGGC AATACAGGGT AT
+GAGCCGTGGC CGGGCACGGT AT
+ACAGGTTGGC CGTTCAGGGT AA
+AAACCGAGGC CGGGACACTC AT
+AAACCATTGC CGGTACGCTT AA
+
+while in sequential format the same sequences would be:
+
+  5    42
+Turkey    AAGCTNGGGC ATTTCAGGGT
+GAGCCCGGGC AATACAGGGT AT
+Salmo gairAAGCCTTGGC AGTGCAGGGT
+GAGCCGTGGC CGGGCACGGT AT
+H. SapiensACCGGTTGGC CGTTCAGGGT
+ACAGGTTGGC CGTTCAGGGT AA
+Chimp     AAACCCTTGC CGTTACGCTT
+AAACCGAGGC CGGGACACTC AT
+Gorilla   AAACCCTTGC CGGTACGCTT
+AAACCATTGC CGGTACGCTT AA
+
+
+Note, of course, that a portion of a sequence like this:
+
+   300   AAGCGTGAAC GTTGTACTAA TRCAG
+
+is perfectly legal, assuming that the species name  has  gone  before,  and  is
+filled  out  to  full  length  by  blanks.  The above digits and blanks will be
+ignored, the sequence being taken as starting at the first base symbol (in this
+case an A).
+
+     The present versions of the programs may sometimes have difficulties  with
+the  blank  lines  between  groups of lines, and if so you might want to retype
+those lines, making sure that they have only a  carriage-return  and  no  blank
+characters on them, or you may perhaps have to eliminate them.  The symptoms of
+this problem are that the programs complain that the sequences are not properly
+aligned, and you can find no other cause for this complaint.
+
+------------------------------------------------
+
+
+|||||||||||  ASN.1 file format
+---------------------------------------------------
+
+
+ASN.1 -- see NCBI toolkit docs, source and examples (ncbi.nlm.nih.gov)
+
+Example asn.1 sequence file----
+
+Bioseq-set ::= {
+seq-set {
+  seq {
+    id { local id 1 } ,                 -- id essential
+    descr {  title "Dummy sequence data from nowhere"  } ,  -- optional
+    inst {                              -- inst essential
+      repr raw ,
+      mol dna ,
+      length 156 ,
+      topology linear ,
+      seq-data
+        iupacna "GAATTCATTTTTGAAACAAATCGACCTGACGACGGAATGGTACTCGAATTA
+TGGGCCAAAGGGTTTTATGGGACAAATTAATAGGTGTTCATTATATGCCACTTTCGGAGATTAGATACAGCAATGCAG
+TGGATTCAAAGCAATAGAGTTGTTCTT" 
+      } } ,
+
+        seq {
+          id { local id 2 } ,
+          descr {  title "Dummy sequence 2 data from somewhere else"  } ,
+          inst {
+                repr raw ,
+                mol dna ,
+                length 150 ,
+                topology linear ,
+                seq-data
+                  iupacna "TTTTTTTTTTTTGAAACAAATCGACCTGACGACGGAATGGTACTCGAATTA
+TGGGCCAAAGGGTTTTATGGGACAAATTAATAGGTGTTCATTATATGCCACTTTCGGAGATTAGATACAGCAATGCAG
+TGGATTCAAAGCAATAGAGTT" 
+            }
+          }
+        }
+      }
+
+
+partial ASN.1 description from toolkit
+
+Bioseq ::= SEQUENCE {
+    id SET OF Seq-id ,            -- equivalent identifiers
+    descr Seq-descr OPTIONAL , -- descriptors
+    inst Seq-inst ,            -- the sequence data
+    annot SET OF Seq-annot OPTIONAL }
+
+Seq-inst ::= SEQUENCE {            -- the sequence data itself
+    repr ENUMERATED {              -- representation class
+        not-set (0) ,              -- empty
+        virtual (1) ,              -- no seq data
+        raw (2) ,                  -- continuous sequence
+        seg (3) ,                  -- segmented sequence
+        const (4) ,                -- constructed sequence
+        ref (5) ,                  -- reference to another sequence
+        consen (6) ,               -- consensus sequence or pattern
+        map (7) ,                  -- ordered map (genetic, restriction)
+        other (255) } ,
+    mol ENUMERATED {               -- molecule class in living organism
+        not-set (0) ,              --   > cdna = rna
+        dna (1) ,
+        rna (2) ,
+        aa (3) ,
+        na (4) ,                   -- just a nucleic acid
+        other (255) } ,
+    length INTEGER OPTIONAL ,      -- length of sequence in residues
+    fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
+    topology ENUMERATED {          -- topology of molecule
+        not-set (0) ,
+        linear (1) ,
+        circular (2) ,
+        tandem (3) ,               -- some part of tandem repeat
+        other (255) } DEFAULT linear ,
+    strand ENUMERATED {            -- strandedness in living organism
+        not-set (0) ,
+        ss (1) ,                   -- single strand
+        ds (2) ,                   -- double strand
+        mixed (3) ,
+        other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
+    seq-data Seq-data OPTIONAL ,   -- the sequence
+    ext Seq-ext OPTIONAL ,         -- extensions for special types
+  hist Seq-hist OPTIONAL }       -- sequence history
+
+------------------------------------------------
diff --git a/Make.com b/Make.com
new file mode 100644
index 0000000..82da18a
--- /dev/null
+++ b/Make.com
@@ -0,0 +1,63 @@
+$!
+$!VAX-VMS cc make file for readseq
+$!
+$ echo := write sys$output
+$ if p1.eqs."TEST" then goto tests
+$
+$ echo "compiling readseq..."
+$ cc readseq, ureadseq
+$!
+$ echo "linking readseq..."
+$ link readseq, ureadseq, sys$library:vaxcrtl/lib
+$!
+$tests:
+$!
+$ echo "defining readseq symbol:"
+$ dd = f$environment("default")
+$ readseq :== $ 'dd'readseq.exe
+$ show symbol readseq
+$!
+$ echo ""
+$ echo "test for general read/write of all chars:"
+$ readseq -p alphabet.std -otest.alpha
+$ diff test.alpha alphabet.std
+$!
+$ echo ""
+$ echo "test for valid format conversions"
+$!
+$ readseq -v -p -f=ig   nucleic.std -otest.ig
+$ readseq -v -p -f=gb   test.ig     -otest.gb
+$ readseq -v -p -f=nbrf test.gb     -otest.nbrf
+$ readseq -v -p -f=embl test.nbrf   -otest.embl
+$ readseq -v -p -f=gcg  test.embl   -otest.gcg
+$ readseq -v -p -f=strider test.gcg -otest.strider
+$ readseq -v -p -f=fitch test.strider -otest.fitch
+$ readseq -v -p -f=fasta test.fitch -otest.fasta
+$ readseq -v -p -f=pir  test.fasta  -otest.pir
+$ readseq -v -p -f=ig   test.pir    -otest.ig-b
+$ diff test.ig test.ig-b
+$!
+$ echo ""
+$ echo "Test for multiple-sequence format conversions:"
+$ readseq -p -f=ig    multi.std   -otest.m-ig
+$ readseq -p -f=gb    test.m-ig   -otest.m-gb
+$ readseq -p -f=nbrf  test.m-gb   -otest.m-nbrf
+$ readseq -p -f=embl  test.m-nbrf -otest.m-embl
+$ readseq -p -f=fasta test.m-embl -otest.m-fasta
+$ readseq -p -f=pir   test.m-fasta -otest.m-pir
+$ readseq -p -f=msf   test.m-pir  -otest.m-msf
+$ readseq -p -f=paup  test.m-msf  -otest.m-paup
+$ readseq -p -f=ig    test.m-paup -otest.m-ig-b
+$ diff test.m-ig test.m-ig-b
+$ echo ""
+$ echo "Expect differences in the header lines due to"
+$ echo "different format headers.  If any sequence lines"
+$ echo "differ, or if checksums differ, there is a problem."
+$!
+$! #cleanup
+$! delete test.*;
+$ echo "-----------"
+$ echo ""
+$ echo "To clean up test files, command me:
+$ echo "  DELETE test.*;"
+$!
diff --git a/Make.ncbi b/Make.ncbi
new file mode 100644
index 0000000..c502210
--- /dev/null
+++ b/Make.ncbi
@@ -0,0 +1,109 @@
+#
+# Unix Makefile for readseq
+# to use, command me:
+#  %  make       -- or --
+#  %  make CC=your-c-compiler-name
+#
+
+# pick an ANSI C compiler (the default Sun CC is not ANSI)
+CC=gcc  # Gnu C Compiler
+#CC=cc  # SGI Irix
+#CC=vcc # some DEC Ultrix
+
+CFLAGS=
+#CFLAGS= -DSMALLCHECKSUM  # if you prefer to use a GCG-standard 13 bit checksum
+#    instead of a full 32 bit checksum. This may enhance compatibility w/ GCG software
+
+SOURCES= readseq.c ureadseq.c ureadseq.h ureadasn.c
+DOCS= Readme readseq.help Formats Stdfiles Makefile Make.com add.gdemenu *.std
+
+
+# NCBI toolkit support for ASN.1 reader
+
+# this is path to NCBI toolkit, you must set for your system:
+NCBI=/bio/mb/ncbi
+#
+OTHERLIBS=-lm
+LIB1=-lncbi
+LIB2=-lncbiobj
+LIB3=-lncbicdr
+LIB4=-lvibrant
+INCPATH=$(NCBI)/include
+LIBPATH=$(NCBI)/lib
+NCFLAGS=$(CFLAGS) -DNCBI -I$(INCPATH)
+NLDFLAGS=-I$(INCPATH) -L$(LIBPATH)
+NLIBS=$(LIB1) $(LIB2) $(OTHERLIBS)
+
+
+all: build test
+
+#build: $(SOURCES)
+#	@echo "Compiling readseq..."
+#	$(CC) $(CFLAGS) -o readseq readseq.c ureadseq.c
+
+# if using NCBI, uncomment these lines in place of build: above
+build: $(SOURCES)
+	@echo "Compiling readseq with NCBI toolkit support...";
+	$(CC) -o readseq $(NLDFLAGS) $(NCFLAGS) readseq.c ureadseq.c ureadasn.c $(NLIBS)
+
+test: $(SOURCES) readseq
+	@echo ""
+	@echo "Test for general read/write of all chars:"
+	./readseq -p alphabet.std -otest.alpha
+	-diff test.alpha alphabet.std
+
+	@echo ""
+	@echo "Test for valid format conversions:"
+	./readseq -v -p -f=ig   nucleic.std -otest.ig
+	./readseq -v -p -f=gb   test.ig     -otest.gb
+	./readseq -v -p -f=nbrf test.gb     -otest.nbrf
+	./readseq -v -p -f=embl test.nbrf   -otest.embl
+	./readseq -v -p -f=gcg  test.embl   -otest.gcg
+	./readseq -v -p -f=strider test.gcg -otest.strider
+	./readseq -v -p -f=fitch test.strider -otest.fitch
+	./readseq -v -p -f=fasta test.fitch -otest.fasta
+	./readseq -v -p -f=pir  test.fasta  -otest.pir
+	./readseq -v -p -f=ig   test.pir    -otest.ig-b
+	-diff test.ig test.ig-b
+
+	@echo ""
+	@echo "Test for multiple-sequence format conversions:"
+	./readseq -p -f=ig    multi.std   -otest.m-ig
+	./readseq -p -f=gb    test.m-ig   -otest.m-gb
+	./readseq -p -f=nbrf  test.m-gb   -otest.m-nbrf
+	./readseq -p -f=embl  test.m-nbrf -otest.m-embl
+	./readseq -p -f=fasta test.m-embl -otest.m-fasta
+	./readseq -p -f=pir   test.m-fasta -otest.m-pir
+	./readseq -p -f=msf   test.m-pir  -otest.m-msf
+	./readseq -p -f=paup  test.m-msf  -otest.m-paup
+	./readseq -p -f=ig    test.m-paup -otest.m-ig-b
+	-diff test.m-ig test.m-ig-b
+#
+# if using NCBI, uncomment these lines
+	@echo ""
+	@echo "Test of NCBI ASN.1 conversions:"
+	./readseq -p -f=asn test.m-ig  -otest.m-asn
+	./readseq -p -f=ig  test.m-asn -otest.m-ig-c
+	-diff test.m-ig test.m-ig-c
+#
+	@echo ""
+	@echo "Expect differences in the header lines due to"
+	@echo "different format headers.  If any sequence lines"
+	@echo "differ, or if the checksums differ, there is a problem."
+	@echo "----------------------"
+	@echo ""
+	@echo "To clean up test files, command me:"
+	@echo "    make clean"
+
+
+clean:
+	rm -f *.o core test.*
+
+shar:
+	@echo "shell archiving files..."
+	-rm -f readseq*.shar
+	mkdir readseqd
+	cp $(SOURCES) readseqd
+	cp $(DOCS) readseqd
+	shar -v readseqd > readseq.shar
+	rm -rf readseqd
diff --git a/Readme b/Readme
new file mode 100644
index 0000000..6efd1f4
--- /dev/null
+++ b/Readme
@@ -0,0 +1,160 @@
+
+ * ReadSeq  -- 1 Feb 93
+ *
+ * Reads and writes nucleic/protein sequences in various
+ * formats. Data files may have multiple sequences.
+ *
+ * Copyright 1990 by d.g.gilbert
+ * biology dept., indiana university, bloomington, in 47405
+ * e-mail: gilbertd@bio.indiana.edu
+ *
+ * This program may be freely copied and used by anyone.
+ * Developers are encourged to incorporate parts in their
+ * programs, rather than devise their own private sequence
+ * format.
+ *
+ * This should compile and run with any ANSI C compiler.
+ * Please advise me of any bugs, additions or corrections.
+
+Readseq has been updated.   There have been a number of enhancements
+and a few bug corrections since the previous general release in Nov 91
+(see below).  If you are using earlier versions, I recommend you update to
+this release.
+
+Readseq is particularly useful as it automatically detects many
+sequence formats, and interconverts among them.
+Formats added to this release include
+  + MSF multi sequence format used by GCG software
+  + PAUP's multiple sequence (NEXUS) format
+  + PIR/CODATA format used by PIR
+  + ASN.1 format used by NCBI
+  + Pretty print with various options for nice looking output.
+
+As well, Phylip format can now be used as input.  Options to
+reverse-compliment and to degap sequences have been added.  A menu
+addition for users of the GDE sequence editor is included.
+
+This program is available thru Internet gopher, as
+
+  gopher ftp.bio.indiana.edu
+  browse into the IUBio-Software+Data/molbio/readseq/ folder
+  select the readseq.shar document
+
+Or thru anonymous FTP in this manner:
+  my_computer> ftp  ftp.bio.indiana.edu  (or IP address 129.79.224.25)
+    username:  anonymous
+    password:  my_username@my_computer
+  ftp> cd molbio/readseq
+  ftp> get readseq.shar
+  ftp> bye
+
+readseq.shar is a Unix shell archive of the readseq files.
+This file can be editted by any text editor to reconstitute the
+original files, for those who do not have a Unix system or an
+Unshar program.  Read the top of this .shar file for further
+instructions.
+
+There are also pre-compiled executables for the following computers:
+Silicon Graphics Iris, Sparc (Sun Sparcstation & clones), VMS-Vax,
+Macintosh. Use binary ftp to transfer these, except Macintosh.  The
+Mac version is just the command-line program in a window, not very
+handy.
+
+C source files:
+  readseq.c ureadseq.c ureadasn.c ureadseq.h
+Document files:
+  Readme (this doc)
+  Readseq.help (longer than this doc)
+  Formats (description of sequence file formats)
+  add.gdemenu (GDE program users can add this to the .GDEmenu file)
+  Stdfiles -- test sequence files
+  Makefile -- Unix make file
+  Make.com -- VMS make file
+  *.std    -- files for testing validity of readseq
+
+
+Example usage:
+  readseq
+      -- for interactive use
+  readseq my.1st.seq  my.2nd.seq  -all  -format=genbank  -output=my.gb
+      -- convert all of two input files to one genbank format output file
+  readseq my.seq -all -form=pretty -nameleft=3 -numleft -numright -numtop -match
+      -- output to standard output a file in a pretty format
+  readseq my.seq -item=9,8,3,2 -degap -CASE -rev -f=msf -out=my.rev
+      -- select 4 items from input, degap, reverse, and uppercase them
+  cat *.seq | readseq -pipe -all -format=asn > bunch-of.asn
+      -- pipe a bunch of data thru readseq, converting all to asn
+
+
+The brief usage of readseq is as follows. The "[]" denote
+optional parts of the syntax:
+
+  readseq -help
+readSeq (27Dec92), multi-format molbio sequence reader.
+usage: readseq [-options] in.seq > out.seq
+ options
+    -a[ll]         select All sequences
+    -c[aselower]   change to lower case
+    -C[ASEUPPER]   change to UPPER CASE
+    -degap[=-]     remove gap symbols
+    -i[tem=2,3,4]  select Item number(s) from several
+    -l[ist]        List sequences only
+    -o[utput=]out.seq  redirect Output
+    -p[ipe]        Pipe (command line, <stdin, >stdout)
+    -r[everse]     change to Reverse-complement
+    -v[erbose]     Verbose progress
+    -f[ormat=]#    Format number for output,  or
+    -f[ormat=]Name Format name for output:
+         1. IG/Stanford           10. Olsen (in-only)
+         2. GenBank/GB            11. Phylip3.2
+         3. NBRF                  12. Phylip
+         4. EMBL                  13. Plain/Raw
+         5. GCG                   14. PIR/CODATA
+         6. DNAStrider            15. MSF
+         7. Fitch                 16. ASN.1
+         8. Pearson/Fasta         17. PAUP
+         9. Zuker                 18. Pretty (out-only)
+
+   Pretty format options:
+    -wid[th]=#            sequence line width
+    -tab=#                left indent
+    -col[space]=#         column space within sequence line on output
+    -gap[count]           count gap chars in sequence numbers
+    -nameleft, -nameright[=#]   name on left/right side [=max width]
+    -nametop              name at top/bottom
+    -numleft, -numright   seq index on left/right side
+    -numtop, -numbot      index on top/bottom
+    -match[=.]            use match base for 2..n species
+    -inter[line=#]        blank line(s) between sequence blocks
+
+
+
+Recent changes:
+
+4 May 92
++ added 32 bit CRC checksum as alternative to GCG 6.5bit checksum
+Aug 92
+= fixed Olsen format input to handle files w/ more sequences,
+  not to mess up when more than one seq has same identifier,
+  and to convert number masks to symbols.
+= IG format fix to understand ^L
+30 Dec 92
+* revised command-line & interactive interface.  Suggested form is now
+    readseq infile -format=genbank -output=outfile -item=1,3,4 ...
+  but remains compatible with prior commandlines:
+    readseq infile -f2 -ooutfile -i3 ...
++ added GCG MSF multi sequence file format
++ added PIR/CODATA format
++ added NCBI ASN.1 sequence file format
++ added Pretty, multi sequence pretty output (only)
++ added PAUP multi seq format
++ added degap option
++ added Gary Williams (GWW, G.Williams@CRC.AC.UK) reverse-complement option.
++ added support for reading Phylip formats (interleave & sequential)
+* string fixes, dropped need for compiler flags NOSTR, FIXTOUPPER, NEEDSTRCASECMP
+* changed 32bit checksum to default, -DSMALLCHECKSUM for GCG version
+
+1Feb93
+= reverted Genbank output format to fixed left margin 
+  (change in 30 Dec release), so GDE and others relying on fixed margin
+  can read this.
diff --git a/Readseq.help b/Readseq.help
new file mode 100644
index 0000000..08fdc08
--- /dev/null
+++ b/Readseq.help
@@ -0,0 +1,229 @@
+
+ * ReadSeq.Help -- 30 Dec 92
+ *
+ * Reads and writes nucleic/protein sequences in various
+ * formats. Data files may have multiple sequences.
+ *
+ * Copyright 1990 by d.g.gilbert
+ * biology dept., indiana university, bloomington, in 47405
+ * e-mail: gilbertd@bio.indiana.edu
+ *
+ * This program may be freely copied and used by anyone.
+ * Developers are encourged to incorporate parts in their
+ * programs, rather than devise their own private sequence
+ * format.
+ *
+ * This should compile and run with any ANSI C compiler.
+ * Please advise me of any bugs, additions or corrections.
+
+Readseq is particularly useful as it automatically detects many
+sequence formats, and interconverts among them.
+
+Formats which readseq currently understands:
+
+  * IG/Stanford, used by Intelligenetics and others
+  * GenBank/GB, genbank flatfile format
+  * NBRF format
+  * EMBL, EMBL flatfile format
+  * GCG, single sequence format of GCG software
+  * DNAStrider, for common Mac program
+  * Fitch format, limited use
+  * Pearson/Fasta, a common format used by Fasta programs and others
+  * Zuker format, limited use. Input only.
+  * Olsen, format printed by Olsen VMS sequence editor. Input only.
+  * Phylip3.2, sequential format for Phylip programs
+  * Phylip, interleaved format for Phylip programs (v3.3, v3.4)
+  * Plain/Raw, sequence data only (no name, document, numbering)
+  + MSF multi sequence format used by GCG software
+  + PAUP's multiple sequence (NEXUS) format
+  + PIR/CODATA format used by PIR
+  + ASN.1 format used by NCBI
+  + Pretty print with various options for nice looking output. Output only.
+
+See the included "Formats" file for detail on file formats.
+
+
+Example usage:
+  readseq
+      -- for interactive use
+
+  readseq my.1st.seq  my.2nd.seq  -all  -format=genbank  -output=my.gb
+      -- convert all of two input files to one genbank format output file
+
+  readseq my.seq -all -form=pretty -nameleft=3 -numleft -numright -numtop -match
+      -- output to standard output a file in a pretty format
+
+  readseq my.seq -item=9,8,3,2 -degap -CASE -rev -f=msf -out=my.rev
+      -- select 4 items from input, degap, reverse, and uppercase them
+
+  cat *.seq | readseq -pipe -all -format=asn > bunch-of.asn
+      -- pipe a bunch of data thru readseq, converting all to asn
+
+
+The brief usage of readseq is as follows. The "[]" denote
+optional parts of the syntax:
+
+readseq -help
+readSeq (27Dec92), multi-format molbio sequence reader.
+usage: readseq [-options] in.seq > out.seq
+ options
+    -a[ll]         select All sequences
+    -c[aselower]   change to lower case
+    -C[ASEUPPER]   change to UPPER CASE
+    -degap[=-]     remove gap symbols
+    -i[tem=2,3,4]  select Item number(s) from several
+    -l[ist]        List sequences only
+    -o[utput=]out.seq  redirect Output
+    -p[ipe]        Pipe (command line, <stdin, >stdout)
+    -r[everse]     change to Reverse-complement
+    -v[erbose]     Verbose progress
+    -f[ormat=]#    Format number for output,  or
+    -f[ormat=]Name Format name for output:
+         1. IG/Stanford           10. Olsen (in-only)
+         2. GenBank/GB            11. Phylip3.2
+         3. NBRF                  12. Phylip
+         4. EMBL                  13. Plain/Raw
+         5. GCG                   14. PIR/CODATA
+         6. DNAStrider            15. MSF
+         7. Fitch                 16. ASN.1
+         8. Pearson/Fasta         17. PAUP
+         9. Zuker                 18. Pretty (out-only)
+
+   Pretty format options:
+    -wid[th]=#            sequence line width
+    -tab=#                left indent
+    -col[space]=#         column space within sequence line on output
+    -gap[count]           count gap chars in sequence numbers
+    -nameleft, -nameright[=#]   name on left/right side [=max width]
+    -nametop              name at top/bottom
+    -numleft, -numright   seq index on left/right side
+    -numtop, -numbot      index on top/bottom
+    -match[=.]            use match base for 2..n species
+    -inter[line=#]        blank line(s) between sequence blocks
+
+
+Notes:
+
+In use, readseq will respond to command line arguments, or to
+interactive use.  Command line arguments cannot be combined
+but must each follow a switch character (-).  In this release,
+the command line options are now words, with an equals (=)
+to separate parameter(s) fromt he command.  You cannot put a
+space between a command and its parameter, as is usual for
+Unix programs (this is to preserve compatibility with VMS).
+The command line syntax of the earlier versions is still
+supported.
+
+See the file Formats for details of the sequence formats which
+are supported by readseq.  The auto-detection feature of
+readseq which distinguishes these formats looks for some of the
+unique keywords and symbols that are found in each format. It
+is not infallible at this, though it attempts to exclude unknown
+formats.  In general, if you feed to readseq a sequence file that
+you know is one of these common formats, you are okay.  If you feed
+it data that might be oddball formats, or non-sequence data,
+you might well get garbage results.  Also, different developers
+are always thinking up minor twists on these common formats
+(like PAUP requiring a blank line between blocks of Phylip format,
+or IG adding form feeds between sequences), which may cause hassles.
+
+In general, output supports only minimal subsets of each format
+needed for sequence data exchanges.  Features, descriptions
+and other format-unique information is discarded.
+
+The pretty format requires additional options to generate a
+nice output.  Try the various pretty options to see what you like.
+Pretty format is OUPUT only, readseq cannot read a Pretty format
+file.
+
+Readseq is NOT optimized for LARGE files.  It generally makes several
+reads thru each input file (one per sequence output at present, future
+version may optimize this).  It should handle input and output files
+and sequences of any size, but will slow down quite a bit for very large
+(multi megabyte) sized files. It is NOT recommended for converting
+databanks or large subsets there-of.  It is primarily directed at the
+small files that researchers use to maintain their personal data, which
+they frequently need to interconvert for the various analysis programs
+which so frequently require a special format.
+
+Users of Olsen multi sequence editor (VMS).  The Olsen format
+here is produced with the print command:
+  print/out=some.file
+Use Genbank output from readseq to produce a format that this
+editor can read, and use the command
+  load/genbank some.file
+Dan Davison has a VMS program that will convert to/from the
+Olsen native binary data format.  E-mail davison@uh.edu
+
+Warning: Phylip format input is now supported (30Dec92), however the
+auto-detection of Phylip format is very probabilistic and messy,
+especially distinguishing sequential from interleaved versions. It
+is not recommended that one use readseq to convert files from Phylip
+format to others unless essential.
+
+
+This program is available thru Internet gopher, as
+
+  gopher ftp.bio.indiana.edu
+  browse into the IUBio-Software+Data/molbio/readseq/ folder
+  select the readseq.shar document
+
+Or thru anonymous FTP in this manner:
+  my_computer> ftp  ftp.bio.indiana.edu  (or IP address 129.79.224.25)
+    username:  anonymous
+    password:  my_username@my_computer
+  ftp> cd molbio/readseq
+  ftp> get readseq.shar
+  ftp> bye
+
+readseq.shar is a Unix shell archive of the readseq files.
+This file can be editted by any text editor to reconstitute the
+original files, for those who do not have a Unix system or an
+Unshar program.  Read the top of this .shar file for further
+instructions.
+
+There are also pre-compiled executables for the following computers:
+Silicon Graphics Iris, Sparc (Sun Sparcstation & clones), VMS-Vax,
+Macintosh. Use binary ftp to transfer these, except Macintosh.  The
+Mac version is just the command-line program in a window, not very
+handy.
+
+C source files:
+  readseq.c ureadseq.c ureadasn.c ureadseq.h
+
+Document files:
+  Readme (this doc)
+  Formats (description of sequence file formats)
+  add.gdemenu (GDE program users can add this to the .GDEmenu file)
+  Stdfiles -- test sequence files
+  Makefile -- Unix make file
+  Make.com -- VMS make file
+  *.std    -- files for testing validity of readseq
+
+
+Recent changes (see also readseq.c for all history of changes):
+
+4 May 92
++ added 32 bit CRC checksum as alternative to GCG 6.5bit checksum
+Aug 92
+= fixed Olsen format input to handle files w/ more sequences,
+  not to mess up when more than one seq has same identifier,
+  and to convert number masks to symbols.
+= IG format fix to understand ^L
+30 Dec 92
+* revised command-line & interactive interface.  Suggested form is now
+    readseq infile -format=genbank -output=outfile -item=1,3,4 ...
+  but remains compatible with prior commandlines:
+    readseq infile -f2 -ooutfile -i3 ...
++ added GCG MSF multi sequence file format
++ added PIR/CODATA format
++ added NCBI ASN.1 sequence file format
++ added Pretty, multi sequence pretty output (only)
++ added PAUP multi seq format
++ added degap option
++ added Gary Williams (GWW, G.Williams@CRC.AC.UK) reverse-complement option.
++ added support for reading Phylip formats (interleave & sequential)
+* string fixes, dropped need for compiler flags NOSTR, FIXTOUPPER, NEEDSTRCASECMP
+* changed 32bit checksum to default, -DSMALLCHECKSUM for GCG version
+
+
diff --git a/Stdfiles b/Stdfiles
new file mode 100644
index 0000000..bd7efc5
--- /dev/null
+++ b/Stdfiles
@@ -0,0 +1,134 @@
+/* Stdfiles 
+	generate standard files to test readseq
+*/
+
+C
+#include <stdio.h>
+/* no sequence formats use chars > #126, ignore these */
+main(void)
+{
+	int c;
+	puts("> alphabet['!'..'~']");
+	for (c = '!'; c <= '~'; c++) putc(c,stdout);
+	putc('\n', stdout);
+}
+
+link  -w  -t MPST -c 'MPS ' c.o  �
+		"{Libraries}"Interface.o  "{Libraries}"ToolLibs.o �
+		"{Libraries}"Runtime.o  "{CLibraries}"StdClib.o 
+link.out > alphabet.orig
+
+
+C
+#include <stdio.h>
+main(void)
+{
+/* note: symbols "*" and "/" removed as terminators for various formats */
+const char *aminos		= "ABCDEFGHIKLMNPQRSTVWXYZ";  
+const char *primenuc	= "ACGTU";
+const char *allsymbols 	= "_.-?<>{}[]()!@#$%^&=+;:'|`~\"\\";
+
+	char *c, all[256];
+	int	count;
+	
+	strcpy(all, aminos);
+	strcat(all, primenuc);
+	strcat(all, allsymbols);
+	puts("> nucleic/amino test");
+	for (count=0; count<4; count++) {
+		for (c = all; *c!=0; c++) putc(*c, stdout);
+		putc('\n', stdout);
+		}
+}
+
+link  -w  -t MPST -c 'MPS ' c.o  �
+		"{Libraries}"Interface.o  "{Libraries}"ToolLibs.o �
+		"{Libraries}"Runtime.o  "{CLibraries}"StdClib.o 
+link.out > nucleic.std
+
+#--------------------------
+
+#standards (ship w/ readseq)
+#note: not all alphabet.orig chars are expected to be passed by
+#     readseq.  Numbers are dropped.
+readseq -p alphabet.orig > alphabet.std
+readseq -p -C  alphabet.std > upper.std
+
+cat alphabet.orig
+	> alphabet['!'..'~']
+	!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+
+cat alphabet.std
+	>alphabet['!'..'~'], 83 bases, 9429 checksum.
+	!"#$%&'()*+-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]
+	^_`abcdefghijklmnopqrstuvwxyz{|}~
+
+cat upper.std
+	>alphabet['!'..'~'], 83 bases, 9429 checksum.
+	!"#$%&'()*+-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]
+	^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~
+
+cat nucleic.std
+	> nucleic/amino test
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;:'|`~"\
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;:'|`~"\
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;:'|`~"\
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;:'|`~"\
+
+readseq -p nucleic.std
+	>nucleic/amino test, 228 bases, 5952 checksum.
+	ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#$%^&=+;
+	:'|`~"\ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}[]()!@#
+	$%^&=+;:'|`~"\ABCDEFGHIKLMNPQRSTVWXYZACGTU_.-?<>{}
+	[]()!@#$%^&=+;:'|`~"\ABCDEFGHIKLMNPQRSTVWXYZACGTU_
+	.-?<>{}[]()!@#$%^&=+;:'|`~"\
+
+
+#----------------------------------
+
+#test for general read/write of all chars:
+readseq -p alphabet.std -otest.alpha
+diff test.alpha alphabet.std
+
+#test for valid toupper, general read/write:
+readseq -p -C  alphabet.std -otest.upper
+diff test.upper upper.std
+#for vms, use "-C" to preserve case
+# readseq -p "-C"  alphabet.std -otest.upper
+
+#test for multiple sequence file conversions
+# leave out gcg, raw; 
+# test of long seq conversion ?
+# test of mail-header seq conversion ?
+
+#test for valid format conversions
+readseq -v -p -f1 nucleic.std -otest.f1
+readseq -v -p -f2 test.f1 -otest.f2
+readseq -v -p -f3 test.f2 -otest.f3
+readseq -v -p -f4 test.f3 -otest.f4
+readseq -v -p -f5 test.f4 -otest.f5
+readseq -v -p -f6 test.f5 -otest.f6
+readseq -v -p -f7 test.f6 -otest.f7
+readseq -v -p -f8 test.f7 -otest.f8
+readseq -v -p -f1 test.f8 -otest.f1b   
+diff test.f1 test.f1b
+compare test.f1 test.f1b
+
+readseq -v -p -f13 test.f8 -otest.f13   # raw, drops name
+readseq -v -p -f9 test.f8 -otest.f9   	# zuker, little used
+#readseq -v -p -f10 test.f9 -otest.f10  # olsen, input only (output=raw)
+readseq -v -p -f11 test.f8 -otest.f11	# phylip 3.2, output only
+readseq -v -p -f12 test.f8 -otest.f12	# phylip 3.3, output only
+readseq -v -p -f14 test.f8 -otest.f14	# phylip 3.4, output only
+
+
+#clean up
+rm test.�
+
+
+#-----------------------------
+# some general tests
+
+readseq -h 
+
+readseq
diff --git a/add.gdemenu b/add.gdemenu
new file mode 100644
index 0000000..12818f6
--- /dev/null
+++ b/add.gdemenu
@@ -0,0 +1,123 @@
+#
+# dgg added new readseq formats, 29 dec 92
+#
+
+item:Export Foreign Format
+itemmethod:readseq in1 -pipe -all -form=$FORMAT > $OUTPUTFILE
+itemhelp:readseq.help
+
+arg:FORMAT
+argtype:choice_menu
+argchoice:GenBank:genbank
+argchoice:IG/Stanford:ig
+argchoice:NBRF:nbrf
+argchoice:EMBL:embl
+argchoice:GCG:gcg
+argchoice:DNA Strider:strider
+argchoice:Fitch:fitch
+argchoice:Pearson/Fasta:pearson
+argchoice:Zuker:zuker
+argchoice:Olsen:olsen
+argchoice:Phylip:phylip
+#argchoice:Phylip v3.2:phylip3.2
+argchoice:Plain text:raw
+argchoice:ASN.1:asn
+argchoice:PIR:pir
+argchoice:MSF:msf
+argchoice:PAUP:paup
+argchoice:Pretty:pretty -nametop -nameleft=3 -numright -nameright -numtop
+
+arg:OUTPUTFILE
+argtype:text
+arglabel:Save as?
+
+in:in1
+informat:genbank
+
+
+#
+#dgg addition for new readseq, 24 dec 92
+#
+
+item:Pretty Print
+itemmethod:readseq in1 -p -a -f=pretty $NAMELEFT $NAMERIGHT $NUMTOP $NUMBOT $NUMLEFT $NUMRIGHT -col=$COLS -width=$WIDTH $MATCH $GAPC > in1.pretty; (textedit in1.pretty; /bin/rm -f in1 in1.pretty)&
+itemhelp:readseq.help
+
+#nametop is bad !?
+
+in:in1
+informat:genbank
+
+arg:NAMETOP
+argtype:chooser
+arglabel:Names at top  ?
+argchoice:No:
+argchoice:Yes:-nametop
+
+arg:NAMELEFT
+argtype:chooser
+arglabel:Names at left ?
+argchoice:No:
+argchoice:Yes:-nameleft
+
+arg:NAMERIGHT
+argtype:chooser
+arglabel:Names at right?
+argchoice:Yes:-nameright
+argchoice:No:
+
+arg:NUMTOP
+argtype:chooser
+arglabel:Numbers at top  ?
+argchoice:Yes:-numtop
+argchoice:No:
+
+arg:NUMBOT
+argtype:chooser
+arglabel:Numbers at tail ?
+argchoice:No:
+argchoice:Yes:-numbot
+
+arg:NUMLEFT
+argtype:chooser
+arglabel:Numbers at left ?
+argchoice:Yes:-numleft
+argchoice:No:
+
+arg:NUMRIGHT
+argtype:chooser
+arglabel:Numbers at right?
+argchoice:Yes:-numright
+argchoice:No:
+
+arg:MATCH
+argtype:chooser
+arglabel:Use match '.' for 2..n species?
+argchoice:No:
+argchoice:Yes:-match
+
+arg:GAPC
+argtype:chooser
+arglabel:Count gap symbols?
+argchoice:No:
+argchoice:Yes:-gap
+
+arg:WIDTH
+argtype:slider
+arglabel:Sequence width?
+argmin:10
+argmax:200
+argvalue:50
+
+arg:COLS
+argtype:slider
+arglabel:Column spacers?
+argmin:0
+argmax:50
+argvalue:10
+
+
+### pretty print insert end
+#
+
+
diff --git a/macinit.r b/macinit.r
new file mode 100644
index 0000000..3dd9c4b
--- /dev/null
+++ b/macinit.r
@@ -0,0 +1,412 @@
+/*------------------------------------------------------------------------------
+#
+#
+#	MultiFinder-Aware Simple Input/Output Window resource
+#
+#	for ReadSeq
+#
+------------------------------------------------------------------------------*/
+
+#include "systypes.r"
+#include "types.r"
+
+
+resource 'MENU' (20000, preload) {
+	20000,
+	textMenuProc,
+	0x7FFFFFFD,
+	enabled,
+	apple,
+	{	/* array: 2 elements */
+		/* [1] */
+		"About ReadSeq�", noIcon, noKey, noMark, plain,
+		/* [2] */
+		"-", noIcon, noKey, noMark, plain
+	}
+};
+
+resource 'MENU' (20001, preload) {
+	20001,
+	textMenuProc,
+	0x0,
+	enabled,
+	"File",
+	{	/* array: 11 elements */
+		/* [1] */
+		"New", noIcon, "N", noMark, plain,
+		/* [2] */
+		"Open", noIcon, "O", noMark, plain,
+		/* [3] */
+		"-", noIcon, noKey, noMark, plain,
+		/* [4] */
+		"Close", noIcon, "W", noMark, plain,
+		/* [5] */
+		"Save", noIcon, "S", noMark, plain,
+		/* [6] */
+		"Save As�", noIcon, noKey, noMark, plain,
+		/* [7] */
+		"-", noIcon, noKey, noMark, plain,
+		/* [8] */
+		"Page Setup�", noIcon, noKey, noMark, plain,
+		/* [9] */
+		"Print�", noIcon, noKey, noMark, plain,
+		/* [10] */
+		"-", noIcon, noKey, noMark, plain,
+		/* [11] */
+		"Quit", noIcon, "Q", noMark, plain
+	}
+};
+
+resource 'MENU' (20002, preload) {
+	20002,
+	textMenuProc,
+	0x0,
+	enabled,
+	"Edit",
+	{	/* array: 6 elements */
+		/* [1] */
+		"Undo", noIcon, "Z", noMark, plain,
+		/* [2] */
+		"-", noIcon, noKey, noMark, plain,
+		/* [3] */
+		"Cut", noIcon, "X", noMark, plain,
+		/* [4] */
+		"Copy", noIcon, "C", noMark, plain,
+		/* [5] */
+		"Paste", noIcon, "V", noMark, plain,
+		/* [6] */
+		"Clear", noIcon, noKey, noMark, plain
+	}
+};
+
+resource 'MENU' (20003, preload) {
+	20003,
+	textMenuProc,
+	allEnabled,
+	enabled,
+	"Font",
+	{	/* array: 0 elements */
+	}
+};
+
+resource 'ALRT' (20000, purgeable) {
+	{98, 108, 314, 405},
+	20000,
+	{	/* array: 4 elements */
+		/* [1] */
+		OK, visible, silent,
+		/* [2] */
+		OK, visible, silent,
+		/* [3] */
+		OK, visible, silent,
+		/* [4] */
+		OK, visible, silent
+	}
+};
+
+resource 'ALRT' (20001, purgeable) {
+	{40, 20, 150, 260},
+	20001,
+	{	/* array: 4 elements */
+		/* [1] */
+		OK, visible, silent,
+		/* [2] */
+		OK, visible, silent,
+		/* [3] */
+		OK, visible, silent,
+		/* [4] */
+		OK, visible, silent
+	}
+};
+
+resource 'ALRT' (20002, preload) {
+	{72, 64, 212, 372},
+	20002,
+	{	/* array: 4 elements */
+		/* [1] */
+		OK, visible, silent,
+		/* [2] */
+		OK, visible, silent,
+		/* [3] */
+		OK, visible, silent,
+		/* [4] */
+		OK, visible, silent
+	}
+};
+
+resource 'DITL' (20000, purgeable) {
+	{	/* array DITLarray: 8 elements */
+		/* [1] */
+		{191, 98, 211, 178},
+		Button {
+			enabled,
+			"OK"
+		},
+		/* [2] */
+		{110, 24, 130, 256},
+		StaticText {
+			disabled,
+			" Copyright � 1990 by d.g.gilbert\n"
+		},
+		/* [3] */
+		{6, 93, 24, 281},
+		StaticText {
+			disabled,
+			"A tool for molecular biology."
+		},
+		/* [4] */
+		{31, 25, 86, 281},
+		StaticText {
+			disabled,
+			"Reads and writes nucleic or protein sequ"
+			"ences in various formats. Data files may"
+			" have multiple sequences."
+		},
+		/* [5] */
+		{6, 17, 22, 92},
+		StaticText {
+			disabled,
+			"ReadSeq"
+		},
+		/* [6] */
+		{150, 28, 186, 262},
+		StaticText {
+			disabled,
+			"land mail: biology dept., indiana univer"
+			"sity, bloomington, in 47405\n"
+		},
+		/* [7] */
+		{129, 25, 153, 258},
+		StaticText {
+			disabled,
+			" e-mail: gilbertd@bio.indiana.edu\n"
+		},
+		/* [8] */
+		{86, 12, 107, 281},
+		StaticText {
+			disabled,
+			"This program may be freely distributed."
+		}
+	}
+};
+
+resource 'DITL' (20001, purgeable) {
+	{	/* array DITLarray: 3 elements */
+		/* [1] */
+		{80, 150, 100, 230},
+		Button {
+			enabled,
+			"OK"
+		},
+		/* [2] */
+		{10, 60, 60, 230},
+		StaticText {
+			disabled,
+			"Error. ^0."
+		},
+		/* [3] */
+		{8, 8, 40, 40},
+		Icon {
+			disabled,
+			2
+		}
+	}
+};
+
+resource 'DITL' (20002, preload) {
+	{	/* array DITLarray: 4 elements */
+		/* [1] */
+		{58, 25, 76, 99},
+		Button {
+			enabled,
+			"Yes"
+		},
+		/* [2] */
+		{86, 25, 104, 99},
+		Button {
+			enabled,
+			"No"
+		},
+		/* [3] */
+		{12, 20, 45, 277},
+		StaticText {
+			disabled,
+			"Save changes before closing?"
+		},
+		/* [4] */
+		{86, 195, 104, 269},
+		Button {
+			enabled,
+			"Cancel"
+		}
+	}
+};
+
+resource 'CNTL' (20000, purgeable, preload) {
+	{-1, 465, 272, 481},
+	0,
+	invisible,
+	0,
+	0,
+	scrollBarProc,
+	0,
+	""
+};
+
+resource 'CNTL' (20001, purgeable, preload) {
+	{271, -1, 287, 466},
+	0,
+	invisible,
+	0,
+	0,
+	scrollBarProc,
+	0,
+	""
+};
+
+data 'pzza' (128, purgeable) {
+	$"4D50 5320"                                          /* MPS  */
+};
+
+resource 'MBAR' (20000, preload) {
+	{	/* array MenuArray: 4 elements */
+		/* [1] */
+		20000,
+		/* [2] */
+		20001,
+		/* [3] */
+		20002,
+		/* [4] */
+		20003
+	}
+};
+
+resource 'WIND' (20000, purgeable, preload) {
+	{0, 0, 286, 480},
+	zoomDocProc,
+	invisible,
+	noGoAway,
+	0x0,
+	"untitled"
+};
+
+resource 'STR#' (20000, purgeable) {
+	{	/* array StringArray: 11 elements */
+		/* [1] */
+		"You must run on 512Ke or later",
+		/* [2] */
+		"Application Memory Size is too small",
+		/* [3] */
+		"Not enough memory to run SIOW",
+		/* [4] */
+		"Not enough memory to do Cut",
+		/* [5] */
+		"Cannot do Cut",
+		/* [6] */
+		"Cannot do Copy",
+		/* [7] */
+		"Cannot exceed 32,000 characters with Pas"
+		"te",
+		/* [8] */
+		"Not enough memory to do Paste",
+		/* [9] */
+		"Cannot create window",
+		/* [10] */
+		"Cannot exceed 32,000 characters",
+		/* [11] */
+		"Cannot do PasteFont not found"
+	}
+};
+
+resource 'SIZE' (-1) {
+	reserved,
+	acceptSuspendResumeEvents,
+	reserved,
+	canBackground,
+	multiFinderAware,
+	backgroundAndForeground,
+	dontGetFrontClicks,
+	ignoreChildDiedEvents,
+	not32BitCompatible,
+	notHighLevelEventAware,
+	onlyLocalHLEvents,
+	notStationeryAware,
+	dontUseTextEditServices,
+	reserved,
+	reserved,
+	reserved,
+	124928,
+	38912
+};
+
+resource 'SIZE' (0) {
+	reserved,
+	acceptSuspendResumeEvents,
+	reserved,
+	canBackground,
+	multiFinderAware,
+	backgroundAndForeground,
+	dontGetFrontClicks,
+	ignoreChildDiedEvents,
+	not32BitCompatible,
+	notHighLevelEventAware,
+	onlyLocalHLEvents,
+	notStationeryAware,
+	dontUseTextEditServices,
+	reserved,
+	reserved,
+	reserved,
+	256000,
+	38912
+};
+
+data 'siow' (0) {
+	$"0F52 6561 6453 6571 2069 6E20 5349 4F57"            /* .ReadSeq in SIOW */
+};
+
+resource 'BNDL' (128) {
+	'siow',
+	0,
+	{	/* array TypeArray: 2 elements */
+		/* [1] */
+		'ICN#',
+		{	/* array IDArray: 1 elements */
+			/* [1] */
+			0, 128
+		},
+		/* [2] */
+		'FREF',
+		{	/* array IDArray: 1 elements */
+			/* [1] */
+			0, 128
+		}
+	}
+};
+
+resource 'FREF' (128) {
+	'APPL',
+	0,
+	""
+};
+
+resource 'ICN#' (128) {
+	{	/* array: 2 elements */
+		/* [1] */
+		$"0000 0000 0000 0000 0010 4100 0010 2200"
+		$"0020 2200 0020 2100 0020 4100 0010 4200"
+		$"0010 4200 0010 2200 0020 2100 0020 0100"
+		$"00FF FF00 03FF FFE0 0791 03F0 0ED1 0E7C"
+		$"1C31 321C 380D C10E 3FFF FFFE 3003 C106"
+		$"380D 300E 1E31 0E3C 1FC1 01F8 07FF FFE0"
+		$"00FF FE",
+		/* [2] */
+		$"0000 0000 0000 0000 0010 4100 0010 2200"
+		$"0020 2200 0020 2100 0020 4100 0010 4200"
+		$"0010 4200 0010 2200 0020 2100 0020 0100"
+		$"00FF FF00 03FF FFE0 07FF FFF0 0FFF FFFC"
+		$"1FFF FFFC 3FFF FFFE 3FFF FFFE 3FFF FFFE"
+		$"3FFF FFFE 1FFF FFFC 1FFF FFF8 07FF FFE0"
+		$"00FF FE"
+	}
+};
+
diff --git a/readseqSIOW.make b/readseqSIOW.make
new file mode 100644
index 0000000..480a146
--- /dev/null
+++ b/readseqSIOW.make
@@ -0,0 +1,42 @@
+#  Macintosh MPW-C Makefile
+#  using Simple Input/Output Window library
+#
+#   File:       ReadseqSIOW.make
+#   Target:     ReadseqSIOW
+#   Sources:    readseq.c ureadseq.c ureadasn.c macinit.c
+#   Created:    Wednesday, November 13, 1991 8:23:00 PM
+
+
+#OBJECTS = macinit.c.o readseq.c.o ureadseq.c.o
+#COptions =  -D SIOW  # -r
+
+#if NCBI is available, set path here to NCBI toolkit:
+NCBI = "{Boot}@molbio:ncbi:"
+OBJECTS = macinit.c.o readseq.c.o ureadseq.c.o ureadasn.c.o
+COptions =  -D SIOW -d NCBI -i "{NCBI}"include:  
+NCBILIBS = "{NCBI}"lib:libncbi.o "{NCBI}"lib:libncbiobj.o "{NCBI}"lib:libvibrant.o
+#endif NCBI
+
+ReadseqSIOW �� ReadseqSIOW.make {OBJECTS}
+	Link -d -c '????' -t APPL �
+		{OBJECTS} �
+		"{CLibraries}"StdClib.o �
+		"{MPW}"Libraries:Libraries:SIOW.o �
+		"{Libraries}"Runtime.o �
+		"{Libraries}"Interface.o �
+#if NCBI
+		{NCBILIBS} �
+		"{CLibraries}"CSANELib.o �
+		"{CLibraries}"Math.o �
+#endif NCBI
+		-o ReadseqSIOW
+		
+readseq.c.o � ReadseqSIOW.make readseq.c
+ureadseq.c.o � ReadseqSIOW.make ureadseq.c
+macinit.c.o � ReadseqSIOW.make macinit.c
+#if NCBI
+ureadasn.c.o � ReadseqSIOW.make ureadasn.c
+#endif NCBI
+
+ReadseqSIOW �� macinit.r
+	Rez -a macinit.r -o ReadseqSIOW