From c9b94ce735ea41a048be4cf7c22e0af3b253dda7 Mon Sep 17 00:00:00 2001
From: Kuoi <starsareintherose@outlook.com>
Date: Sun, 16 Apr 2023 07:33:28 +0800
Subject: [PATCH] init

---
 Makefile   |  111 +++
 macinit.c  |  292 ++++++++
 readseq.c  | 1411 ++++++++++++++++++++++++++++++++++
 ureadasn.c |  324 ++++++++
 ureadseq.c | 2121 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 ureadseq.h |  172 +++++
 6 files changed, 4431 insertions(+)
 create mode 100644 Makefile
 create mode 100644 macinit.c
 create mode 100644 readseq.c
 create mode 100644 ureadasn.c
 create mode 100644 ureadseq.c
 create mode 100644 ureadseq.h

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..40a5a92
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,111 @@
+#
+# Unix Makefile for readseq
+# to use, command me:
+#  %  make       -- or --
+#  %  make CC=your-c-compiler-name
+#
+
+# pick an ANSI C compiler (the default Sun CC is not ANSI)
+CC=gcc  # Gnu C Compiler
+#CC=cc  # SGI Irix
+#CC=vcc # some DEC Ultrix
+
+CFLAGS=
+#CFLAGS= -DSMALLCHECKSUM  # if you prefer to use a GCG-standard 13 bit checksum
+#    instead of a full 32 bit checksum. This may enhance compatibility w/ GCG software
+
+SOURCES= readseq.c ureadseq.c ureadseq.h ureadasn.c
+DOCS= Readme Readseq.help Formats Stdfiles Makefile Make.com add.gdemenu *.std
+
+
+# NCBI toolkit support for ASN.1 reader
+
+# this is path to NCBI toolkit, you must set for your system:
+NCBI=
+#NCBI=/bio/mb/ncbi
+#
+OTHERLIBS=-lm
+LIB1=-lncbi
+LIB2=-lncbiobj
+LIB3=-lncbicdr
+LIB4=-lvibrant
+INCPATH=$(NCBI)/include
+LIBPATH=$(NCBI)/lib
+NCFLAGS=$(CFLAGS) -DNCBI -I$(INCPATH)
+NLDFLAGS=-I$(INCPATH) -L$(LIBPATH)
+NLIBS=$(LIB1) $(LIB2) $(OTHERLIBS)
+
+
+all: build test
+
+build: $(SOURCES)
+	@echo "Compiling readseq..."
+	$(CC) $(CFLAGS) -o readseq readseq.c ureadseq.c
+
+# if using NCBI, uncomment these lines in place of build: above
+#build: $(SOURCES)
+# @echo "Compiling readseq with NCBI toolkit support...";
+# $(CC) -o readseq $(NLDFLAGS) $(NCFLAGS) readseq.c ureadseq.c ureadasn.c $(NLIBS)
+
+test: $(SOURCES) readseq
+	@echo ""
+	@echo "Test for general read/write of all chars:"
+	./readseq -p alphabet.std -otest.alpha
+	-diff test.alpha alphabet.std
+
+	@echo ""
+	@echo "Test for valid format conversions:"
+	./readseq -v -p -f=ig   nucleic.std -otest.ig
+	./readseq -v -p -f=gb   test.ig     -otest.gb
+	./readseq -v -p -f=nbrf test.gb     -otest.nbrf
+	./readseq -v -p -f=embl test.nbrf   -otest.embl
+	./readseq -v -p -f=gcg  test.embl   -otest.gcg
+	./readseq -v -p -f=strider test.gcg -otest.strider
+	./readseq -v -p -f=fitch test.strider -otest.fitch
+	./readseq -v -p -f=fasta test.fitch -otest.fasta
+	./readseq -v -p -f=pir  test.fasta  -otest.pir
+	./readseq -v -p -f=ig   test.pir    -otest.ig-b
+	-diff test.ig test.ig-b
+
+	@echo ""
+	@echo "Test for multiple-sequence format conversions:"
+	./readseq -p -f=ig    multi.std   -otest.m-ig
+	./readseq -p -f=gb    test.m-ig   -otest.m-gb
+	./readseq -p -f=nbrf  test.m-gb   -otest.m-nbrf
+	./readseq -p -f=embl  test.m-nbrf -otest.m-embl
+	./readseq -p -f=fasta test.m-embl -otest.m-fasta
+	./readseq -p -f=pir   test.m-fasta -otest.m-pir
+	./readseq -p -f=msf   test.m-pir  -otest.m-msf
+	./readseq -p -f=paup  test.m-msf  -otest.m-paup
+	./readseq -p -f=ig    test.m-paup -otest.m-ig-b
+	-diff test.m-ig test.m-ig-b
+#
+# if using NCBI, uncomment these lines
+# @echo ""
+# @echo "Test of NCBI ASN.1 conversions:"
+# ./readseq -p -f=asn test.m-ig  -otest.m-asn
+# ./readseq -p -f=ig  test.m-asn -otest.m-ig-c
+# -diff test.m-ig test.m-ig-c
+#
+	@echo ""
+	@echo "Expect differences in the header lines due to"
+	@echo "different format headers.  If any sequence lines"
+	@echo "differ, or if the checksums differ, there is a problem."
+	@echo "----------------------"
+	@echo ""
+	@echo "To clean up test files, command me:"
+	@echo "    make clean"
+
+
+clean:
+	rm -f *.o core test.*
+
+shar:
+	@echo "shell archiving files..."
+	-rm -f readseq*.shar
+	mkdir readseqd
+	cp $(SOURCES) readseqd
+	cp $(DOCS) readseqd
+	shar -v readseqd > readseq.shar
+	rm -rf readseqd
+
diff --git a/macinit.c b/macinit.c
new file mode 100644
index 0000000..aeeabd3
--- /dev/null
+++ b/macinit.c
@@ -0,0 +1,292 @@
+/*
+  macinit.c
+  -- Macintosh initializations, then call real main
+
+Note: compile this segment as Main for generic 68000 processor, so it won't
+ fail on generic mac
+
+*/
+
+#pragma segment Main
+
+#include <Events.h>
+#include <Memory.h>
+#include <OSUtils.h>
+#include <Palettes.h>
+#include <Quickdraw.h>
+#include <StandardFile.h>
+#include <StdLib.h>
+#include <ToolUtils.h>
+#include <Types.h>
+#include <Windows.h>
+#include <ctype.h>
+#include <dialogs.h>
+#include <stdio.h>
+#include <string.h>
+// #include <Menus.h>
+// #include <Fonts.h>
+
+Boolean StopKey()
+{
+	EventRecord ev;
+
+	if (EventAvail(keyDownMask + autoKeyMask, &ev)) {
+		if ((ev.modifiers & cmdKey) &&
+		    ((char)(ev.message & charCodeMask) == '.')) {
+			SysBeep(1);
+			(void)GetNextEvent(keyDownMask + autoKeyMask, &ev);
+			return true;
+		}
+	}
+	return false;
+}
+
+Boolean cmdKeyIsDown()
+{
+	KeyMap kmap;
+	GetKeys(&kmap);
+	return BitTst(kmap, (sizeof(KeyMap) * 8) - 55);
+}
+
+Boolean shiftKeyIsDown()
+{
+	KeyMap kmap;
+	GetKeys(&kmap);
+	return BitTst(kmap, (sizeof(KeyMap) * 8) - 56);
+}
+
+Boolean capsLockIsDown()
+{
+	KeyMap kmap;
+	GetKeys(&kmap);
+	return BitTst(kmap, (sizeof(KeyMap) * 8) - 57);
+}
+
+Boolean optionKeyIsDown()
+{
+	KeyMap kmap;
+	GetKeys(&kmap);
+	return BitTst(kmap, (sizeof(KeyMap) * 8) - 58);
+}
+
+Boolean MouseButton() { return Button(); }
+
+Boolean Keypress()
+{
+	EventRecord ev;
+	return EventAvail(keyDownMask + keyUpMask + autoKeyMask, &ev);
+}
+
+char *StdGetFile(char *prompt, OSType fileTypes[], int nFileTypes)
+{
+	Point wher;    /*where to display dialog*/
+	SFReply reply; /*reply record*/
+	short len;
+	static char filename[80] = "\0";
+
+	wher.h = 80;
+	wher.v = 90;
+	if (optionKeyIsDown()) nFileTypes = 0;
+
+	SFGetFile(wher, prompt, nil, nFileTypes, fileTypes, nil, &reply);
+
+	if (reply.good) {
+		len = SetVol(nil, reply.vRefNum);
+		len = reply.fName[0];
+		strncpy(filename, (char *)(&reply.fName[1]), len);
+		filename[len] = '\0';
+		return filename;
+	}
+	else
+		return NULL;
+}
+
+int readCmdOptions(FILE *cl, char *progname, char ***argv)
+/* command line reader for Mac/MPW  -- dgg */
+{
+#define MAXS 255
+#define addarg(sptr)                                                           \
+	if (strlen(sptr) > 0) {                                                \
+		targv = (char **)realloc(targv, (argc + 1) * sizeof(char *));  \
+		targv[argc] = (char *)malloc(1 + strlen(sptr) * sizeof(char)); \
+		strcpy(targv[argc], sptr);                                     \
+		argc++;                                                        \
+	}
+
+	char *pword, st[MAXS];
+	int argc = 0;
+	char **targv;
+
+	targv = (char **)malloc(1);
+	if (progname == NULL) progname = "program";
+	addarg(progname);
+	fgets(st, MAXS, cl);
+	if (!feof(cl) && st != NULL && *st != 0) {
+		pword = strtok(st, "\ \n");
+		while (pword != NULL) {
+			addarg(pword);
+			pword = strtok(NULL, "\ \n");
+		}
+	}
+
+	*argv = targv;
+	return argc;
+}
+
+int ccommand(char ***argv)
+{
+	int argc;
+	char **targv;
+
+	argc = readCmdOptions(stdin, *argv[0], &targv);
+	*argv = targv;
+	return argc;
+}
+
+extern _DataInit();
+
+// #define VERSION     curSysEnvVers
+#define nocolorID 130
+#define no68020 133
+#define no68881 132
+#define no256 134
+#define nosys6 135
+
+void MacInit()
+{
+	SysEnvRec theWorld;
+	OSErr OSys;
+	DialogPtr crashDia;
+	long tick;
+
+	UnloadSeg(_DataInit);
+
+	InitGraf((Ptr)&qd.thePort);
+	// InitFonts();
+	InitWindows();
+	// InitMenus();
+	// TEInit();
+	InitDialogs(nil);
+	InitCursor();
+
+	/*______________________________________________________*/
+	/*            If not right Machine then stop            */
+	/*______________________________________________________*/
+	OSys = SysEnvirons(curSysEnvVers, &theWorld);
+
+	/*if(!theWorld.hasColorQD) {
+	  crashDia = GetNewDialog (nocolorID, nil, (WindowPtr) -1);
+	  DrawDialog (crashDia);
+	  Delay (300, &tick);
+	  ExitToShell();
+	  }*/
+	/*if(theWorld.processor < env68020) {
+	  crashDia = GetNewDialog (no68020, nil, (WindowPtr) -1);
+	  DrawDialog (crashDia);
+	  Delay (300, &tick);
+	  ExitToShell();
+	  }*/
+	/*if(!theWorld.hasFPU) {
+	  crashDia = GetNewDialog (no68881, nil, (WindowPtr) -1);
+	  DrawDialog (crashDia);
+	  Delay (300, &tick);
+	  ExitToShell();
+	  }
+	if(theWorld.systemVersion < 0x0600) {
+	  crashDia = GetNewDialog (nosys6, nil, (WindowPtr) -1);
+	  DrawDialog (crashDia);
+	  Delay (300, &tick);
+	  ExitToShell();
+	  }*/
+
+#ifdef UnDeFineD
+	/*______________________________________________________*/
+	/*                     Set Rects                        */
+	/*______________________________________________________*/
+	screenRect = qd.screenBits.bounds;
+	offLeft = 0;
+	offTop = 0;
+	offRight = screenRect.right;
+	offBottom = screenRect.bottom;
+	SetRect(&BaseRect, 40, 60, 472, 282);
+	tempRgn = GetGrayRgn();
+	HLock((Handle)tempRgn);
+	TotalRect = (**tempRgn).rgnBBox;
+	SetRect(&minRect, 80, 80, (**tempRgn).rgnBBox.right - 40,
+		(**tempRgn).rgnBBox.bottom - 40);
+	HUnlock((Handle)tempRgn);
+
+	/*______________________________________________________*/
+	/*        Open Window & set Palette & Picture           */
+	/*______________________________________________________*/
+	theGDevice = GetMainDevice();
+	HLock((Handle)theGDevice);
+	mycolors = (**(**theGDevice).gdPMap).pmTable;
+	numcolor = (**(**theGDevice).gdPMap).pixelSize;
+	HUnlock((Handle)theGDevice);
+	switch (numcolor) {
+		case 1:
+			numcolor = 2;
+			break;
+		case 2:
+			numcolor = 4;
+			break;
+		case 4:
+			numcolor = 16;
+			break;
+		case 8:
+			numcolor = 256;
+			break;
+	}
+
+	myWindow = NewCWindow(nil, &BaseRect, "", true, zoomDocProc,
+			      (WindowPtr)-1, true, 150);
+	SetPort((WindowPtr)myWindow);
+	DrawGrowIcon(myWindow);
+
+	srcPalette = NewPalette(numcolor, mycolors, pmCourteous, 0);
+	SetPalette((WindowPtr)myWindow, srcPalette, true);
+
+	/*______________________________________________________*/
+	/*                    Set menus                         */
+	/*______________________________________________________*/
+	mymenu0 = GetMenu(appleID);
+	AddResMenu(mymenu0, 'DRVR');
+	InsertMenu(mymenu0, 0);
+	mymenu1 = newmenu(129, "File");
+	appendmenu(mymenu1, "Start;Quit");
+	InsertMenu(mymenu1, 0);
+	mymenu2 = newmenu(130, "Edit");
+	InsertMenu(mymenu2, 0);
+	DrawMenuBar();
+
+	/*______________________________________________________*/
+	/*                  Init variables                      */
+	/*______________________________________________________*/
+	DoneFlag = false;
+	yieldTime = 0;
+	return;
+#endif
+}
+
+main(int argc, char *argv[])
+{
+	Boolean loop = true;
+	char **myargv;
+	int myargc;
+
+	/* MacInit();  -- SIOW library handles this */
+	do {
+		fprintf(stderr, "\nEnter command line for %s [cmd-Q to quit]\n",
+			argv[0]);
+		fprintf(stderr, "-> %s ", argv[0]);
+		myargv = argv;
+		myargc = ccommand(&myargv);
+
+		siow_main(myargc, myargv);
+		fflush(stdout);
+
+	} while (true);
+	exit(0);
+}
+
diff --git a/readseq.c b/readseq.c
new file mode 100644
index 0000000..44e2472
--- /dev/null
+++ b/readseq.c
@@ -0,0 +1,1411 @@
+/* File: readseq.c
+ * main() program for ureadseq.c, ureadseq.h
+ *
+ * Reads and writes nucleic/protein sequence in various
+ * formats. Data files may have multiple sequences.
+ *
+ * Copyright 1990 by d.g.gilbert
+ * biology dept., indiana university, bloomington, in 47405
+ * e-mail: gilbertd@bio.indiana.edu
+ *
+ * This program may be freely copied and used by anyone.
+ * Developers are encourged to incorporate parts in their
+ * programs, rather than devise their own private sequence
+ * format.
+ *
+ * This should compile and run with any ANSI C compiler.
+ * Please advise me of any bugs, additions or corrections.
+ *
+ */
+
+const char *title = "readSeq (1Feb93), multi-format molbio sequence reader.\n";
+
+/*  History
+ 27 Feb 90.  1st release to public.
+  4 Mar 90.  + Gary Olsen format
+	     + case change
+	     * minor corrections to NBRF,EMBL,others
+	     * output 1 file per sequence for gcg, unknown
+	     * define -DNOSTR for c-libraries w/o strstr
+	     - readseq.p, pascal version, becomes out-of-date
+ 24 May 90.  + Phylip 3.2 output format (no input)
+ 20 Jul 90.  + Phylip 3.3 output (no input yet)
+	     + interactive output re-direction
+	     + verbose progress info
+	     * interactive help output
+	     * dropped line no.s on NBRF output
+	     * patched in HyperGCG XCMD corrections,
+	       - except for seq. documentation handling
+	     * dropped the IG special nuc codes, as IG has
+	       adopted the standard IUB codes (now if only
+	       everyone would adopt a standard format !)
+ 11 Oct 90.  * corrected bug in reading/writing of EMBL format
+
+ 17 Oct 91.  * corrected bug in reading Olsen format
+	       (serious-deletion)
+ 10 Nov 91.  * corrected bug in reading some GCG format files
+	       (serious-last line duplicated)
+	     + add format name parsing (-fgb, -ffasta, ...)
+	     + Phylip v3.4 output format (== v3.2, sequential)
+	     + add checksum output to all forms that have document
+	     + skip mail headers in seq file
+	     + add pipe for standard input == seq file (with -p)
+	     * fold in parts of MacApp Seq object
+	     * strengthen format detection
+	     * clarify program structure
+	     * remove fixed sequence size limit (now dynamic, sizeof memory)
+	     * check and fold in accumulated bug reports:
+	     *   Now ANSI-C fopen(..,"w") & check open failure
+	     *   Define -DFIXTOUPPER for nonANSI C libraries that mess
+		 up toupper/tolower
+	     = No command-line changes; callers of readseq main() should be okay
+	     - ureadseq.h functions have changed; client programs need to note.
+	     + added Unix and VMS Make scripts, including validation tests
+
+  4 May 92.  + added 32 bit CRC checksum as alternative to GCG 6.5bit checksum
+	       (-DBIGCHECKSUM)
+   Aug 92    = fixed Olsen format input to handle files w/ more sequences,
+	       not to mess up when more than one seq has same identifier,
+	       and to convert number masks to symbols.
+	     = IG format fix to understand ^L
+
+ 25-30 Dec 92
+	     * revised command-line & interactive interface.  Suggested form is
+ now readseq infile -format=genbank -output=outfile -item=1,3,4 ... but remains
+ compatible with prior commandlines: readseq infile -f2 -ooutfile -i3 ...
+	     + added GCG MSF multi sequence file format
+	     + added PIR/CODATA format
+	     + added NCBI ASN.1 sequence file format
+	     + added Pretty, multi sequence pretty output (only)
+	     + added PAUP multi seq format
+	     + added degap option
+	     + added Gary Williams (GWW, G.Williams@CRC.AC.UK)
+ reverse-complement option.
+	     + added support for reading Phylip formats (interleave &
+ sequential)
+	     * string fixes, dropped need for compiler flags NOSTR, FIXTOUPPER,
+ NEEDSTRCASECMP
+	     * changed 32bit checksum to default, -DSMALLCHECKSUM for GCG
+ version
+
+  1Feb93
+	     = revert GenBank output to a fixed left number width which
+	      other software depends on.
+	     = fix for MSF input to handle symbols in names
+	     = fix bug for possible memory overrun when truncating seqs for
+	       Phylip or Paup formats (thanks Anthony Persechini)
+
+*/
+
+/*
+   Readseq has been tested with:
+      Macintosh MPW C
+      GNU gcc
+      SGI cc
+      VAX-VMS cc
+   Any ANSI C compiler should be able to handle this.
+   Old-style C compilers barf all over the source.
+
+
+How do I build the readseq program if I have an Ansi C compiler?
+#--------------------
+# Unix ANSI C
+# Use the supplied Makefile this way:
+%  make CC=name-of-c-compiler
+# OR do this...
+% gcc readseq.c ureadseq.c -o readseq
+
+#--------------------
+$!VAX-VMS cc
+$! Use the supplied Make.Com this way:
+$  @make
+$! OR, do this:
+$ cc readseq, ureadseq
+$ link readseq, ureadseq, sys$library:vaxcrtl/lib
+$ readseq :== $ MyDisk:[myacct]readseq
+
+#--------------------
+# Macintosh Simple Input/Output Window application
+# requires MPW-C and SIOW library (from APDA)
+# also uses files macinit.c, macinit.r, readseqSIOW.make
+#
+Buildprogram readseqSIOW
+
+#--------------------
+#MPW-C v3 tool
+C  ureadseq.c
+C  readseq.c
+link -w -o readseq -t MPST -c 'MPS ' �
+   readseq.c.o Ureadseq.c.o �
+    "{Libraries}"Interface.o �
+    "{Libraries}"ToolLibs.o �
+    "{Libraries}"Runtime.o �
+    "{CLibraries}"StdClib.o
+readseq -i1 ig.seq
+
+# MPW-C with NCBI tools
+
+set NCBI "{Boot}@molbio:ncbi:"; EXPORT NCBI
+set NCBILIB1  "{NCBI}"lib:libncbi.o; export NCBILIB1
+set NCBILIB2  "{NCBI}"lib:libncbiobj.o; export NCBILIB2
+set NCBILIB3  "{NCBI}"lib:libncbicdr.o; export NCBILIB3
+set NCBILIB4  "{NCBI}"lib:libvibrant.o; export NCBILIB4
+
+C  ureadseq.c
+C  -d NCBI -i "{NCBI}"include: ureadasn.c
+C  -d NCBI -i "{NCBI}"include: readseq.c
+link -w -o readseq -t MPST -c 'MPS ' �
+   ureadseq.c.o ureadasn.c.o readseq.c.o  �
+    {NCBILIB4} {NCBILIB2} {NCBILIB1} �
+    "{Libraries}"Interface.o �
+    "{Libraries}"ToolLibs.o �
+    "{Libraries}"Runtime.o �
+    "{CLibraries}"CSANELib.o �
+    "{CLibraries}"Math.o �
+    "{CLibraries}"StdClib.o
+
+===========================================================*/
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "ureadseq.h"
+
+#pragma segment readseq
+
+static char inputfilestore[256], *inputfile = inputfilestore;
+
+const char *formats[kMaxFormat + 1] = {" 1. IG/Stanford",
+				       " 2. GenBank/GB",
+				       " 3. NBRF",
+				       " 4. EMBL",
+				       " 5. GCG",
+				       " 6. DNAStrider",
+				       " 7. Fitch",
+				       " 8. Pearson/Fasta",
+				       " 9. Zuker (in-only)",
+				       "10. Olsen (in-only)",
+				       "11. Phylip3.2",
+				       "12. Phylip",
+				       "13. Plain/Raw",
+				       "14. PIR/CODATA",
+				       "15. MSF",
+				       "16. ASN.1",
+				       "17. PAUP/NEXUS",
+				       "18. Pretty (out-only)",
+				       ""};
+
+#define kFormCount 30
+#define kMaxFormName 15
+
+const struct formatTable {
+	char *name;
+	short num;
+} formname[] = {
+    {"ig", kIG},
+    {"stanford", kIG},
+    {"genbank", kGenBank},
+    {"gb", kGenBank},
+    {"nbrf", kNBRF},
+    {"embl", kEMBL},
+    {"gcg", kGCG},
+    {"uwgcg", kGCG},
+    {"dnastrider", kStrider},
+    {"strider", kStrider},
+    {"fitch", kFitch},
+    {"pearson", kPearson},
+    {"fasta", kPearson},
+    {"zuker", kZuker},
+    {"olsen", kOlsen},
+    {"phylip", kPhylip},
+    {"phylip3.2", kPhylip2},
+    {"phylip3.3", kPhylip3},
+    {"phylip3.4", kPhylip4},
+    {"phylip-interleaved", kPhylip4},
+    {"phylip-sequential", kPhylip2},
+    {"plain", kPlain},
+    {"raw", kPlain},
+    {"pir", kPIR},
+    {"codata", kPIR},
+    {"asn.1", kASN1},
+    {"msf", kMSF},
+    {"paup", kPAUP},
+    {"nexus", kPAUP},
+    {"pretty", kPretty},
+};
+
+const char *kASN1headline = "Bioseq-set ::= {\nseq-set {\n";
+
+/* GWW table for getting the complement of a nucleotide (IUB codes) */
+/*                     !
+ * "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
+ * \]^_`abcdefghijklmnopqrstuvwxyz{|}~ */
+const char compl [] =
+    " !\"#$%&'()*+,-./"
+    "0123456789:;<=>?@TVGHNNCDNNMNKNNYRYSAABWNRN[\\]^_`"
+    "tvghnncdnnmnknnyrysaabwnrn{|}~";
+
+char *formatstr(short format)
+{
+	if (format < 1 || format > kMaxFormat) {
+		switch (format) {
+			case kASNseqentry:
+			case kASNseqset:
+				return formats[kASN1 - 1];
+			case kPhylipInterleave:
+			case kPhylipSequential:
+				return formats[kPhylip - 1];
+			default:
+				return "(unknown)";
+		}
+	}
+	else
+		return formats[format - 1];
+}
+
+int parseformat(char *name)
+{
+#define kDupmatch -2
+	int namelen, maxlen, i, match, matchat;
+	char lname[kMaxFormName + 1];
+
+	skipwhitespace(name);
+	namelen = strlen(name);
+	if (namelen == 0)
+		return kNoformat;
+	else if (isdigit(*name)) {
+		i = atol(name);
+		if (i<kMinFormat | i> kMaxFormat)
+			return kNoformat;
+		else
+			return i;
+	}
+
+	/* else match character name */
+	maxlen = min(kMaxFormName, namelen);
+	for (i = 0; i < maxlen; i++) lname[i] = to_lower(name[i]);
+	lname[maxlen] = 0;
+	matchat = kNoformat;
+
+	for (i = 0; i < kFormCount; i++) {
+		match = strncmp(lname, formname[i].name, maxlen);
+		if (match == 0) {
+			if (strlen(formname[i].name) == namelen)
+				return (formname[i].num);
+			else if (matchat == kNoformat)
+				matchat = i;
+			else
+				matchat =
+				    kDupmatch; /* 2 or more partial matches */
+		}
+	}
+	if (matchat == kNoformat || matchat == kDupmatch)
+		return kNoformat;
+	else
+		return formname[matchat].num;
+}
+
+static void dumpSeqList(char *list, short format)
+{
+	long i, l, listlen;
+	char s[256];
+
+	listlen = strlen(list);
+	printf("Sequences in %s  (format is %s)\n", inputfile,
+	       formatstr(format));
+	for (i = 0, l = 0; i < listlen; i++) {
+		if (list[i] == (char)NEWLINE) {
+			s[l] = '\0';
+			l = 0;
+			puts(s);
+		}
+		else if (l < 255)
+			s[l++] = list[i];
+	}
+	putchar('\n');
+}
+
+void usage()
+{
+	short i, midi;
+
+	fprintf(stderr, title);
+	fprintf(stderr, "usage: readseq [-options] in.seq > out.seq\n");
+	fprintf(stderr, " options\n");
+	/* ? add -d[igits] to allow digits in sequence data, &/or option to
+	 * specify seq charset !? */
+	fprintf(stderr, "    -a[ll]         select All sequences\n");
+	fprintf(stderr, "    -c[aselower]   change to lower case\n");
+	fprintf(stderr, "    -C[ASEUPPER]   change to UPPER CASE\n");
+	fprintf(stderr, "    -degap[=-]     remove gap symbols\n");
+	fprintf(stderr,
+		"    -i[tem=2,3,4]  select Item number(s) from several\n");
+	fprintf(stderr, "    -l[ist]        List sequences only\n");
+	fprintf(stderr, "    -o[utput=]out.seq  redirect Output\n");
+	fprintf(stderr,
+		"    -p[ipe]        Pipe (command line, <stdin, >stdout)\n");
+	fprintf(stderr, "    -r[everse]     change to Reverse-complement\n");
+	fprintf(stderr, "    -v[erbose]     Verbose progress\n");
+	fprintf(stderr, "    -f[ormat=]#    Format number for output,  or\n");
+	fprintf(stderr, "    -f[ormat=]Name Format name for output:\n");
+	midi = (kMaxFormat + 1) / 2;
+	for (i = kMinFormat - 1; i < midi; i++)
+		fprintf(stderr, "        %-20s      %-20s\n", formats[i],
+			formats[midi + i]);
+
+	/* new output format options, esp. for pretty format: */
+	fprintf(stderr, "     \n");
+	fprintf(stderr, "   Pretty format options: \n");
+	fprintf(stderr, "    -wid[th]=#            sequence line width\n");
+	fprintf(stderr, "    -tab=#                left indent\n");
+	fprintf(stderr,
+		"    -col[space]=#         column space within sequence line "
+		"on output\n");
+	fprintf(
+	    stderr,
+	    "    -gap[count]           count gap chars in sequence numbers\n");
+	fprintf(stderr,
+		"    -nameleft, -nameright[=#]   name on left/right side [=max "
+		"width]\n");
+	fprintf(stderr, "    -nametop              name at top/bottom\n");
+	fprintf(stderr,
+		"    -numleft, -numright   seq index on left/right side\n");
+	fprintf(stderr, "    -numtop, -numbot      index on top/bottom\n");
+	fprintf(stderr,
+		"    -match[=.]            use match base for 2..n species\n");
+	fprintf(stderr,
+		"    -inter[line=#]        blank line(s) between sequence "
+		"blocks\n");
+
+	/******  not ready yet
+	fprintf(stderr, "    -code=none,rtf,postscript,ps   code syntax\n");
+	fprintf(stderr, "    -namefont=, -numfont=, -seqfont=font   font
+	choice\n"); fprintf(stderr, "       font suggestions include
+	times,courier,helvetica\n"); fprintf(stderr, "    -namefontsize=,
+	-numfontsize=, -seqfontsize=#\n"); fprintf(stderr, "       fontsize
+	suggestions include 9,10,12,14\n"); fprintf(stderr, " -namefontstyle=,
+	-numfontstyle=, -seqfontstyle= style  fontstyle for names\n");
+	fprintf(stderr, "       fontstyle options are
+	plain,italic,bold,bold-italic\n");
+	******/
+}
+
+void erralert(short err)
+{
+	switch (err) {
+		case 0:
+			break;
+		case eFileNotFound:
+			fprintf(stderr, "File not found: %s\n", inputfile);
+			break;
+		case eFileCreate:
+			fprintf(stderr, "Can't open output file.\n");
+			break;
+		case eASNerr:
+			fprintf(stderr, "Error in ASN.1 sequence routines.\n");
+			break;
+		case eNoData:
+			fprintf(stderr, "No data in file.\n");
+			break;
+		case eItemNotFound:
+			fprintf(stderr, "Specified item not in file.\n");
+			break;
+		case eUnequalSize:
+			fprintf(stderr,
+				"This format requires equal length "
+				"sequences.\nSequence truncated or padded to "
+				"fit.\n");
+			break;
+		case eUnknownFormat:
+			fprintf(stderr,
+				"Error: this format is unknown to me.\n");
+			break;
+		case eOneFormat:
+			fprintf(stderr,
+				"Warning: This format permits only 1 sequence "
+				"per file.\n");
+			break;
+		case eMemFull:
+			fprintf(stderr,
+				"Out of storage memory. Sequence truncated.\n");
+			break;
+		default:
+			fprintf(stderr, "readSeq error = %d\n", err);
+			break;
+	}
+} /* erralert */
+
+int chooseFormat(boolean quietly)
+{
+	char sform[128];
+	int midi, i, outform;
+
+	if (quietly)
+		return kPearson; /* default */
+	else {
+		midi = (kMaxFormat + 1) / 2;
+		for (i = kMinFormat - 1; i < midi; i++)
+			fprintf(stderr, "        %-20s      %-20s\n",
+				formats[i], formats[midi + i]);
+		fprintf(stderr, "\nChoose an output format (name or #): \n");
+		gets(sform);
+		outform = parseformat(sform);
+		if (outform == kNoformat) outform = kPearson;
+		return outform;
+	}
+}
+
+/* read paramater(s) */
+
+boolean checkopt(boolean casesense, char *sopt, const char *smatch,
+		 short minword)
+{
+	long lenopt, lenmatch;
+	boolean result;
+	short minmaxw;
+
+	lenopt = strlen(sopt);
+	lenmatch = strlen(smatch);
+	minmaxw = max(minword, min(lenopt, lenmatch));
+
+	if (casesense)
+		result = (!strncmp(sopt, smatch, minmaxw));
+	else
+		result = (!Strncasecmp(sopt, smatch, minmaxw));
+	/* if (result) { */
+	/* fprintf(stderr,"true checkopt(opt=%s,match=%s,param=%s)\n", sopt,
+	 * smatch, *sparam); */
+	/*  } */
+	return result;
+}
+
+#define kMaxwhichlist 50
+
+/* global for readopt(), main() */
+boolean chooseall = false, quietly = false, gotinputfile = false,
+	listonly = false, closeout = false, verbose = false, manyout = false,
+	dolower = false, doupper = false, doreverse = false, askout = true,
+	dopipe = false, interleaved = false;
+short nfile = 0, iwhichlist = 0, nwhichlist = 0;
+short whichlist[kMaxwhichlist + 1];
+long whichSeq = 0, outform = kNoformat;
+char onamestore[128], *oname = onamestore;
+FILE *foo = NULL;
+
+void resetGlobals()
+/* need this when used from SIOW, as these globals are not reinited
+automatically between calls to local main() */
+{
+	chooseall = false;
+	quietly = false;
+	gotinputfile = false;
+	listonly = false;
+	closeout = false;
+	verbose = false;
+	manyout = false;
+	dolower = false;
+	doupper = false;
+	doreverse = false;
+	askout = true;
+	dopipe = false;
+	interleaved = false;
+	nfile = 0;
+	iwhichlist = 0;
+	nwhichlist = 0;
+	whichSeq = 0;
+	outform = kNoformat;
+	oname = onamestore;
+	foo = NULL;
+
+	gPrettyInit(gPretty);
+}
+
+#define kOptOkay 1
+#define kOptNone 0
+
+int readopt(char *sopt)
+{
+	char sparamstore[256], *sparam = sparamstore;
+	short n, slen = strlen(sopt);
+
+	/* fprintf(stderr,"readopt( %s) == ", sopt); */
+
+	if (*sopt == '?') {
+		usage();
+		return kOptNone; /*? eOptionBad or kOptNone */
+	}
+
+	else if (*sopt == '-') {
+		char *cp = strchr(sopt, '=');
+		*sparam = '\0';
+		if (cp) {
+			strcpy(sparam, cp + 1);
+			*cp = 0;
+		}
+
+		if (checkopt(false, sopt, "-help", 2)) {
+			usage();
+			return kOptNone;
+		}
+
+		if (checkopt(false, sopt, "-all", 2)) {
+			whichSeq = 1;
+			chooseall = true;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-colspace",
+			     4)) { /* test before -c[ase] */
+			n = atoi(sparam);
+			gPretty.spacer = n;
+			return kOptOkay;
+		}
+
+		if (checkopt(true, sopt, "-caselower", 2)) {
+			dolower = true;
+			return kOptOkay;
+		}
+		if (checkopt(true, sopt, "-CASEUPPER", 2)) {
+			doupper = true;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-pipe", 2)) {
+			dopipe = true;
+			askout = false;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-list", 2)) {
+			listonly = true;
+			askout = false;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-reverse", 2)) {
+			doreverse = true;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-verbose", 2)) {
+			verbose = true;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-match", 5)) {
+			gPretty.domatch = true;
+			if (*sparam >= ' ') gPretty.matchchar = *sparam;
+			return kOptOkay;
+		}
+		if (checkopt(false, sopt, "-degap", 4)) {
+			gPretty.degap = true;
+			if (*sparam >= ' ') gPretty.gapchar = *sparam;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-interline", 4)) {
+			gPretty.interline = atoi(sparam);
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-item", 2)) {
+			char *cp = sparam;
+			nwhichlist = 0;
+			whichlist[0] = 0;
+			if (*cp == 0) cp = sopt + 2; /* compatible w/ old way */
+			do {
+				while (*cp != 0 && !isdigit(*cp)) cp++;
+				if (*cp != 0) {
+					n = atoi(cp);
+					whichlist[nwhichlist++] = n;
+					while (*cp != 0 && isdigit(*cp)) cp++;
+				}
+			} while (*cp != 0 && n > 0 &&
+				 nwhichlist < kMaxwhichlist);
+			whichlist[nwhichlist++] =
+			    0; /* 0 == stopsign for loop */
+			whichSeq = max(1, whichlist[0]);
+			iwhichlist = 1;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-format",
+			     5)) { /* -format=phylip, -f2, -form=phylip */
+			if (*sparam == 0) {
+				for (sparam = sopt + 2; isalpha(*sparam);
+				     sparam++)
+					;
+			}
+			outform = parseformat(sparam);
+			return kOptOkay;
+		}
+		if (checkopt(false, sopt, "-f",
+			     2)) { /* compatible w/ -fphylip prior version */
+			if (*sparam == 0) sparam = sopt + 2;
+			outform = parseformat(sparam);
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-output", 3)) { /* -output=myseq */
+			if (*sparam == 0) {
+				for (sparam = sopt + 3; isalpha(*sparam);
+				     sparam++)
+					;
+			}
+			strcpy(oname, sparam);
+			foo = fopen(oname, "w");
+			if (!foo) {
+				erralert(eFileCreate);
+				return eFileCreate;
+			}
+			closeout = true;
+			askout = false;
+			return kOptOkay;
+		}
+		if (checkopt(false, sopt, "-o",
+			     2)) { /* compatible w/ -omyseq prior version */
+			if (*sparam == 0) sparam = sopt + 2;
+			strcpy(oname, sparam);
+			foo = fopen(oname, "w");
+			if (!foo) {
+				erralert(eFileCreate);
+				return eFileCreate;
+			}
+			closeout = true;
+			askout = false;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-width", 2)) {
+			if (*sparam == 0) {
+				for (sparam = sopt + 2;
+				     !isdigit(*sparam) && *sparam != 0;
+				     sparam++)
+					;
+			}
+			n = atoi(sparam);
+			if (n > 0) gPretty.seqwidth = n;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-tab", 4)) {
+			if (*sparam == 0) {
+				for (sparam = sopt + 2;
+				     !isdigit(*sparam) && *sparam != 0;
+				     sparam++)
+					;
+			}
+			n = atoi(sparam);
+			gPretty.tab = n;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-gapcount", 4)) {
+			gPretty.baseonlynum = false;
+			/* if (*sparam >= ' ') gPretty.gapchar= *sparam; */
+			return kOptOkay;
+		}
+		if (checkopt(false, sopt, "-nointerleave", 8)) {
+			gPretty.noleaves = true;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-nameleft", 7)) {
+			if (*sparam == 0) {
+				for (sparam = sopt + 2;
+				     !isdigit(*sparam) && *sparam != 0;
+				     sparam++)
+					;
+			}
+			n = atoi(sparam);
+			if (n > 0 && n < 50) gPretty.namewidth = n;
+			gPretty.nameleft = true;
+			return kOptOkay;
+		}
+		if (checkopt(false, sopt, "-nameright", 7)) {
+			if (*sparam == 0) {
+				for (sparam = sopt + 2;
+				     !isdigit(*sparam) && *sparam != 0;
+				     sparam++)
+					;
+			}
+			n = atoi(sparam);
+			if (n > 0 && n < 50) gPretty.namewidth = n;
+			gPretty.nameright = true;
+			return kOptOkay;
+		}
+		if (checkopt(false, sopt, "-nametop", 6)) {
+			gPretty.nametop = true;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-numleft", 6)) {
+			if (*sparam == 0) {
+				for (sparam = sopt + 2;
+				     !isdigit(*sparam) && *sparam != 0;
+				     sparam++)
+					;
+			}
+			n = atoi(sparam);
+			if (n > 0 && n < 50) gPretty.numwidth = n;
+			gPretty.numleft = true;
+			return kOptOkay;
+		}
+		if (checkopt(false, sopt, "-numright", 6)) {
+			if (*sparam == 0) {
+				for (sparam = sopt + 2;
+				     !isdigit(*sparam) && *sparam != 0;
+				     sparam++)
+					;
+			}
+			n = atoi(sparam);
+			if (n > 0 && n < 50) gPretty.numwidth = n;
+			gPretty.numright = true;
+			return kOptOkay;
+		}
+
+		if (checkopt(false, sopt, "-numtop", 6)) {
+			gPretty.numtop = true;
+			return kOptOkay;
+		}
+		if (checkopt(false, sopt, "-numbottom", 6)) {
+			gPretty.numbot = true;
+			return kOptOkay;
+		}
+
+		else {
+			usage();
+			return eOptionBad;
+		}
+	}
+
+	else {
+		strcpy(inputfile, sopt);
+		gotinputfile = (*inputfile != 0);
+		nfile++;
+		return kOptOkay;
+	}
+
+	/* return kOptNone; -- never here */
+}
+
+/* this program suffers some as it tries to be a quiet translator pipe
+   _and_ a noisy user interactor
+*/
+
+/* return is best for SIOW, okay for others */
+#ifdef SIOW
+#define Exit(a) return (a)
+siow_main(int argc, char *argv[])
+
+#else
+#define Exit(a) exit(a)
+
+main(int argc, char *argv[])
+#endif
+{
+	boolean closein = false;
+	short ifile, nseq, atseq, format, err = 0, seqtype = kDNA, nlines,
+					  seqout = 0, phylvers = 2;
+	long i, skiplines, seqlen, seqlen0;
+	unsigned long checksum = 0, checkall = 0;
+	char *seq, *cp, *firstseq = NULL, *seqlist, *progname, tempname[256];
+	char seqid[256], *seqidptr = seqid;
+	char stempstore[256], *stemp = stempstore;
+	FILE *ftmp, *fin, *fout;
+	long outindexmax = 0, noutindex = 0, *outindex = NULL;
+
+#define exit_main(err)                                \
+	{                                             \
+		if (closeout) fclose(fout);           \
+		if (closein) fclose(fin);             \
+		if (*tempname != 0) remove(tempname); \
+		Exit(err);                            \
+	}
+
+#define indexout()                                                 \
+	if (interleaved) {                                         \
+		if (noutindex >= outindexmax) {                    \
+			outindexmax = noutindex + 20;              \
+			outindex = (long *)realloc(                \
+			    outindex, sizeof(long) * outindexmax); \
+			if (outindex == NULL) {                    \
+				err = eMemFull;                    \
+				erralert(err);                     \
+				exit_main(err);                    \
+			}                                          \
+		}                                                  \
+		outindex[noutindex++] = ftell(fout);               \
+	}
+
+	resetGlobals();
+	foo = stdout;
+	progname = argv[0];
+	*oname = 0;
+	*tempname = 0;
+	/* initialize gPretty ?? -- done in header */
+
+	for (i = 1; i < argc; i++) {
+		err = readopt(argv[i]);
+		if (err <= 0) exit_main(err);
+	}
+
+	/* pipe input from stdin !? */
+	if (dopipe && !gotinputfile) {
+		int c;
+		tmpnam(tempname);
+		inputfile = tempname;
+		ftmp = fopen(inputfile, "w");
+		if (!ftmp) {
+			erralert(eFileCreate);
+			exit_main(eFileCreate);
+		}
+		while ((c = getc(stdin)) != EOF) fputc(c, ftmp);
+		fclose(ftmp);
+		gotinputfile = true;
+	}
+
+	quietly = (dopipe || (gotinputfile && (listonly || whichSeq != 0)));
+
+	if (verbose || (!quietly && !gotinputfile)) fprintf(stderr, title);
+	ifile = 1;
+
+	/* UI: Choose output */
+	if (askout && !closeout && !quietly) {
+		askout = false;
+		fprintf(
+		    stderr,
+		    "\nName of output file (?=help, defaults to display): \n");
+		gets(oname = onamestore);
+		skipwhitespace(oname);
+		if (*oname == '?') {
+			usage();
+			exit_main(0);
+		}
+		else if (*oname != 0) {
+			closeout = true;
+			foo = fopen(oname, "w");
+			if (!foo) {
+				erralert(eFileCreate);
+				exit_main(eFileCreate);
+			}
+		}
+	}
+
+	fout = foo;
+	if (outform == kNoformat) outform = chooseFormat(quietly);
+
+	/* set up formats ... */
+	switch (outform) {
+		case kPhylip2:
+			interleaved = false;
+			phylvers = 2;
+			outform = kPhylip;
+			break;
+
+		case kPhylip4:
+			interleaved = true;
+			phylvers = 4;
+			outform = kPhylip;
+			break;
+
+		case kMSF:
+		case kPAUP:
+			interleaved = true;
+			break;
+
+		case kPretty:
+			gPretty.isactive = true;
+			interleaved = true;
+			break;
+	}
+
+	if (gPretty.isactive && gPretty.noleaves) interleaved = false;
+	if (interleaved) {
+		fout = ftmp = tmpfile();
+		outindexmax = 30;
+		noutindex = 0;
+		outindex = (long *)malloc(outindexmax * sizeof(long));
+		if (outindex == NULL) {
+			err = eMemFull;
+			erralert(err);
+			exit_main(err);
+		}
+	}
+
+	/* big loop over all input files */
+	do {
+		/* select next input file */
+		gotinputfile = (*tempname != 0);
+		while ((ifile < argc) && (!gotinputfile)) {
+			if (*argv[ifile] != '-') {
+				strcpy(inputfile, argv[ifile]);
+				gotinputfile = (*inputfile != 0);
+				--nfile;
+			}
+			ifile++;
+		}
+
+		while (!gotinputfile) {
+			fprintf(stderr,
+				"\nName an input sequence or -option: \n");
+			inputfile = inputfilestore;
+
+			gets(stemp = stempstore);
+			if (*stemp == 0)
+				goto fini; /* !! need this to finish work during
+					      interactive use */
+			stemp = strtok(stempstore, " \n\r\t");
+			while (stemp) {
+				err = readopt(stemp); /* will read inputfile if
+							 it exists */
+				if (err < 0) exit_main(err);
+				stemp = strtok(NULL, " \n\r\t");
+			}
+		}
+		/* thanks to AJB@UK.AC.DARESBURY.DLVH for this PHYLIP3 fix: */
+		/* head for end (interleave if needed) */
+		if (*inputfile == 0) break;
+
+		format = seqFileFormat(inputfile, &skiplines, &err);
+
+		if (err == 0) {
+#ifdef NCBI
+			if (format == kASNseqentry || format == kASNseqset)
+				seqlist = listASNSeqs(inputfile, skiplines,
+						      format, &nseq, &err);
+			else
+#endif
+				seqlist = listSeqs(inputfile, skiplines, format,
+						   &nseq, &err);
+		}
+
+		if (err != 0)
+			erralert(err);
+
+		else if (listonly) {
+			dumpSeqList(seqlist, format);
+			free(seqlist);
+		}
+
+		else {
+			/* choose whichSeq if needed */
+			if (nseq == 1 || chooseall ||
+			    (quietly && whichSeq == 0)) {
+				chooseall = true;
+				whichSeq = 1;
+				quietly = true; /* no loop */
+			}
+			else if (whichSeq > nseq && quietly) {
+				erralert(eItemNotFound);
+				err = eItemNotFound;
+			}
+			else if (whichSeq > nseq || !quietly) {
+				dumpSeqList(seqlist, format);
+				fprintf(stderr,
+					"\nChoose a sequence (# or All): \n");
+				gets(stemp = stempstore);
+				skipwhitespace(stemp);
+				if (to_lower(*stemp) == 'a') {
+					chooseall = true;
+					whichSeq = 1;
+					quietly =
+					    true; /* !? this means we don't ask
+						    for another file as well as
+						    no more whichSeqs... */
+				}
+				else if (isdigit(*stemp))
+					whichSeq = atol(stemp);
+				else
+					whichSeq = 1; /* default */
+			}
+			free(seqlist);
+
+			if (false /*chooseall*/) { /* this isn't debugged
+						      yet...*/
+				fin = fopen(inputfile, "r");
+				closein = true;
+			}
+
+			while (whichSeq > 0 && whichSeq <= nseq) {
+				/* need to open multiple output files ? */
+				manyout =
+				    ((chooseall || nwhichlist > 1) &&
+				     nseq > 1 &&
+				     (outform == kPlain || outform == kGCG));
+				if (manyout) {
+					if (whichSeq == 1)
+						erralert(eOneFormat);
+					else if (closeout) {
+						sprintf(stemp, "%s_%d", oname,
+							whichSeq);
+						freopen(stemp, "w", fout);
+						fprintf(stderr,
+							"Writing sequence %d "
+							"to file %s\n",
+							whichSeq, stemp);
+					}
+				}
+
+				if (closein) {
+					/* !! this fails... skips most seqs...
+					 */
+					/* !! in sequential read, must count
+					 * seqs already read from whichSeq ...
+					 */
+					/* need major revision of ureadseq
+					 * before we can do this */
+					atseq = whichSeq - 1;
+					seqidptr = seqid;
+					seq = readSeqFp(
+					    whichSeq, fin, skiplines, format,
+					    &seqlen, &atseq, &err, seqidptr);
+					skiplines = 0;
+				}
+				else {
+					atseq = 0;
+					seqidptr = seqid;
+#ifdef NCBI
+					if (format == kASNseqentry ||
+					    format == kASNseqset) {
+						seqidptr = NULL;
+						seq = readASNSeq(
+						    whichSeq, inputfile,
+						    skiplines, format, &seqlen,
+						    &atseq, &err, &seqidptr);
+					}
+					else
+#endif
+						seq = readSeq(
+						    whichSeq, inputfile,
+						    skiplines, format, &seqlen,
+						    &atseq, &err, seqidptr);
+				}
+
+				if (gPretty.degap) {
+					char *newseq;
+					long newlen;
+					newseq =
+					    compressSeq(gPretty.gapchar, seq,
+							seqlen, &newlen);
+					if (newseq) {
+						free(seq);
+						seq = newseq;
+						seqlen = newlen;
+					}
+				}
+
+				if (outform == kMSF)
+					checksum =
+					    GCGchecksum(seq, seqlen, &checkall);
+				else if (verbose)
+					checksum =
+					    seqchecksum(seq, seqlen, &checkall);
+				if (verbose)
+					fprintf(stderr,
+						"Sequence %d, length= %d, "
+						"checksum= %X, format= %s, id= "
+						"%s\n",
+						whichSeq, seqlen, checksum,
+						formatstr(format), seqidptr);
+
+				if (err != 0)
+					erralert(err);
+				else {
+					/* format fixes that writeseq doesn't do
+					 */
+					switch (outform) {
+						case kPIR:
+							if (seqout == 0)
+								fprintf(
+								    foo,
+								    "\\\\\\\n");
+							break;
+						case kASN1:
+							if (seqout == 0)
+								fprintf(
+								    foo,
+								    kASN1headline);
+							break;
+
+						case kPhylip:
+							if (seqout == 0) {
+								if (!interleaved) { /*  bug, nseq is for 1st infile only */
+									if (chooseall)
+										i = nseq;
+									else
+										i = 1;
+									if (phylvers >=
+									    4)
+										fprintf(
+										    foo,
+										    " %d %d\n",
+										    i,
+										    seqlen);
+									else
+										fprintf(
+										    foo,
+										    " %d %d YF\n",
+										    i,
+										    seqlen);
+								}
+								seqlen0 =
+								    seqlen;
+							}
+							else if (seqlen !=
+								 seqlen0) {
+								erralert(
+								    eUnequalSize);
+								if (seqlen <
+								    seqlen0)
+									seq = (char
+										   *)
+									    realloc(
+										seq,
+										seqlen0);
+								for (i = seqlen;
+								     i <
+								     seqlen0;
+								     i++)
+									seq[i] =
+									    gPretty
+										.gapchar;
+								seqlen =
+								    seqlen0;
+								seq[seqlen] = 0;
+							}
+							break;
+
+						case kPAUP:
+							if (seqout == 0) {
+								seqtype =
+								    getseqtype(
+									seq,
+									seqlen);
+								seqlen0 =
+								    seqlen;
+							}
+							else if (seqlen !=
+								 seqlen0) {
+								erralert(
+								    eUnequalSize);
+								if (seqlen <
+								    seqlen0)
+									seq = (char
+										   *)
+									    realloc(
+										seq,
+										seqlen0);
+								for (i = seqlen;
+								     i <
+								     seqlen0;
+								     i++)
+									seq[i] =
+									    gPretty
+										.gapchar;
+								seqlen =
+								    seqlen0;
+								seq[seqlen] = 0;
+							}
+							break;
+					}
+
+					if (doupper)
+						for (i = 0; i < seqlen; i++)
+							seq[i] =
+							    to_upper(seq[i]);
+					else if (dolower)
+						for (i = 0; i < seqlen; i++)
+							seq[i] =
+							    to_lower(seq[i]);
+
+					if (doreverse) {
+						long j, k;
+						char ctemp;
+						for (j = 0, k = seqlen - 1;
+						     j <= k; j++, k--) {
+							ctemp = compl [seq[j] -
+								       ' '];
+							seq[j] = compl [seq[k] -
+									' '];
+							seq[k] = ctemp;
+						}
+					}
+
+					if ((gPretty.isactive ||
+					     outform == kPAUP) &&
+					    gPretty.domatch &&
+					    firstseq != NULL) {
+						for (i = 0; i < seqlen; i++)
+							if (seq[i] ==
+							    firstseq[i])
+								seq[i] =
+								    gPretty
+									.matchchar;
+					}
+
+					if (gPretty.isactive &&
+					    gPretty.numtop && seqout == 0) {
+						gPretty.numline = 1;
+						indexout();
+						(void)writeSeq(fout, seq,
+							       seqlen, outform,
+							       seqidptr);
+						gPretty.numline = 2;
+						indexout();
+						(void)writeSeq(fout, seq,
+							       seqlen, outform,
+							       seqidptr);
+						gPretty.numline = 0;
+					}
+
+					indexout();
+					nlines = writeSeq(fout, seq, seqlen,
+							  outform, seqidptr);
+					seqout++;
+				}
+
+				if ((gPretty.isactive || outform == kPAUP) &&
+				    gPretty.domatch && firstseq == NULL) {
+					firstseq = seq;
+					seq = NULL;
+				}
+				else if (seq != NULL) {
+					free(seq);
+					seq = NULL;
+				}
+
+#ifdef NCBI
+				if ((format == kASNseqentry ||
+				     format == kASNseqset) &&
+				    seqidptr && seqidptr != seqid)
+					free(seqidptr);
+#endif
+				if (chooseall)
+					whichSeq++;
+				else if (iwhichlist < nwhichlist)
+					whichSeq = whichlist[iwhichlist++];
+				else
+					whichSeq = 0;
+			}
+			if (closein) {
+				fclose(fin);
+				closein = false;
+			}
+		}
+		whichSeq = 0;
+	} while (nfile > 0 || !quietly);
+
+fini:
+	if (firstseq) {
+		free(firstseq);
+		firstseq = NULL;
+	}
+	if (err || listonly) exit_main(err);
+
+	if (gPretty.isactive && gPretty.numbot) {
+		gPretty.numline = 2;
+		indexout();
+		(void)writeSeq(fout, seq, seqlen, outform, seqidptr);
+		gPretty.numline = 1;
+		indexout();
+		(void)writeSeq(fout, seq, seqlen, outform, seqidptr);
+		gPretty.numline = 0;
+	}
+
+	if (outform == kMSF) {
+		if (*oname)
+			cp = oname;
+		else
+			cp = inputfile;
+		fprintf(foo,
+			"\n %s  MSF: %d  Type: N  January 01, 1776  12:00  "
+			"Check: %d ..\n\n",
+			cp, seqlen, checkall);
+	}
+
+	if (outform == kPAUP) {
+		fprintf(foo, "#NEXUS\n");
+		if (*oname)
+			cp = oname;
+		else
+			cp = inputfile;
+		fprintf(foo, "[%s -- data title]\n\n", cp);
+		/* ! now have header lines for each sequence... put them before
+		 * "begin data;... */
+	}
+
+	if (outform == kPhylip && interleaved) {
+		if (phylvers >= 4)
+			fprintf(foo, " %d %d\n", seqout, seqlen);
+		else
+			fprintf(foo, " %d %d YF\n", seqout, seqlen);
+	}
+
+	if (interleaved) {
+		/* interleave species lines in true output */
+		/* nlines is # lines / sequence */
+		short iline, j, leaf, iseq;
+		char *s = stempstore;
+
+		indexout();
+		noutindex--; /* mark eof */
+
+		for (leaf = 0; leaf < nlines; leaf++) {
+			if (outform == kMSF && leaf == 1) {
+				fputs("//\n\n", foo);
+			}
+			if (outform == kPAUP && leaf == 1) {
+				switch (seqtype) {
+					case kDNA:
+						cp = "dna";
+						break;
+					case kRNA:
+						cp = "rna";
+						break;
+					case kNucleic:
+						cp = "dna";
+						break;
+					case kAmino:
+						cp = "protein";
+						break;
+					case kOtherSeq:
+						cp = "dna";
+						break;
+				}
+				fprintf(foo, "\nbegin data;\n");
+				fprintf(foo, " dimensions ntax=%d nchar=%d;\n",
+					seqout, seqlen);
+				fprintf(
+				    foo,
+				    " format datatype=%s interleave missing=%c",
+				    cp, gPretty.gapchar);
+				if (gPretty.domatch)
+					fprintf(foo, " matchchar=%c",
+						gPretty.matchchar);
+				fprintf(foo, ";\n  matrix\n");
+			}
+
+			for (iseq = 0; iseq < noutindex; iseq++) {
+				fseek(ftmp, outindex[iseq], 0);
+				for (iline = 0; iline <= leaf; iline++)
+					if (!fgets(s, 256, ftmp)) *s = 0;
+				if (ftell(ftmp) <= outindex[iseq + 1])
+					fputs(s, foo);
+			}
+
+			for (j = 0; j < gPretty.interline; j++)
+				fputs("\n", foo); /* some want spacer line */
+		}
+		fclose(ftmp); /* tmp disappears */
+		fout = foo;
+	}
+
+	if (outform == kASN1) fprintf(foo, "} }\n");
+	if (outform == kPAUP) fprintf(foo, ";\n  end;\n");
+
+	if (outindex != NULL) free(outindex);
+	exit_main(0);
+}
+
diff --git a/ureadasn.c b/ureadasn.c
new file mode 100644
index 0000000..1548594
--- /dev/null
+++ b/ureadasn.c
@@ -0,0 +1,324 @@
+/* ureadasn.c
+  -- parse, mangle and otherwise rewrite ASN1 file/entries for readseq reading
+  -- from NCBI toolkit (ncbi.nlm.nih.gov:/toolkit)
+*/
+
+#ifdef NCBI
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+/* NCBI toolkit :include: must be on lib path */
+#include <ncbi.h>
+#include <seqport.h>
+
+#define UREADASN
+#include "ureadseq.h"
+
+#pragma segment ureadasn
+
+/* this stuff is hacked up from tofasta.c of ncbitools */
+#define kBaseAny 0
+#define kBaseNucleic 1
+#define kBaseAmino 2
+
+typedef struct tofasta {
+	Boolean idonly;
+	short *seqnum;
+	short whichSeq;
+	char **seq, **seqid;
+	long *seqlen;
+} FastaDat, PNTR FastaPtr;
+
+void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly, short whichSeq,
+		    short *seqnum, char **seq, char **seqid, long *seqlen)
+{
+	SeqPortPtr spp;
+	SeqIdPtr bestid;
+	Uint1 repr, code, residue;
+	CharPtr tmp, title;
+	long outlen, outmax;
+	char localid[256], *sp;
+
+	/* !!! this may be called several times for a single sequence
+	  because SeqEntryExplore looks for parts and joins them...
+	  assume seq, seqid, seqlen may contain data (or NULL)
+	*/
+	if (bsp == NULL) return;
+	repr = Bioseq_repr(bsp);
+	if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return;
+
+	(*seqnum)++;
+	if (!(whichSeq == *seqnum || whichSeq == 0)) return;
+
+	bestid = SeqIdFindBest(bsp->id, (Uint1)0);
+	title = BioseqGetTitle(bsp);
+	if (idonly) {
+		sprintf(localid, " %d)  ", *seqnum);
+		tmp = localid + strlen(localid) - 1;
+	}
+	else {
+		strcpy(localid, " ");
+		tmp = localid;
+	}
+	tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT);
+	tmp = StringMove(tmp, " ");
+	StringNCpy(tmp, title, 200);
+	/* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */
+
+	/* < seqid is fixed storage */
+	/* strcpy( *seqid, localid);  */
+	/* < seqid is variable sized */
+	outmax = strlen(localid) + 3;
+	if (*seqid == NULL) {
+		*seqid = (char *)malloc(outmax);
+		if (*seqid == NULL) return;
+		strcpy(*seqid, localid);
+	}
+	else {
+		outmax += strlen(*seqid) + 2;
+		*seqid = (char *)realloc(*seqid, outmax);
+		if (*seqid == NULL) return;
+		if (!idonly) strcat(*seqid, "; ");
+		strcat(*seqid, localid);
+	}
+
+	if (idonly) {
+		strcat(*seqid, "\n");
+		return;
+	}
+
+	if (ISA_na(bsp->mol))
+		code = Seq_code_iupacna;
+	else
+		code = Seq_code_iupacaa;
+	spp = SeqPortNew(bsp, 0, -1, 0, code);
+	SeqPortSeek(spp, 0, SEEK_SET);
+
+	sp = *seq;
+	if (sp == NULL) { /* this is always true now !? */
+		outlen = 0;
+		outmax = 500;
+		sp = (char *)malloc(outmax);
+	}
+	else {
+		outlen = strlen(sp);
+		outmax = outlen + 500;
+		sp = (char *)realloc(sp, outmax);
+	}
+	if (sp == NULL) return;
+
+	while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
+		if (outlen >= outmax) {
+			outmax = outlen + 500;
+			sp = (char *)realloc(sp, outmax);
+			if (sp == NULL) return;
+		}
+		sp[outlen++] = residue;
+	}
+	sp = (char *)realloc(sp, outlen + 1);
+	if (sp != NULL) sp[outlen] = '\0';
+	*seq = sp;
+	*seqlen = outlen;
+	SeqPortFree(spp);
+	return;
+}
+
+static void SeqEntryRawseq(SeqEntryPtr sep, Pointer data, Int4 index,
+			   Int2 indent)
+{
+	FastaPtr tfa;
+	BioseqPtr bsp;
+
+	if (!IS_Bioseq(sep)) return;
+	bsp = (BioseqPtr)sep->data.ptrvalue;
+	tfa = (FastaPtr)data;
+	BioseqRawToRaw(bsp, tfa->idonly, tfa->whichSeq, tfa->seqnum, tfa->seq,
+		       tfa->seqid, tfa->seqlen);
+}
+
+void SeqEntryToRaw(SeqEntryPtr sep, Boolean idonly, short whichSeq,
+		   short *seqnum, char **seq, char **seqid, long *seqlen)
+{
+	FastaDat tfa;
+
+	if (sep == NULL) return;
+	tfa.idonly = idonly;
+	tfa.seqnum = seqnum;
+	tfa.whichSeq = whichSeq;
+	tfa.seq = seq;
+	tfa.seqid = seqid;
+	tfa.seqlen = seqlen;
+	SeqEntryExplore(sep, (Pointer)&tfa, SeqEntryRawseq);
+}
+
+char *listASNSeqs(
+    const char *filename, const long skiplines,
+    const short format, /* note: this is kASNseqentry or kASNseqset */
+    short *nseq, short *error)
+{
+	AsnIoPtr aip = NULL;
+	SeqEntryPtr the_set;
+	AsnTypePtr atp, atp2;
+	AsnModulePtr amp;
+	Boolean inIsBinary =
+	    FALSE; /* damn, why can't asn routines test this? */
+	char *seq = NULL;
+	char *seqid = NULL, stemp[256];
+	long seqlen;
+	int i, count;
+
+	*nseq = 0;
+	*error = 0;
+
+	/* asn dictionary setups */
+	/*fprintf(stderr,"listASNSeqs: SeqEntryLoad\n");*/
+	if (!SeqEntryLoad())
+		goto errxit;  /*  sequence alphabets (and sequence parse trees)
+			       */
+	amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */
+	if (amp == NULL) goto errxit;
+	atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
+	if (atp == NULL) goto errxit;
+	atp2 = AsnFind("Bioseq-set.seq-set.E");
+	if (atp2 == NULL) goto errxit;
+
+	/*fprintf(stderr,"listASNSeqs: AsnIoOpen\n");*/
+	/* open the ASN.1 input file in the right mode */
+	/* !!!! THIS FAILS when filename has MAC PATH (& other paths?)
+	 * (:folder:filename) */
+	if ((aip = AsnIoOpen(filename, inIsBinary ? "rb" : "r")) == NULL)
+		goto errxit;
+	for (i = 0; i < skiplines; i++)
+		fgets(stemp, 255,
+		      aip->fp); /* this may mess up asn routines... */
+
+	if (!ErrSetLog("stderr"))
+		goto errxit;
+	else
+		ErrSetOpts(ERR_CONTINUE,
+			   ERR_LOG_ON); /*??  log errors instead of die */
+
+	if (format == kASNseqentry) { /* read one Seq-entry */
+		/*fprintf(stderr,"listASNSeqs: SeqEntryAsnRead\n");*/
+		the_set = SeqEntryAsnRead(aip, NULL);
+		SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen);
+		if (seq) free(seq);
+		seq = NULL;
+		SeqEntryFree(the_set);
+	}
+	else { /* read Seq-entry's from a Bioseq-set */
+		count = 0;
+		/*fprintf(stderr,"listASNSeqs: AsnReadId\n");*/
+		while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
+			if (atp == atp2) { /* top level Seq-entry */
+				the_set = SeqEntryAsnRead(aip, atp);
+				SeqEntryToRaw(the_set, true, 0, nseq, &seq,
+					      &seqid, &seqlen);
+				SeqEntryFree(the_set);
+				if (seq) free(seq);
+				seq = NULL;
+			}
+			else
+				AsnReadVal(aip, atp, NULL);
+			count++;
+		}
+	}
+
+	AsnIoClose(aip);
+	*error = 0;
+	return seqid;
+
+errxit:
+	AsnIoClose(aip);
+	if (seqid) free(seqid);
+	*error = eASNerr;
+	return NULL;
+}
+
+char *readASNSeq(
+    const short whichEntry, const char *filename, const long skiplines,
+    const short format, /* note: this is kASNseqentry or kASNseqset */
+    long *seqlen, short *nseq, short *error, char **seqid)
+{
+	AsnIoPtr aip = NULL;
+	SeqEntryPtr the_set;
+	AsnTypePtr atp, atp2;
+	AsnModulePtr amp;
+	Boolean inIsBinary =
+	    FALSE; /* damn, why can't asn routines test this? */
+	char *seq, stemp[200];
+	int i, count;
+
+	*seqlen = 0;
+	*nseq = 0;
+	*error = 0;
+	seq = NULL;
+
+	/*fprintf(stderr,"readASNseq: SeqEntryLoad\n");*/
+	/* asn dictionary setups */
+	if (!SeqEntryLoad())
+		goto errxit;  /*  sequence alphabets (and sequence parse trees)
+			       */
+	amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */
+	if (amp == NULL) goto errxit;
+	atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
+	if (atp == NULL) goto errxit;
+	atp2 = AsnFind("Bioseq-set.seq-set.E");
+	if (atp2 == NULL) goto errxit;
+
+	/* open the ASN.1 input file in the right mode */
+	/*fprintf(stderr,"readASNseq: AsnIoOpen(%s)\n", filename);*/
+	if ((aip = AsnIoOpen(filename, inIsBinary ? "rb" : "r")) == NULL)
+		goto errxit;
+	for (i = 0; i < skiplines; i++)
+		fgets(stemp, 255,
+		      aip->fp); /* this may mess up asn routines... */
+
+	if (!ErrSetLog("stderr"))
+		goto errxit;
+	else
+		ErrSetOpts(ERR_CONTINUE,
+			   ERR_LOG_ON); /*??  log errors instead of die */
+
+	seq = NULL;
+	if (format == kASNseqentry) { /* read one Seq-entry */
+		/*fprintf(stderr,"readASNseq: SeqEntryAsnRead\n");*/
+		the_set = SeqEntryAsnRead(aip, NULL);
+		SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid,
+			      seqlen);
+		SeqEntryFree(the_set);
+		goto goodexit;
+	}
+
+	else { /* read Seq-entry's from a Bioseq-set */
+		count = 0;
+		/*fprintf(stderr,"readASNseq: AsnReadId\n");*/
+		while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
+			if (atp == atp2) { /* top level Seq-entry */
+				the_set = SeqEntryAsnRead(aip, atp);
+				SeqEntryToRaw(the_set, false, whichEntry, nseq,
+					      &seq, seqid, seqlen);
+				SeqEntryFree(the_set);
+				if (*nseq >= whichEntry) goto goodexit;
+			}
+			else
+				AsnReadVal(aip, atp, NULL);
+			count++;
+		}
+	}
+
+goodexit:
+	AsnIoClose(aip);
+	*error = 0;
+	return seq;
+
+errxit:
+	AsnIoClose(aip);
+	*error = eASNerr;
+	if (seq) free(seq);
+	return NULL;
+}
+
+#endif /*NCBI*/
diff --git a/ureadseq.c b/ureadseq.c
new file mode 100644
index 0000000..dbd93b8
--- /dev/null
+++ b/ureadseq.c
@@ -0,0 +1,2121 @@
+/* File: ureadseq.c
+ *
+ * Reads and writes nucleic/protein sequence in various
+ * formats. Data files may have multiple sequences.
+ *
+ * Copyright 1990 by d.g.gilbert
+ * biology dept., indiana university, bloomington, in 47405
+ * e-mail: gilbertd@bio.indiana.edu
+ *
+ * This program may be freely copied and used by anyone.
+ * Developers are encourged to incorporate parts in their
+ * programs, rather than devise their own private sequence
+ * format.
+ *
+ * This should compile and run with any ANSI C compiler.
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#define UREADSEQ_G
+#include "ureadseq.h"
+
+#pragma segment ureadseq
+
+int Strcasecmp(const char *a, const char *b) /* from Nlm_StrICmp */
+{
+	int diff, done;
+	if (a == b) return 0;
+	done = 0;
+	while (!done) {
+		diff = to_upper(*a) - to_upper(*b);
+		if (diff) return diff;
+		if (*a == '\0')
+			done = 1;
+		else {
+			a++;
+			b++;
+		}
+	}
+	return 0;
+}
+
+int Strncasecmp(const char *a, const char *b, long maxn) /* from Nlm_StrNICmp */
+{
+	int diff, done;
+	if (a == b) return 0;
+	done = 0;
+	while (!done) {
+		diff = to_upper(*a) - to_upper(*b);
+		if (diff) return diff;
+		if (*a == '\0')
+			done = 1;
+		else {
+			a++;
+			b++;
+			maxn--;
+			if (!maxn) done = 1;
+		}
+	}
+	return 0;
+}
+
+#ifndef Local
+#define Local static /* local functions */
+#endif
+
+#define kStartLength 500
+
+const char *aminos = "ABCDEFGHIKLMNPQRSTVWXYZ*";
+const char *primenuc = "ACGTU";
+const char *protonly = "EFIPQZ";
+
+const char kNocountsymbols[5] = "_.-?";
+const char stdsymbols[6] = "_.-*?";
+const char allsymbols[32] = "_.-*?<>{}[]()!@#$%^&=+;:'/|`~\"\\";
+static const char *seqsymbols = allsymbols;
+
+const char nummask[11] = "0123456789";
+const char nonummask[11] = "~!@#$%^&*(";
+
+/*
+    use general form of isseqchar -- all chars + symbols.
+    no formats except nbrf (?) use symbols in data area as
+    anything other than sequence chars.
+*/
+
+/* Local variables for readSeq: */
+struct ReadSeqVars {
+	short choice, err, nseq;
+	long seqlen, maxseq, seqlencount;
+	short topnseq;
+	long topseqlen;
+	const char *fname;
+	char *seq, *seqid, matchchar;
+	boolean allDone, done, filestart, addit;
+	FILE *f;
+	long linestart;
+	char s[256], *sp;
+
+	int (*isseqchar)();
+	/* int  (*isseqchar)(int c);  << sgi cc hates (int c) */
+};
+
+int isSeqChar(int c) { return (isalpha(c) || strchr(seqsymbols, c)); }
+
+int isSeqNumChar(int c) { return (isalnum(c) || strchr(seqsymbols, c)); }
+
+int isAnyChar(int c) { return isascii(c); /* wrap in case isascii is macro */ }
+
+Local void readline(FILE *f, char *s, long *linestart)
+{
+	char *cp;
+
+	*linestart = ftell(f);
+	if (NULL == fgets(s, 256, f))
+		*s = 0;
+	else {
+		cp = strchr(s, '\n');
+		if (cp != NULL) *cp = 0;
+	}
+}
+
+Local void getline(struct ReadSeqVars *V)
+{
+	readline(V->f, V->s, &V->linestart);
+}
+
+Local void ungetline(struct ReadSeqVars *V) { fseek(V->f, V->linestart, 0); }
+
+Local void addseq(char *s, struct ReadSeqVars *V)
+{
+	char *ptr;
+
+	if (V->addit)
+		while (*s != 0) {
+			if ((V->isseqchar)(*s)) {
+				if (V->seqlen >= V->maxseq) {
+					V->maxseq += kStartLength;
+					ptr = (char *)realloc(V->seq,
+							      V->maxseq + 1);
+					if (ptr == NULL) {
+						V->err = eMemFull;
+						return;
+					}
+					else
+						V->seq = ptr;
+				}
+				V->seq[(V->seqlen)++] = *s;
+			}
+			s++;
+		}
+}
+
+Local void countseq(char *s, struct ReadSeqVars *V)
+/* this must count all valid seq chars, for some formats (paup-sequential) even
+   if we are skipping seq... */
+{
+	while (*s != 0) {
+		if ((V->isseqchar)(*s)) {
+			(V->seqlencount)++;
+		}
+		s++;
+	}
+}
+
+Local void addinfo(char *s, struct ReadSeqVars *V)
+{
+	char s2[256], *si;
+	boolean saveadd;
+
+	si = s2;
+	while (*s == ' ') s++;
+	sprintf(si, " %d)  %s\n", V->nseq, s);
+
+	saveadd = V->addit;
+	V->addit = true;
+	V->isseqchar = isAnyChar;
+	addseq(si, V);
+	V->addit = saveadd;
+	V->isseqchar = isSeqChar;
+}
+
+Local void readLoop(short margin, boolean addfirst,
+		    boolean (*endTest)(boolean *addend, boolean *ungetend,
+				       struct ReadSeqVars *V),
+		    struct ReadSeqVars *V)
+{
+	boolean addend = false;
+	boolean ungetend = false;
+
+	V->nseq++;
+	if (V->choice == kListSequences)
+		V->addit = false;
+	else
+		V->addit = (V->nseq == V->choice);
+	if (V->addit) V->seqlen = 0;
+
+	if (addfirst) addseq(V->s, V);
+	do {
+		getline(V);
+		V->done = feof(V->f);
+		V->done |= (*endTest)(&addend, &ungetend, V);
+		if (V->addit && (addend || !V->done) &&
+		    (strlen(V->s) > margin)) {
+			addseq((V->s) + margin, V);
+		}
+	} while (!V->done);
+
+	if (V->choice == kListSequences)
+		addinfo(V->seqid, V);
+	else {
+		V->allDone = (V->nseq >= V->choice);
+		if (V->allDone && ungetend) ungetline(V);
+	}
+}
+
+Local boolean endIG(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
+{
+	*addend = true; /* 1 or 2 occur in line w/ bases */
+	*ungetend = false;
+	return ((strchr(V->s, '1') != NULL) || (strchr(V->s, '2') != NULL));
+}
+
+Local void readIG(struct ReadSeqVars *V)
+{
+	/* 18Aug92: new IG format -- ^L between sequences in place of ";" */
+	char *si;
+
+	while (!V->allDone) {
+		do {
+			getline(V);
+			for (si = V->s; *si != 0 && *si < ' '; si++)
+				*si = ' ';	 /* drop controls */
+			if (*si == 0) *V->s = 0; /* chop line to empty */
+		} while (!(feof(V->f) || ((*V->s != 0) && (*V->s != ';'))));
+		if (feof(V->f))
+			V->allDone = true;
+		else {
+			strcpy(V->seqid, V->s);
+			readLoop(0, false, endIG, V);
+		}
+	}
+}
+
+Local boolean endStrider(boolean *addend, boolean *ungetend,
+			 struct ReadSeqVars *V)
+{
+	*addend = false;
+	*ungetend = false;
+	return (strstr(V->s, "//") != NULL);
+}
+
+Local void readStrider(struct ReadSeqVars *V)
+{ /* ? only 1 seq/file ? */
+
+	while (!V->allDone) {
+		getline(V);
+		if (strstr(V->s, "; DNA sequence  ") == V->s)
+			strcpy(V->seqid, (V->s) + 16);
+		else
+			strcpy(V->seqid, (V->s) + 1);
+		while ((!feof(V->f)) && (*V->s == ';')) {
+			getline(V);
+		}
+		if (feof(V->f))
+			V->allDone = true;
+		else
+			readLoop(0, true, endStrider, V);
+	}
+}
+
+Local boolean endPIR(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
+{
+	*addend = false;
+	*ungetend = (strstr(V->s, "ENTRY") == V->s);
+	return ((strstr(V->s, "///") != NULL) || *ungetend);
+}
+
+Local void readPIR(struct ReadSeqVars *V)
+{ /*PIR -- many seqs/file */
+
+	while (!V->allDone) {
+		while (!(feof(V->f) || strstr(V->s, "ENTRY") ||
+			 strstr(V->s, "SEQUENCE")))
+			getline(V);
+		strcpy(V->seqid, (V->s) + 16);
+		while (!(feof(V->f) || strstr(V->s, "SEQUENCE") == V->s))
+			getline(V);
+		readLoop(0, false, endPIR, V);
+
+		if (!V->allDone) {
+			while (!(
+			    feof(V->f) ||
+			    ((*V->s != 0) && (strstr(V->s, "ENTRY") == V->s))))
+				getline(V);
+		}
+		if (feof(V->f)) V->allDone = true;
+	}
+}
+
+Local boolean endGB(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
+{
+	*addend = false;
+	*ungetend = (strstr(V->s, "LOCUS") == V->s);
+	return ((strstr(V->s, "//") != NULL) || *ungetend);
+}
+
+Local void readGenBank(struct ReadSeqVars *V)
+{ /*GenBank -- many seqs/file */
+
+	while (!V->allDone) {
+		strcpy(V->seqid, (V->s) + 12);
+		while (!(feof(V->f) || strstr(V->s, "ORIGIN") == V->s))
+			getline(V);
+		readLoop(0, false, endGB, V);
+
+		if (!V->allDone) {
+			while (!(
+			    feof(V->f) ||
+			    ((*V->s != 0) && (strstr(V->s, "LOCUS") == V->s))))
+				getline(V);
+		}
+		if (feof(V->f)) V->allDone = true;
+	}
+}
+
+Local boolean endNBRF(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
+{
+	char *a;
+
+	if ((a = strchr(V->s, '*')) != NULL) { /* end of 1st seq */
+		/* "*" can be valid base symbol, drop it here */
+		*a = 0;
+		*addend = true;
+		*ungetend = false;
+		return (true);
+	}
+	else if (*V->s == '>') { /* start of next seq */
+		*addend = false;
+		*ungetend = true;
+		return (true);
+	}
+	else
+		return (false);
+}
+
+Local void readNBRF(struct ReadSeqVars *V)
+{
+	while (!V->allDone) {
+		strcpy(V->seqid, (V->s) + 4);
+		getline(V); /*skip title-junk line*/
+		readLoop(0, false, endNBRF, V);
+		if (!V->allDone) {
+			while (!(feof(V->f) || (*V->s != 0 && *V->s == '>')))
+				getline(V);
+		}
+		if (feof(V->f)) V->allDone = true;
+	}
+}
+
+Local boolean endPearson(boolean *addend, boolean *ungetend,
+			 struct ReadSeqVars *V)
+{
+	*addend = false;
+	*ungetend = true;
+	return (*V->s == '>');
+}
+
+Local void readPearson(struct ReadSeqVars *V)
+{
+	while (!V->allDone) {
+		strcpy(V->seqid, (V->s) + 1);
+		readLoop(0, false, endPearson, V);
+		if (!V->allDone) {
+			while (
+			    !(feof(V->f) || ((*V->s != 0) && (*V->s == '>'))))
+				getline(V);
+		}
+		if (feof(V->f)) V->allDone = true;
+	}
+}
+
+Local boolean endEMBL(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
+{
+	*addend = false;
+	*ungetend = (strstr(V->s, "ID   ") == V->s);
+	return ((strstr(V->s, "//") != NULL) || *ungetend);
+}
+
+Local void readEMBL(struct ReadSeqVars *V)
+{
+	while (!V->allDone) {
+		strcpy(V->seqid, (V->s) + 5);
+		do {
+			getline(V);
+		} while (!(feof(V->f) | (strstr(V->s, "SQ   ") == V->s)));
+
+		readLoop(0, false, endEMBL, V);
+		if (!V->allDone) {
+			while (
+			    !(feof(V->f) | ((*V->s != '\0') &
+					    (strstr(V->s, "ID   ") == V->s))))
+				getline(V);
+		}
+		if (feof(V->f)) V->allDone = true;
+	}
+}
+
+Local boolean endZuker(boolean *addend, boolean *ungetend,
+		       struct ReadSeqVars *V)
+{
+	*addend = false;
+	*ungetend = true;
+	return (*V->s == '(');
+}
+
+Local void readZuker(struct ReadSeqVars *V)
+{
+	/*! 1st string is Zuker's Fortran format */
+
+	while (!V->allDone) {
+		getline(V); /*s == "seqLen seqid string..."*/
+		strcpy(V->seqid, (V->s) + 6);
+		readLoop(0, false, endZuker, V);
+		if (!V->allDone) {
+			while (
+			    !(feof(V->f) | ((*V->s != '\0') & (*V->s == '('))))
+				getline(V);
+		}
+		if (feof(V->f)) V->allDone = true;
+	}
+}
+
+Local boolean endFitch(boolean *addend, boolean *ungetend,
+		       struct ReadSeqVars *V)
+{
+	/* this is a somewhat shaky end,
+	  1st char of line is non-blank for seq. title
+	*/
+	*addend = false;
+	*ungetend = true;
+	return (*V->s != ' ');
+}
+
+Local void readFitch(struct ReadSeqVars *V)
+{
+	boolean first;
+
+	first = true;
+	while (!V->allDone) {
+		if (!first) strcpy(V->seqid, V->s);
+		readLoop(0, first, endFitch, V);
+		if (feof(V->f)) V->allDone = true;
+		first = false;
+	}
+}
+
+Local void readPlain(struct ReadSeqVars *V)
+{
+	V->nseq++;
+	V->addit = (V->choice > 0);
+	if (V->addit) V->seqlen = 0;
+	addseq(V->seqid, V); /*from above..*/
+	if (V->fname != NULL)
+		sprintf(V->seqid, "%s  [Unknown form]", V->fname);
+	else
+		sprintf(V->seqid, "  [Unknown form]");
+	do {
+		addseq(V->s, V);
+		V->done = feof(V->f);
+		getline(V);
+	} while (!V->done);
+	if (V->choice == kListSequences) addinfo(V->seqid, V);
+	V->allDone = true;
+}
+
+Local void readUWGCG(struct ReadSeqVars *V)
+{
+	/*
+	10nov91: Reading GCG files casued duplication of last line when
+		 EOF followed that line !!!
+	    fix: getline now sets *V->s = 0
+	*/
+	char *si;
+
+	V->nseq++;
+	V->addit = (V->choice > 0);
+	if (V->addit) V->seqlen = 0;
+	strcpy(V->seqid, V->s);
+	/*writeseq: "    %s  Length: %d  (today)  Check: %d  ..\n" */
+	/*drop above or ".." from id*/
+	if (si = strstr(V->seqid, "  Length: "))
+		*si = 0;
+	else if (si = strstr(V->seqid, ".."))
+		*si = 0;
+	do {
+		V->done = feof(V->f);
+		getline(V);
+		if (!V->done) addseq((V->s), V);
+	} while (!V->done);
+	if (V->choice == kListSequences) addinfo(V->seqid, V);
+	V->allDone = true;
+}
+
+Local void readOlsen(struct ReadSeqVars *V)
+{ /* G. Olsen /print output from multiple sequence editor */
+
+	char *si, *sj, *sk, *sm, sid[40], snum[20];
+	boolean indata = false;
+	int snumlen;
+
+	V->addit = (V->choice > 0);
+	if (V->addit) V->seqlen = 0;
+	rewind(V->f);
+	V->nseq = 0;
+	do {
+		getline(V);
+		V->done = feof(V->f);
+
+		if (V->done && !(*V->s))
+			break;
+		else if (indata) {
+			if ((si = strstr(V->s, sid))
+			    /* && (strstr(V->s, snum) == si - snumlen - 1) ) {
+			     */
+			    && (sm = strstr(V->s, snum)) &&
+			    (sm < si - snumlen)) {
+				/* Spaces are valid alignment data !! */
+				/* 17Oct91: Error, the left margin is 21 not 22!
+				 */
+				/* dropped some nucs up to now -- my example
+				 * file was right shifted ! */
+				/* variable right id margin, drop id-2 spaces at
+				 * end */
+				/*
+				  VMS CC COMPILER (VAXC031) mess up:
+				  -- Index of 21 is chopping 1st nuc on VMS
+				  systems Only! Byte-for-byte same ame
+				  rnasep.olsen sequence file !
+				*/
+
+				/* si = (V->s)+21; < was this before VMS CC
+				 * wasted my time */
+				si += 10; /* use strstr index plus offset to
+					     outfox VMS CC bug */
+
+				if (sk = strstr(si, sid)) *(sk - 2) = 0;
+				for (sk = si; *sk != 0; sk++) {
+					if (*sk == ' ') *sk = '.';
+					/* 18aug92: !! some olsen masks are
+					 * NUMBERS !! which addseq eats */
+					else if (isdigit(*sk))
+						*sk = nonummask[*sk - '0'];
+				}
+
+				addseq(si, V);
+			}
+		}
+
+		else if (sk = strstr(V->s, "): ")) { /* seq info header line */
+			/* 18aug92: correct for diff seqs w/ same name -- use
+			 * number, e.g. */
+			/*   3 (Agr.tume):  agrobacterium.prna  18-JUN-1987
+			 * 16:12 */
+			/* 328 (Agr.tume):  agrobacterium.prna XYZ  19-DEC-1992
+			 */
+			(V->nseq)++;
+			si = 1 + strchr(V->s, '(');
+			*sk = ' ';
+			if (V->choice == kListSequences)
+				addinfo(si, V);
+			else if (V->nseq == V->choice) {
+				strcpy(V->seqid, si);
+				sj = strchr(V->seqid, ':');
+				while (*(--sj) == ' ')
+					;
+				while (--sj != V->seqid) {
+					if (*sj == ' ') *sj = '_';
+				}
+
+				*sk = 0;
+				while (*(--sk) == ' ') *sk = 0;
+				strcpy(sid, si);
+
+				si = V->s;
+				while ((*si <= ' ') && (*si != 0)) si++;
+				snumlen = 0;
+				while (si[snumlen] > ' ' && snumlen < 20) {
+					snum[snumlen] = si[snumlen];
+					snumlen++;
+				}
+				snum[snumlen] = 0;
+			}
+		}
+
+		else if (strstr(V->s, "identity:   Data:")) {
+			indata = true;
+			if (V->choice == kListSequences) V->done = true;
+		}
+
+	} while (!V->done);
+
+	V->allDone = true;
+} /*readOlsen*/
+
+Local void readMSF(struct ReadSeqVars *V)
+{ /* gcg's MSF, mult. sequence format, interleaved ! */
+
+	char *si, *sj, sid[128];
+	boolean indata = false;
+	int atseq = 0, iline = 0;
+
+	V->addit = (V->choice > 0);
+	if (V->addit) V->seqlen = 0;
+	rewind(V->f);
+	V->nseq = 0;
+	do {
+		getline(V);
+		V->done = feof(V->f);
+
+		if (V->done && !(*V->s))
+			break;
+		else if (indata) {
+			/*somename  ...gpvedai .......t.. aaigr..vad tvgtgptnse
+			 * aipaltaaet */
+			/*       E  gvenae.kgv tentna.tad fvaqpvylpe .nqt......
+			 * kv.affynrs */
+
+			si = V->s;
+			skipwhitespace(si);
+			/* for (sj= si; isalnum(*sj); sj++) ; bug -- cdelwiche
+			 * uses "-", "_" and others in names*/
+			for (sj = si; *sj > ' '; sj++)
+				;
+			*sj = 0;
+			if (*si) {
+				if ((0 == strcmp(si, sid))) {
+					addseq(sj + 1, V);
+				}
+				iline++;
+			}
+		}
+
+		else if (NULL !=
+			 (si = strstr(V->s,
+				      "Name: "))) { /* seq info header line */
+			/* Name: somename      Len:   100  Check: 7009
+			 * Weight:  1.00 */
+
+			(V->nseq)++;
+			si += 6;
+			if (V->choice == kListSequences)
+				addinfo(si, V);
+			else if (V->nseq == V->choice) {
+				strcpy(V->seqid, si);
+				si = V->seqid;
+				skipwhitespace(si);
+				/* for (sj= si; isalnum(*sj); sj++) ; -- bug */
+				for (sj = si; *sj > ' '; sj++)
+					;
+				*sj = 0;
+				strcpy(sid, si);
+			}
+		}
+
+		else if (strstr(V->s, "//") /*== V->s*/) {
+			indata = true;
+			iline = 0;
+			if (V->choice == kListSequences) V->done = true;
+		}
+
+	} while (!V->done);
+
+	V->allDone = true;
+} /*readMSF*/
+
+Local void readPAUPinterleaved(struct ReadSeqVars *V)
+{ /* PAUP mult. sequence format, interleaved or sequential! */
+
+	char *si, *sj, *send, sid[40], sid1[40], saveseq[255];
+	boolean first = true, indata = false, domatch;
+	int atseq = 0, iline = 0, ifmc, saveseqlen = 0;
+
+#define fixmatchchar(s)                                                       \
+	{                                                                     \
+		for (ifmc = 0; ifmc < saveseqlen; ifmc++)                     \
+			if (s[ifmc] == V->matchchar) s[ifmc] = saveseq[ifmc]; \
+	}
+
+	V->addit = (V->choice > 0);
+	V->seqlencount = 0;
+	if (V->addit) V->seqlen = 0;
+	/* rewind(V->f); V->nseq= 0;  << do in caller !*/
+	indata = true; /* call here after we find "matrix" */
+	domatch = (V->matchchar > 0);
+
+	do {
+		getline(V);
+		V->done = feof(V->f);
+
+		if (V->done && !(*V->s))
+			break;
+		else if (indata) {
+			/* [         1                    1                    1
+			 * ]*/
+			/* human     aagcttcaccggcgcagtca ttctcataatcgcccacggR
+			 * cttacatcct*/
+			/* chimp     ................a.t. .c.................a
+			 * ..........*/
+			/* !! need to correct for V->matchchar */
+			si = V->s;
+			skipwhitespace(si);
+			if (strchr(si, ';')) indata = false;
+
+			if (isalnum(*si)) {
+				/* valid data line starts w/ a left-justified
+				 * seq name in columns [0..8] */
+				if (first) {
+					(V->nseq)++;
+					if (V->nseq >= V->topnseq)
+						first = false;
+					for (sj = si; isalnum(*sj); sj++)
+						;
+					send = sj;
+					skipwhitespace(sj);
+					if (V->choice == kListSequences) {
+						*send = 0;
+						addinfo(si, V);
+					}
+					else if (V->nseq == V->choice) {
+						if (domatch) {
+							if (V->nseq == 1) {
+								strcpy(saveseq,
+								       sj);
+								saveseqlen =
+								    strlen(
+									saveseq);
+							}
+							else
+								fixmatchchar(
+								    sj);
+						}
+						addseq(sj, V);
+						*send = 0;
+						strcpy(V->seqid, si);
+						strcpy(sid, si);
+						if (V->nseq == 1)
+							strcpy(sid1, sid);
+					}
+				}
+
+				else if ((strstr(si, sid) == si)) {
+					while (isalnum(*si)) si++;
+					skipwhitespace(si);
+					if (domatch) {
+						if (V->nseq == 1) {
+							strcpy(saveseq, si);
+							saveseqlen =
+							    strlen(saveseq);
+						}
+						else
+							fixmatchchar(si);
+					}
+					addseq(si, V);
+				}
+
+				else if (domatch && (strstr(si, sid1) == si)) {
+					strcpy(saveseq, si);
+					saveseqlen = strlen(saveseq);
+				}
+
+				iline++;
+			}
+		}
+
+		else if (strstr(V->s, "matrix")) {
+			indata = true;
+			iline = 0;
+			if (V->choice == kListSequences) V->done = true;
+		}
+
+	} while (!V->done);
+
+	V->allDone = true;
+} /*readPAUPinterleaved*/
+
+Local void readPAUPsequential(struct ReadSeqVars *V)
+{ /* PAUP mult. sequence format, interleaved or sequential! */
+	char *si, *sj;
+	boolean atname = true, indata = false;
+
+	V->addit = (V->choice > 0);
+	if (V->addit) V->seqlen = 0;
+	V->seqlencount = 0;
+	/* rewind(V->f); V->nseq= 0;  << do in caller !*/
+	indata = true; /* call here after we find "matrix" */
+	do {
+		getline(V);
+		V->done = feof(V->f);
+
+		if (V->done && !(*V->s))
+			break;
+		else if (indata) {
+			/* [         1                    1                    1
+			 * ]*/
+			/* human     aagcttcaccggcgcagtca ttctcataatcgcccacggR
+			 * cttacatcct*/
+			/*           aagcttcaccggcgcagtca ttctcataatcgcccacggR
+			 * cttacatcct*/
+			/* chimp     ................a.t. .c.................a
+			 * ..........*/
+			/*           ................a.t. .c.................a
+			 * ..........*/
+
+			si = V->s;
+			skipwhitespace(si);
+			if (strchr(si, ';')) indata = false;
+			if (isalnum(*si)) {
+				/* valid data line starts w/ a left-justified
+				 * seq name in columns [0..8] */
+				if (atname) {
+					(V->nseq)++;
+					V->seqlencount = 0;
+					atname = false;
+					sj = si + 1;
+					while (isalnum(*sj)) sj++;
+					if (V->choice == kListSequences) {
+						/* !! we must count bases to
+						 * know when topseqlen is
+						 * reached ! */
+						countseq(sj, V);
+						if (V->seqlencount >=
+						    V->topseqlen)
+							atname = true;
+						*sj = 0;
+						addinfo(si, V);
+					}
+					else if (V->nseq == V->choice) {
+						addseq(sj, V);
+						V->seqlencount = V->seqlen;
+						if (V->seqlencount >=
+						    V->topseqlen)
+							atname = true;
+						*sj = 0;
+						strcpy(V->seqid, si);
+					}
+					else {
+						countseq(sj, V);
+						if (V->seqlencount >=
+						    V->topseqlen)
+							atname = true;
+					}
+				}
+
+				else if (V->nseq == V->choice) {
+					addseq(V->s, V);
+					V->seqlencount = V->seqlen;
+					if (V->seqlencount >= V->topseqlen)
+						atname = true;
+				}
+				else {
+					countseq(V->s, V);
+					if (V->seqlencount >= V->topseqlen)
+						atname = true;
+				}
+			}
+		}
+
+		else if (strstr(V->s, "matrix")) {
+			indata = true;
+			atname = true;
+			if (V->choice == kListSequences) V->done = true;
+		}
+
+	} while (!V->done);
+
+	V->allDone = true;
+} /*readPAUPsequential*/
+
+Local void readPhylipInterleaved(struct ReadSeqVars *V)
+{
+	char *si, *sj;
+	boolean first = true;
+	int iline = 0;
+
+	V->addit = (V->choice > 0);
+	if (V->addit) V->seqlen = 0;
+	V->seqlencount = 0;
+	/* sscanf( V->s, "%d%d", &V->topnseq, &V->topseqlen); << topnseq == 0
+	 * !!! bad scan !! */
+	si = V->s;
+	skipwhitespace(si);
+	V->topnseq = atoi(si);
+	while (isdigit(*si)) si++;
+	skipwhitespace(si);
+	V->topseqlen = atol(si);
+	/* fprintf(stderr,"Phylip-ileaf: topnseq=%d  topseqlen=%d\n",V->topnseq,
+	 * V->topseqlen); */
+
+	do {
+		getline(V);
+		V->done = feof(V->f);
+
+		if (V->done && !(*V->s)) break;
+		si = V->s;
+		skipwhitespace(si);
+		if (*si != 0) {
+			if (first) { /* collect seq names + seq, as
+					fprintf(outf,"%-10s  ",seqname); */
+				(V->nseq)++;
+				if (V->nseq >= V->topnseq) first = false;
+				sj = V->s + 10; /* past name, start of data */
+				if (V->choice == kListSequences) {
+					*sj = 0;
+					addinfo(si, V);
+				}
+				else if (V->nseq == V->choice) {
+					addseq(sj, V);
+					*sj = 0;
+					strcpy(V->seqid, si);
+				}
+			}
+			else if (iline % V->nseq == V->choice - 1) {
+				addseq(si, V);
+			}
+			iline++;
+		}
+	} while (!V->done);
+
+	V->allDone = true;
+} /*readPhylipInterleaved*/
+
+Local boolean endPhylipSequential(boolean *addend, boolean *ungetend,
+				  struct ReadSeqVars *V)
+{
+	*addend = false;
+	*ungetend = false;
+	countseq(V->s, V);
+	return V->seqlencount >= V->topseqlen;
+}
+
+Local void readPhylipSequential(struct ReadSeqVars *V)
+{
+	short i;
+	char *si;
+	/* sscanf( V->s, "%d%d", &V->topnseq, &V->topseqlen); < ? bad sscan ? */
+	si = V->s;
+	skipwhitespace(si);
+	V->topnseq = atoi(si);
+	while (isdigit(*si)) si++;
+	skipwhitespace(si);
+	V->topseqlen = atol(si);
+	getline(V);
+	while (!V->allDone) {
+		V->seqlencount = 0;
+		strncpy(V->seqid, (V->s), 10);
+		V->seqid[10] = 0;
+		for (i = 0; i < 10 && V->s[i]; i++) V->s[i] = ' ';
+		readLoop(0, true, endPhylipSequential, V);
+		if (feof(V->f)) V->allDone = true;
+	}
+}
+
+Local void readSeqMain(struct ReadSeqVars *V, const long skiplines_,
+		       const short format_)
+{
+#define tolowerstr(s)                                   \
+	{                                               \
+		long Itlwr, Ntlwr = strlen(s);          \
+		for (Itlwr = 0; Itlwr < Ntlwr; Itlwr++) \
+			s[Itlwr] = to_lower(s[Itlwr]);  \
+	}
+
+	boolean gotuw;
+	long l;
+
+	V->linestart = 0;
+	V->matchchar = 0;
+	if (V->f == NULL)
+		V->err = eFileNotFound;
+	else {
+		for (l = skiplines_; l > 0; l--) getline(V);
+
+		do {
+			getline(V);
+			for (l = strlen(V->s); (l > 0) && (V->s[l] == ' '); l--)
+				;
+		} while ((l == 0) && !feof(V->f));
+
+		if (feof(V->f))
+			V->err = eNoData;
+
+		else
+			switch (format_) {
+				case kPlain:
+					readPlain(V);
+					break;
+				case kIG:
+					readIG(V);
+					break;
+				case kStrider:
+					readStrider(V);
+					break;
+				case kGenBank:
+					readGenBank(V);
+					break;
+				case kPIR:
+					readPIR(V);
+					break;
+				case kNBRF:
+					readNBRF(V);
+					break;
+				case kPearson:
+					readPearson(V);
+					break;
+				case kEMBL:
+					readEMBL(V);
+					break;
+				case kZuker:
+					readZuker(V);
+					break;
+				case kOlsen:
+					readOlsen(V);
+					break;
+				case kMSF:
+					readMSF(V);
+					break;
+
+				case kPAUP: {
+					boolean done = false;
+					boolean interleaved = false;
+					char *cp;
+					/* rewind(V->f); V->nseq= 0; ?? assume
+					 * it is at top ?? skiplines ... */
+					while (!done) {
+						getline(V);
+						tolowerstr(V->s);
+						if (strstr(V->s, "matrix"))
+							done = true;
+						if (strstr(V->s, "interleav"))
+							interleaved = true;
+						if (NULL !=
+						    (cp =
+							 strstr(V->s, "ntax=")))
+							V->topnseq =
+							    atoi(cp + 5);
+						if (NULL !=
+						    (cp = strstr(V->s,
+								 "nchar=")))
+							V->topseqlen =
+							    atoi(cp + 6);
+						if (NULL !=
+						    (cp = strstr(
+							 V->s, "matchchar="))) {
+							cp += 10;
+							if (*cp == '\'')
+								cp++;
+							else if (*cp == '"')
+								cp++;
+							V->matchchar = *cp;
+						}
+					}
+					if (interleaved)
+						readPAUPinterleaved(V);
+					else
+						readPAUPsequential(V);
+				} break;
+
+				/* kPhylip: ! can't determine in middle of file
+				 * which type it is...*/
+				/* test for interleave or sequential and use
+				 * Phylip4(ileave) or Phylip2 */
+				case kPhylip2:
+					readPhylipSequential(V);
+					break;
+				case kPhylip4: /* == kPhylip3 */
+					readPhylipInterleaved(V);
+					break;
+
+				default:
+					V->err = eUnknownFormat;
+					break;
+
+				case kFitch:
+					strcpy(V->seqid, V->s);
+					getline(V);
+					readFitch(V);
+					break;
+
+				case kGCG:
+					do {
+						gotuw = (strstr(V->s, "..") !=
+							 NULL);
+						if (gotuw) readUWGCG(V);
+						getline(V);
+					} while (!(feof(V->f) || V->allDone));
+					break;
+			}
+	}
+
+	V->filestart = false;
+	V->seq[V->seqlen] = 0; /* stick a string terminator on it */
+}
+
+char *readSeqFp(const short whichEntry_, /* index to sequence in file */
+		FILE *fp_,		 /* pointer to open seq file */
+		const long skiplines_,
+		const short format_, /* sequence file format */
+		long *seqlen_,	     /* return seq size */
+		short *nseq_,  /* number of seqs in file, for listSeqs() */
+		short *error_, /* return error */
+		char *seqid_)  /* return seq name/info */
+{
+	struct ReadSeqVars V;
+
+	if (format_ < kMinFormat || format_ > kMaxFormat) {
+		*error_ = eUnknownFormat;
+		*seqlen_ = 0;
+		return NULL;
+	}
+
+	V.choice = whichEntry_;
+	V.fname = NULL; /* don't know */
+	V.seq = (char *)calloc(1, kStartLength + 1);
+	V.maxseq = kStartLength;
+	V.seqlen = 0;
+	V.seqid = seqid_;
+
+	V.f = fp_;
+	V.filestart = (ftell(fp_) == 0);
+	/* !! in sequential read, must remove current seq position from
+	 * choice/whichEntry_ counter !! ... */
+	if (V.filestart)
+		V.nseq = 0;
+	else
+		V.nseq = *nseq_; /* track where we are in file...*/
+
+	*V.seqid = '\0';
+	V.err = 0;
+	V.nseq = 0;
+	V.isseqchar = isSeqChar;
+	if (V.choice == kListSequences)
+		; /* leave as is */
+	else if (V.choice <= 0)
+		V.choice = 1; /* default ?? */
+	V.addit = (V.choice > 0);
+	V.allDone = false;
+
+	readSeqMain(&V, skiplines_, format_);
+
+	*error_ = V.err;
+	*seqlen_ = V.seqlen;
+	*nseq_ = V.nseq;
+	return V.seq;
+}
+
+char *readSeq(const short whichEntry_, /* index to sequence in file */
+	      const char *filename_,   /* file name */
+	      const long skiplines_,
+	      const short format_, /* sequence file format */
+	      long *seqlen_,	   /* return seq size */
+	      short *nseq_,	   /* number of seqs in file, for listSeqs() */
+	      short *error_,	   /* return error */
+	      char *seqid_)	   /* return seq name/info */
+{
+	struct ReadSeqVars V;
+
+	if (format_ < kMinFormat || format_ > kMaxFormat) {
+		*error_ = eUnknownFormat;
+		*seqlen_ = 0;
+		return NULL;
+	}
+
+	V.choice = whichEntry_;
+	V.fname = filename_; /* don't need to copy string, just ptr to it */
+	V.seq = (char *)calloc(1, kStartLength + 1);
+	V.maxseq = kStartLength;
+	V.seqlen = 0;
+	V.seqid = seqid_;
+
+	V.f = NULL;
+	*V.seqid = '\0';
+	V.err = 0;
+	V.nseq = 0;
+	V.isseqchar = isSeqChar;
+	if (V.choice == kListSequences)
+		; /* leave as is */
+	else if (V.choice <= 0)
+		V.choice = 1; /* default ?? */
+	V.addit = (V.choice > 0);
+	V.allDone = false;
+
+	V.f = fopen(V.fname, "r");
+	V.filestart = true;
+
+	readSeqMain(&V, skiplines_, format_);
+
+	if (V.f != NULL) fclose(V.f);
+	*error_ = V.err;
+	*seqlen_ = V.seqlen;
+	*nseq_ = V.nseq;
+	return V.seq;
+}
+
+char *listSeqs(const char *filename_, /* file name */
+	       const long skiplines_,
+	       const short format_, /* sequence file format */
+	       short *nseq_,	    /* number of seqs in file, for listSeqs() */
+	       short *error_)	    /* return error */
+{
+	char seqid[256];
+	long seqlen;
+
+	return readSeq(kListSequences, filename_, skiplines_, format_, &seqlen,
+		       nseq_, error_, seqid);
+}
+
+short seqFileFormat(/* return sequence format number, see ureadseq.h */
+		    const char *filename,
+		    long *skiplines, /* return #lines to skip any junk like mail
+					header */
+		    short *error)    /* return any error value or 0 */
+{
+	FILE *fseq;
+	short format;
+
+	fseq = fopen(filename, "r");
+	format = seqFileFormatFp(fseq, skiplines, error);
+	if (fseq != NULL) fclose(fseq);
+	return format;
+}
+
+short seqFileFormatFp(
+    FILE *fseq,
+    long *skiplines, /* return #lines to skip any junk like mail header */
+    short *error)    /* return any error value or 0 */
+{
+	boolean foundDNA = false, foundIG = false, foundStrider = false,
+		foundGB = false, foundPIR = false, foundEMBL = false,
+		foundNBRF = false, foundPearson = false, foundFitch = false,
+		foundPhylip = false, foundZuker = false, gotolsen = false,
+		gotpaup = false, gotasn1 = false, gotuw = false, gotMSF = false,
+		isfitch = false, isphylip = false, done = false;
+	short format = kUnknown;
+	int nlines = 0, k, splen = 0, otherlines = 0, aminolines = 0,
+	    dnalines = 0;
+	char sp[256];
+	long linestart = 0;
+	int maxlines2check = 500;
+
+#define ReadOneLine(sp)                         \
+	{                                       \
+		done |= (feof(fseq));           \
+		readline(fseq, sp, &linestart); \
+		if (!done) {                    \
+			splen = strlen(sp);     \
+			++nlines;               \
+		}                               \
+	}
+
+	*skiplines = 0;
+	*error = 0;
+	if (fseq == NULL) {
+		*error = eFileNotFound;
+		return kNoformat;
+	}
+
+	while (!done) {
+		ReadOneLine(sp);
+
+		/* check for mailer head & skip past if found */
+		if (nlines < 4 && !done) {
+			if ((strstr(sp, "From ") == sp) ||
+			    (strstr(sp, "Received:") == sp)) {
+				do {
+					/* skip all lines until find one blank
+					 * line */
+					ReadOneLine(sp);
+					if (!done)
+						for (k = 0; (k < splen) &&
+							    (sp[k] == ' ');
+						     k++)
+							;
+				} while ((!done) && (k < splen));
+				*skiplines = nlines; /* !? do we want #lines or
+							#bytes ?? */
+			}
+		}
+
+		if (sp == NULL || *sp == 0)
+			; /* nada */
+
+		/* high probability identities: */
+
+		else if (strstr(sp, "MSF:") && strstr(sp, "Type:") &&
+			 strstr(sp, "Check:"))
+			gotMSF = true;
+
+		else if ((strstr(sp, "..") != NULL) &&
+			 (strstr(sp, "Check:") != NULL))
+			gotuw = true;
+
+		else if (strstr(sp, "identity:   Data:") != NULL)
+			gotolsen = true;
+
+		else if (strstr(sp, "::=") &&
+			 (strstr(sp, "Bioseq") || /* Bioseq or Bioseq-set */
+			  strstr(sp, "Seq-entry") ||
+			  strstr(
+			      sp,
+			      "Seq-submit"))) /* can we read submit format? */
+			gotasn1 = true;
+
+		else if (strstr(sp, "#NEXUS") == sp)
+			gotpaup = true;
+
+		/* uncertain identities: */
+
+		else if (*sp == ';') {
+			if (strstr(sp, "Strider") != NULL)
+				foundStrider = true;
+			else
+				foundIG = true;
+		}
+
+		else if (strstr(sp, "LOCUS") == sp)
+			foundGB = true;
+		else if (strstr(sp, "ORIGIN") == sp)
+			foundGB = true;
+
+		else if (strstr(sp, "ENTRY   ") ==
+			 sp) /* ? also (strcmp(sp,"\\\\\\")==0) */
+			foundPIR = true;
+		else if (strstr(sp, "SEQUENCE") == sp)
+			foundPIR = true;
+
+		else if (*sp == '>') {
+			if (sp[3] == ';')
+				foundNBRF = true;
+			else
+				foundPearson = true;
+		}
+
+		else if (strstr(sp, "ID   ") == sp)
+			foundEMBL = true;
+		else if (strstr(sp, "SQ   ") == sp)
+			foundEMBL = true;
+
+		else if (*sp == '(')
+			foundZuker = true;
+
+		else {
+			if (nlines - *skiplines == 1) {
+				int ispp = 0, ilen = 0;
+				sscanf(sp, "%d%d", &ispp, &ilen);
+				if (ispp > 0 && ilen > 0) isphylip = true;
+			}
+			else if (isphylip && nlines - *skiplines == 2) {
+				int tseq;
+				tseq = getseqtype(sp + 10, strlen(sp + 10));
+				if (isalpha(*sp) /* 1st letter in 2nd line must
+						    be of a name */
+				    && (tseq != kOtherSeq)) /* sequence section
+							       must be okay */
+					foundPhylip = true;
+			}
+
+			for (k = 0, isfitch = true; isfitch & (k < splen);
+			     k++) {
+				if (k % 4 == 0)
+					isfitch &= (sp[k] == ' ');
+				else
+					isfitch &= (sp[k] != ' ');
+			}
+			if (isfitch & (splen > 20)) foundFitch = true;
+
+			/* kRNA && kDNA are fairly certain...*/
+			switch (getseqtype(sp, splen)) {
+				case kOtherSeq:
+					otherlines++;
+					break;
+				case kAmino:
+					if (splen > 20) aminolines++;
+					break;
+				case kDNA:
+				case kRNA:
+					if (splen > 20) dnalines++;
+					break;
+				case kNucleic:
+					break; /* not much info ? */
+			}
+		}
+
+		/* pretty certain */
+		if (gotolsen) {
+			format = kOlsen;
+			done = true;
+		}
+		else if (gotMSF) {
+			format = kMSF;
+			done = true;
+		}
+		else if (gotasn1) {
+			/* !! we need to look further and return  kASNseqentry |
+			 * kASNseqset */
+			/*
+			  seqentry key is Seq-entry ::=
+			  seqset key is Bioseq-set ::=
+			  ?? can't read these yet w/ ncbi tools ??
+			    Seq-submit ::=
+			    Bioseq ::=  << fails both bioseq-seq and seq-entry
+			  parsers !
+			*/
+			if (strstr(sp, "Bioseq-set"))
+				format = kASNseqset;
+			else if (strstr(sp, "Seq-entry"))
+				format = kASNseqentry;
+			else
+				format = kASN1; /* other form, we can't yet
+						   read... */
+			done = true;
+		}
+		else if (gotpaup) {
+			format = kPAUP;
+			done = true;
+		}
+
+		else if (gotuw) {
+			if (foundIG)
+				format =
+				    kIG; /* a TOIG file from GCG for certain */
+			else
+				format = kGCG;
+			done = true;
+		}
+
+		else if ((dnalines > 1) || done || (nlines > maxlines2check)) {
+			/* decide on most likely format */
+			/* multichar idents: */
+			if (foundStrider)
+				format = kStrider;
+			else if (foundGB)
+				format = kGenBank;
+			else if (foundPIR)
+				format = kPIR;
+			else if (foundEMBL)
+				format = kEMBL;
+			else if (foundNBRF)
+				format = kNBRF;
+			/* single char idents: */
+			else if (foundIG)
+				format = kIG;
+			else if (foundPearson)
+				format = kPearson;
+			else if (foundZuker)
+				format = kZuker;
+			/* digit ident: */
+			else if (foundPhylip)
+				format = kPhylip;
+			/* spacing ident: */
+			else if (foundFitch)
+				format = kFitch;
+			/* no format chars: */
+			else if (otherlines > 0)
+				format = kUnknown;
+			else if (dnalines > 1)
+				format = kPlain;
+			else if (aminolines > 1)
+				format = kPlain;
+			else
+				format = kUnknown;
+
+			done = true;
+		}
+
+		/* need this for possible long header in olsen format */
+		else if (strstr(sp, "): ") != NULL)
+			maxlines2check++;
+	}
+
+	if (format == kPhylip) {
+		/* check for interleaved or sequential -- really messy */
+		int tname, tseq;
+		long i, j, nspp = 0, nlen = 0, ilen, leaf = 0, seq = 0;
+		char *ps;
+
+		rewind(fseq);
+		for (i = 0; i < *skiplines; i++) ReadOneLine(sp);
+		nlines = 0;
+		ReadOneLine(sp);
+		sscanf(sp, "%d%d", &nspp, &nlen);
+		ReadOneLine(sp); /* 1st seq line */
+		for (ps = sp + 10, ilen = 0; *ps != 0; ps++)
+			if (isprint(*ps)) ilen++;
+
+		for (i = 1; i < nspp; i++) {
+			ReadOneLine(sp);
+
+			tseq = getseqtype(sp + 10, strlen(sp + 10));
+			tname = getseqtype(sp, 10);
+			for (j = 0, ps = sp; isspace(*ps) && j < 10; ps++, j++)
+				;
+			for (ps = sp; *ps != 0; ps++)
+				if (isprint(*ps)) ilen++;
+
+			/* find probable interleaf or sequential ... */
+			if (j >= 9)
+				seq += 10; /* pretty certain not ileaf */
+			else {
+				if (tseq != tname)
+					leaf++;
+				else
+					seq++;
+				if (tname == kDNA || tname == kRNA)
+					seq++;
+				else
+					leaf++;
+			}
+
+			if (ilen <= nlen && j < 9) {
+				if (tname == kOtherSeq)
+					leaf += 10;
+				else if (tname == kAmino || tname == kDNA ||
+					 tname == kRNA)
+					seq++;
+				else
+					leaf++;
+			}
+			else if (ilen > nlen) {
+				ilen = 0;
+			}
+		}
+		for (nspp *= 2; i < nspp;
+		     i++) { /* this should be only bases if interleaf */
+			ReadOneLine(sp);
+
+			tseq = getseqtype(sp + 10, strlen(sp + 10));
+			tname = getseqtype(sp, 10);
+			for (ps = sp; *ps != 0; ps++)
+				if (isprint(*ps)) ilen++;
+			for (j = 0, ps = sp; isspace(*ps) && j < 10; ps++, j++)
+				;
+			if (j < 9) {
+				if (tname == kOtherSeq) seq += 10;
+				if (tseq != tname)
+					seq++;
+				else
+					leaf++;
+				if (tname == kDNA || tname == kRNA)
+					leaf++;
+				else
+					seq++;
+			}
+			if (ilen > nlen) {
+				if (j > 9)
+					leaf += 10; /* must be a name here for
+						       sequent */
+				else if (tname == kOtherSeq)
+					seq += 10;
+				ilen = 0;
+			}
+		}
+
+		if (leaf > seq)
+			format = kPhylip4;
+		else
+			format = kPhylip2;
+	}
+
+	return (format);
+#undef ReadOneLine
+} /* SeqFileFormat */
+
+unsigned long GCGchecksum(const char *seq, const long seqlen,
+			  unsigned long *checktotal)
+/* GCGchecksum */
+{
+	register long i, check = 0, count = 0;
+
+	for (i = 0; i < seqlen; i++) {
+		count++;
+		check += count * to_upper(seq[i]);
+		if (count == 57) count = 0;
+	}
+	check %= 10000;
+	*checktotal += check;
+	*checktotal %= 10000;
+	return check;
+}
+
+/* Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
+ */
+const unsigned long crctab[] = {
+    0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
+    0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
+    0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
+    0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
+    0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
+    0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
+    0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
+    0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+    0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
+    0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
+    0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
+    0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
+    0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
+    0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
+    0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
+    0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+    0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
+    0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
+    0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
+    0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
+    0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
+    0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
+    0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
+    0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+    0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
+    0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
+    0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
+    0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
+    0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
+    0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
+    0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
+    0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+    0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
+    0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
+    0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
+    0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
+    0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
+    0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
+    0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
+    0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+    0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
+    0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
+    0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
+    0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
+    0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
+    0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
+    0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
+    0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+    0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
+    0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
+    0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
+    0x2d02ef8dL};
+
+unsigned long CRC32checksum(const char *seq, const long seqlen,
+			    unsigned long *checktotal)
+/*CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
+ */
+{
+	register unsigned long c = 0xffffffffL;
+	register long n = seqlen;
+
+	while (n--) {
+		c = crctab[((int)c ^ (to_upper(*seq))) & 0xff] ^ (c >> 8);
+		seq++; /* fixed aug'98 finally */
+	}
+	c = c ^ 0xffffffffL;
+	*checktotal += c;
+	return c;
+}
+
+short getseqtype(const char *seq, const long seqlen)
+{ /* return sequence kind: kDNA, kRNA, kProtein, kOtherSeq, ??? */
+	char c;
+	short i, maxtest;
+	short na = 0, aa = 0, po = 0, nt = 0, nu = 0, ns = 0, no = 0;
+
+	maxtest = min(300, seqlen);
+	for (i = 0; i < maxtest; i++) {
+		c = to_upper(seq[i]);
+		if (strchr(protonly, c))
+			po++;
+		else if (strchr(primenuc, c)) {
+			na++;
+			if (c == 'T')
+				nt++;
+			else if (c == 'U')
+				nu++;
+		}
+		else if (strchr(aminos, c))
+			aa++;
+		else if (strchr(seqsymbols, c))
+			ns++;
+		else if (isalpha(c))
+			no++;
+	}
+
+	if ((no > 0) || (po + aa + na == 0)) return kOtherSeq;
+	/* ?? test for probability of kOtherSeq ?, e.g.,
+	else if (po+aa+na / maxtest < 0.70) return kOtherSeq;
+	*/
+	else if (po > 0)
+		return kAmino;
+	else if (aa == 0) {
+		if (nu > nt)
+			return kRNA;
+		else
+			return kDNA;
+	}
+	else if (na > aa)
+		return kNucleic;
+	else
+		return kAmino;
+} /* getseqtype */
+
+char *compressSeq(const char gapc, const char *seq, const long seqlen,
+		  long *newlen)
+{
+	register char *a, *b;
+	register long i;
+	char *newseq;
+
+	*newlen = 0;
+	if (!seq) return NULL;
+	newseq = (char *)malloc(seqlen + 1);
+	if (!newseq) return NULL;
+	for (a = (char *)seq, b = newseq, i = 0; *a != 0; a++)
+		if (*a != gapc) {
+			*b++ = *a;
+			i++;
+		}
+	*b = '\0';
+	newseq = (char *)realloc(newseq, i + 1);
+	*newlen = i;
+	return newseq;
+}
+
+/***
+char *rtfhead = "{\\rtf1\\defformat\\mac\\deff2 \
+{\\fonttbl\
+  {\\f1\\fmodern Courier;}{\\f2\\fmodern Monaco;}\
+  {\\f3\\fswiss Helvetica;}{\\f4\\fswiss Geneva;}\
+  {\\f5\\froman Times;}{\\f6\\froman Palatino;}\
+  {\\f7\\froman New Century Schlbk;}{\\f8\\ftech Symbol;}}\
+{\\stylesheet\
+  {\\s1 \\f5\\fs20 \\sbasedon0\\snext1 name;}\
+  {\\s2 \\f3\\fs20 \\sbasedon0\\snext2 num;}\
+  {\\s3 \\f1\\f21 \\sbasedon0\\snext3 seq;}}";
+
+char *rtftail = "}";
+****/
+
+short writeSeq(FILE *outf, const char *seq, const long seqlen,
+	       const short outform, const char *seqid)
+/* dump sequence to standard output */
+{
+	const short kSpaceAll = -9;
+#define kMaxseqwidth 250
+
+	boolean baseonlynum =
+	    false; /* nocountsymbols -- only count true bases, not "-" */
+	short numline = 0; /* only true if we are writing seq number line (for
+			      interleave) */
+	boolean numright = false, numleft = false;
+	boolean nameright = false, nameleft = false;
+	short namewidth = 8, numwidth = 8;
+	short spacer = 0, width = 50, tab = 0;
+	/* new parameters: width, spacer, those above... */
+
+	short linesout = 0, seqtype = kNucleic;
+	long i, j, l, l1, ibase;
+	char idword[31], endstr[10];
+	char seqnamestore[128], *seqname = seqnamestore;
+	char s[kMaxseqwidth], *cp;
+	char nameform[10], numform[10], nocountsymbols[10];
+	unsigned long checksum = 0, checktotal = 0;
+
+	gPretty.atseq++;
+	skipwhitespace(seqid);
+	l = min(128, strlen(seqid));
+	strncpy(seqnamestore, seqid, l);
+	seqname[l] = 0;
+
+	sscanf(seqname, "%30s", idword);
+	sprintf(numform, "%d", seqlen);
+	numwidth = strlen(numform) + 1;
+	nameform[0] = '\0';
+
+	if (strstr(seqname, "checksum") != NULL) {
+		cp = strstr(seqname, "bases");
+		if (cp != NULL) {
+			for (; (cp != seqname) && (*cp != ','); cp--)
+				;
+			if (cp != seqname) *cp = 0;
+		}
+	}
+
+	strcpy(endstr, "");
+	l1 = 0;
+
+	if (outform == kGCG || outform == kMSF)
+		checksum = GCGchecksum(seq, seqlen, &checktotal);
+	else
+		checksum = seqchecksum(seq, seqlen, &checktotal);
+
+	switch (outform) {
+		case kPlain:
+		case kUnknown:		      /* no header, just sequence */
+			strcpy(endstr, "\n"); /* end w/ extra blank line */
+			break;
+
+		case kOlsen: /* Olsen seq. editor takes plain nucs OR Genbank */
+		case kGenBank:
+			fprintf(outf, "LOCUS       %s       %d bp\n", idword,
+				seqlen);
+			fprintf(outf,
+				"DEFINITION  %s, %d bases, %X checksum.\n",
+				seqname, seqlen, checksum);
+			/* fprintf(outf,"ACCESSION   %s\n", accnum); */
+			fprintf(outf, "ORIGIN      \n");
+			spacer = 11;
+			numleft = true;
+			numwidth = 8; /* dgg. 1Feb93, patch for GDE fail to read
+					 short numwidth */
+			strcpy(endstr, "\n//");
+			linesout += 4;
+			break;
+
+		case kPIR:
+			/* somewhat like genbank... \\\*/
+			/* fprintf(outf,"\\\\\\\n"); << only at top of file, not
+			 * each entry... */
+			fprintf(outf, "ENTRY           %s \n", idword);
+			fprintf(outf,
+				"TITLE           %s, %d bases, %X checksum.\n",
+				seqname, seqlen, checksum);
+			/* fprintf(outf,"ACCESSION       %s\n", accnum); */
+			fprintf(outf, "SEQUENCE        \n");
+			numwidth = 7;
+			width = 30;
+			spacer = kSpaceAll;
+			numleft = true;
+			strcpy(endstr, "\n///");
+			/* run a top number line for PIR */
+			for (j = 0; j < numwidth; j++) fputc(' ', outf);
+			for (j = 5; j <= width; j += 5)
+				fprintf(outf, "%10d", j);
+			fputc('\n', outf);
+			linesout += 5;
+			break;
+
+		case kNBRF:
+			if (getseqtype(seq, seqlen) == kAmino)
+				fprintf(outf, ">P1;%s\n", idword);
+			else
+				fprintf(outf, ">DL;%s\n", idword);
+			fprintf(outf, "%s, %d bases, %X checksum.\n", seqname,
+				seqlen, checksum);
+			spacer = 11;
+			strcpy(endstr, "*\n");
+			linesout += 3;
+			break;
+
+		case kEMBL:
+			fprintf(outf, "ID   %s\n", idword);
+			/*  fprintf(outf,"AC   %s\n", accnum); */
+			fprintf(outf, "DE   %s, %d bases, %X checksum.\n",
+				seqname, seqlen, checksum);
+			fprintf(outf, "SQ             %d BP\n", seqlen);
+			strcpy(endstr, "\n//"); /* 11Oct90: bug fix*/
+			tab = 4;		/** added 31jan91 */
+			spacer = 11;		/** added 31jan91 */
+			width = 60;
+			linesout += 4;
+			break;
+
+		case kGCG:
+			fprintf(outf, "%s\n", seqname);
+			/* fprintf(outf,"ACCESSION   %s\n", accnum); */
+			fprintf(outf,
+				"    %s  Length: %d  (today)  Check: %d  ..\n",
+				idword, seqlen, checksum);
+			spacer = 11;
+			numleft = true;
+			strcpy(endstr, "\n"); /* this is insurance to help
+						 prevent misreads at eof */
+			linesout += 3;
+			break;
+
+		case kStrider: /* ?? map ?*/
+			fprintf(outf, "; ### from DNA Strider ;-)\n");
+			fprintf(
+			    outf,
+			    "; DNA sequence  %s, %d bases, %X checksum.\n;\n",
+			    seqname, seqlen, checksum);
+			strcpy(endstr, "\n//");
+			linesout += 3;
+			break;
+
+		case kFitch:
+			fprintf(outf, "%s, %d bases, %X checksum.\n", seqname,
+				seqlen, checksum);
+			spacer = 4;
+			width = 60;
+			linesout += 1;
+			break;
+
+		case kPhylip2:
+		case kPhylip4:
+			/* this is version 3.2/3.4 -- simplest way to write
+			  version 3.3 is to write as version 3.2, then
+			  re-read file and interleave the species lines */
+			if (strlen(idword) > 10) idword[10] = 0;
+			fprintf(outf, "%-10s  ", idword);
+			l1 = -1;
+			tab = 12;
+			spacer = 11;
+			break;
+
+		case kASN1:
+			seqtype = getseqtype(seq, seqlen);
+			switch (seqtype) {
+				case kDNA:
+					cp = "dna";
+					break;
+				case kRNA:
+					cp = "rna";
+					break;
+				case kNucleic:
+					cp = "na";
+					break;
+				case kAmino:
+					cp = "aa";
+					break;
+				case kOtherSeq:
+					cp = "not-set";
+					break;
+			}
+			fprintf(outf, "  seq {\n");
+			fprintf(outf, "    id { local id %d },\n",
+				gPretty.atseq);
+			fprintf(outf, "    descr { title \"%s\" },\n", seqid);
+			fprintf(outf, "    inst {\n");
+			fprintf(outf,
+				"      repr raw, mol %s, length %d, topology "
+				"linear,\n",
+				cp, seqlen);
+			fprintf(outf, "      seq-data\n");
+			if (seqtype == kAmino)
+				fprintf(outf, "        iupacaa \"");
+			else
+				fprintf(outf, "        iupacna \"");
+			l1 = 17;
+			spacer = 0;
+			width = 78;
+			tab = 0;
+			strcpy(endstr, "\"\n      } } ,");
+			linesout += 7;
+			break;
+
+		case kPAUP:
+			nameleft = true;
+			namewidth = 9;
+			spacer = 21;
+			width = 100;
+			tab = 0; /* 1; */
+			/* strcpy(endstr,";\nend;"); << this is end of all
+			 * seqs.. */
+			/* do a header comment line for paup */
+			fprintf(outf, "[Name: %-16s  Len:%6d  Check: %8X]\n",
+				idword, seqlen, checksum);
+			linesout += 1;
+			break;
+
+		case kPretty:
+			numline = gPretty.numline;
+			baseonlynum = gPretty.baseonlynum;
+			namewidth = gPretty.namewidth;
+			numright = gPretty.numright;
+			numleft = gPretty.numleft;
+			nameright = gPretty.nameright;
+			nameleft = gPretty.nameleft;
+			spacer = gPretty.spacer + 1;
+			width = gPretty.seqwidth;
+			tab = gPretty.tab;
+			/* also add rtf formatting w/ font, size, style */
+			if (gPretty.nametop) {
+				fprintf(outf,
+					"Name: %-16s  Len:%6d  Check: %8X\n",
+					idword, seqlen, checksum);
+				linesout++;
+			}
+			break;
+
+		case kMSF:
+			fprintf(
+			    outf,
+			    " Name: %-16s Len:%6d  Check: %5d  Weight:  1.00\n",
+			    idword, seqlen, checksum);
+			linesout++;
+			nameleft = true;
+			namewidth = 15; /* need MAX namewidth here... */
+			sprintf(nameform, "%%+%ds ", namewidth);
+			spacer = 11;
+			width = 50;
+			tab = 0; /* 1; */
+			break;
+
+		case kIG:
+			fprintf(outf, ";%s, %d bases, %X checksum.\n", seqname,
+				seqlen, checksum);
+			fprintf(outf, "%s\n", idword);
+			strcpy(endstr, "1"); /* == linear dna */
+			linesout += 2;
+			break;
+
+		default:
+		case kZuker: /* don't attempt Zuker's ftn format */
+		case kPearson:
+			fprintf(outf, ">%s, %d bases, %X checksum.\n", seqname,
+				seqlen, checksum);
+			linesout += 1;
+			break;
+	}
+
+	if (*nameform == 0)
+		sprintf(nameform, "%%%d.%ds ", namewidth, namewidth);
+	if (numline)
+		sprintf(numform, "%%%ds ", numwidth);
+	else
+		sprintf(numform, "%%%dd ", numwidth);
+	strcpy(nocountsymbols, kNocountsymbols);
+	if (baseonlynum) {
+		if (strchr(nocountsymbols, gPretty.gapchar) == NULL) {
+			strcat(nocountsymbols, " ");
+			nocountsymbols[strlen(nocountsymbols) - 1] =
+			    gPretty.gapchar;
+		}
+		if (gPretty.domatch &&
+		    (cp = strchr(nocountsymbols, gPretty.matchchar)) != NULL) {
+			*cp = ' ';
+		}
+	}
+
+	if (numline) {
+		*idword = 0;
+	}
+
+	width = min(width, kMaxseqwidth);
+	for (i = 0, l = 0, ibase = 1; i < seqlen;) {
+		if (l1 < 0)
+			l1 = 0;
+		else if (l1 == 0) {
+			if (nameleft) fprintf(outf, nameform, idword);
+			if (numleft) {
+				if (numline)
+					fprintf(outf, numform, "");
+				else
+					fprintf(outf, numform, ibase);
+			}
+			for (j = 0; j < tab; j++) fputc(' ', outf);
+		}
+
+		l1++; /* don't count spaces for width*/
+		if (numline) {
+			if (spacer == kSpaceAll ||
+			    (spacer != 0 && (l + 1) % spacer == 1)) {
+				if (numline == 1) fputc(' ', outf);
+				s[l++] = ' ';
+			}
+			if (l1 % 10 == 1 || l1 == width) {
+				if (numline == 1) fprintf(outf, "%-9d ", i + 1);
+				s[l++] = '|'; /* == put a number here */
+			}
+			else
+				s[l++] = ' ';
+			i++;
+		}
+
+		else {
+			if (spacer == kSpaceAll ||
+			    (spacer != 0 && (l + 1) % spacer == 1))
+				s[l++] = ' ';
+			if (!baseonlynum)
+				ibase++;
+			else if (0 == strchr(nocountsymbols, seq[i]))
+				ibase++;
+			s[l++] = seq[i++];
+		}
+
+		if (l1 == width || i == seqlen) {
+			if (outform == kPretty)
+				for (; l1 < width; l1++) {
+					if (spacer == kSpaceAll ||
+					    (spacer != 0 &&
+					     (l + 1) % spacer == 1))
+						s[l++] = ' ';
+					s[l++] = ' '; /* pad w/ blanks */
+				}
+			s[l] = '\0';
+			l = 0;
+			l1 = 0;
+
+			if (numline) {
+				if (numline == 2)
+					fprintf(
+					    outf, "%s",
+					    s); /* finish numberline ! and | */
+			}
+			else {
+				if (i == seqlen)
+					fprintf(outf, "%s%s", s, endstr);
+				else
+					fprintf(outf, "%s", s);
+				if (numright || nameright) fputc(' ', outf);
+				if (numright) fprintf(outf, numform, ibase - 1);
+				if (nameright) fprintf(outf, nameform, idword);
+			}
+			fputc('\n', outf);
+			linesout++;
+		}
+	}
+	return linesout;
+} /*writeSeq*/
+
+/* End file: ureadseq.c */
diff --git a/ureadseq.h b/ureadseq.h
new file mode 100644
index 0000000..29d71b5
--- /dev/null
+++ b/ureadseq.h
@@ -0,0 +1,172 @@
+/* File: ureadseq.h
+ *
+ * Header for module UReadSeq
+ */
+
+#ifndef UREADSEQ_H
+#define UREADSEQ_H
+
+typedef char boolean;
+#define NEWLINE '\n'
+#define false 0
+#define true 1
+#define min(a, b) (a < b) ? a : b
+#define max(a, b) (a > b) ? a : b
+#define skipwhitespace(string)                                   \
+	{                                                        \
+		while (*string <= ' ' && *string != 0) string++; \
+	}
+
+/* NLM strings */
+#define is_upper(c) ('A' <= (c) && (c) <= 'Z')
+#define is_lower(c) ('a' <= (c) && (c) <= 'z')
+#define to_lower(c) ((char)(is_upper(c) ? (c) + ' ' : (c)))
+#define to_upper(c) ((char)(is_lower(c) ? (c) - ' ' : (c)))
+
+/* readSeq errors */
+#define eFileNotFound -1
+#define eNoData -2
+#define eMemFull -3
+#define eItemNotFound -4
+#define eOneFormat -5
+#define eUnequalSize -6
+#define eFileCreate -7
+#define eUnknownFormat -8
+#define eOptionBad -9
+#define eASNerr -10
+
+/* magic number for readSeq(whichEntry) to give seq list */
+#define kListSequences -1
+
+/* sequence types parsed by getseqtype */
+#define kOtherSeq 0
+#define kDNA 1
+#define kRNA 2
+#define kNucleic 3
+#define kAmino 4
+
+/* formats known to readSeq */
+#define kIG 1
+#define kGenBank 2
+#define kNBRF 3
+#define kEMBL 4
+#define kGCG 5
+#define kStrider 6
+#define kFitch 7
+#define kPearson 8
+#define kZuker 9
+#define kOlsen 10
+#define kPhylip2 11
+#define kPhylip4 12
+#define kPhylip3 kPhylip4
+#define kPhylip kPhylip4
+#define kPlain 13 /* keep this at #13 */
+#define kPIR 14
+#define kMSF 15
+#define kASN1 16
+#define kPAUP 17
+#define kPretty 18
+
+#define kMaxFormat 18
+#define kMinFormat 1
+#define kNoformat -1 /* format not tested */
+#define kUnknown 0   /* format not determinable */
+
+/* subsidiary types */
+#define kASNseqentry 51
+#define kASNseqset 52
+
+#define kPhylipInterleave 61
+#define kPhylipSequential 62
+
+typedef struct {
+	boolean isactive, baseonlynum;
+	boolean numright, numleft, numtop, numbot;
+	boolean nameright, nameleft, nametop;
+	boolean noleaves, domatch, degap;
+	char matchchar, gapchar;
+	short numline, atseq;
+	short namewidth, numwidth;
+	short interline, spacer, seqwidth, tab;
+} prettyopts;
+
+#define gPrettyInit(p)                                                \
+	{                                                             \
+		p.isactive = false;                                   \
+		p.baseonlynum = true;                                 \
+		p.numline = p.atseq = 0;                              \
+		p.numright = p.numleft = p.numtop = p.numbot = false; \
+		p.nameright = p.nameleft = p.nametop = false;         \
+		p.noleaves = p.domatch = p.degap = false;             \
+		p.matchchar = '.';                                    \
+		p.gapchar = '-';                                      \
+		p.namewidth = 8;                                      \
+		p.numwidth = 5;                                       \
+		p.interline = 1;                                      \
+		p.spacer = 10;                                        \
+		p.seqwidth = 50;                                      \
+		p.tab = 0;                                            \
+	}
+
+#ifdef UREADSEQ_G
+prettyopts gPretty;
+#else
+extern prettyopts gPretty;
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern short seqFileFormat(const char *filename, long *skiplines, short *error);
+extern short seqFileFormatFp(FILE *fseq, long *skiplines, short *error);
+
+extern char *listSeqs(const char *filename, const long skiplines,
+		      const short format, short *nseq, short *error);
+
+extern char *readSeq(const short whichEntry, const char *filename,
+		     const long skiplines, const short format, long *seqlen,
+		     short *nseq, short *error, char *seqid);
+
+extern char *readSeqFp(const short whichEntry_, FILE *fp_,
+		       const long skiplines_, const short format_,
+		       long *seqlen_, short *nseq_, short *error_,
+		       char *seqid_);
+
+extern short writeSeq(FILE *outf, const char *seq, const long seqlen,
+		      const short outform, const char *seqid);
+
+extern unsigned long CRC32checksum(const char *seq, const long seqlen,
+				   unsigned long *checktotal);
+extern unsigned long GCGchecksum(const char *seq, const long seqlen,
+				 unsigned long *checktotal);
+#ifdef SMALLCHECKSUM
+#define seqchecksum GCGchecksum
+#else
+#define seqchecksum CRC32checksum
+#endif
+
+extern short getseqtype(const char *seq, const long seqlen);
+extern char *compressSeq(const char gapc, const char *seq, const long seqlen,
+			 long *newlen);
+
+#ifdef NCBI
+
+extern char *listASNSeqs(const char *filename, const long skiplines,
+			 const short format, short *nseq, short *error);
+
+extern char *readASNSeq(const short whichEntry, const char *filename,
+			const long skiplines, const short format, long *seqlen,
+			short *nseq, short *error, char **seqid);
+#endif
+
+/* patches for some missing string.h stuff */
+extern int Strcasecmp(const char *a, const char *b);
+extern int Strncasecmp(const char *a, const char *b, long maxn);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*UREADSEQ_H*/
+