2023-04-16 07:33:28 +08:00
|
|
|
|
/* File: readseq.c
|
|
|
|
|
* main() program for ureadseq.c, ureadseq.h
|
|
|
|
|
*
|
|
|
|
|
* Reads and writes nucleic/protein sequence in various
|
|
|
|
|
* formats. Data files may have multiple sequences.
|
|
|
|
|
*
|
|
|
|
|
* Copyright 1990 by d.g.gilbert
|
|
|
|
|
* biology dept., indiana university, bloomington, in 47405
|
|
|
|
|
* e-mail: gilbertd@bio.indiana.edu
|
|
|
|
|
*
|
|
|
|
|
* This program may be freely copied and used by anyone.
|
|
|
|
|
* Developers are encourged to incorporate parts in their
|
|
|
|
|
* programs, rather than devise their own private sequence
|
|
|
|
|
* format.
|
|
|
|
|
*
|
|
|
|
|
* This should compile and run with any ANSI C compiler.
|
|
|
|
|
* Please advise me of any bugs, additions or corrections.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
const char *title = "readSeq (1Feb93), multi-format molbio sequence reader.\n";
|
|
|
|
|
|
|
|
|
|
/* History
|
|
|
|
|
27 Feb 90. 1st release to public.
|
|
|
|
|
4 Mar 90. + Gary Olsen format
|
|
|
|
|
+ case change
|
|
|
|
|
* minor corrections to NBRF,EMBL,others
|
|
|
|
|
* output 1 file per sequence for gcg, unknown
|
|
|
|
|
* define -DNOSTR for c-libraries w/o strstr
|
|
|
|
|
- readseq.p, pascal version, becomes out-of-date
|
|
|
|
|
24 May 90. + Phylip 3.2 output format (no input)
|
|
|
|
|
20 Jul 90. + Phylip 3.3 output (no input yet)
|
|
|
|
|
+ interactive output re-direction
|
|
|
|
|
+ verbose progress info
|
|
|
|
|
* interactive help output
|
|
|
|
|
* dropped line no.s on NBRF output
|
|
|
|
|
* patched in HyperGCG XCMD corrections,
|
|
|
|
|
- except for seq. documentation handling
|
|
|
|
|
* dropped the IG special nuc codes, as IG has
|
|
|
|
|
adopted the standard IUB codes (now if only
|
|
|
|
|
everyone would adopt a standard format !)
|
|
|
|
|
11 Oct 90. * corrected bug in reading/writing of EMBL format
|
|
|
|
|
|
|
|
|
|
17 Oct 91. * corrected bug in reading Olsen format
|
|
|
|
|
(serious-deletion)
|
|
|
|
|
10 Nov 91. * corrected bug in reading some GCG format files
|
|
|
|
|
(serious-last line duplicated)
|
|
|
|
|
+ add format name parsing (-fgb, -ffasta, ...)
|
|
|
|
|
+ Phylip v3.4 output format (== v3.2, sequential)
|
|
|
|
|
+ add checksum output to all forms that have document
|
|
|
|
|
+ skip mail headers in seq file
|
|
|
|
|
+ add pipe for standard input == seq file (with -p)
|
|
|
|
|
* fold in parts of MacApp Seq object
|
|
|
|
|
* strengthen format detection
|
|
|
|
|
* clarify program structure
|
|
|
|
|
* remove fixed sequence size limit (now dynamic, sizeof memory)
|
|
|
|
|
* check and fold in accumulated bug reports:
|
|
|
|
|
* Now ANSI-C fopen(..,"w") & check open failure
|
|
|
|
|
* Define -DFIXTOUPPER for nonANSI C libraries that mess
|
|
|
|
|
up toupper/tolower
|
|
|
|
|
= No command-line changes; callers of readseq main() should be okay
|
|
|
|
|
- ureadseq.h functions have changed; client programs need to note.
|
|
|
|
|
+ added Unix and VMS Make scripts, including validation tests
|
|
|
|
|
|
|
|
|
|
4 May 92. + added 32 bit CRC checksum as alternative to GCG 6.5bit checksum
|
|
|
|
|
(-DBIGCHECKSUM)
|
|
|
|
|
Aug 92 = fixed Olsen format input to handle files w/ more sequences,
|
|
|
|
|
not to mess up when more than one seq has same identifier,
|
|
|
|
|
and to convert number masks to symbols.
|
|
|
|
|
= IG format fix to understand ^L
|
|
|
|
|
|
|
|
|
|
25-30 Dec 92
|
|
|
|
|
* revised command-line & interactive interface. Suggested form is
|
|
|
|
|
now readseq infile -format=genbank -output=outfile -item=1,3,4 ... but remains
|
|
|
|
|
compatible with prior commandlines: readseq infile -f2 -ooutfile -i3 ...
|
|
|
|
|
+ added GCG MSF multi sequence file format
|
|
|
|
|
+ added PIR/CODATA format
|
|
|
|
|
+ added NCBI ASN.1 sequence file format
|
|
|
|
|
+ added Pretty, multi sequence pretty output (only)
|
|
|
|
|
+ added PAUP multi seq format
|
|
|
|
|
+ added degap option
|
|
|
|
|
+ added Gary Williams (GWW, G.Williams@CRC.AC.UK)
|
|
|
|
|
reverse-complement option.
|
|
|
|
|
+ added support for reading Phylip formats (interleave &
|
|
|
|
|
sequential)
|
|
|
|
|
* string fixes, dropped need for compiler flags NOSTR, FIXTOUPPER,
|
|
|
|
|
NEEDSTRCASECMP
|
|
|
|
|
* changed 32bit checksum to default, -DSMALLCHECKSUM for GCG
|
|
|
|
|
version
|
|
|
|
|
|
|
|
|
|
1Feb93
|
|
|
|
|
= revert GenBank output to a fixed left number width which
|
|
|
|
|
other software depends on.
|
|
|
|
|
= fix for MSF input to handle symbols in names
|
|
|
|
|
= fix bug for possible memory overrun when truncating seqs for
|
|
|
|
|
Phylip or Paup formats (thanks Anthony Persechini)
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Readseq has been tested with:
|
|
|
|
|
Macintosh MPW C
|
|
|
|
|
GNU gcc
|
|
|
|
|
SGI cc
|
|
|
|
|
VAX-VMS cc
|
|
|
|
|
Any ANSI C compiler should be able to handle this.
|
|
|
|
|
Old-style C compilers barf all over the source.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
How do I build the readseq program if I have an Ansi C compiler?
|
|
|
|
|
#--------------------
|
|
|
|
|
# Unix ANSI C
|
|
|
|
|
# Use the supplied Makefile this way:
|
|
|
|
|
% make CC=name-of-c-compiler
|
|
|
|
|
# OR do this...
|
|
|
|
|
% gcc readseq.c ureadseq.c -o readseq
|
|
|
|
|
|
|
|
|
|
#--------------------
|
|
|
|
|
$!VAX-VMS cc
|
|
|
|
|
$! Use the supplied Make.Com this way:
|
|
|
|
|
$ @make
|
|
|
|
|
$! OR, do this:
|
|
|
|
|
$ cc readseq, ureadseq
|
|
|
|
|
$ link readseq, ureadseq, sys$library:vaxcrtl/lib
|
|
|
|
|
$ readseq :== $ MyDisk:[myacct]readseq
|
|
|
|
|
|
|
|
|
|
#--------------------
|
|
|
|
|
# Macintosh Simple Input/Output Window application
|
|
|
|
|
# requires MPW-C and SIOW library (from APDA)
|
|
|
|
|
# also uses files macinit.c, macinit.r, readseqSIOW.make
|
|
|
|
|
#
|
|
|
|
|
Buildprogram readseqSIOW
|
|
|
|
|
|
|
|
|
|
#--------------------
|
|
|
|
|
#MPW-C v3 tool
|
|
|
|
|
C ureadseq.c
|
|
|
|
|
C readseq.c
|
|
|
|
|
link -w -o readseq -t MPST -c 'MPS ' <EFBFBD>
|
|
|
|
|
readseq.c.o Ureadseq.c.o <EFBFBD>
|
|
|
|
|
"{Libraries}"Interface.o <EFBFBD>
|
|
|
|
|
"{Libraries}"ToolLibs.o <EFBFBD>
|
|
|
|
|
"{Libraries}"Runtime.o <EFBFBD>
|
|
|
|
|
"{CLibraries}"StdClib.o
|
|
|
|
|
readseq -i1 ig.seq
|
|
|
|
|
|
|
|
|
|
# MPW-C with NCBI tools
|
|
|
|
|
|
|
|
|
|
set NCBI "{Boot}@molbio:ncbi:"; EXPORT NCBI
|
|
|
|
|
set NCBILIB1 "{NCBI}"lib:libncbi.o; export NCBILIB1
|
|
|
|
|
set NCBILIB2 "{NCBI}"lib:libncbiobj.o; export NCBILIB2
|
|
|
|
|
set NCBILIB3 "{NCBI}"lib:libncbicdr.o; export NCBILIB3
|
|
|
|
|
set NCBILIB4 "{NCBI}"lib:libvibrant.o; export NCBILIB4
|
|
|
|
|
|
|
|
|
|
C ureadseq.c
|
|
|
|
|
C -d NCBI -i "{NCBI}"include: ureadasn.c
|
|
|
|
|
C -d NCBI -i "{NCBI}"include: readseq.c
|
|
|
|
|
link -w -o readseq -t MPST -c 'MPS ' <EFBFBD>
|
|
|
|
|
ureadseq.c.o ureadasn.c.o readseq.c.o <EFBFBD>
|
|
|
|
|
{NCBILIB4} {NCBILIB2} {NCBILIB1} <EFBFBD>
|
|
|
|
|
"{Libraries}"Interface.o <EFBFBD>
|
|
|
|
|
"{Libraries}"ToolLibs.o <EFBFBD>
|
|
|
|
|
"{Libraries}"Runtime.o <EFBFBD>
|
|
|
|
|
"{CLibraries}"CSANELib.o <EFBFBD>
|
|
|
|
|
"{CLibraries}"Math.o <EFBFBD>
|
|
|
|
|
"{CLibraries}"StdClib.o
|
|
|
|
|
|
|
|
|
|
===========================================================*/
|
|
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <stdio.h>
|
2023-04-16 13:50:30 +08:00
|
|
|
|
#include <stdlib.h>
|
2023-04-16 07:33:28 +08:00
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
#include "ureadseq.h"
|
|
|
|
|
|
|
|
|
|
#pragma segment readseq
|
|
|
|
|
|
|
|
|
|
static char inputfilestore[256], *inputfile = inputfilestore;
|
|
|
|
|
|
|
|
|
|
const char *formats[kMaxFormat + 1] = {" 1. IG/Stanford",
|
|
|
|
|
" 2. GenBank/GB",
|
|
|
|
|
" 3. NBRF",
|
|
|
|
|
" 4. EMBL",
|
|
|
|
|
" 5. GCG",
|
|
|
|
|
" 6. DNAStrider",
|
|
|
|
|
" 7. Fitch",
|
|
|
|
|
" 8. Pearson/Fasta",
|
|
|
|
|
" 9. Zuker (in-only)",
|
|
|
|
|
"10. Olsen (in-only)",
|
|
|
|
|
"11. Phylip3.2",
|
|
|
|
|
"12. Phylip",
|
|
|
|
|
"13. Plain/Raw",
|
|
|
|
|
"14. PIR/CODATA",
|
|
|
|
|
"15. MSF",
|
|
|
|
|
"16. ASN.1",
|
|
|
|
|
"17. PAUP/NEXUS",
|
|
|
|
|
"18. Pretty (out-only)",
|
|
|
|
|
""};
|
|
|
|
|
|
|
|
|
|
#define kFormCount 30
|
|
|
|
|
#define kMaxFormName 15
|
|
|
|
|
|
|
|
|
|
const struct formatTable {
|
|
|
|
|
char *name;
|
|
|
|
|
short num;
|
|
|
|
|
} formname[] = {
|
|
|
|
|
{"ig", kIG},
|
|
|
|
|
{"stanford", kIG},
|
|
|
|
|
{"genbank", kGenBank},
|
|
|
|
|
{"gb", kGenBank},
|
|
|
|
|
{"nbrf", kNBRF},
|
|
|
|
|
{"embl", kEMBL},
|
|
|
|
|
{"gcg", kGCG},
|
|
|
|
|
{"uwgcg", kGCG},
|
|
|
|
|
{"dnastrider", kStrider},
|
|
|
|
|
{"strider", kStrider},
|
|
|
|
|
{"fitch", kFitch},
|
|
|
|
|
{"pearson", kPearson},
|
|
|
|
|
{"fasta", kPearson},
|
|
|
|
|
{"zuker", kZuker},
|
|
|
|
|
{"olsen", kOlsen},
|
|
|
|
|
{"phylip", kPhylip},
|
|
|
|
|
{"phylip3.2", kPhylip2},
|
|
|
|
|
{"phylip3.3", kPhylip3},
|
|
|
|
|
{"phylip3.4", kPhylip4},
|
|
|
|
|
{"phylip-interleaved", kPhylip4},
|
|
|
|
|
{"phylip-sequential", kPhylip2},
|
|
|
|
|
{"plain", kPlain},
|
|
|
|
|
{"raw", kPlain},
|
|
|
|
|
{"pir", kPIR},
|
|
|
|
|
{"codata", kPIR},
|
|
|
|
|
{"asn.1", kASN1},
|
|
|
|
|
{"msf", kMSF},
|
|
|
|
|
{"paup", kPAUP},
|
|
|
|
|
{"nexus", kPAUP},
|
|
|
|
|
{"pretty", kPretty},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const char *kASN1headline = "Bioseq-set ::= {\nseq-set {\n";
|
|
|
|
|
|
|
|
|
|
/* GWW table for getting the complement of a nucleotide (IUB codes) */
|
|
|
|
|
/* !
|
|
|
|
|
* "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
|
|
|
|
|
* \]^_`abcdefghijklmnopqrstuvwxyz{|}~ */
|
|
|
|
|
const char compl [] =
|
|
|
|
|
" !\"#$%&'()*+,-./"
|
|
|
|
|
"0123456789:;<=>?@TVGHNNCDNNMNKNNYRYSAABWNRN[\\]^_`"
|
|
|
|
|
"tvghnncdnnmnknnyrysaabwnrn{|}~";
|
|
|
|
|
|
|
|
|
|
char *formatstr(short format)
|
|
|
|
|
{
|
|
|
|
|
if (format < 1 || format > kMaxFormat) {
|
|
|
|
|
switch (format) {
|
|
|
|
|
case kASNseqentry:
|
|
|
|
|
case kASNseqset:
|
|
|
|
|
return formats[kASN1 - 1];
|
|
|
|
|
case kPhylipInterleave:
|
|
|
|
|
case kPhylipSequential:
|
|
|
|
|
return formats[kPhylip - 1];
|
|
|
|
|
default:
|
|
|
|
|
return "(unknown)";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return formats[format - 1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int parseformat(char *name)
|
|
|
|
|
{
|
|
|
|
|
#define kDupmatch -2
|
|
|
|
|
int namelen, maxlen, i, match, matchat;
|
|
|
|
|
char lname[kMaxFormName + 1];
|
|
|
|
|
|
|
|
|
|
skipwhitespace(name);
|
|
|
|
|
namelen = strlen(name);
|
|
|
|
|
if (namelen == 0)
|
|
|
|
|
return kNoformat;
|
|
|
|
|
else if (isdigit(*name)) {
|
|
|
|
|
i = atol(name);
|
|
|
|
|
if (i<kMinFormat | i> kMaxFormat)
|
|
|
|
|
return kNoformat;
|
|
|
|
|
else
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* else match character name */
|
|
|
|
|
maxlen = min(kMaxFormName, namelen);
|
|
|
|
|
for (i = 0; i < maxlen; i++) lname[i] = to_lower(name[i]);
|
|
|
|
|
lname[maxlen] = 0;
|
|
|
|
|
matchat = kNoformat;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < kFormCount; i++) {
|
|
|
|
|
match = strncmp(lname, formname[i].name, maxlen);
|
|
|
|
|
if (match == 0) {
|
|
|
|
|
if (strlen(formname[i].name) == namelen)
|
|
|
|
|
return (formname[i].num);
|
|
|
|
|
else if (matchat == kNoformat)
|
|
|
|
|
matchat = i;
|
|
|
|
|
else
|
|
|
|
|
matchat =
|
|
|
|
|
kDupmatch; /* 2 or more partial matches */
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (matchat == kNoformat || matchat == kDupmatch)
|
|
|
|
|
return kNoformat;
|
|
|
|
|
else
|
|
|
|
|
return formname[matchat].num;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void dumpSeqList(char *list, short format)
|
|
|
|
|
{
|
|
|
|
|
long i, l, listlen;
|
|
|
|
|
char s[256];
|
|
|
|
|
|
|
|
|
|
listlen = strlen(list);
|
|
|
|
|
printf("Sequences in %s (format is %s)\n", inputfile,
|
|
|
|
|
formatstr(format));
|
|
|
|
|
for (i = 0, l = 0; i < listlen; i++) {
|
|
|
|
|
if (list[i] == (char)NEWLINE) {
|
|
|
|
|
s[l] = '\0';
|
|
|
|
|
l = 0;
|
|
|
|
|
puts(s);
|
|
|
|
|
}
|
|
|
|
|
else if (l < 255)
|
|
|
|
|
s[l++] = list[i];
|
|
|
|
|
}
|
|
|
|
|
putchar('\n');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void usage()
|
|
|
|
|
{
|
|
|
|
|
short i, midi;
|
|
|
|
|
|
|
|
|
|
fprintf(stderr, title);
|
|
|
|
|
fprintf(stderr, "usage: readseq [-options] in.seq > out.seq\n");
|
|
|
|
|
fprintf(stderr, " options\n");
|
|
|
|
|
/* ? add -d[igits] to allow digits in sequence data, &/or option to
|
|
|
|
|
* specify seq charset !? */
|
|
|
|
|
fprintf(stderr, " -a[ll] select All sequences\n");
|
|
|
|
|
fprintf(stderr, " -c[aselower] change to lower case\n");
|
|
|
|
|
fprintf(stderr, " -C[ASEUPPER] change to UPPER CASE\n");
|
|
|
|
|
fprintf(stderr, " -degap[=-] remove gap symbols\n");
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
" -i[tem=2,3,4] select Item number(s) from several\n");
|
|
|
|
|
fprintf(stderr, " -l[ist] List sequences only\n");
|
|
|
|
|
fprintf(stderr, " -o[utput=]out.seq redirect Output\n");
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
" -p[ipe] Pipe (command line, <stdin, >stdout)\n");
|
|
|
|
|
fprintf(stderr, " -r[everse] change to Reverse-complement\n");
|
|
|
|
|
fprintf(stderr, " -v[erbose] Verbose progress\n");
|
|
|
|
|
fprintf(stderr, " -f[ormat=]# Format number for output, or\n");
|
|
|
|
|
fprintf(stderr, " -f[ormat=]Name Format name for output:\n");
|
|
|
|
|
midi = (kMaxFormat + 1) / 2;
|
|
|
|
|
for (i = kMinFormat - 1; i < midi; i++)
|
|
|
|
|
fprintf(stderr, " %-20s %-20s\n", formats[i],
|
|
|
|
|
formats[midi + i]);
|
|
|
|
|
|
|
|
|
|
/* new output format options, esp. for pretty format: */
|
|
|
|
|
fprintf(stderr, " \n");
|
|
|
|
|
fprintf(stderr, " Pretty format options: \n");
|
|
|
|
|
fprintf(stderr, " -wid[th]=# sequence line width\n");
|
|
|
|
|
fprintf(stderr, " -tab=# left indent\n");
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
" -col[space]=# column space within sequence line "
|
|
|
|
|
"on output\n");
|
|
|
|
|
fprintf(
|
|
|
|
|
stderr,
|
|
|
|
|
" -gap[count] count gap chars in sequence numbers\n");
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
" -nameleft, -nameright[=#] name on left/right side [=max "
|
|
|
|
|
"width]\n");
|
|
|
|
|
fprintf(stderr, " -nametop name at top/bottom\n");
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
" -numleft, -numright seq index on left/right side\n");
|
|
|
|
|
fprintf(stderr, " -numtop, -numbot index on top/bottom\n");
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
" -match[=.] use match base for 2..n species\n");
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
" -inter[line=#] blank line(s) between sequence "
|
|
|
|
|
"blocks\n");
|
|
|
|
|
|
|
|
|
|
/****** not ready yet
|
|
|
|
|
fprintf(stderr, " -code=none,rtf,postscript,ps code syntax\n");
|
|
|
|
|
fprintf(stderr, " -namefont=, -numfont=, -seqfont=font font
|
|
|
|
|
choice\n"); fprintf(stderr, " font suggestions include
|
|
|
|
|
times,courier,helvetica\n"); fprintf(stderr, " -namefontsize=,
|
|
|
|
|
-numfontsize=, -seqfontsize=#\n"); fprintf(stderr, " fontsize
|
|
|
|
|
suggestions include 9,10,12,14\n"); fprintf(stderr, " -namefontstyle=,
|
|
|
|
|
-numfontstyle=, -seqfontstyle= style fontstyle for names\n");
|
|
|
|
|
fprintf(stderr, " fontstyle options are
|
|
|
|
|
plain,italic,bold,bold-italic\n");
|
|
|
|
|
******/
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void erralert(short err)
|
|
|
|
|
{
|
|
|
|
|
switch (err) {
|
|
|
|
|
case 0:
|
|
|
|
|
break;
|
|
|
|
|
case eFileNotFound:
|
|
|
|
|
fprintf(stderr, "File not found: %s\n", inputfile);
|
|
|
|
|
break;
|
|
|
|
|
case eFileCreate:
|
|
|
|
|
fprintf(stderr, "Can't open output file.\n");
|
|
|
|
|
break;
|
|
|
|
|
case eASNerr:
|
|
|
|
|
fprintf(stderr, "Error in ASN.1 sequence routines.\n");
|
|
|
|
|
break;
|
|
|
|
|
case eNoData:
|
|
|
|
|
fprintf(stderr, "No data in file.\n");
|
|
|
|
|
break;
|
|
|
|
|
case eItemNotFound:
|
|
|
|
|
fprintf(stderr, "Specified item not in file.\n");
|
|
|
|
|
break;
|
|
|
|
|
case eUnequalSize:
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"This format requires equal length "
|
|
|
|
|
"sequences.\nSequence truncated or padded to "
|
|
|
|
|
"fit.\n");
|
|
|
|
|
break;
|
|
|
|
|
case eUnknownFormat:
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"Error: this format is unknown to me.\n");
|
|
|
|
|
break;
|
|
|
|
|
case eOneFormat:
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"Warning: This format permits only 1 sequence "
|
|
|
|
|
"per file.\n");
|
|
|
|
|
break;
|
|
|
|
|
case eMemFull:
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"Out of storage memory. Sequence truncated.\n");
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
fprintf(stderr, "readSeq error = %d\n", err);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} /* erralert */
|
|
|
|
|
|
|
|
|
|
int chooseFormat(boolean quietly)
|
|
|
|
|
{
|
|
|
|
|
char sform[128];
|
|
|
|
|
int midi, i, outform;
|
|
|
|
|
|
|
|
|
|
if (quietly)
|
|
|
|
|
return kPearson; /* default */
|
|
|
|
|
else {
|
|
|
|
|
midi = (kMaxFormat + 1) / 2;
|
|
|
|
|
for (i = kMinFormat - 1; i < midi; i++)
|
|
|
|
|
fprintf(stderr, " %-20s %-20s\n",
|
|
|
|
|
formats[i], formats[midi + i]);
|
|
|
|
|
fprintf(stderr, "\nChoose an output format (name or #): \n");
|
|
|
|
|
gets(sform);
|
|
|
|
|
outform = parseformat(sform);
|
|
|
|
|
if (outform == kNoformat) outform = kPearson;
|
|
|
|
|
return outform;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* read paramater(s) */
|
|
|
|
|
|
|
|
|
|
boolean checkopt(boolean casesense, char *sopt, const char *smatch,
|
|
|
|
|
short minword)
|
|
|
|
|
{
|
|
|
|
|
long lenopt, lenmatch;
|
|
|
|
|
boolean result;
|
|
|
|
|
short minmaxw;
|
|
|
|
|
|
|
|
|
|
lenopt = strlen(sopt);
|
|
|
|
|
lenmatch = strlen(smatch);
|
|
|
|
|
minmaxw = max(minword, min(lenopt, lenmatch));
|
|
|
|
|
|
|
|
|
|
if (casesense)
|
|
|
|
|
result = (!strncmp(sopt, smatch, minmaxw));
|
|
|
|
|
else
|
|
|
|
|
result = (!Strncasecmp(sopt, smatch, minmaxw));
|
|
|
|
|
/* if (result) { */
|
|
|
|
|
/* fprintf(stderr,"true checkopt(opt=%s,match=%s,param=%s)\n", sopt,
|
|
|
|
|
* smatch, *sparam); */
|
|
|
|
|
/* } */
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define kMaxwhichlist 50
|
|
|
|
|
|
|
|
|
|
/* global for readopt(), main() */
|
|
|
|
|
boolean chooseall = false, quietly = false, gotinputfile = false,
|
|
|
|
|
listonly = false, closeout = false, verbose = false, manyout = false,
|
|
|
|
|
dolower = false, doupper = false, doreverse = false, askout = true,
|
|
|
|
|
dopipe = false, interleaved = false;
|
|
|
|
|
short nfile = 0, iwhichlist = 0, nwhichlist = 0;
|
|
|
|
|
short whichlist[kMaxwhichlist + 1];
|
|
|
|
|
long whichSeq = 0, outform = kNoformat;
|
|
|
|
|
char onamestore[128], *oname = onamestore;
|
|
|
|
|
FILE *foo = NULL;
|
|
|
|
|
|
|
|
|
|
void resetGlobals()
|
|
|
|
|
/* need this when used from SIOW, as these globals are not reinited
|
|
|
|
|
automatically between calls to local main() */
|
|
|
|
|
{
|
|
|
|
|
chooseall = false;
|
|
|
|
|
quietly = false;
|
|
|
|
|
gotinputfile = false;
|
|
|
|
|
listonly = false;
|
|
|
|
|
closeout = false;
|
|
|
|
|
verbose = false;
|
|
|
|
|
manyout = false;
|
|
|
|
|
dolower = false;
|
|
|
|
|
doupper = false;
|
|
|
|
|
doreverse = false;
|
|
|
|
|
askout = true;
|
|
|
|
|
dopipe = false;
|
|
|
|
|
interleaved = false;
|
|
|
|
|
nfile = 0;
|
|
|
|
|
iwhichlist = 0;
|
|
|
|
|
nwhichlist = 0;
|
|
|
|
|
whichSeq = 0;
|
|
|
|
|
outform = kNoformat;
|
|
|
|
|
oname = onamestore;
|
|
|
|
|
foo = NULL;
|
|
|
|
|
|
|
|
|
|
gPrettyInit(gPretty);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define kOptOkay 1
|
|
|
|
|
#define kOptNone 0
|
|
|
|
|
|
|
|
|
|
int readopt(char *sopt)
|
|
|
|
|
{
|
|
|
|
|
char sparamstore[256], *sparam = sparamstore;
|
|
|
|
|
short n, slen = strlen(sopt);
|
|
|
|
|
|
|
|
|
|
/* fprintf(stderr,"readopt( %s) == ", sopt); */
|
|
|
|
|
|
|
|
|
|
if (*sopt == '?') {
|
|
|
|
|
usage();
|
|
|
|
|
return kOptNone; /*? eOptionBad or kOptNone */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
else if (*sopt == '-') {
|
|
|
|
|
char *cp = strchr(sopt, '=');
|
|
|
|
|
*sparam = '\0';
|
|
|
|
|
if (cp) {
|
|
|
|
|
strcpy(sparam, cp + 1);
|
|
|
|
|
*cp = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-help", 2)) {
|
|
|
|
|
usage();
|
|
|
|
|
return kOptNone;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-all", 2)) {
|
|
|
|
|
whichSeq = 1;
|
|
|
|
|
chooseall = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-colspace",
|
|
|
|
|
4)) { /* test before -c[ase] */
|
|
|
|
|
n = atoi(sparam);
|
|
|
|
|
gPretty.spacer = n;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(true, sopt, "-caselower", 2)) {
|
|
|
|
|
dolower = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(true, sopt, "-CASEUPPER", 2)) {
|
|
|
|
|
doupper = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-pipe", 2)) {
|
|
|
|
|
dopipe = true;
|
|
|
|
|
askout = false;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-list", 2)) {
|
|
|
|
|
listonly = true;
|
|
|
|
|
askout = false;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-reverse", 2)) {
|
|
|
|
|
doreverse = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-verbose", 2)) {
|
|
|
|
|
verbose = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-match", 5)) {
|
|
|
|
|
gPretty.domatch = true;
|
|
|
|
|
if (*sparam >= ' ') gPretty.matchchar = *sparam;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(false, sopt, "-degap", 4)) {
|
|
|
|
|
gPretty.degap = true;
|
|
|
|
|
if (*sparam >= ' ') gPretty.gapchar = *sparam;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-interline", 4)) {
|
|
|
|
|
gPretty.interline = atoi(sparam);
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-item", 2)) {
|
|
|
|
|
char *cp = sparam;
|
|
|
|
|
nwhichlist = 0;
|
|
|
|
|
whichlist[0] = 0;
|
|
|
|
|
if (*cp == 0) cp = sopt + 2; /* compatible w/ old way */
|
|
|
|
|
do {
|
|
|
|
|
while (*cp != 0 && !isdigit(*cp)) cp++;
|
|
|
|
|
if (*cp != 0) {
|
|
|
|
|
n = atoi(cp);
|
|
|
|
|
whichlist[nwhichlist++] = n;
|
|
|
|
|
while (*cp != 0 && isdigit(*cp)) cp++;
|
|
|
|
|
}
|
|
|
|
|
} while (*cp != 0 && n > 0 &&
|
|
|
|
|
nwhichlist < kMaxwhichlist);
|
|
|
|
|
whichlist[nwhichlist++] =
|
|
|
|
|
0; /* 0 == stopsign for loop */
|
|
|
|
|
whichSeq = max(1, whichlist[0]);
|
|
|
|
|
iwhichlist = 1;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-format",
|
|
|
|
|
5)) { /* -format=phylip, -f2, -form=phylip */
|
|
|
|
|
if (*sparam == 0) {
|
|
|
|
|
for (sparam = sopt + 2; isalpha(*sparam);
|
|
|
|
|
sparam++)
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
outform = parseformat(sparam);
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(false, sopt, "-f",
|
|
|
|
|
2)) { /* compatible w/ -fphylip prior version */
|
|
|
|
|
if (*sparam == 0) sparam = sopt + 2;
|
|
|
|
|
outform = parseformat(sparam);
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-output", 3)) { /* -output=myseq */
|
|
|
|
|
if (*sparam == 0) {
|
|
|
|
|
for (sparam = sopt + 3; isalpha(*sparam);
|
|
|
|
|
sparam++)
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
strcpy(oname, sparam);
|
|
|
|
|
foo = fopen(oname, "w");
|
|
|
|
|
if (!foo) {
|
|
|
|
|
erralert(eFileCreate);
|
|
|
|
|
return eFileCreate;
|
|
|
|
|
}
|
|
|
|
|
closeout = true;
|
|
|
|
|
askout = false;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(false, sopt, "-o",
|
|
|
|
|
2)) { /* compatible w/ -omyseq prior version */
|
|
|
|
|
if (*sparam == 0) sparam = sopt + 2;
|
|
|
|
|
strcpy(oname, sparam);
|
|
|
|
|
foo = fopen(oname, "w");
|
|
|
|
|
if (!foo) {
|
|
|
|
|
erralert(eFileCreate);
|
|
|
|
|
return eFileCreate;
|
|
|
|
|
}
|
|
|
|
|
closeout = true;
|
|
|
|
|
askout = false;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-width", 2)) {
|
|
|
|
|
if (*sparam == 0) {
|
|
|
|
|
for (sparam = sopt + 2;
|
|
|
|
|
!isdigit(*sparam) && *sparam != 0;
|
|
|
|
|
sparam++)
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
n = atoi(sparam);
|
|
|
|
|
if (n > 0) gPretty.seqwidth = n;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-tab", 4)) {
|
|
|
|
|
if (*sparam == 0) {
|
|
|
|
|
for (sparam = sopt + 2;
|
|
|
|
|
!isdigit(*sparam) && *sparam != 0;
|
|
|
|
|
sparam++)
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
n = atoi(sparam);
|
|
|
|
|
gPretty.tab = n;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-gapcount", 4)) {
|
|
|
|
|
gPretty.baseonlynum = false;
|
|
|
|
|
/* if (*sparam >= ' ') gPretty.gapchar= *sparam; */
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(false, sopt, "-nointerleave", 8)) {
|
|
|
|
|
gPretty.noleaves = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-nameleft", 7)) {
|
|
|
|
|
if (*sparam == 0) {
|
|
|
|
|
for (sparam = sopt + 2;
|
|
|
|
|
!isdigit(*sparam) && *sparam != 0;
|
|
|
|
|
sparam++)
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
n = atoi(sparam);
|
|
|
|
|
if (n > 0 && n < 50) gPretty.namewidth = n;
|
|
|
|
|
gPretty.nameleft = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(false, sopt, "-nameright", 7)) {
|
|
|
|
|
if (*sparam == 0) {
|
|
|
|
|
for (sparam = sopt + 2;
|
|
|
|
|
!isdigit(*sparam) && *sparam != 0;
|
|
|
|
|
sparam++)
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
n = atoi(sparam);
|
|
|
|
|
if (n > 0 && n < 50) gPretty.namewidth = n;
|
|
|
|
|
gPretty.nameright = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(false, sopt, "-nametop", 6)) {
|
|
|
|
|
gPretty.nametop = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-numleft", 6)) {
|
|
|
|
|
if (*sparam == 0) {
|
|
|
|
|
for (sparam = sopt + 2;
|
|
|
|
|
!isdigit(*sparam) && *sparam != 0;
|
|
|
|
|
sparam++)
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
n = atoi(sparam);
|
|
|
|
|
if (n > 0 && n < 50) gPretty.numwidth = n;
|
|
|
|
|
gPretty.numleft = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(false, sopt, "-numright", 6)) {
|
|
|
|
|
if (*sparam == 0) {
|
|
|
|
|
for (sparam = sopt + 2;
|
|
|
|
|
!isdigit(*sparam) && *sparam != 0;
|
|
|
|
|
sparam++)
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
n = atoi(sparam);
|
|
|
|
|
if (n > 0 && n < 50) gPretty.numwidth = n;
|
|
|
|
|
gPretty.numright = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (checkopt(false, sopt, "-numtop", 6)) {
|
|
|
|
|
gPretty.numtop = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
if (checkopt(false, sopt, "-numbottom", 6)) {
|
|
|
|
|
gPretty.numbot = true;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
else {
|
|
|
|
|
usage();
|
|
|
|
|
return eOptionBad;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
else {
|
|
|
|
|
strcpy(inputfile, sopt);
|
|
|
|
|
gotinputfile = (*inputfile != 0);
|
|
|
|
|
nfile++;
|
|
|
|
|
return kOptOkay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* return kOptNone; -- never here */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* this program suffers some as it tries to be a quiet translator pipe
|
|
|
|
|
_and_ a noisy user interactor
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* return is best for SIOW, okay for others */
|
|
|
|
|
#ifdef SIOW
|
|
|
|
|
#define Exit(a) return (a)
|
|
|
|
|
siow_main(int argc, char *argv[])
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
#define Exit(a) exit(a)
|
|
|
|
|
|
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
|
#endif
|
|
|
|
|
{
|
|
|
|
|
boolean closein = false;
|
|
|
|
|
short ifile, nseq, atseq, format, err = 0, seqtype = kDNA, nlines,
|
|
|
|
|
seqout = 0, phylvers = 2;
|
|
|
|
|
long i, skiplines, seqlen, seqlen0;
|
|
|
|
|
unsigned long checksum = 0, checkall = 0;
|
|
|
|
|
char *seq, *cp, *firstseq = NULL, *seqlist, *progname, tempname[256];
|
|
|
|
|
char seqid[256], *seqidptr = seqid;
|
|
|
|
|
char stempstore[256], *stemp = stempstore;
|
|
|
|
|
FILE *ftmp, *fin, *fout;
|
|
|
|
|
long outindexmax = 0, noutindex = 0, *outindex = NULL;
|
|
|
|
|
|
|
|
|
|
#define exit_main(err) \
|
|
|
|
|
{ \
|
|
|
|
|
if (closeout) fclose(fout); \
|
|
|
|
|
if (closein) fclose(fin); \
|
|
|
|
|
if (*tempname != 0) remove(tempname); \
|
|
|
|
|
Exit(err); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define indexout() \
|
|
|
|
|
if (interleaved) { \
|
|
|
|
|
if (noutindex >= outindexmax) { \
|
|
|
|
|
outindexmax = noutindex + 20; \
|
|
|
|
|
outindex = (long *)realloc( \
|
|
|
|
|
outindex, sizeof(long) * outindexmax); \
|
|
|
|
|
if (outindex == NULL) { \
|
|
|
|
|
err = eMemFull; \
|
|
|
|
|
erralert(err); \
|
|
|
|
|
exit_main(err); \
|
|
|
|
|
} \
|
|
|
|
|
} \
|
|
|
|
|
outindex[noutindex++] = ftell(fout); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resetGlobals();
|
|
|
|
|
foo = stdout;
|
|
|
|
|
progname = argv[0];
|
|
|
|
|
*oname = 0;
|
|
|
|
|
*tempname = 0;
|
|
|
|
|
/* initialize gPretty ?? -- done in header */
|
|
|
|
|
|
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
|
err = readopt(argv[i]);
|
|
|
|
|
if (err <= 0) exit_main(err);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* pipe input from stdin !? */
|
|
|
|
|
if (dopipe && !gotinputfile) {
|
|
|
|
|
int c;
|
|
|
|
|
tmpnam(tempname);
|
|
|
|
|
inputfile = tempname;
|
|
|
|
|
ftmp = fopen(inputfile, "w");
|
|
|
|
|
if (!ftmp) {
|
|
|
|
|
erralert(eFileCreate);
|
|
|
|
|
exit_main(eFileCreate);
|
|
|
|
|
}
|
|
|
|
|
while ((c = getc(stdin)) != EOF) fputc(c, ftmp);
|
|
|
|
|
fclose(ftmp);
|
|
|
|
|
gotinputfile = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
quietly = (dopipe || (gotinputfile && (listonly || whichSeq != 0)));
|
|
|
|
|
|
|
|
|
|
if (verbose || (!quietly && !gotinputfile)) fprintf(stderr, title);
|
|
|
|
|
ifile = 1;
|
|
|
|
|
|
|
|
|
|
/* UI: Choose output */
|
|
|
|
|
if (askout && !closeout && !quietly) {
|
|
|
|
|
askout = false;
|
|
|
|
|
fprintf(
|
|
|
|
|
stderr,
|
|
|
|
|
"\nName of output file (?=help, defaults to display): \n");
|
|
|
|
|
gets(oname = onamestore);
|
|
|
|
|
skipwhitespace(oname);
|
|
|
|
|
if (*oname == '?') {
|
|
|
|
|
usage();
|
|
|
|
|
exit_main(0);
|
|
|
|
|
}
|
|
|
|
|
else if (*oname != 0) {
|
|
|
|
|
closeout = true;
|
|
|
|
|
foo = fopen(oname, "w");
|
|
|
|
|
if (!foo) {
|
|
|
|
|
erralert(eFileCreate);
|
|
|
|
|
exit_main(eFileCreate);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fout = foo;
|
|
|
|
|
if (outform == kNoformat) outform = chooseFormat(quietly);
|
|
|
|
|
|
|
|
|
|
/* set up formats ... */
|
|
|
|
|
switch (outform) {
|
|
|
|
|
case kPhylip2:
|
|
|
|
|
interleaved = false;
|
|
|
|
|
phylvers = 2;
|
|
|
|
|
outform = kPhylip;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case kPhylip4:
|
|
|
|
|
interleaved = true;
|
|
|
|
|
phylvers = 4;
|
|
|
|
|
outform = kPhylip;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case kMSF:
|
|
|
|
|
case kPAUP:
|
|
|
|
|
interleaved = true;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case kPretty:
|
|
|
|
|
gPretty.isactive = true;
|
|
|
|
|
interleaved = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (gPretty.isactive && gPretty.noleaves) interleaved = false;
|
|
|
|
|
if (interleaved) {
|
|
|
|
|
fout = ftmp = tmpfile();
|
|
|
|
|
outindexmax = 30;
|
|
|
|
|
noutindex = 0;
|
|
|
|
|
outindex = (long *)malloc(outindexmax * sizeof(long));
|
|
|
|
|
if (outindex == NULL) {
|
|
|
|
|
err = eMemFull;
|
|
|
|
|
erralert(err);
|
|
|
|
|
exit_main(err);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* big loop over all input files */
|
|
|
|
|
do {
|
|
|
|
|
/* select next input file */
|
|
|
|
|
gotinputfile = (*tempname != 0);
|
|
|
|
|
while ((ifile < argc) && (!gotinputfile)) {
|
|
|
|
|
if (*argv[ifile] != '-') {
|
|
|
|
|
strcpy(inputfile, argv[ifile]);
|
|
|
|
|
gotinputfile = (*inputfile != 0);
|
|
|
|
|
--nfile;
|
|
|
|
|
}
|
|
|
|
|
ifile++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (!gotinputfile) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"\nName an input sequence or -option: \n");
|
|
|
|
|
inputfile = inputfilestore;
|
|
|
|
|
|
|
|
|
|
gets(stemp = stempstore);
|
|
|
|
|
if (*stemp == 0)
|
|
|
|
|
goto fini; /* !! need this to finish work during
|
|
|
|
|
interactive use */
|
|
|
|
|
stemp = strtok(stempstore, " \n\r\t");
|
|
|
|
|
while (stemp) {
|
|
|
|
|
err = readopt(stemp); /* will read inputfile if
|
|
|
|
|
it exists */
|
|
|
|
|
if (err < 0) exit_main(err);
|
|
|
|
|
stemp = strtok(NULL, " \n\r\t");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/* thanks to AJB@UK.AC.DARESBURY.DLVH for this PHYLIP3 fix: */
|
|
|
|
|
/* head for end (interleave if needed) */
|
|
|
|
|
if (*inputfile == 0) break;
|
|
|
|
|
|
|
|
|
|
format = seqFileFormat(inputfile, &skiplines, &err);
|
|
|
|
|
|
|
|
|
|
if (err == 0) {
|
|
|
|
|
#ifdef NCBI
|
|
|
|
|
if (format == kASNseqentry || format == kASNseqset)
|
|
|
|
|
seqlist = listASNSeqs(inputfile, skiplines,
|
|
|
|
|
format, &nseq, &err);
|
|
|
|
|
else
|
|
|
|
|
#endif
|
|
|
|
|
seqlist = listSeqs(inputfile, skiplines, format,
|
|
|
|
|
&nseq, &err);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (err != 0)
|
|
|
|
|
erralert(err);
|
|
|
|
|
|
|
|
|
|
else if (listonly) {
|
|
|
|
|
dumpSeqList(seqlist, format);
|
|
|
|
|
free(seqlist);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
else {
|
|
|
|
|
/* choose whichSeq if needed */
|
|
|
|
|
if (nseq == 1 || chooseall ||
|
|
|
|
|
(quietly && whichSeq == 0)) {
|
|
|
|
|
chooseall = true;
|
|
|
|
|
whichSeq = 1;
|
|
|
|
|
quietly = true; /* no loop */
|
|
|
|
|
}
|
|
|
|
|
else if (whichSeq > nseq && quietly) {
|
|
|
|
|
erralert(eItemNotFound);
|
|
|
|
|
err = eItemNotFound;
|
|
|
|
|
}
|
|
|
|
|
else if (whichSeq > nseq || !quietly) {
|
|
|
|
|
dumpSeqList(seqlist, format);
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"\nChoose a sequence (# or All): \n");
|
|
|
|
|
gets(stemp = stempstore);
|
|
|
|
|
skipwhitespace(stemp);
|
|
|
|
|
if (to_lower(*stemp) == 'a') {
|
|
|
|
|
chooseall = true;
|
|
|
|
|
whichSeq = 1;
|
|
|
|
|
quietly =
|
|
|
|
|
true; /* !? this means we don't ask
|
|
|
|
|
for another file as well as
|
|
|
|
|
no more whichSeqs... */
|
|
|
|
|
}
|
|
|
|
|
else if (isdigit(*stemp))
|
|
|
|
|
whichSeq = atol(stemp);
|
|
|
|
|
else
|
|
|
|
|
whichSeq = 1; /* default */
|
|
|
|
|
}
|
|
|
|
|
free(seqlist);
|
|
|
|
|
|
|
|
|
|
if (false /*chooseall*/) { /* this isn't debugged
|
|
|
|
|
yet...*/
|
|
|
|
|
fin = fopen(inputfile, "r");
|
|
|
|
|
closein = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (whichSeq > 0 && whichSeq <= nseq) {
|
|
|
|
|
/* need to open multiple output files ? */
|
|
|
|
|
manyout =
|
|
|
|
|
((chooseall || nwhichlist > 1) &&
|
|
|
|
|
nseq > 1 &&
|
|
|
|
|
(outform == kPlain || outform == kGCG));
|
|
|
|
|
if (manyout) {
|
|
|
|
|
if (whichSeq == 1)
|
|
|
|
|
erralert(eOneFormat);
|
|
|
|
|
else if (closeout) {
|
|
|
|
|
sprintf(stemp, "%s_%d", oname,
|
|
|
|
|
whichSeq);
|
|