1412 lines
34 KiB
C
1412 lines
34 KiB
C
/* File: readseq.c
|
||
* main() program for ureadseq.c, ureadseq.h
|
||
*
|
||
* Reads and writes nucleic/protein sequence in various
|
||
* formats. Data files may have multiple sequences.
|
||
*
|
||
* Copyright 1990 by d.g.gilbert
|
||
* biology dept., indiana university, bloomington, in 47405
|
||
* e-mail: gilbertd@bio.indiana.edu
|
||
*
|
||
* This program may be freely copied and used by anyone.
|
||
* Developers are encourged to incorporate parts in their
|
||
* programs, rather than devise their own private sequence
|
||
* format.
|
||
*
|
||
* This should compile and run with any ANSI C compiler.
|
||
* Please advise me of any bugs, additions or corrections.
|
||
*
|
||
*/
|
||
|
||
const char *title = "readSeq (1Feb93), multi-format molbio sequence reader.\n";
|
||
|
||
/* History
|
||
27 Feb 90. 1st release to public.
|
||
4 Mar 90. + Gary Olsen format
|
||
+ case change
|
||
* minor corrections to NBRF,EMBL,others
|
||
* output 1 file per sequence for gcg, unknown
|
||
* define -DNOSTR for c-libraries w/o strstr
|
||
- readseq.p, pascal version, becomes out-of-date
|
||
24 May 90. + Phylip 3.2 output format (no input)
|
||
20 Jul 90. + Phylip 3.3 output (no input yet)
|
||
+ interactive output re-direction
|
||
+ verbose progress info
|
||
* interactive help output
|
||
* dropped line no.s on NBRF output
|
||
* patched in HyperGCG XCMD corrections,
|
||
- except for seq. documentation handling
|
||
* dropped the IG special nuc codes, as IG has
|
||
adopted the standard IUB codes (now if only
|
||
everyone would adopt a standard format !)
|
||
11 Oct 90. * corrected bug in reading/writing of EMBL format
|
||
|
||
17 Oct 91. * corrected bug in reading Olsen format
|
||
(serious-deletion)
|
||
10 Nov 91. * corrected bug in reading some GCG format files
|
||
(serious-last line duplicated)
|
||
+ add format name parsing (-fgb, -ffasta, ...)
|
||
+ Phylip v3.4 output format (== v3.2, sequential)
|
||
+ add checksum output to all forms that have document
|
||
+ skip mail headers in seq file
|
||
+ add pipe for standard input == seq file (with -p)
|
||
* fold in parts of MacApp Seq object
|
||
* strengthen format detection
|
||
* clarify program structure
|
||
* remove fixed sequence size limit (now dynamic, sizeof memory)
|
||
* check and fold in accumulated bug reports:
|
||
* Now ANSI-C fopen(..,"w") & check open failure
|
||
* Define -DFIXTOUPPER for nonANSI C libraries that mess
|
||
up toupper/tolower
|
||
= No command-line changes; callers of readseq main() should be okay
|
||
- ureadseq.h functions have changed; client programs need to note.
|
||
+ added Unix and VMS Make scripts, including validation tests
|
||
|
||
4 May 92. + added 32 bit CRC checksum as alternative to GCG 6.5bit checksum
|
||
(-DBIGCHECKSUM)
|
||
Aug 92 = fixed Olsen format input to handle files w/ more sequences,
|
||
not to mess up when more than one seq has same identifier,
|
||
and to convert number masks to symbols.
|
||
= IG format fix to understand ^L
|
||
|
||
25-30 Dec 92
|
||
* revised command-line & interactive interface. Suggested form is
|
||
now readseq infile -format=genbank -output=outfile -item=1,3,4 ... but remains
|
||
compatible with prior commandlines: readseq infile -f2 -ooutfile -i3 ...
|
||
+ added GCG MSF multi sequence file format
|
||
+ added PIR/CODATA format
|
||
+ added NCBI ASN.1 sequence file format
|
||
+ added Pretty, multi sequence pretty output (only)
|
||
+ added PAUP multi seq format
|
||
+ added degap option
|
||
+ added Gary Williams (GWW, G.Williams@CRC.AC.UK)
|
||
reverse-complement option.
|
||
+ added support for reading Phylip formats (interleave &
|
||
sequential)
|
||
* string fixes, dropped need for compiler flags NOSTR, FIXTOUPPER,
|
||
NEEDSTRCASECMP
|
||
* changed 32bit checksum to default, -DSMALLCHECKSUM for GCG
|
||
version
|
||
|
||
1Feb93
|
||
= revert GenBank output to a fixed left number width which
|
||
other software depends on.
|
||
= fix for MSF input to handle symbols in names
|
||
= fix bug for possible memory overrun when truncating seqs for
|
||
Phylip or Paup formats (thanks Anthony Persechini)
|
||
|
||
*/
|
||
|
||
/*
|
||
Readseq has been tested with:
|
||
Macintosh MPW C
|
||
GNU gcc
|
||
SGI cc
|
||
VAX-VMS cc
|
||
Any ANSI C compiler should be able to handle this.
|
||
Old-style C compilers barf all over the source.
|
||
|
||
|
||
How do I build the readseq program if I have an Ansi C compiler?
|
||
#--------------------
|
||
# Unix ANSI C
|
||
# Use the supplied Makefile this way:
|
||
% make CC=name-of-c-compiler
|
||
# OR do this...
|
||
% gcc readseq.c ureadseq.c -o readseq
|
||
|
||
#--------------------
|
||
$!VAX-VMS cc
|
||
$! Use the supplied Make.Com this way:
|
||
$ @make
|
||
$! OR, do this:
|
||
$ cc readseq, ureadseq
|
||
$ link readseq, ureadseq, sys$library:vaxcrtl/lib
|
||
$ readseq :== $ MyDisk:[myacct]readseq
|
||
|
||
#--------------------
|
||
# Macintosh Simple Input/Output Window application
|
||
# requires MPW-C and SIOW library (from APDA)
|
||
# also uses files macinit.c, macinit.r, readseqSIOW.make
|
||
#
|
||
Buildprogram readseqSIOW
|
||
|
||
#--------------------
|
||
#MPW-C v3 tool
|
||
C ureadseq.c
|
||
C readseq.c
|
||
link -w -o readseq -t MPST -c 'MPS ' <20>
|
||
readseq.c.o Ureadseq.c.o <20>
|
||
"{Libraries}"Interface.o <20>
|
||
"{Libraries}"ToolLibs.o <20>
|
||
"{Libraries}"Runtime.o <20>
|
||
"{CLibraries}"StdClib.o
|
||
readseq -i1 ig.seq
|
||
|
||
# MPW-C with NCBI tools
|
||
|
||
set NCBI "{Boot}@molbio:ncbi:"; EXPORT NCBI
|
||
set NCBILIB1 "{NCBI}"lib:libncbi.o; export NCBILIB1
|
||
set NCBILIB2 "{NCBI}"lib:libncbiobj.o; export NCBILIB2
|
||
set NCBILIB3 "{NCBI}"lib:libncbicdr.o; export NCBILIB3
|
||
set NCBILIB4 "{NCBI}"lib:libvibrant.o; export NCBILIB4
|
||
|
||
C ureadseq.c
|
||
C -d NCBI -i "{NCBI}"include: ureadasn.c
|
||
C -d NCBI -i "{NCBI}"include: readseq.c
|
||
link -w -o readseq -t MPST -c 'MPS ' <20>
|
||
ureadseq.c.o ureadasn.c.o readseq.c.o <20>
|
||
{NCBILIB4} {NCBILIB2} {NCBILIB1} <20>
|
||
"{Libraries}"Interface.o <20>
|
||
"{Libraries}"ToolLibs.o <20>
|
||
"{Libraries}"Runtime.o <20>
|
||
"{CLibraries}"CSANELib.o <20>
|
||
"{CLibraries}"Math.o <20>
|
||
"{CLibraries}"StdClib.o
|
||
|
||
===========================================================*/
|
||
|
||
#include <ctype.h>
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
#include "ureadseq.h"
|
||
|
||
#pragma segment readseq
|
||
|
||
static char inputfilestore[256], *inputfile = inputfilestore;
|
||
|
||
const char *formats[kMaxFormat + 1] = {" 1. IG/Stanford",
|
||
" 2. GenBank/GB",
|
||
" 3. NBRF",
|
||
" 4. EMBL",
|
||
" 5. GCG",
|
||
" 6. DNAStrider",
|
||
" 7. Fitch",
|
||
" 8. Pearson/Fasta",
|
||
" 9. Zuker (in-only)",
|
||
"10. Olsen (in-only)",
|
||
"11. Phylip3.2",
|
||
"12. Phylip",
|
||
"13. Plain/Raw",
|
||
"14. PIR/CODATA",
|
||
"15. MSF",
|
||
"16. ASN.1",
|
||
"17. PAUP/NEXUS",
|
||
"18. Pretty (out-only)",
|
||
""};
|
||
|
||
#define kFormCount 30
|
||
#define kMaxFormName 15
|
||
|
||
const struct formatTable {
|
||
char *name;
|
||
short num;
|
||
} formname[] = {
|
||
{"ig", kIG},
|
||
{"stanford", kIG},
|
||
{"genbank", kGenBank},
|
||
{"gb", kGenBank},
|
||
{"nbrf", kNBRF},
|
||
{"embl", kEMBL},
|
||
{"gcg", kGCG},
|
||
{"uwgcg", kGCG},
|
||
{"dnastrider", kStrider},
|
||
{"strider", kStrider},
|
||
{"fitch", kFitch},
|
||
{"pearson", kPearson},
|
||
{"fasta", kPearson},
|
||
{"zuker", kZuker},
|
||
{"olsen", kOlsen},
|
||
{"phylip", kPhylip},
|
||
{"phylip3.2", kPhylip2},
|
||
{"phylip3.3", kPhylip3},
|
||
{"phylip3.4", kPhylip4},
|
||
{"phylip-interleaved", kPhylip4},
|
||
{"phylip-sequential", kPhylip2},
|
||
{"plain", kPlain},
|
||
{"raw", kPlain},
|
||
{"pir", kPIR},
|
||
{"codata", kPIR},
|
||
{"asn.1", kASN1},
|
||
{"msf", kMSF},
|
||
{"paup", kPAUP},
|
||
{"nexus", kPAUP},
|
||
{"pretty", kPretty},
|
||
};
|
||
|
||
const char *kASN1headline = "Bioseq-set ::= {\nseq-set {\n";
|
||
|
||
/* GWW table for getting the complement of a nucleotide (IUB codes) */
|
||
/* !
|
||
* "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
|
||
* \]^_`abcdefghijklmnopqrstuvwxyz{|}~ */
|
||
const char compl [] =
|
||
" !\"#$%&'()*+,-./"
|
||
"0123456789:;<=>?@TVGHNNCDNNMNKNNYRYSAABWNRN[\\]^_`"
|
||
"tvghnncdnnmnknnyrysaabwnrn{|}~";
|
||
|
||
char *formatstr(short format)
|
||
{
|
||
if (format < 1 || format > kMaxFormat) {
|
||
switch (format) {
|
||
case kASNseqentry:
|
||
case kASNseqset:
|
||
return formats[kASN1 - 1];
|
||
case kPhylipInterleave:
|
||
case kPhylipSequential:
|
||
return formats[kPhylip - 1];
|
||
default:
|
||
return "(unknown)";
|
||
}
|
||
}
|
||
else
|
||
return formats[format - 1];
|
||
}
|
||
|
||
int parseformat(char *name)
|
||
{
|
||
#define kDupmatch -2
|
||
int namelen, maxlen, i, match, matchat;
|
||
char lname[kMaxFormName + 1];
|
||
|
||
skipwhitespace(name);
|
||
namelen = strlen(name);
|
||
if (namelen == 0)
|
||
return kNoformat;
|
||
else if (isdigit(*name)) {
|
||
i = atol(name);
|
||
if (i<kMinFormat | i> kMaxFormat)
|
||
return kNoformat;
|
||
else
|
||
return i;
|
||
}
|
||
|
||
/* else match character name */
|
||
maxlen = min(kMaxFormName, namelen);
|
||
for (i = 0; i < maxlen; i++) lname[i] = to_lower(name[i]);
|
||
lname[maxlen] = 0;
|
||
matchat = kNoformat;
|
||
|
||
for (i = 0; i < kFormCount; i++) {
|
||
match = strncmp(lname, formname[i].name, maxlen);
|
||
if (match == 0) {
|
||
if (strlen(formname[i].name) == namelen)
|
||
return (formname[i].num);
|
||
else if (matchat == kNoformat)
|
||
matchat = i;
|
||
else
|
||
matchat =
|
||
kDupmatch; /* 2 or more partial matches */
|
||
}
|
||
}
|
||
if (matchat == kNoformat || matchat == kDupmatch)
|
||
return kNoformat;
|
||
else
|
||
return formname[matchat].num;
|
||
}
|
||
|
||
static void dumpSeqList(char *list, short format)
|
||
{
|
||
long i, l, listlen;
|
||
char s[256];
|
||
|
||
listlen = strlen(list);
|
||
printf("Sequences in %s (format is %s)\n", inputfile,
|
||
formatstr(format));
|
||
for (i = 0, l = 0; i < listlen; i++) {
|
||
if (list[i] == (char)NEWLINE) {
|
||
s[l] = '\0';
|
||
l = 0;
|
||
puts(s);
|
||
}
|
||
else if (l < 255)
|
||
s[l++] = list[i];
|
||
}
|
||
putchar('\n');
|
||
}
|
||
|
||
void usage()
|
||
{
|
||
short i, midi;
|
||
|
||
fprintf(stderr, title);
|
||
fprintf(stderr, "usage: readseq [-options] in.seq > out.seq\n");
|
||
fprintf(stderr, " options\n");
|
||
/* ? add -d[igits] to allow digits in sequence data, &/or option to
|
||
* specify seq charset !? */
|
||
fprintf(stderr, " -a[ll] select All sequences\n");
|
||
fprintf(stderr, " -c[aselower] change to lower case\n");
|
||
fprintf(stderr, " -C[ASEUPPER] change to UPPER CASE\n");
|
||
fprintf(stderr, " -degap[=-] remove gap symbols\n");
|
||
fprintf(stderr,
|
||
" -i[tem=2,3,4] select Item number(s) from several\n");
|
||
fprintf(stderr, " -l[ist] List sequences only\n");
|
||
fprintf(stderr, " -o[utput=]out.seq redirect Output\n");
|
||
fprintf(stderr,
|
||
" -p[ipe] Pipe (command line, <stdin, >stdout)\n");
|
||
fprintf(stderr, " -r[everse] change to Reverse-complement\n");
|
||
fprintf(stderr, " -v[erbose] Verbose progress\n");
|
||
fprintf(stderr, " -f[ormat=]# Format number for output, or\n");
|
||
fprintf(stderr, " -f[ormat=]Name Format name for output:\n");
|
||
midi = (kMaxFormat + 1) / 2;
|
||
for (i = kMinFormat - 1; i < midi; i++)
|
||
fprintf(stderr, " %-20s %-20s\n", formats[i],
|
||
formats[midi + i]);
|
||
|
||
/* new output format options, esp. for pretty format: */
|
||
fprintf(stderr, " \n");
|
||
fprintf(stderr, " Pretty format options: \n");
|
||
fprintf(stderr, " -wid[th]=# sequence line width\n");
|
||
fprintf(stderr, " -tab=# left indent\n");
|
||
fprintf(stderr,
|
||
" -col[space]=# column space within sequence line "
|
||
"on output\n");
|
||
fprintf(
|
||
stderr,
|
||
" -gap[count] count gap chars in sequence numbers\n");
|
||
fprintf(stderr,
|
||
" -nameleft, -nameright[=#] name on left/right side [=max "
|
||
"width]\n");
|
||
fprintf(stderr, " -nametop name at top/bottom\n");
|
||
fprintf(stderr,
|
||
" -numleft, -numright seq index on left/right side\n");
|
||
fprintf(stderr, " -numtop, -numbot index on top/bottom\n");
|
||
fprintf(stderr,
|
||
" -match[=.] use match base for 2..n species\n");
|
||
fprintf(stderr,
|
||
" -inter[line=#] blank line(s) between sequence "
|
||
"blocks\n");
|
||
|
||
/****** not ready yet
|
||
fprintf(stderr, " -code=none,rtf,postscript,ps code syntax\n");
|
||
fprintf(stderr, " -namefont=, -numfont=, -seqfont=font font
|
||
choice\n"); fprintf(stderr, " font suggestions include
|
||
times,courier,helvetica\n"); fprintf(stderr, " -namefontsize=,
|
||
-numfontsize=, -seqfontsize=#\n"); fprintf(stderr, " fontsize
|
||
suggestions include 9,10,12,14\n"); fprintf(stderr, " -namefontstyle=,
|
||
-numfontstyle=, -seqfontstyle= style fontstyle for names\n");
|
||
fprintf(stderr, " fontstyle options are
|
||
plain,italic,bold,bold-italic\n");
|
||
******/
|
||
}
|
||
|
||
void erralert(short err)
|
||
{
|
||
switch (err) {
|
||
case 0:
|
||
break;
|
||
case eFileNotFound:
|
||
fprintf(stderr, "File not found: %s\n", inputfile);
|
||
break;
|
||
case eFileCreate:
|
||
fprintf(stderr, "Can't open output file.\n");
|
||
break;
|
||
case eASNerr:
|
||
fprintf(stderr, "Error in ASN.1 sequence routines.\n");
|
||
break;
|
||
case eNoData:
|
||
fprintf(stderr, "No data in file.\n");
|
||
break;
|
||
case eItemNotFound:
|
||
fprintf(stderr, "Specified item not in file.\n");
|
||
break;
|
||
case eUnequalSize:
|
||
fprintf(stderr,
|
||
"This format requires equal length "
|
||
"sequences.\nSequence truncated or padded to "
|
||
"fit.\n");
|
||
break;
|
||
case eUnknownFormat:
|
||
fprintf(stderr,
|
||
"Error: this format is unknown to me.\n");
|
||
break;
|
||
case eOneFormat:
|
||
fprintf(stderr,
|
||
"Warning: This format permits only 1 sequence "
|
||
"per file.\n");
|
||
break;
|
||
case eMemFull:
|
||
fprintf(stderr,
|
||
"Out of storage memory. Sequence truncated.\n");
|
||
break;
|
||
default:
|
||
fprintf(stderr, "readSeq error = %d\n", err);
|
||
break;
|
||
}
|
||
} /* erralert */
|
||
|
||
int chooseFormat(boolean quietly)
|
||
{
|
||
char sform[128];
|
||
int midi, i, outform;
|
||
|
||
if (quietly)
|
||
return kPearson; /* default */
|
||
else {
|
||
midi = (kMaxFormat + 1) / 2;
|
||
for (i = kMinFormat - 1; i < midi; i++)
|
||
fprintf(stderr, " %-20s %-20s\n",
|
||
formats[i], formats[midi + i]);
|
||
fprintf(stderr, "\nChoose an output format (name or #): \n");
|
||
gets(sform);
|
||
outform = parseformat(sform);
|
||
if (outform == kNoformat) outform = kPearson;
|
||
return outform;
|
||
}
|
||
}
|
||
|
||
/* read paramater(s) */
|
||
|
||
boolean checkopt(boolean casesense, char *sopt, const char *smatch,
|
||
short minword)
|
||
{
|
||
long lenopt, lenmatch;
|
||
boolean result;
|
||
short minmaxw;
|
||
|
||
lenopt = strlen(sopt);
|
||
lenmatch = strlen(smatch);
|
||
minmaxw = max(minword, min(lenopt, lenmatch));
|
||
|
||
if (casesense)
|
||
result = (!strncmp(sopt, smatch, minmaxw));
|
||
else
|
||
result = (!Strncasecmp(sopt, smatch, minmaxw));
|
||
/* if (result) { */
|
||
/* fprintf(stderr,"true checkopt(opt=%s,match=%s,param=%s)\n", sopt,
|
||
* smatch, *sparam); */
|
||
/* } */
|
||
return result;
|
||
}
|
||
|
||
#define kMaxwhichlist 50
|
||
|
||
/* global for readopt(), main() */
|
||
boolean chooseall = false, quietly = false, gotinputfile = false,
|
||
listonly = false, closeout = false, verbose = false, manyout = false,
|
||
dolower = false, doupper = false, doreverse = false, askout = true,
|
||
dopipe = false, interleaved = false;
|
||
short nfile = 0, iwhichlist = 0, nwhichlist = 0;
|
||
short whichlist[kMaxwhichlist + 1];
|
||
long whichSeq = 0, outform = kNoformat;
|
||
char onamestore[128], *oname = onamestore;
|
||
FILE *foo = NULL;
|
||
|
||
void resetGlobals()
|
||
/* need this when used from SIOW, as these globals are not reinited
|
||
automatically between calls to local main() */
|
||
{
|
||
chooseall = false;
|
||
quietly = false;
|
||
gotinputfile = false;
|
||
listonly = false;
|
||
closeout = false;
|
||
verbose = false;
|
||
manyout = false;
|
||
dolower = false;
|
||
doupper = false;
|
||
doreverse = false;
|
||
askout = true;
|
||
dopipe = false;
|
||
interleaved = false;
|
||
nfile = 0;
|
||
iwhichlist = 0;
|
||
nwhichlist = 0;
|
||
whichSeq = 0;
|
||
outform = kNoformat;
|
||
oname = onamestore;
|
||
foo = NULL;
|
||
|
||
gPrettyInit(gPretty);
|
||
}
|
||
|
||
#define kOptOkay 1
|
||
#define kOptNone 0
|
||
|
||
int readopt(char *sopt)
|
||
{
|
||
char sparamstore[256], *sparam = sparamstore;
|
||
short n, slen = strlen(sopt);
|
||
|
||
/* fprintf(stderr,"readopt( %s) == ", sopt); */
|
||
|
||
if (*sopt == '?') {
|
||
usage();
|
||
return kOptNone; /*? eOptionBad or kOptNone */
|
||
}
|
||
|
||
else if (*sopt == '-') {
|
||
char *cp = strchr(sopt, '=');
|
||
*sparam = '\0';
|
||
if (cp) {
|
||
strcpy(sparam, cp + 1);
|
||
*cp = 0;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-help", 2)) {
|
||
usage();
|
||
return kOptNone;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-all", 2)) {
|
||
whichSeq = 1;
|
||
chooseall = true;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-colspace",
|
||
4)) { /* test before -c[ase] */
|
||
n = atoi(sparam);
|
||
gPretty.spacer = n;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(true, sopt, "-caselower", 2)) {
|
||
dolower = true;
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(true, sopt, "-CASEUPPER", 2)) {
|
||
doupper = true;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-pipe", 2)) {
|
||
dopipe = true;
|
||
askout = false;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-list", 2)) {
|
||
listonly = true;
|
||
askout = false;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-reverse", 2)) {
|
||
doreverse = true;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-verbose", 2)) {
|
||
verbose = true;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-match", 5)) {
|
||
gPretty.domatch = true;
|
||
if (*sparam >= ' ') gPretty.matchchar = *sparam;
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(false, sopt, "-degap", 4)) {
|
||
gPretty.degap = true;
|
||
if (*sparam >= ' ') gPretty.gapchar = *sparam;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-interline", 4)) {
|
||
gPretty.interline = atoi(sparam);
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-item", 2)) {
|
||
char *cp = sparam;
|
||
nwhichlist = 0;
|
||
whichlist[0] = 0;
|
||
if (*cp == 0) cp = sopt + 2; /* compatible w/ old way */
|
||
do {
|
||
while (*cp != 0 && !isdigit(*cp)) cp++;
|
||
if (*cp != 0) {
|
||
n = atoi(cp);
|
||
whichlist[nwhichlist++] = n;
|
||
while (*cp != 0 && isdigit(*cp)) cp++;
|
||
}
|
||
} while (*cp != 0 && n > 0 &&
|
||
nwhichlist < kMaxwhichlist);
|
||
whichlist[nwhichlist++] =
|
||
0; /* 0 == stopsign for loop */
|
||
whichSeq = max(1, whichlist[0]);
|
||
iwhichlist = 1;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-format",
|
||
5)) { /* -format=phylip, -f2, -form=phylip */
|
||
if (*sparam == 0) {
|
||
for (sparam = sopt + 2; isalpha(*sparam);
|
||
sparam++)
|
||
;
|
||
}
|
||
outform = parseformat(sparam);
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(false, sopt, "-f",
|
||
2)) { /* compatible w/ -fphylip prior version */
|
||
if (*sparam == 0) sparam = sopt + 2;
|
||
outform = parseformat(sparam);
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-output", 3)) { /* -output=myseq */
|
||
if (*sparam == 0) {
|
||
for (sparam = sopt + 3; isalpha(*sparam);
|
||
sparam++)
|
||
;
|
||
}
|
||
strcpy(oname, sparam);
|
||
foo = fopen(oname, "w");
|
||
if (!foo) {
|
||
erralert(eFileCreate);
|
||
return eFileCreate;
|
||
}
|
||
closeout = true;
|
||
askout = false;
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(false, sopt, "-o",
|
||
2)) { /* compatible w/ -omyseq prior version */
|
||
if (*sparam == 0) sparam = sopt + 2;
|
||
strcpy(oname, sparam);
|
||
foo = fopen(oname, "w");
|
||
if (!foo) {
|
||
erralert(eFileCreate);
|
||
return eFileCreate;
|
||
}
|
||
closeout = true;
|
||
askout = false;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-width", 2)) {
|
||
if (*sparam == 0) {
|
||
for (sparam = sopt + 2;
|
||
!isdigit(*sparam) && *sparam != 0;
|
||
sparam++)
|
||
;
|
||
}
|
||
n = atoi(sparam);
|
||
if (n > 0) gPretty.seqwidth = n;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-tab", 4)) {
|
||
if (*sparam == 0) {
|
||
for (sparam = sopt + 2;
|
||
!isdigit(*sparam) && *sparam != 0;
|
||
sparam++)
|
||
;
|
||
}
|
||
n = atoi(sparam);
|
||
gPretty.tab = n;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-gapcount", 4)) {
|
||
gPretty.baseonlynum = false;
|
||
/* if (*sparam >= ' ') gPretty.gapchar= *sparam; */
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(false, sopt, "-nointerleave", 8)) {
|
||
gPretty.noleaves = true;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-nameleft", 7)) {
|
||
if (*sparam == 0) {
|
||
for (sparam = sopt + 2;
|
||
!isdigit(*sparam) && *sparam != 0;
|
||
sparam++)
|
||
;
|
||
}
|
||
n = atoi(sparam);
|
||
if (n > 0 && n < 50) gPretty.namewidth = n;
|
||
gPretty.nameleft = true;
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(false, sopt, "-nameright", 7)) {
|
||
if (*sparam == 0) {
|
||
for (sparam = sopt + 2;
|
||
!isdigit(*sparam) && *sparam != 0;
|
||
sparam++)
|
||
;
|
||
}
|
||
n = atoi(sparam);
|
||
if (n > 0 && n < 50) gPretty.namewidth = n;
|
||
gPretty.nameright = true;
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(false, sopt, "-nametop", 6)) {
|
||
gPretty.nametop = true;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-numleft", 6)) {
|
||
if (*sparam == 0) {
|
||
for (sparam = sopt + 2;
|
||
!isdigit(*sparam) && *sparam != 0;
|
||
sparam++)
|
||
;
|
||
}
|
||
n = atoi(sparam);
|
||
if (n > 0 && n < 50) gPretty.numwidth = n;
|
||
gPretty.numleft = true;
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(false, sopt, "-numright", 6)) {
|
||
if (*sparam == 0) {
|
||
for (sparam = sopt + 2;
|
||
!isdigit(*sparam) && *sparam != 0;
|
||
sparam++)
|
||
;
|
||
}
|
||
n = atoi(sparam);
|
||
if (n > 0 && n < 50) gPretty.numwidth = n;
|
||
gPretty.numright = true;
|
||
return kOptOkay;
|
||
}
|
||
|
||
if (checkopt(false, sopt, "-numtop", 6)) {
|
||
gPretty.numtop = true;
|
||
return kOptOkay;
|
||
}
|
||
if (checkopt(false, sopt, "-numbottom", 6)) {
|
||
gPretty.numbot = true;
|
||
return kOptOkay;
|
||
}
|
||
|
||
else {
|
||
usage();
|
||
return eOptionBad;
|
||
}
|
||
}
|
||
|
||
else {
|
||
strcpy(inputfile, sopt);
|
||
gotinputfile = (*inputfile != 0);
|
||
nfile++;
|
||
return kOptOkay;
|
||
}
|
||
|
||
/* return kOptNone; -- never here */
|
||
}
|
||
|
||
/* this program suffers some as it tries to be a quiet translator pipe
|
||
_and_ a noisy user interactor
|
||
*/
|
||
|
||
/* return is best for SIOW, okay for others */
|
||
#ifdef SIOW
|
||
#define Exit(a) return (a)
|
||
siow_main(int argc, char *argv[])
|
||
|
||
#else
|
||
#define Exit(a) exit(a)
|
||
|
||
main(int argc, char *argv[])
|
||
#endif
|
||
{
|
||
boolean closein = false;
|
||
short ifile, nseq, atseq, format, err = 0, seqtype = kDNA, nlines,
|
||
seqout = 0, phylvers = 2;
|
||
long i, skiplines, seqlen, seqlen0;
|
||
unsigned long checksum = 0, checkall = 0;
|
||
char *seq, *cp, *firstseq = NULL, *seqlist, *progname, tempname[256];
|
||
char seqid[256], *seqidptr = seqid;
|
||
char stempstore[256], *stemp = stempstore;
|
||
FILE *ftmp, *fin, *fout;
|
||
long outindexmax = 0, noutindex = 0, *outindex = NULL;
|
||
|
||
#define exit_main(err) \
|
||
{ \
|
||
if (closeout) fclose(fout); \
|
||
if (closein) fclose(fin); \
|
||
if (*tempname != 0) remove(tempname); \
|
||
Exit(err); \
|
||
}
|
||
|
||
#define indexout() \
|
||
if (interleaved) { \
|
||
if (noutindex >= outindexmax) { \
|
||
outindexmax = noutindex + 20; \
|
||
outindex = (long *)realloc( \
|
||
outindex, sizeof(long) * outindexmax); \
|
||
if (outindex == NULL) { \
|
||
err = eMemFull; \
|
||
erralert(err); \
|
||
exit_main(err); \
|
||
} \
|
||
} \
|
||
outindex[noutindex++] = ftell(fout); \
|
||
}
|
||
|
||
resetGlobals();
|
||
foo = stdout;
|
||
progname = argv[0];
|
||
*oname = 0;
|
||
*tempname = 0;
|
||
/* initialize gPretty ?? -- done in header */
|
||
|
||
for (i = 1; i < argc; i++) {
|
||
err = readopt(argv[i]);
|
||
if (err <= 0) exit_main(err);
|
||
}
|
||
|
||
/* pipe input from stdin !? */
|
||
if (dopipe && !gotinputfile) {
|
||
int c;
|
||
tmpnam(tempname);
|
||
inputfile = tempname;
|
||
ftmp = fopen(inputfile, "w");
|
||
if (!ftmp) {
|
||
erralert(eFileCreate);
|
||
exit_main(eFileCreate);
|
||
}
|
||
while ((c = getc(stdin)) != EOF) fputc(c, ftmp);
|
||
fclose(ftmp);
|
||
gotinputfile = true;
|
||
}
|
||
|
||
quietly = (dopipe || (gotinputfile && (listonly || whichSeq != 0)));
|
||
|
||
if (verbose || (!quietly && !gotinputfile)) fprintf(stderr, title);
|
||
ifile = 1;
|
||
|
||
/* UI: Choose output */
|
||
if (askout && !closeout && !quietly) {
|
||
askout = false;
|
||
fprintf(
|
||
stderr,
|
||
"\nName of output file (?=help, defaults to display): \n");
|
||
gets(oname = onamestore);
|
||
skipwhitespace(oname);
|
||
if (*oname == '?') {
|
||
usage();
|
||
exit_main(0);
|
||
}
|
||
else if (*oname != 0) {
|
||
closeout = true;
|
||
foo = fopen(oname, "w");
|
||
if (!foo) {
|
||
erralert(eFileCreate);
|
||
exit_main(eFileCreate);
|
||
}
|
||
}
|
||
}
|
||
|
||
fout = foo;
|
||
if (outform == kNoformat) outform = chooseFormat(quietly);
|
||
|
||
/* set up formats ... */
|
||
switch (outform) {
|
||
case kPhylip2:
|
||
interleaved = false;
|
||
phylvers = 2;
|
||
outform = kPhylip;
|
||
break;
|
||
|
||
case kPhylip4:
|
||
interleaved = true;
|
||
phylvers = 4;
|
||
outform = kPhylip;
|
||
break;
|
||
|
||
case kMSF:
|
||
case kPAUP:
|
||
interleaved = true;
|
||
break;
|
||
|
||
case kPretty:
|
||
gPretty.isactive = true;
|
||
interleaved = true;
|
||
break;
|
||
}
|
||
|
||
if (gPretty.isactive && gPretty.noleaves) interleaved = false;
|
||
if (interleaved) {
|
||
fout = ftmp = tmpfile();
|
||
outindexmax = 30;
|
||
noutindex = 0;
|
||
outindex = (long *)malloc(outindexmax * sizeof(long));
|
||
if (outindex == NULL) {
|
||
err = eMemFull;
|
||
erralert(err);
|
||
exit_main(err);
|
||
}
|
||
}
|
||
|
||
/* big loop over all input files */
|
||
do {
|
||
/* select next input file */
|
||
gotinputfile = (*tempname != 0);
|
||
while ((ifile < argc) && (!gotinputfile)) {
|
||
if (*argv[ifile] != '-') {
|
||
strcpy(inputfile, argv[ifile]);
|
||
gotinputfile = (*inputfile != 0);
|
||
--nfile;
|
||
}
|
||
ifile++;
|
||
}
|
||
|
||
while (!gotinputfile) {
|
||
fprintf(stderr,
|
||
"\nName an input sequence or -option: \n");
|
||
inputfile = inputfilestore;
|
||
|
||
gets(stemp = stempstore);
|
||
if (*stemp == 0)
|
||
goto fini; /* !! need this to finish work during
|
||
interactive use */
|
||
stemp = strtok(stempstore, " \n\r\t");
|
||
while (stemp) {
|
||
err = readopt(stemp); /* will read inputfile if
|
||
it exists */
|
||
if (err < 0) exit_main(err);
|
||
stemp = strtok(NULL, " \n\r\t");
|
||
}
|
||
}
|
||
/* thanks to AJB@UK.AC.DARESBURY.DLVH for this PHYLIP3 fix: */
|
||
/* head for end (interleave if needed) */
|
||
if (*inputfile == 0) break;
|
||
|
||
format = seqFileFormat(inputfile, &skiplines, &err);
|
||
|
||
if (err == 0) {
|
||
#ifdef NCBI
|
||
if (format == kASNseqentry || format == kASNseqset)
|
||
seqlist = listASNSeqs(inputfile, skiplines,
|
||
format, &nseq, &err);
|
||
else
|
||
#endif
|
||
seqlist = listSeqs(inputfile, skiplines, format,
|
||
&nseq, &err);
|
||
}
|
||
|
||
if (err != 0)
|
||
erralert(err);
|
||
|
||
else if (listonly) {
|
||
dumpSeqList(seqlist, format);
|
||
free(seqlist);
|
||
}
|
||
|
||
else {
|
||
/* choose whichSeq if needed */
|
||
if (nseq == 1 || chooseall ||
|
||
(quietly && whichSeq == 0)) {
|
||
chooseall = true;
|
||
whichSeq = 1;
|
||
quietly = true; /* no loop */
|
||
}
|
||
else if (whichSeq > nseq && quietly) {
|
||
erralert(eItemNotFound);
|
||
err = eItemNotFound;
|
||
}
|
||
else if (whichSeq > nseq || !quietly) {
|
||
dumpSeqList(seqlist, format);
|
||
fprintf(stderr,
|
||
"\nChoose a sequence (# or All): \n");
|
||
gets(stemp = stempstore);
|
||
skipwhitespace(stemp);
|
||
if (to_lower(*stemp) == 'a') {
|
||
chooseall = true;
|
||
whichSeq = 1;
|
||
quietly =
|
||
true; /* !? this means we don't ask
|
||
for another file as well as
|
||
no more whichSeqs... */
|
||
}
|
||
else if (isdigit(*stemp))
|
||
whichSeq = atol(stemp);
|
||
else
|
||
whichSeq = 1; /* default */
|
||
}
|
||
free(seqlist);
|
||
|
||
if (false /*chooseall*/) { /* this isn't debugged
|
||
yet...*/
|
||
fin = fopen(inputfile, "r");
|
||
closein = true;
|
||
}
|
||
|
||
while (whichSeq > 0 && whichSeq <= nseq) {
|
||
/* need to open multiple output files ? */
|
||
manyout =
|
||
((chooseall || nwhichlist > 1) &&
|
||
nseq > 1 &&
|
||
(outform == kPlain || outform == kGCG));
|
||
if (manyout) {
|
||
if (whichSeq == 1)
|
||
erralert(eOneFormat);
|
||
else if (closeout) {
|
||
sprintf(stemp, "%s_%d", oname,
|
||
whichSeq);
|
||
freopen(stemp, "w", fout);
|
||
fprintf(stderr,
|
||
"Writing sequence %d "
|
||
"to file %s\n",
|
||
whichSeq, stemp);
|
||
}
|
||
}
|
||
|
||
if (closein) {
|
||
/* !! this fails... skips most seqs...
|
||
*/
|
||
/* !! in sequential read, must count
|
||
* seqs already read from whichSeq ...
|
||
*/
|
||
/* need major revision of ureadseq
|
||
* before we can do this */
|
||
atseq = whichSeq - 1;
|
||
seqidptr = seqid;
|
||
seq = readSeqFp(
|
||
whichSeq, fin, skiplines, format,
|
||
&seqlen, &atseq, &err, seqidptr);
|
||
skiplines = 0;
|
||
}
|
||
else {
|
||
atseq = 0;
|
||
seqidptr = seqid;
|
||
#ifdef NCBI
|
||
if (format == kASNseqentry ||
|
||
format == kASNseqset) {
|
||
seqidptr = NULL;
|
||
seq = readASNSeq(
|
||
whichSeq, inputfile,
|
||
skiplines, format, &seqlen,
|
||
&atseq, &err, &seqidptr);
|
||
}
|
||
else
|
||
#endif
|
||
seq = readSeq(
|
||
whichSeq, inputfile,
|
||
skiplines, format, &seqlen,
|
||
&atseq, &err, seqidptr);
|
||
}
|
||
|
||
if (gPretty.degap) {
|
||
char *newseq;
|
||
long newlen;
|
||
newseq =
|
||
compressSeq(gPretty.gapchar, seq,
|
||
seqlen, &newlen);
|
||
if (newseq) {
|
||
free(seq);
|
||
seq = newseq;
|
||
seqlen = newlen;
|
||
}
|
||
}
|
||
|
||
if (outform == kMSF)
|
||
checksum =
|
||
GCGchecksum(seq, seqlen, &checkall);
|
||
else if (verbose)
|
||
checksum =
|
||
seqchecksum(seq, seqlen, &checkall);
|
||
if (verbose)
|
||
fprintf(stderr,
|
||
"Sequence %d, length= %d, "
|
||
"checksum= %X, format= %s, id= "
|
||
"%s\n",
|
||
whichSeq, seqlen, checksum,
|
||
formatstr(format), seqidptr);
|
||
|
||
if (err != 0)
|
||
erralert(err);
|
||
else {
|
||
/* format fixes that writeseq doesn't do
|
||
*/
|
||
switch (outform) {
|
||
case kPIR:
|
||
if (seqout == 0)
|
||
fprintf(
|
||
foo,
|
||
"\\\\\\\n");
|
||
break;
|
||
case kASN1:
|
||
if (seqout == 0)
|
||
fprintf(
|
||
foo,
|
||
kASN1headline);
|
||
break;
|
||
|
||
case kPhylip:
|
||
if (seqout == 0) {
|
||
if (!interleaved) { /* bug, nseq is for 1st infile only */
|
||
if (chooseall)
|
||
i = nseq;
|
||
else
|
||
i = 1;
|
||
if (phylvers >=
|
||
4)
|
||
fprintf(
|
||
foo,
|
||
" %d %d\n",
|
||
i,
|
||
seqlen);
|
||
else
|
||
fprintf(
|
||
foo,
|
||
" %d %d YF\n",
|
||
i,
|
||
seqlen);
|
||
}
|
||
seqlen0 =
|
||
seqlen;
|
||
}
|
||
else if (seqlen !=
|
||
seqlen0) {
|
||
erralert(
|
||
eUnequalSize);
|
||
if (seqlen <
|
||
seqlen0)
|
||
seq = (char
|
||
*)
|
||
realloc(
|
||
seq,
|
||
seqlen0);
|
||
for (i = seqlen;
|
||
i <
|
||
seqlen0;
|
||
i++)
|
||
seq[i] =
|
||
gPretty
|
||
.gapchar;
|
||
seqlen =
|
||
seqlen0;
|
||
seq[seqlen] = 0;
|
||
}
|
||
break;
|
||
|
||
case kPAUP:
|
||
if (seqout == 0) {
|
||
seqtype =
|
||
getseqtype(
|
||
seq,
|
||
seqlen);
|
||
seqlen0 =
|
||
seqlen;
|
||
}
|
||
else if (seqlen !=
|
||
seqlen0) {
|
||
erralert(
|
||
eUnequalSize);
|
||
if (seqlen <
|
||
seqlen0)
|
||
seq = (char
|
||
*)
|
||
realloc(
|
||
seq,
|
||
seqlen0);
|
||
for (i = seqlen;
|
||
i <
|
||
seqlen0;
|
||
i++)
|
||
seq[i] =
|
||
gPretty
|
||
.gapchar;
|
||
seqlen =
|
||
seqlen0;
|
||
seq[seqlen] = 0;
|
||
}
|
||
break;
|
||
}
|
||
|
||
if (doupper)
|
||
for (i = 0; i < seqlen; i++)
|
||
seq[i] =
|
||
to_upper(seq[i]);
|
||
else if (dolower)
|
||
for (i = 0; i < seqlen; i++)
|
||
seq[i] =
|
||
to_lower(seq[i]);
|
||
|
||
if (doreverse) {
|
||
long j, k;
|
||
char ctemp;
|
||
for (j = 0, k = seqlen - 1;
|
||
j <= k; j++, k--) {
|
||
ctemp = compl [seq[j] -
|
||
' '];
|
||
seq[j] = compl [seq[k] -
|
||
' '];
|
||
seq[k] = ctemp;
|
||
}
|
||
}
|
||
|
||
if ((gPretty.isactive ||
|
||
outform == kPAUP) &&
|
||
gPretty.domatch &&
|
||
firstseq != NULL) {
|
||
for (i = 0; i < seqlen; i++)
|
||
if (seq[i] ==
|
||
firstseq[i])
|
||
seq[i] =
|
||
gPretty
|
||
.matchchar;
|
||
}
|
||
|
||
if (gPretty.isactive &&
|
||
gPretty.numtop && seqout == 0) {
|
||
gPretty.numline = 1;
|
||
indexout();
|
||
(void)writeSeq(fout, seq,
|
||
seqlen, outform,
|
||
seqidptr);
|
||
gPretty.numline = 2;
|
||
indexout();
|
||
(void)writeSeq(fout, seq,
|
||
seqlen, outform,
|
||
seqidptr);
|
||
gPretty.numline = 0;
|
||
}
|
||
|
||
indexout();
|
||
nlines = writeSeq(fout, seq, seqlen,
|
||
outform, seqidptr);
|
||
seqout++;
|
||
}
|
||
|
||
if ((gPretty.isactive || outform == kPAUP) &&
|
||
gPretty.domatch && firstseq == NULL) {
|
||
firstseq = seq;
|
||
seq = NULL;
|
||
}
|
||
else if (seq != NULL) {
|
||
free(seq);
|
||
seq = NULL;
|
||
}
|
||
|
||
#ifdef NCBI
|
||
if ((format == kASNseqentry ||
|
||
format == kASNseqset) &&
|
||
seqidptr && seqidptr != seqid)
|
||
free(seqidptr);
|
||
#endif
|
||
if (chooseall)
|
||
whichSeq++;
|
||
else if (iwhichlist < nwhichlist)
|
||
whichSeq = whichlist[iwhichlist++];
|
||
else
|
||
whichSeq = 0;
|
||
}
|
||
if (closein) {
|
||
fclose(fin);
|
||
closein = false;
|
||
}
|
||
}
|
||
whichSeq = 0;
|
||
} while (nfile > 0 || !quietly);
|
||
|
||
fini:
|
||
if (firstseq) {
|
||
free(firstseq);
|
||
firstseq = NULL;
|
||
}
|
||
if (err || listonly) exit_main(err);
|
||
|
||
if (gPretty.isactive && gPretty.numbot) {
|
||
gPretty.numline = 2;
|
||
indexout();
|
||
(void)writeSeq(fout, seq, seqlen, outform, seqidptr);
|
||
gPretty.numline = 1;
|
||
indexout();
|
||
(void)writeSeq(fout, seq, seqlen, outform, seqidptr);
|
||
gPretty.numline = 0;
|
||
}
|
||
|
||
if (outform == kMSF) {
|
||
if (*oname)
|
||
cp = oname;
|
||
else
|
||
cp = inputfile;
|
||
fprintf(foo,
|
||
"\n %s MSF: %d Type: N January 01, 1776 12:00 "
|
||
"Check: %d ..\n\n",
|
||
cp, seqlen, checkall);
|
||
}
|
||
|
||
if (outform == kPAUP) {
|
||
fprintf(foo, "#NEXUS\n");
|
||
if (*oname)
|
||
cp = oname;
|
||
else
|
||
cp = inputfile;
|
||
fprintf(foo, "[%s -- data title]\n\n", cp);
|
||
/* ! now have header lines for each sequence... put them before
|
||
* "begin data;... */
|
||
}
|
||
|
||
if (outform == kPhylip && interleaved) {
|
||
if (phylvers >= 4)
|
||
fprintf(foo, " %d %d\n", seqout, seqlen);
|
||
else
|
||
fprintf(foo, " %d %d YF\n", seqout, seqlen);
|
||
}
|
||
|
||
if (interleaved) {
|
||
/* interleave species lines in true output */
|
||
/* nlines is # lines / sequence */
|
||
short iline, j, leaf, iseq;
|
||
char *s = stempstore;
|
||
|
||
indexout();
|
||
noutindex--; /* mark eof */
|
||
|
||
for (leaf = 0; leaf < nlines; leaf++) {
|
||
if (outform == kMSF && leaf == 1) {
|
||
fputs("//\n\n", foo);
|
||
}
|
||
if (outform == kPAUP && leaf == 1) {
|
||
switch (seqtype) {
|
||
case kDNA:
|
||
cp = "dna";
|
||
break;
|
||
case kRNA:
|
||
cp = "rna";
|
||
break;
|
||
case kNucleic:
|
||
cp = "dna";
|
||
break;
|
||
case kAmino:
|
||
cp = "protein";
|
||
break;
|
||
case kOtherSeq:
|
||
cp = "dna";
|
||
break;
|
||
}
|
||
fprintf(foo, "\nbegin data;\n");
|
||
fprintf(foo, " dimensions ntax=%d nchar=%d;\n",
|
||
seqout, seqlen);
|
||
fprintf(
|
||
foo,
|
||
" format datatype=%s interleave missing=%c",
|
||
cp, gPretty.gapchar);
|
||
if (gPretty.domatch)
|
||
fprintf(foo, " matchchar=%c",
|
||
gPretty.matchchar);
|
||
fprintf(foo, ";\n matrix\n");
|
||
}
|
||
|
||
for (iseq = 0; iseq < noutindex; iseq++) {
|
||
fseek(ftmp, outindex[iseq], 0);
|
||
for (iline = 0; iline <= leaf; iline++)
|
||
if (!fgets(s, 256, ftmp)) *s = 0;
|
||
if (ftell(ftmp) <= outindex[iseq + 1])
|
||
fputs(s, foo);
|
||
}
|
||
|
||
for (j = 0; j < gPretty.interline; j++)
|
||
fputs("\n", foo); /* some want spacer line */
|
||
}
|
||
fclose(ftmp); /* tmp disappears */
|
||
fout = foo;
|
||
}
|
||
|
||
if (outform == kASN1) fprintf(foo, "} }\n");
|
||
if (outform == kPAUP) fprintf(foo, ";\n end;\n");
|
||
|
||
if (outindex != NULL) free(outindex);
|
||
exit_main(0);
|
||
}
|
||
|