503 lines
12 KiB
C
Executable file
503 lines
12 KiB
C
Executable file
#include <malloc.h>
|
|
#include <stdio.h>
|
|
#include <sys/time.h>
|
|
#include <time.h>
|
|
#include <xview/panel.h>
|
|
#include <xview/xview.h>
|
|
|
|
#include "defines.h"
|
|
#include "menudefs.h"
|
|
|
|
/*
|
|
Copyright (c) 1989-1990, University of Illinois board of trustees. All
|
|
rights reserved. Written by Steven Smith at the Center for Prokaryote Genome
|
|
Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr.
|
|
Carl Woese.
|
|
|
|
Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory.
|
|
all rights reserved.
|
|
|
|
Copyright (c) 1993, Steven Smith, all rights reserved.
|
|
|
|
*/
|
|
|
|
ReadGen(filename, dataset, type) char *filename;
|
|
NA_Alignment *dataset;
|
|
int type;
|
|
{
|
|
register int done = FALSE, len = 0, j = 0;
|
|
int count, IS_REALLY_AA = FALSE;
|
|
char Inline[GBUFSIZ], c;
|
|
char *buffer, *gencomments = NULL, fields[8][GBUFSIZ];
|
|
int buflen = 0, genclen = 0, curelem = 0, n = 0, flag = 0;
|
|
int start_col = -1;
|
|
|
|
NA_Sequence *this_elem;
|
|
FILE *file;
|
|
extern int Default_DNA_Trans[], Default_RNA_Trans[];
|
|
extern int Default_NA_RTrans[];
|
|
extern int Default_PROColor_LKUP[], Default_NAColor_LKUP[];
|
|
|
|
ErrorOut("No such file", file = fopen(filename, "r"));
|
|
|
|
for (; fgets(Inline, GBUFSIZ, file) != 0;) {
|
|
if (Inline[strlen(Inline) - 1] == '\n')
|
|
Inline[strlen(Inline) - 1] = '\0';
|
|
if (Find(Inline, "LOCUS")) {
|
|
curelem = dataset->numelements++;
|
|
if (curelem == 0) {
|
|
dataset->element = (NA_Sequence *)Calloc(
|
|
5, sizeof(NA_Sequence));
|
|
dataset->maxnumelements = 5;
|
|
}
|
|
else if (curelem == dataset->maxnumelements) {
|
|
(dataset->maxnumelements) *= 2;
|
|
dataset->element = (NA_Sequence *)Realloc(
|
|
dataset->element, dataset->maxnumelements *
|
|
sizeof(NA_Sequence));
|
|
}
|
|
this_elem = &(dataset->element[curelem]);
|
|
n = sscanf(Inline, "%s %s %s %s %s %s %s %s", fields[0],
|
|
fields[1], fields[2], fields[3], fields[4],
|
|
fields[5], fields[6], fields[7]);
|
|
if (IS_REALLY_AA) {
|
|
InitNASeq(this_elem, PROTEIN);
|
|
}
|
|
else if (Find(Inline, "DNA")) {
|
|
InitNASeq(this_elem, DNA);
|
|
}
|
|
else if (Find(Inline, "RNA")) {
|
|
InitNASeq(this_elem, RNA);
|
|
}
|
|
else if (Find(Inline, "MASK")) {
|
|
InitNASeq(this_elem, MASK);
|
|
}
|
|
else if (Find(Inline, "TEXT")) {
|
|
InitNASeq(this_elem, TEXT);
|
|
}
|
|
else if (Find(Inline, "PROT")) {
|
|
InitNASeq(this_elem, PROTEIN);
|
|
}
|
|
else
|
|
InitNASeq(this_elem, DNA);
|
|
|
|
strncpy(this_elem->short_name, fields[1], 31);
|
|
AsciiTime(&(this_elem->t_stamp.origin), fields[n - 1]);
|
|
this_elem->attr = DEFAULT_X_ATTR;
|
|
|
|
if (Find(Inline, "Circular"))
|
|
this_elem->attr |= IS_CIRCULAR;
|
|
|
|
gencomments = NULL;
|
|
genclen = 0;
|
|
}
|
|
else if (Find(Inline, "DEFINITION"))
|
|
strncpy(this_elem->description, &(Inline[12]), 79);
|
|
|
|
else if (Find(Inline, "AUTHOR"))
|
|
strncpy(this_elem->authority, &(Inline[12]), 79);
|
|
|
|
else if (Find(Inline, " ORGANISM"))
|
|
strncpy(this_elem->seq_name, &(Inline[12]), 79);
|
|
|
|
else if (Find(Inline, "ACCESSION"))
|
|
strncpy(this_elem->id, &(Inline[12]), 79);
|
|
|
|
else if (Find(Inline, "ORIGIN")) {
|
|
done = FALSE;
|
|
len = 0;
|
|
for (; done == FALSE &&
|
|
fgets(Inline, GBUFSIZ, file) != 0;) {
|
|
if (Inline[0] != '/') {
|
|
if (buflen == 0) {
|
|
buflen = GBUFSIZ;
|
|
buffer = Calloc(sizeof(char),
|
|
buflen);
|
|
}
|
|
|
|
else if (len + strlen(Inline) >=
|
|
buflen) {
|
|
buflen += GBUFSIZ;
|
|
buffer = Realloc(
|
|
buffer,
|
|
sizeof(char) * buflen);
|
|
for (j = buflen - GBUFSIZ;
|
|
j < buflen; j++)
|
|
buffer[j] = '\0';
|
|
}
|
|
/*
|
|
* Search for the fist column of
|
|
*data
|
|
*(whitespace-number-whitespace)data
|
|
*/
|
|
if (start_col == -1) {
|
|
for (start_col = 0;
|
|
Inline[start_col] == ' ' ||
|
|
Inline[start_col] == '\t';
|
|
start_col++)
|
|
;
|
|
|
|
for (start_col++;
|
|
strchr(
|
|
"1234567890",
|
|
Inline[start_col]) !=
|
|
NULL;
|
|
start_col++)
|
|
;
|
|
|
|
for (start_col++;
|
|
Inline[start_col] == ' ' ||
|
|
Inline[start_col] == '\t';
|
|
start_col++)
|
|
;
|
|
}
|
|
for (j = start_col;
|
|
(c = Inline[j]) != '\0'; j++) {
|
|
if ((c != '\n') &&
|
|
((j - start_col + 1) % 11 !=
|
|
0))
|
|
buffer[len++] = c;
|
|
}
|
|
}
|
|
else {
|
|
AppendNA(buffer, len,
|
|
&(dataset->element[curelem]));
|
|
for (j = 0; j < len; j++)
|
|
buffer[j] = '\0';
|
|
len = 0;
|
|
done = TRUE;
|
|
dataset->element[curelem].comments =
|
|
gencomments;
|
|
dataset->element[curelem].comments_len =
|
|
genclen - 1;
|
|
dataset->element[curelem]
|
|
.comments_maxlen = genclen;
|
|
|
|
gencomments = NULL;
|
|
genclen = 0;
|
|
}
|
|
}
|
|
/*
|
|
* Test if sequence should be converted by
|
|
*the translation table If it looks like a protein...
|
|
*/
|
|
if (dataset->element[curelem].rmatrix &&
|
|
IS_REALLY_AA == FALSE) {
|
|
IS_REALLY_AA = CheckType(
|
|
dataset->element[curelem].sequence,
|
|
dataset->element[curelem].seqlen);
|
|
|
|
if (IS_REALLY_AA == FALSE)
|
|
Ascii2NA(
|
|
dataset->element[curelem].sequence,
|
|
dataset->element[curelem].seqlen,
|
|
dataset->element[curelem].rmatrix);
|
|
else
|
|
/*
|
|
* Force the sequence to be AA
|
|
*/
|
|
{
|
|
dataset->element[curelem].elementtype =
|
|
PROTEIN;
|
|
dataset->element[curelem].rmatrix =
|
|
NULL;
|
|
dataset->element[curelem].tmatrix =
|
|
NULL;
|
|
dataset->element[curelem].col_lut =
|
|
Default_PROColor_LKUP;
|
|
}
|
|
}
|
|
}
|
|
else if (Find(Inline, "ZZZZZ")) {
|
|
Cfree(gencomments);
|
|
genclen = 0;
|
|
}
|
|
else {
|
|
if (gencomments == NULL) {
|
|
gencomments = String(Inline);
|
|
genclen = strlen(gencomments) + 1;
|
|
}
|
|
else {
|
|
genclen += strlen(Inline) + 1;
|
|
gencomments = Realloc(gencomments,
|
|
genclen * sizeof(char));
|
|
strncat(gencomments, Inline, GBUFSIZ);
|
|
strncat(gencomments, "\n", GBUFSIZ);
|
|
}
|
|
}
|
|
}
|
|
Cfree(buffer);
|
|
fclose(file);
|
|
for (j = 0; j < dataset->numelements; j++)
|
|
dataset->maxlen =
|
|
MAX(dataset->maxlen, dataset->element[j].seqlen +
|
|
dataset->element[j].offset);
|
|
return;
|
|
}
|
|
|
|
typedef struct mya {
|
|
int yy;
|
|
int mm;
|
|
int dd;
|
|
int hr;
|
|
int mn;
|
|
int sc;
|
|
} sA;
|
|
|
|
AsciiTime(sA *a, char *asciitime)
|
|
{
|
|
int j;
|
|
char temp[GBUFSIZ];
|
|
extern char month[12][6];
|
|
|
|
a->dd = 0;
|
|
a->yy = 0;
|
|
a->mm = 0;
|
|
sscanf(asciitime, "%d%5c%d", &(a->dd), temp, &(a->yy));
|
|
temp[5] = '\0';
|
|
for (j = 0; j < 12; j++)
|
|
if (strcmp(temp, month[j]) == 0) a->mm = j + 1;
|
|
if (a->dd < 0 || a->dd > 31 || a->yy < 0 || a->mm > 11) SetTime(a);
|
|
return;
|
|
}
|
|
|
|
WriteGen(aln, filename, method, maskable) NA_Alignment *aln;
|
|
char *filename;
|
|
int method, maskable;
|
|
{
|
|
int i, j, k, mask = -1;
|
|
FILE *file;
|
|
NA_Sequence *this_elem;
|
|
extern char month[12][6];
|
|
char c;
|
|
if (aln == NULL) return;
|
|
if (aln->na_ddata == NULL) return;
|
|
|
|
file = fopen(filename, "w");
|
|
if (file == NULL) {
|
|
Warning("Cannot open file for output");
|
|
return (1);
|
|
}
|
|
|
|
if (maskable && method != SELECT_REGION)
|
|
for (j = 0; j < aln->numelements; j++)
|
|
if (aln->element[j].elementtype == MASK &&
|
|
aln->element[j].selected)
|
|
mask = j;
|
|
|
|
for (j = 0; j < aln->numelements; j++) {
|
|
if ((aln->element[j].selected && j != mask &&
|
|
method != SELECT_REGION) ||
|
|
(aln->element[j].subselected && method == SELECT_REGION) ||
|
|
(method == ALL)) {
|
|
this_elem = &(aln->element[j]);
|
|
fprintf(
|
|
file,
|
|
"LOCUS %10s%8d bp %4s %10s %2d%5s%4d\n",
|
|
this_elem->short_name,
|
|
this_elem->seqlen + this_elem->offset,
|
|
(this_elem->elementtype == DNA) ? "DNA"
|
|
: (this_elem->elementtype == RNA) ? "RNA"
|
|
: (this_elem->elementtype == MASK) ? "MASK"
|
|
: (this_elem->elementtype == PROTEIN) ? "PROT"
|
|
: "TEXT",
|
|
this_elem->attr & IS_CIRCULAR ? "Circular" : "",
|
|
this_elem->t_stamp.origin.dd,
|
|
month[this_elem->t_stamp.origin.mm - 1],
|
|
this_elem->t_stamp.origin.yy > 1900
|
|
? this_elem->t_stamp.origin.yy
|
|
: this_elem->t_stamp.origin.yy + 1900);
|
|
if (this_elem->description[0])
|
|
fprintf(file, "DEFINITION %s\n",
|
|
this_elem->description);
|
|
if (this_elem->seq_name[0])
|
|
fprintf(file, " ORGANISM %s\n",
|
|
this_elem->seq_name);
|
|
if (this_elem->id[0])
|
|
fprintf(file, " ACCESSION %s\n",
|
|
this_elem->id);
|
|
if (this_elem->authority[0])
|
|
fprintf(file, " AUTHORS %s\n",
|
|
this_elem->authority);
|
|
if (this_elem->comments)
|
|
fprintf(file, "%s\n", this_elem->comments);
|
|
fprintf(file, "ORIGIN");
|
|
if (this_elem->tmatrix) {
|
|
if (mask == -1) {
|
|
for (i = 0, k = 0;
|
|
k < this_elem->seqlen +
|
|
this_elem->offset;
|
|
k++) {
|
|
if (method == SELECT_REGION) {
|
|
if (aln->selection_mask
|
|
[k] == '1') {
|
|
if (i % 60 == 0)
|
|
fprintf(
|
|
file,
|
|
"\n"
|
|
"%9"
|
|
"d",
|
|
i + 1);
|
|
if (i % 10 == 0)
|
|
fprintf(
|
|
file,
|
|
" ");
|
|
fprintf(
|
|
file, "%c",
|
|
this_elem->tmatrix
|
|
[getelem(
|
|
this_elem,
|
|
k)]);
|
|
i++;
|
|
}
|
|
}
|
|
else {
|
|
if (i % 60 == 0)
|
|
fprintf(file,
|
|
"\n%9d",
|
|
i + 1);
|
|
if (i % 10 == 0)
|
|
fprintf(file,
|
|
" ");
|
|
fprintf(
|
|
file, "%c",
|
|
this_elem->tmatrix
|
|
[getelem(
|
|
this_elem,
|
|
k)]);
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
for (k = 0; k < this_elem->seqlen +
|
|
this_elem->offset;
|
|
k++) {
|
|
c = (char)getelem(
|
|
&(aln->element[mask]), k);
|
|
if (c != '0' && c != '-') {
|
|
if (k % 60 == 0)
|
|
fprintf(file,
|
|
"\n%9d",
|
|
k + 1);
|
|
if (k % 10 == 0)
|
|
fprintf(file,
|
|
" ");
|
|
fprintf(
|
|
file, "%c",
|
|
this_elem->tmatrix
|
|
[getelem(
|
|
this_elem,
|
|
k)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (mask == -1) {
|
|
for (i = 0, k = 0;
|
|
k < this_elem->seqlen +
|
|
this_elem->offset;
|
|
k++) {
|
|
if (method == SELECT_REGION) {
|
|
if (aln->selection_mask
|
|
[k] == '1') {
|
|
if (i % 60 == 0)
|
|
fprintf(
|
|
file,
|
|
"\n"
|
|
"%9"
|
|
"d",
|
|
i + 1);
|
|
if (i % 10 == 0)
|
|
fprintf(
|
|
file,
|
|
" ");
|
|
fprintf(
|
|
file, "%c",
|
|
getelem(
|
|
this_elem,
|
|
k));
|
|
i++;
|
|
}
|
|
}
|
|
else {
|
|
if (i % 60 == 0)
|
|
fprintf(file,
|
|
"\n%9d",
|
|
i + 1);
|
|
if (i % 10 == 0)
|
|
fprintf(file,
|
|
" ");
|
|
fprintf(
|
|
file, "%c",
|
|
getelem(this_elem,
|
|
k));
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
for (k = 0; k < this_elem->seqlen +
|
|
this_elem->offset;
|
|
k++) {
|
|
c = (char)getelem(
|
|
&(aln->element[mask]), k);
|
|
if (c != '0' && c != '-') {
|
|
if (k % 60 == 0)
|
|
fprintf(file,
|
|
"\n%9d",
|
|
k + 1);
|
|
if (k % 10 == 0)
|
|
fprintf(file,
|
|
" ");
|
|
fprintf(
|
|
file, "%c",
|
|
getelem(this_elem,
|
|
k));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fprintf(file, "\n//\n");
|
|
}
|
|
}
|
|
fclose(file);
|
|
return;
|
|
}
|
|
|
|
SetTime(sA *a)
|
|
{
|
|
struct tm *tim, *localtime();
|
|
long clock;
|
|
|
|
clock = time(0);
|
|
tim = localtime(&clock);
|
|
|
|
a->yy = tim->tm_year;
|
|
a->mm = tim->tm_mon + 1;
|
|
a->dd = tim->tm_mday;
|
|
a->hr = tim->tm_hour;
|
|
a->mn = tim->tm_min;
|
|
a->sc = tim->tm_sec;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* CheckType: Check base composition to see if the sequence
|
|
* appears to be an amino acid sequence. If it is, pass back
|
|
* TRUE, else FALSE.
|
|
*/
|
|
CheckType(seq, len) char *seq;
|
|
int len;
|
|
{
|
|
int j, count1 = 0, count2 = 0;
|
|
|
|
for (j = 0; j < len; j++)
|
|
if (((seq[j] | 32) < 'z') && ((seq[j] | 32) > 'a')) {
|
|
count1++;
|
|
if (index("ACGTUNacgtun", seq[j]) == NULL) count2++;
|
|
}
|
|
|
|
return ((count2 > count1 / 4) ? TRUE : FALSE);
|
|
}
|