1020 lines
23 KiB
C
Executable file
1020 lines
23 KiB
C
Executable file
#include <malloc.h>
|
|
#include <stdio.h>
|
|
#include <xview/notice.h>
|
|
#include <xview/panel.h>
|
|
#include <xview/xview.h>
|
|
|
|
#include "defines.h"
|
|
#include "menudefs.h"
|
|
|
|
/*
|
|
LoadData():
|
|
Load a data set from the command line argument.
|
|
|
|
Copyright (c) 1989, University of Illinois board of trustees. All rights
|
|
reserved. Written by Steven Smith at the Center for Prokaryote Genome
|
|
Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr.
|
|
Carl Woese.
|
|
|
|
Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory.
|
|
All rights reserved.
|
|
|
|
*/
|
|
|
|
LoadData(filename) char *filename;
|
|
{
|
|
extern NA_Alignment *DataSet;
|
|
extern int DataType, FileFormat, Default_DNA_Trans[],
|
|
Default_RNA_Trans[];
|
|
extern int Default_NA_RTrans[], Default_PROColor_LKUP[],
|
|
Default_NAColor_LKUP[];
|
|
|
|
extern Frame frame;
|
|
extern Canvas EditCan, EditNameCan;
|
|
extern char FileName[];
|
|
FILE *file;
|
|
NA_Alignment *DataNaAln;
|
|
char temp[1024];
|
|
/*
|
|
* Get file name, determine the file type, and away we go..
|
|
*/
|
|
if (Find2(filename, "gde") != 0) strcpy(FileName, filename);
|
|
if ((file = fopen(filename, "r")) != 0) {
|
|
FindType(filename, &DataType, &FileFormat);
|
|
switch (DataType) {
|
|
case NASEQ_ALIGN:
|
|
if (DataSet == NULL) {
|
|
DataSet = (NA_Alignment *)Calloc(
|
|
1, sizeof(NA_Alignment));
|
|
DataNaAln = (NA_Alignment *)DataSet;
|
|
DataSet->rel_offset = 0;
|
|
}
|
|
else
|
|
DataNaAln = (NA_Alignment *)DataSet;
|
|
|
|
LoadFile(filename, DataNaAln, DataType,
|
|
FileFormat);
|
|
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
fclose(file);
|
|
sprintf(temp, "Genetic Data Environment 2.2");
|
|
xv_set(frame, FRAME_LABEL, temp, 0);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
LoadFile():
|
|
Load the given filename into the given dataset. Handle any
|
|
type conversion needed to get the data into the specified data type.
|
|
This routine is used in situations where the format and datatype is known.
|
|
|
|
Copyright (c) 1989-1990, University of Illinois board of trustees. All
|
|
rights reserved. Written by Steven Smith at the Center for Prokaryote Genome
|
|
Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr.
|
|
Carl Woese.
|
|
|
|
Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory.
|
|
All rights reserved.
|
|
*/
|
|
|
|
LoadFile(filename, dataset, type, format) char *filename;
|
|
char *dataset;
|
|
int type, format;
|
|
{
|
|
extern int DataType;
|
|
|
|
if (DataType != type)
|
|
fprintf(stderr, "Warning, datatypes do not match.\n");
|
|
/*
|
|
Handle the overwrite/create/merge dialog here.
|
|
*/
|
|
switch (format) {
|
|
case NA_FLAT:
|
|
ReadNA_Flat(filename, dataset, type);
|
|
((NA_Alignment *)dataset)->format = GDE;
|
|
break;
|
|
|
|
case GENBANK:
|
|
ReadGen(filename, dataset, type);
|
|
((NA_Alignment *)dataset)->format = GENBANK;
|
|
break;
|
|
|
|
case GDE:
|
|
ReadGDE(filename, dataset, type);
|
|
((NA_Alignment *)dataset)->format = GDE;
|
|
break;
|
|
case COLORMASK:
|
|
ReadCMask(filename);
|
|
|
|
default:
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Print error message, and die
|
|
*/
|
|
ErrorOut(code, string) int code;
|
|
char *string;
|
|
{
|
|
if (code == 0) {
|
|
fprintf(stderr, "Error:%s\n", string);
|
|
exit(1);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* More robust memory management routines
|
|
*/
|
|
char *Calloc(count, size)
|
|
int count, size;
|
|
{
|
|
char *temp;
|
|
#ifdef SeeAlloc
|
|
extern int TotalCalloc;
|
|
TotalCalloc += count * size;
|
|
fprintf(stderr, "Calloc %d %d\n", count * size, TotalCalloc);
|
|
#endif
|
|
temp = calloc(count, size);
|
|
ErrorOut(temp, "Cannot allocate memory");
|
|
return (temp);
|
|
}
|
|
|
|
char *Realloc(block, size)
|
|
char *block;
|
|
int size;
|
|
{
|
|
char *temp;
|
|
#ifdef SeeAlloc
|
|
extern int TotalRealloc;
|
|
TotalRealloc += size;
|
|
fprintf(stderr, "Realloc %d\n", TotalRealloc);
|
|
#endif
|
|
temp = realloc(block, size);
|
|
ErrorOut(temp, "Cannot change memory size");
|
|
return (temp);
|
|
}
|
|
|
|
Cfree(block) char *block;
|
|
{
|
|
if (block) {
|
|
/* rtm 18.III.98
|
|
FileIO.c: In function `Cfree':
|
|
FileIO.c:181: void value not ignored as it ought to be
|
|
|
|
if(free(block) == 0)
|
|
Warning("Error in Cfree...");
|
|
*/
|
|
free(block);
|
|
}
|
|
else
|
|
Warning("Error in Cfree, NULL block");
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* same as strdup
|
|
*/
|
|
char *String(string)
|
|
char *string;
|
|
{
|
|
char *temp;
|
|
|
|
temp = Calloc(strlen(string) + 1, sizeof(char));
|
|
strcpy(temp, string);
|
|
return (temp);
|
|
}
|
|
|
|
FindType(name, dtype, ftype) char *name;
|
|
int *dtype, *ftype;
|
|
{
|
|
FILE *file;
|
|
char Inline[GBUFSIZ];
|
|
|
|
file = fopen(name, "r");
|
|
*dtype = 0;
|
|
*ftype = 0;
|
|
|
|
if (file == NULL) return (1);
|
|
|
|
/*
|
|
* Is this a flat file?
|
|
* Get the first non blank line, see if a type marker shows up.
|
|
*/
|
|
fgets(Inline, GBUFSIZ, file);
|
|
for (; strlen(Inline) < 2 && fgets(Inline, GBUFSIZ, file) != NULL;)
|
|
;
|
|
if (Inline[0] == '#' || Inline[0] == '%' || Inline[0] == '"' ||
|
|
Inline[0] == '@') {
|
|
*dtype = NASEQ_ALIGN;
|
|
*ftype = NA_FLAT;
|
|
}
|
|
|
|
/*
|
|
* Else, try genbank
|
|
*/
|
|
else {
|
|
fclose(file);
|
|
file = fopen(name, "r");
|
|
*dtype = 0;
|
|
*ftype = 0;
|
|
|
|
if (file == NULL) return (1);
|
|
|
|
for (; fgets(Inline, GBUFSIZ, file) != NULL;)
|
|
if (Find(Inline, "LOCUS")) {
|
|
*dtype = NASEQ_ALIGN;
|
|
*ftype = GENBANK;
|
|
fclose(file);
|
|
return (0);
|
|
}
|
|
/*
|
|
* and last, try GDE
|
|
*/
|
|
else if (Find(Inline, "sequence")) {
|
|
*dtype = NASEQ_ALIGN;
|
|
*ftype = GDE;
|
|
fclose(file);
|
|
return (0);
|
|
}
|
|
else if (Find(Inline, "start:")) {
|
|
*dtype = NASEQ_ALIGN;
|
|
*ftype = COLORMASK;
|
|
fclose(file);
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
fclose(file);
|
|
return (0);
|
|
}
|
|
|
|
AppendNA(buffer, len, seq) NA_Base *buffer;
|
|
int len;
|
|
NA_Sequence *seq;
|
|
{
|
|
int curlen = 0, j;
|
|
NA_Base *temp;
|
|
|
|
if (seq->seqlen + len >= seq->seqmaxlen) {
|
|
if (seq->seqlen > 0)
|
|
seq->sequence = (NA_Base *)Realloc(
|
|
seq->sequence,
|
|
(seq->seqlen + len + GBUFSIZ) * sizeof(NA_Base));
|
|
else
|
|
seq->sequence = (NA_Base *)Calloc(
|
|
1, (seq->seqlen + len + GBUFSIZ) * sizeof(NA_Base));
|
|
seq->seqmaxlen = seq->seqlen + len + GBUFSIZ;
|
|
}
|
|
/*
|
|
* seqlen is the length, and the index of the next free
|
|
* base
|
|
*/
|
|
curlen = seq->seqlen + seq->offset;
|
|
for (j = 0; j < len; j++) putelem(seq, j + curlen, buffer[j]);
|
|
|
|
seq->seqlen += len;
|
|
return;
|
|
}
|
|
|
|
Ascii2NA(buffer, len, matrix) char *buffer;
|
|
int len;
|
|
int matrix[16];
|
|
{
|
|
/*
|
|
* if the translation matrix exists, use it to
|
|
* encode the buffer.
|
|
*/
|
|
register i;
|
|
if (matrix != NULL)
|
|
for (i = 0; i < len; i++) buffer[i] = matrix[buffer[i]];
|
|
return;
|
|
}
|
|
|
|
WriteNA_Flat(aln, filename, method, maskable) NA_Alignment *aln;
|
|
char *filename;
|
|
int method, maskable;
|
|
{
|
|
int j, kk, mask = -1, k, offset, min_offset = -999999;
|
|
char offset_str[100], buf[100];
|
|
NA_Sequence *seqs;
|
|
FILE *file;
|
|
if (aln == (NA_Alignment *)NULL) return;
|
|
if (aln->numelements == (int)NULL) return;
|
|
seqs = aln->element;
|
|
|
|
file = fopen(filename, "w");
|
|
if (file == NULL) {
|
|
Warning("Cannot open file for output");
|
|
return (1);
|
|
}
|
|
if (maskable && (method != SELECT_REGION)) {
|
|
for (j = 0; j < aln->numelements; j++)
|
|
if (seqs[j].elementtype == MASK && seqs[j].selected)
|
|
mask = j;
|
|
}
|
|
for (j = 0; j < aln->numelements; j++) {
|
|
SeqNorm(&(seqs[j]));
|
|
}
|
|
|
|
for (j = 0; j < aln->numelements; j++) {
|
|
if (method != SELECT_REGION)
|
|
offset = seqs[j].offset;
|
|
else
|
|
for (offset = seqs[j].offset;
|
|
aln->selection_mask[offset] == '0'; offset++)
|
|
;
|
|
|
|
if (offset + aln->rel_offset != 0)
|
|
sprintf(offset_str, "(%d)", offset + aln->rel_offset);
|
|
else
|
|
offset_str[0] = '\0';
|
|
|
|
if (((j != mask) && (seqs[j].selected) &&
|
|
method != SELECT_REGION) ||
|
|
(method == SELECT_REGION && seqs[j].subselected) ||
|
|
method == ALL) {
|
|
fprintf(
|
|
file, "%c%s%s\n",
|
|
seqs[j].elementtype == DNA ? '#'
|
|
: seqs[j].elementtype == RNA ? '#'
|
|
: seqs[j].elementtype == PROTEIN ? '%'
|
|
: seqs[j].elementtype == TEXT ? '"'
|
|
: seqs[j].elementtype == MASK ? '@'
|
|
: '"',
|
|
seqs[j].short_name,
|
|
(offset + aln->rel_offset == 0) ? "" : offset_str);
|
|
if (seqs[j].tmatrix) {
|
|
if (mask == -1)
|
|
for (k = 0, kk = 0; kk < seqs[j].seqlen;
|
|
kk++) {
|
|
if ((k) % 60 == 0 && k > 0) {
|
|
buf[60] = '\0';
|
|
fputs(buf, file);
|
|
putc('\n', file);
|
|
}
|
|
if (method == SELECT_REGION) {
|
|
if (aln->selection_mask
|
|
[kk + offset] ==
|
|
'1') {
|
|
buf[k % 60] =
|
|
((char)seqs[j]
|
|
.tmatrix[(int)getelem(
|
|
&(seqs[j]),
|
|
kk +
|
|
offset)]);
|
|
k++;
|
|
}
|
|
}
|
|
else {
|
|
buf[k % 60] =
|
|
((char)seqs[j].tmatrix
|
|
[(int)getelem(
|
|
&(seqs[j]),
|
|
kk +
|
|
offset)]);
|
|
k++;
|
|
}
|
|
}
|
|
else
|
|
for (k = 0, kk = 0; kk < seqs[j].seqlen;
|
|
kk++) {
|
|
if (getelem(&(seqs[mask]),
|
|
kk + seqs[mask]
|
|
.offset) !=
|
|
'0' &&
|
|
(getelem(
|
|
&(seqs[mask]),
|
|
kk + seqs[mask]
|
|
.offset) !=
|
|
'-')) {
|
|
if ((k++) % 60 == 0 &&
|
|
k > 1) {
|
|
buf[60] = '\0';
|
|
fputs(buf,
|
|
file);
|
|
putc('\n',
|
|
file);
|
|
}
|
|
buf[k % 60] =
|
|
((char)seqs[j].tmatrix
|
|
[getelem(
|
|
&(seqs[j]),
|
|
kk +
|
|
offset)]);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (mask == -1)
|
|
for (k = 0, kk = 0; kk < seqs[j].seqlen;
|
|
kk++) {
|
|
if ((k) % 60 == 0 && k > 0) {
|
|
buf[60] = '\0';
|
|
fputs(buf, file);
|
|
putc('\n', file);
|
|
}
|
|
if (method == SELECT_REGION) {
|
|
if (aln->selection_mask
|
|
[kk + offset] ==
|
|
'1') {
|
|
buf[k % 60] = (getelem(
|
|
&(seqs[j]),
|
|
kk +
|
|
offset));
|
|
k++;
|
|
}
|
|
}
|
|
else {
|
|
buf[k % 60] = (getelem(
|
|
&(seqs[j]),
|
|
kk + offset));
|
|
k++;
|
|
}
|
|
}
|
|
else
|
|
for (k = 0, kk = 0; kk < seqs[j].seqlen;
|
|
kk++) {
|
|
if (getelem(&(seqs[mask]),
|
|
kk + offset) ==
|
|
'1') {
|
|
if ((k++) % 60 == 0 &&
|
|
k > 1) {
|
|
buf[60] = '\0';
|
|
fputs(buf,
|
|
file);
|
|
putc('\n',
|
|
file);
|
|
}
|
|
buf[k % 60] =
|
|
((char)getelem(
|
|
&(seqs[j]),
|
|
kk + offset));
|
|
}
|
|
}
|
|
}
|
|
buf[(k % 60) > 0 ? (k % 60) : 60] = '\0';
|
|
fputs(buf, file);
|
|
putc('\n', file);
|
|
}
|
|
}
|
|
fclose(file);
|
|
return (0);
|
|
}
|
|
|
|
Warning(s) char *s;
|
|
{
|
|
extern Frame frame;
|
|
extern Panel_item left_foot, right_foot;
|
|
Beep();
|
|
xv_set(frame, FRAME_RIGHT_FOOTER, s, 0);
|
|
xv_set(right_foot, PANEL_LABEL_STRING, s, 0);
|
|
}
|
|
|
|
InitNASeq(seq, type) NA_Sequence *seq;
|
|
int type;
|
|
{
|
|
extern int Default_RNA_Trans[]; /* rtm 18.III.98 */
|
|
extern int Default_DNA_Trans[], Default_NA_RTrans[];
|
|
extern int Default_NA_RTrans[], Default_PROColor_LKUP[],
|
|
Default_NAColor_LKUP[];
|
|
|
|
SetTime(&(seq->t_stamp.origin));
|
|
SetTime(&(seq->t_stamp.modify));
|
|
strncpy(seq->id, uniqueID(), 79);
|
|
seq->seq_name[0] = '\0';
|
|
seq->barcode[0] = '\0';
|
|
seq->contig[0] = '\0';
|
|
seq->membrane[0] = '\0';
|
|
seq->authority[0] = '\0';
|
|
seq->short_name[0] = '\0';
|
|
seq->sequence = NULL;
|
|
seq->offset = 0;
|
|
seq->baggage = NULL;
|
|
seq->baggage_len = 0;
|
|
seq->baggage_maxlen = 0;
|
|
seq->comments = NULL;
|
|
seq->comments_len = 0;
|
|
seq->comments_maxlen = 0;
|
|
seq->description[0] = '\0';
|
|
seq->mask = NULL;
|
|
seq->seqlen = 0;
|
|
seq->seqmaxlen = 0;
|
|
seq->protect = PROT_WHITE_SPACE + PROT_TRANSLATION;
|
|
#ifdef HGL
|
|
seq->attr = 0;
|
|
#else
|
|
seq->attr = IS_5_TO_3 + IS_PRIMARY;
|
|
#endif
|
|
seq->elementtype = type;
|
|
seq->groupid = 0;
|
|
seq->groupb = NULL;
|
|
seq->groupf = NULL;
|
|
seq->cmask = NULL;
|
|
seq->selected = 0;
|
|
seq->subselected = 0;
|
|
|
|
switch (type) {
|
|
case DNA:
|
|
seq->tmatrix = Default_DNA_Trans;
|
|
seq->rmatrix = Default_NA_RTrans;
|
|
seq->col_lut = Default_NAColor_LKUP;
|
|
break;
|
|
case RNA:
|
|
seq->tmatrix = Default_RNA_Trans;
|
|
seq->rmatrix = Default_NA_RTrans;
|
|
seq->col_lut = Default_NAColor_LKUP;
|
|
break;
|
|
case PROTEIN:
|
|
seq->tmatrix = NULL;
|
|
seq->rmatrix = NULL;
|
|
seq->col_lut = Default_PROColor_LKUP;
|
|
break;
|
|
case MASK:
|
|
case TEXT:
|
|
default:
|
|
seq->tmatrix = NULL;
|
|
seq->rmatrix = NULL;
|
|
seq->col_lut = NULL;
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
ReadCMask(filename) char *filename;
|
|
{
|
|
extern Frame frame;
|
|
extern NA_Alignment *DataSet;
|
|
|
|
char Inline[GBUFSIZ], head[GBUFSIZ], curname[GBUFSIZ], temp[GBUFSIZ];
|
|
int IGNORE_DASH = FALSE, offset;
|
|
NA_DisplayData *NAdd;
|
|
NA_Alignment *aln;
|
|
|
|
int i, j, k, curlen = 0, *colors, orig_ctype, jj, indx = 0;
|
|
FILE *file;
|
|
|
|
if (DataSet == NULL) return;
|
|
|
|
NAdd = (NA_DisplayData *)((NA_Alignment *)DataSet)->na_ddata;
|
|
|
|
if (NAdd == NULL) return;
|
|
|
|
aln = (NA_Alignment *)DataSet;
|
|
|
|
curname[0] = '\0';
|
|
orig_ctype = NAdd->color_type;
|
|
file = fopen(filename, "r");
|
|
if (file == NULL) {
|
|
Warning("File not found");
|
|
Warning(filename);
|
|
return;
|
|
}
|
|
|
|
NAdd->color_type = COLOR_ALN_MASK;
|
|
for (; fgets(Inline, GBUFSIZ, file) != 0;) {
|
|
if (Find(Inline, "offset:")) {
|
|
crop(Inline, head, temp);
|
|
sscanf(temp, "%d", &(aln->cmask_offset));
|
|
}
|
|
else if (Find(Inline, "nodash:"))
|
|
IGNORE_DASH = TRUE;
|
|
else if (Find(Inline, "dash:"))
|
|
IGNORE_DASH = TRUE;
|
|
else if (Find(Inline, "name:")) {
|
|
crop(Inline, head, curname);
|
|
curname[strlen(curname) - 1] = '\0';
|
|
for (j = 0; j < strlen(curname); j++)
|
|
if (curname[j] == '(') curname[j] = '\0';
|
|
}
|
|
else if (Find(Inline, "length:")) {
|
|
crop(Inline, head, temp);
|
|
sscanf(temp, "%d", &curlen);
|
|
}
|
|
else if (Find(Inline, "start:")) {
|
|
indx = -1;
|
|
if (curlen == 0) {
|
|
Warning("illegal format in colormask");
|
|
NAdd->color_type = orig_ctype;
|
|
return;
|
|
}
|
|
if (strlen(curname) != 0) {
|
|
indx = -1;
|
|
for (j = 0; j < aln->numelements; j++)
|
|
if (Find(aln->element[j].short_name,
|
|
curname) ||
|
|
Find(aln->element[j].id, curname)) {
|
|
if (aln->element[j].cmask !=
|
|
NULL)
|
|
Cfree(aln->element[j]
|
|
.cmask);
|
|
colors = (int *)Calloc(
|
|
aln->element[j].seqmaxlen +
|
|
1 +
|
|
aln->element[j].offset,
|
|
sizeof(int));
|
|
aln->element[j].cmask = colors;
|
|
NAdd->color_type =
|
|
COLOR_SEQ_MASK;
|
|
indx = j;
|
|
j = aln->numelements;
|
|
}
|
|
if (indx == -1) colors = NULL;
|
|
}
|
|
else {
|
|
if (aln->cmask != NULL) Cfree(aln->cmask);
|
|
colors = (int *)Calloc(curlen, sizeof(int));
|
|
aln->cmask = colors;
|
|
aln->cmask_len = curlen;
|
|
NAdd->color_type = COLOR_ALN_MASK;
|
|
for (j = 0; j < curlen; j++) colors[j] = 12;
|
|
}
|
|
|
|
if (IGNORE_DASH && (indx != -1)) {
|
|
for (jj = 0, j = 0;
|
|
(j < curlen) &&
|
|
(jj < aln->element[indx].seqlen);
|
|
j++, jj++) {
|
|
offset = aln->element[indx].offset;
|
|
if (fgets(Inline, GBUFSIZ, file) ==
|
|
NULL) {
|
|
Warning(
|
|
"illegal format in "
|
|
"colormask");
|
|
NAdd->color_type = orig_ctype;
|
|
return;
|
|
}
|
|
/*
|
|
* Fixed so that the keyword nodash
|
|
*causes the colormask to be mapped to
|
|
*the sequence, not the alignment.
|
|
*
|
|
* The allocated space is equal the
|
|
*seqlen of the matched sequence.
|
|
*
|
|
*/
|
|
if (aln->element[indx].tmatrix)
|
|
for (;
|
|
(getelem(
|
|
&(aln->element[indx]),
|
|
jj + offset) ==
|
|
(aln->element[indx]
|
|
.tmatrix['-']) ||
|
|
(getelem(&(aln->element
|
|
[indx]),
|
|
jj + offset) ==
|
|
aln->element[indx]
|
|
.tmatrix['~'])) &&
|
|
jj < aln->element[indx]
|
|
.seqlen;)
|
|
colors[jj++] = 12;
|
|
else
|
|
for (;
|
|
getelem(
|
|
&(aln->element[indx]),
|
|
jj + offset) == '-' &&
|
|
jj < aln->element[indx]
|
|
.seqlen;)
|
|
colors[jj++] = 12;
|
|
|
|
sscanf(Inline, "%d", &(colors[jj]));
|
|
}
|
|
}
|
|
else if ((indx == -1) && (strlen(curname) != 0))
|
|
for (j = 0; j < curlen; j++)
|
|
fgets(Inline, GBUFSIZ, file);
|
|
else
|
|
for (j = 0; j < curlen; j++) {
|
|
if (fgets(Inline, GBUFSIZ, file) ==
|
|
NULL) {
|
|
Warning(
|
|
"illegal format in "
|
|
"colormask");
|
|
NAdd->color_type = orig_ctype;
|
|
return;
|
|
}
|
|
sscanf(Inline, "%d", &(colors[j]));
|
|
}
|
|
IGNORE_DASH = FALSE;
|
|
curname[0] = '\0';
|
|
}
|
|
}
|
|
RepaintAll(TRUE);
|
|
return;
|
|
}
|
|
|
|
ReadNA_Flat(filename, dataset, type) char *filename;
|
|
char *dataset;
|
|
int type;
|
|
{
|
|
int i, j, jj, c, curelem, offset;
|
|
char name[GBUFSIZ];
|
|
char buffer[GBUFSIZ];
|
|
char origin[GBUFSIZ], ref[GBUFSIZ];
|
|
char Inline[GBUFSIZ], head[GBUFSIZ], tail[GBUFSIZ], temp[GBUFSIZ];
|
|
char curname[GBUFSIZ];
|
|
|
|
NA_Sequence *this_elem;
|
|
NA_Alignment *data;
|
|
extern int Default_DNA_Trans[], Default_RNA_Trans[],
|
|
Default_NA_RTrans[];
|
|
|
|
FILE *file;
|
|
|
|
curname[0] = '\0';
|
|
data = (NA_Alignment *)dataset;
|
|
|
|
file = fopen(filename, "r");
|
|
if (file == NULL) {
|
|
fprintf(stderr, "Cannot open %s.\n", filename);
|
|
return;
|
|
}
|
|
for (; fgets(Inline, GBUFSIZ, file) != 0;) {
|
|
if (Inline[0] == '#' || Inline[0] == '%' || Inline[0] == '"' ||
|
|
Inline[0] == '@') {
|
|
offset = 0;
|
|
for (j = 0; j < strlen(Inline); j++) {
|
|
if (Inline[j] == '(') {
|
|
sscanf((char *)&(Inline[j + 1]), "%d",
|
|
&offset);
|
|
Inline[j] = '\0';
|
|
}
|
|
}
|
|
|
|
curelem = data->numelements++;
|
|
if (curelem == 0) {
|
|
data->element = (NA_Sequence *)Calloc(
|
|
5, sizeof(NA_Sequence));
|
|
data->maxnumelements = 5;
|
|
}
|
|
else if (curelem == data->maxnumelements) {
|
|
(data->maxnumelements) *= 2;
|
|
data->element = (NA_Sequence *)Realloc(
|
|
data->element,
|
|
data->maxnumelements * sizeof(NA_Sequence));
|
|
}
|
|
|
|
InitNASeq(&(data->element[curelem]),
|
|
Inline[0] == '#' ? DNA
|
|
: Inline[0] == '%' ? PROTEIN
|
|
: Inline[0] == '"' ? TEXT
|
|
: Inline[0] == '@' ? MASK
|
|
: TEXT);
|
|
this_elem = &(data->element[curelem]);
|
|
if (Inline[strlen(Inline) - 1] == '\n')
|
|
Inline[strlen(Inline) - 1] = '\0';
|
|
strncpy(this_elem->short_name, (char *)&(Inline[1]),
|
|
31);
|
|
this_elem->offset = offset;
|
|
}
|
|
else if (Inline[0] != '\n') {
|
|
for (j = 0, jj = 0; j < strlen(Inline); j++)
|
|
if (Inline[j] != ' ' && Inline[j] != '\n' &&
|
|
Inline[j] != '\t')
|
|
buffer[jj++] = Inline[j];
|
|
|
|
if (data->element[curelem].rmatrix)
|
|
Ascii2NA(buffer, jj,
|
|
data->element[curelem].rmatrix);
|
|
AppendNA(buffer, jj, &(data->element[curelem]));
|
|
}
|
|
}
|
|
|
|
for (j = 0; j < data->numelements; j++)
|
|
data->maxlen = MAX(data->maxlen, data->element[j].seqlen +
|
|
data->element[j].offset);
|
|
|
|
for (j = 0; j < data->numelements; j++)
|
|
if (data->element[j].seqlen == 0)
|
|
data->element[j].protect =
|
|
PROT_BASE_CHANGES + PROT_GREY_SPACE +
|
|
PROT_WHITE_SPACE + PROT_TRANSLATION;
|
|
|
|
NormalizeOffset(data);
|
|
Regroup(data);
|
|
return;
|
|
}
|
|
|
|
WriteStatus(aln, filename, method) NA_Alignment *aln;
|
|
char *filename;
|
|
int method;
|
|
{
|
|
extern int EditMode, FileFormat;
|
|
extern NA_Alignment *DataSet;
|
|
NA_DisplayData *NAdd;
|
|
NA_Sequence *this_seq;
|
|
int j;
|
|
FILE *file;
|
|
|
|
if (DataSet == NULL) return;
|
|
|
|
NAdd = (NA_DisplayData *)((NA_Alignment *)DataSet)->na_ddata;
|
|
if (NAdd == NULL) return;
|
|
|
|
file = fopen(filename, "w");
|
|
if (file == NULL) {
|
|
Warning("Cannot open status file.");
|
|
return (1);
|
|
}
|
|
fprintf(file, "File_format: %s\n",
|
|
FileFormat == GENBANK ? "genbank" : "flat");
|
|
/*
|
|
fprintf(file,"EditMode: %s\n",EditMode==INSERT?"insert":
|
|
"check");
|
|
*/
|
|
|
|
this_seq = &(aln->element[NAdd->cursor_y]);
|
|
if (this_seq->id != NULL)
|
|
fprintf(file, "sequence-ID %s\n", this_seq->id);
|
|
fprintf(file, "Column: %d\nPos:%d\n", NAdd->cursor_x, NAdd->position);
|
|
switch (this_seq->elementtype) {
|
|
case DNA:
|
|
case RNA:
|
|
fprintf(file, "#%s\n", this_seq->short_name);
|
|
break;
|
|
case PROTEIN:
|
|
fprintf(file, "%%%s\n", this_seq->short_name);
|
|
break;
|
|
case MASK:
|
|
fprintf(file, "@%s\n", this_seq->short_name);
|
|
break;
|
|
case TEXT:
|
|
fprintf(file, "%c%s\n", '"', this_seq->short_name);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (this_seq->tmatrix)
|
|
for (j = 0; j < this_seq->seqlen; j++)
|
|
putc(this_seq->tmatrix[getelem(this_seq, j)], file);
|
|
else
|
|
for (j = 0; j < this_seq->seqlen; j++)
|
|
putc(getelem(this_seq, j), file);
|
|
|
|
fclose(file);
|
|
return;
|
|
}
|
|
|
|
ReadStatus(filename) char *filename;
|
|
{
|
|
/*
|
|
int i,j;
|
|
FILE *file;
|
|
char Inline[GBUFSIZ],head[GBUFSIZ];
|
|
file = fopen(filename,"r");
|
|
for(;!DONE;)
|
|
{
|
|
fgets(Inline,GBUFSIZ,file);
|
|
if(strlen(Inline) == 0)
|
|
DONE = TRUE;
|
|
else
|
|
{
|
|
sscanf(Inline,"%s",head);
|
|
if(strncmp(head,"Col",3) != 0)
|
|
{
|
|
sscanf(Inline,"%*s %d",head,&(DataSet->nadd->
|
|
cursor_x),&(DataSet->nadd->cursory);
|
|
}
|
|
else if(strncmp(head,"Pos",3) != 0)
|
|
{
|
|
}
|
|
}
|
|
}
|
|
|
|
*/
|
|
}
|
|
|
|
NormalizeOffset(aln) NA_Alignment *aln;
|
|
{
|
|
int i, j, offset = 99999999;
|
|
|
|
for (j = 0; j < aln->numelements; j++)
|
|
offset = MIN(offset, aln->element[j].offset);
|
|
|
|
for (j = 0; j < aln->numelements; j++) aln->element[j].offset -= offset;
|
|
|
|
aln->maxlen = -999999999;
|
|
for (j = 0; j < aln->numelements; j++)
|
|
aln->maxlen =
|
|
MAX(aln->element[j].seqlen + aln->element[j].offset,
|
|
aln->maxlen);
|
|
|
|
aln->rel_offset += offset;
|
|
|
|
if (aln->numelements == 0) aln->rel_offset = 0;
|
|
|
|
return;
|
|
}
|
|
|
|
WriteCMask(aln, filename, method, maskable) NA_Alignment *aln;
|
|
char *filename;
|
|
int method, maskable;
|
|
{
|
|
int j, kk, mask = -1, k, offset, min_offset = -999999;
|
|
char offset_str[100];
|
|
int *buf;
|
|
NA_Sequence *seqs;
|
|
FILE *file;
|
|
if (aln == NULL) return;
|
|
if (aln->numelements == (int)NULL) return;
|
|
seqs = aln->element;
|
|
|
|
file = fopen(filename, "w");
|
|
if (file == NULL) {
|
|
Warning("Cannot open file for output");
|
|
return (1);
|
|
}
|
|
if (maskable && (method != SELECT_REGION)) {
|
|
for (j = 0; j < aln->numelements; j++)
|
|
if (seqs[j].elementtype == MASK && seqs[j].selected)
|
|
mask = j;
|
|
}
|
|
for (j = 0; j < aln->numelements; j++) {
|
|
SeqNorm(&(seqs[j]));
|
|
}
|
|
|
|
for (j = 0; j < aln->numelements; j++) {
|
|
if (method != SELECT_REGION)
|
|
offset = seqs[j].offset;
|
|
else
|
|
for (offset = seqs[j].offset;
|
|
aln->selection_mask[offset] == '0'; offset++)
|
|
;
|
|
|
|
if (offset + aln->rel_offset != 0)
|
|
sprintf(offset_str, "(%d)", offset + aln->rel_offset);
|
|
else
|
|
offset_str[0] = '\0';
|
|
|
|
if (((j != mask) && (seqs[j].selected) &&
|
|
method != SELECT_REGION) ||
|
|
(method == SELECT_REGION && seqs[j].subselected) ||
|
|
method == ALL) {
|
|
fprintf(
|
|
file, "%c%s%s\n",
|
|
seqs[j].elementtype == DNA ? '#'
|
|
: seqs[j].elementtype == RNA ? '#'
|
|
: seqs[j].elementtype == PROTEIN ? '%'
|
|
: seqs[j].elementtype == TEXT ? '"'
|
|
: seqs[j].elementtype == MASK ? '@'
|
|
: '"',
|
|
seqs[j].short_name,
|
|
(offset + aln->rel_offset == 0) ? "" : offset_str);
|
|
|
|
if (seqs[j].cmask != NULL) {
|
|
buf =
|
|
(int *)Calloc(seqs[j].seqlen, sizeof(int));
|
|
|
|
if (mask == -1) {
|
|
for (k = 0, kk = 0; kk < seqs[j].seqlen;
|
|
kk++) {
|
|
if (method == SELECT_REGION) {
|
|
if (aln->selection_mask
|
|
[kk + offset] ==
|
|
'1')
|
|
buf[k++] = (getcmask(
|
|
&(seqs[j]),
|
|
kk +
|
|
offset));
|
|
}
|
|
|
|
else
|
|
buf[k++] = (getcmask(
|
|
&(seqs[j]),
|
|
kk + offset));
|
|
}
|
|
}
|
|
else {
|
|
for (k = 0, kk = 0; kk < seqs[j].seqlen;
|
|
kk++)
|
|
if (getelem(&(seqs[mask]),
|
|
kk + offset) == '1')
|
|
buf[k++] = (getcmask(
|
|
&(seqs[j]),
|
|
kk + offset));
|
|
/*
|
|
* Looks like k
|
|
*might be one behind?
|
|
*/
|
|
}
|
|
fprintf(
|
|
file,
|
|
"name:%s\noffset:%d\nlength:%d\nstart:\n",
|
|
seqs[j].short_name, seqs[j].offset, k);
|
|
|
|
for (kk = 0; kk < k; kk++)
|
|
fprintf(file, "%d\n", buf[kk]);
|
|
|
|
Cfree(buf);
|
|
}
|
|
}
|
|
}
|
|
fclose(file);
|
|
return (0);
|
|
}
|