staden-lg/src/newted/seqIOSCF.c

287 lines
6.7 KiB
C

/*
Title: seqIOSCF
File: seqIOSCF.c
Purpose: IO of Standard Chromatogram Format sequences
Last update: Tue Nov 10 1992
Change log:
4 Feb 1992, Now draft proposal version 2
20 Feb 1992, Grab info from comment lines
19 Aug 1992, If SCF file has clip information, don't clip automatically
10 Nov 1992 SCF comments now stored in seq data structure
*/
/* ---- Imports ---- */
#include "scf.h" /* SCF structures */
#include "seq.h" /* IMPORT: Seq, BasesAndTraces, NULLSeq,
newSeq, freeSeq */
#include "seqIOEdit.h" /* IMPORT: writeEdSeq,
readEdSEq */
#include <ctype.h>
#include <stdio.h> /* IMPORT: fopen, fclose, fseek, ftell, fgetc,
EOF */
#include <stdlib.h>
#include "mach-io.h"
/* ---- Internal functions ---- */
static int read_scf_header(FILE *fp, Header *h)
{
int i;
if (be_read_int_4(fp,&h->magic_number)==False) return (False);
if (be_read_int_4(fp,&h->samples)==False) return (False);
if (be_read_int_4(fp,&h->samples_offset)==False) return (False);
if (be_read_int_4(fp,&h->bases)==False) return (False);
if (be_read_int_4(fp,&h->bases_left_clip)==False) return (False);
if (be_read_int_4(fp,&h->bases_right_clip)==False) return (False);
if (be_read_int_4(fp,&h->bases_offset)==False) return (False);
if (be_read_int_4(fp,&h->comments_size)==False) return (False);
if (be_read_int_4(fp,&h->comments_offset)==False) return (False);
if (fread(&h->version[0],sizeof(h->version),1,fp)!=1) return (False);
if (be_read_int_4(fp,&h->sample_size)==False) return (False);
if (be_read_int_4(fp,&h->code_set)==False) return (False);
for (i=0;i<20;i++)
if (be_read_int_4(fp,&h->spare[i])==False) return (False);
return (True);
}
static int read_scf_sample1(FILE *fp, Samples1 *s)
{
if (be_read_int_1(fp,&s->sample_A)==False) return (False);
if (be_read_int_1(fp,&s->sample_C)==False) return (False);
if (be_read_int_1(fp,&s->sample_G)==False) return (False);
if (be_read_int_1(fp,&s->sample_T)==False) return (False);
return (True);
}
static int read_scf_sample2(FILE *fp, Samples2 *s)
{
if (be_read_int_2(fp,&s->sample_A)==False) return (False);
if (be_read_int_2(fp,&s->sample_C)==False) return (False);
if (be_read_int_2(fp,&s->sample_G)==False) return (False);
if (be_read_int_2(fp,&s->sample_T)==False) return (False);
return (True);
}
static int read_scf_base(FILE *fp, Bases *b)
{
if (be_read_int_4(fp,&b->peak_index)==False) return (False);
if (be_read_int_1(fp,&b->prob_A)==False) return (False);
if (be_read_int_1(fp,&b->prob_C)==False) return (False);
if (be_read_int_1(fp,&b->prob_G)==False) return (False);
if (be_read_int_1(fp,&b->prob_T)==False) return (False);
if (be_read_int_1(fp,(byte *)&b->base)==False) return (False);
if (be_read_int_1(fp,&b->spare[0])==False) return (False);
if (be_read_int_1(fp,&b->spare[1])==False) return (False);
if (be_read_int_1(fp,&b->spare[2])==False) return (False);
return (True);
}
static float parse_version(char version[])
/*
*
*/
{
char v[5];
strncpy(v,version,4);v[4]='\0';
if (strspn(v,"0123456789. ")!=4) return 0.0;
return atof(v);
}
/* ---- Exports ---- */
int is_SCF(char *fn)
/*
* Check to see if file with name `fn' is in SCF format
*
* Returns:
* 1 - is SCF format
* 0 - not SCF format
*/
{
FILE *fp;
uint_4 magic;
int ok;
if ( (fp=fopen(fn,"rb")) == NULL) {
ok = 0;
} else {
if ( be_read_int_4(fp, &magic) != 1 ) {
ok = 0;
} else {
ok = (magic==SCF_MAGIC);
}
fclose(fp);
}
return ok;
}
Seq readSeqSCF(char *fn)
/*
* Read the SCF format sequence with name `fn' into `seq'.
* If bottom is True, we read the sequence in in reverse and
* complemented A NULL result indicates failure.
*/
{ Seq seq = NULLSeq;
FILE *fp;
Header header;
int i;
/* Open for: reading, binary */
if ((fp = fopen(fn, "rb")) == NULL) return(NULLSeq);
/* Read header */
if (fseek(fp, (off_t)0, 0) != 0) {
fclose(fp);
return(NULLSeq);
}
if (read_scf_header(fp,&header)==False) {
fclose(fp);
return(NULLSeq);
}
/* Allocate the sequence */
if ((seq = newSeq(BasesAndTraces, header.samples, header.bases)) == NULLSeq)
{ fclose(fp);
return(NULLSeq);
}
seq->mode = BasesAndTraces;
seq->format = SCFFormat;
seq->dirty = False;
seq->maxTraceVal = 0;
seq->NPoints = header.samples;
seq->NorigBases = header.bases;
seq->NedBases = header.bases;
seq->leftCutoff = header.bases_left_clip;
seq->rightCutoff = header.bases_right_clip;
seq->bottom = False;
/* set precision if necessary */
if (parse_version(header.version)<VERSION) {
/* old 8-bit format */
header.sample_size = 1;
}
/* read sample points */
if (fseek(fp, (off_t)(header.samples_offset), 0) != 0) {
fclose(fp);
return(NULLSeq);
}
for(i=0;i<seq->NPoints;i++) {
switch ( header.sample_size ) {
case 2:
{
Samples2 sample;
if (read_scf_sample2(fp,&sample)==False) {
fclose(fp);
return(NULLSeq);
}
seq->traceA[i] = sample.sample_A;
seq->traceC[i] = sample.sample_C;
seq->traceG[i] = sample.sample_G;
seq->traceT[i] = sample.sample_T;
break;
}
default:
{
Samples1 sample;
if (read_scf_sample1(fp,&sample)==False) {
fclose(fp);
return(NULLSeq);
}
seq->traceA[i] = sample.sample_A;
seq->traceC[i] = sample.sample_C;
seq->traceG[i] = sample.sample_G;
seq->traceT[i] = sample.sample_T;
break;
}
}
if (seq->maxTraceVal < seq->traceA[i]) seq->maxTraceVal = seq->traceA[i];
if (seq->maxTraceVal < seq->traceC[i]) seq->maxTraceVal = seq->traceC[i];
if (seq->maxTraceVal < seq->traceG[i]) seq->maxTraceVal = seq->traceG[i];
if (seq->maxTraceVal < seq->traceT[i]) seq->maxTraceVal = seq->traceT[i];
}
/* Read bases */
if (fseek(fp, (off_t)(header.bases_offset), 0) != 0) {
fclose(fp);
return(NULLSeq);
}
for(i=0;i<seq->NorigBases;i++) {
Bases base;
if (read_scf_base(fp,&base)==False) {
fclose(fp);
return(NULLSeq);
}
seq->base[i] = base.base;
seq->basePos[i] = base.peak_index;
}
/* Read selected comment information */
{
char *comments;
comments = (char *) malloc(header.comments_size+1);
if (fseek(fp,(off_t)(header.comments_offset), 0) != 0) {
free(comments);
fclose(fp);
return(NULLSeq);
}
if (fread(comments,header.comments_size,1,fp)==0) {
free(comments);
fclose(fp);
return(NULLSeq);
}
comments[header.comments_size] = '\0';
seq->info = comments;
}
/* Finished with the file */
fclose(fp);
return(seq);
}