/* Title: seqIOALF File: seqIOALF.c Purpose: IO of ALF sequences Last update: Tue Nov 10 1992 */ /* Change Log :- 14.01.91 SD when complimenting the sequence with an odd number of bases, the middle base position was not adjusted. 15.01.91 SD Put StLouis stuff on compilation flag 15.01.91 SD New include file (opp.h) 02.08.91 SD Changes the mapping of uncertainty codes so that we now only generate A C G T and - Previously... bug in interpreting ALF integer fields. We now treat them as unsigned. 17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS and AUTO_CLIP 25.10.91 SD Machine independant I/O...removed BIGENDIAN flag 25.11.91 SD There was a hard limit (of 1024) for allocation of space for number of bases, yet program would read in more if there were any, causing nasties to happen. */ /* RMD I made substantial changes to this file 12/28/90 so as to read sequence data more freely (necessary when reading data from multiple trace files). The affected area is indicated by comments starting RMD, like this one. */ /* This file was adapted by LFW from seqIOABI.c. At the moment, the `maxTraceVal' of the sequence is hardwired as 1200. This fudge worked for the ABI, but is not really good for the ALF. To keep compatibility we keep 1200 as a max and scale to fit (search for scaleFactor). This needs work. The ALF results file is a concatenation of many files with an index structure at the beginning, consisting of a 512 byte block that we ignore, followed by 128 byte blocks describing each file. All files, including the header region, are rounded up to a multiple of 512 bytes long. The getIndexEntry routines identify the 128 byte index component of interest by matching 4 chars of its ASCII label, then extract the field of choice from that entry. Note that the SUN and PC are of opposite endian-ness, so that we have to provide special routines to read words and longwords from the results file. Luckily the floating point numbers are written out in ASCII. All references to the seq->bottom, the bottom strand of the sequence, were added by lfw. */ /* ---- Imports ---- */ #include "seq.h" /* IMPORT: Seq, BasesAndTraces, NULLSeq, newSeq, freeSeq */ /*#include "seqIOABI.h"*/ #include "seqIOEdit.h" /* IMPORT: writeEdSeq, readEdSEq */ #include #include /* IMPORT: fopen, fclose, fseek, ftell, fgetc, EOF */ #include "mach-io.h" /* ---- Constants ---- */ #define BasesPerLine 50 /* For output formatting */ #define IndexEntryLength ((off_t)128) /* Here are some labels we will be looking for, four chars packed into a long word. */ #define EntryLabel ((uint_4) ((((('A'<<8)+'L')<<8)+'F')<<8)+' ') #define BaseEntryLabel ((uint_4) ((((('S'<<8)+'e')<<8)+'q')<<8)+'u') #define DataEntryLabel ((uint_4) ((((('P'<<8)+'r')<<8)+'o')<<8)+'c') /* ---- Internal functions ---- */ static Boolean getIndexEntryLW(FILE *fp, off_t indexO, uint_4 label, int lw, uint_4 *val) /* From the ALF results file connected to `fp' whose index starts at byte offset `indexO', return in `val' the `lw'th long word from the entry labelled `label'. The result indicates success. */ { off_t entryNum=-1; int i; uint_4 entryLabel; do { entryNum++; if (fseek(fp, indexO+(entryNum*IndexEntryLength), 0) != 0) return(-1); if (!be_read_int_4(fp, &entryLabel)) return(False); } while (!(entryLabel == label)); for(i=2; imode = BasesAndTraces; seq->format = ALFFormat; seq->dirty = False; seq->maxTraceVal = 0; seq->NPoints = numPoints; seq->bottom = False; /* RMD set ->NorigBases and ->NedBases after reading them in */ /* read in the sequence */ if (!(fseek(fp, (off_t)baseO, 0) == 0)) goto abort ; { /* new locals introduced by LFW and/or RMD for the ALF */ int numBases; /* number of nucleotides read in */ float bp ; char ch; for (numBases = 0 ; ftell(fp) < baseO+(unsigned short)actBaseDataSize && numBasesbase[numBases] = ch; seq->basePos[numBases] = bp; ++numBases ; } } seq->NorigBases = numBases; seq->NedBases = numBases; } /* read in the traces , stored in 2 byte integers in records in the order A C G T A C G T A C G T ...*/ if (fseek(fp, (off_t)(dataO+header_size), 0) != 0) goto abort ; num_points = 0; for (i=0;i<(seq->NPoints);i++) { if (!le_read_int_2(fp, &(seq->traceA[i]))) {fclose(fp);freeSeq(seq);return(NULLSeq);} if (seq->maxTraceVal < seq->traceA[i]) seq->maxTraceVal = seq->traceA[i]; if (!le_read_int_2(fp, &(seq->traceC[i]))) {fclose(fp);freeSeq(seq);return(NULLSeq);} if (seq->maxTraceVal < seq->traceC[i]) seq->maxTraceVal = seq->traceC[i]; if (!le_read_int_2(fp, &(seq->traceG[i]))) {fclose(fp);freeSeq(seq);return(NULLSeq);} if (seq->maxTraceVal < seq->traceG[i]) seq->maxTraceVal = seq->traceG[i]; if (!le_read_int_2(fp, &(seq->traceT[i]))) {fclose(fp);freeSeq(seq);return(NULLSeq);} if (seq->maxTraceVal < seq->traceT[i]) seq->maxTraceVal = seq->traceT[i]; if (seq->traceA[i]==0 && seq->traceT[i]==0 && seq->traceC[i]==0 && seq->traceG[i]==0 && i > (numPoints-64)) break; num_points++; } /* Finished with the file */ fclose(fp); return(seq); abort: fclose(fp); freeSeq(seq); return(NULLSeq); }