384 lines
10 KiB
C
384 lines
10 KiB
C
/* alfsplit.c
|
|
Written by Richard Durbin 12/28/90.
|
|
Takes big combined alf results file, and splits it into
|
|
separate files for each clone.
|
|
Only keep processed data, sequence data and experimental notes.
|
|
Although the format of the small files is based on that of an
|
|
ALF file officially split on the PC, they are unfortunately
|
|
not reaadable by ALFManager software on the PC.
|
|
*/
|
|
|
|
/* first give full function prototypes for system functions
|
|
these are incomplete in the Sun /usr/include/...
|
|
|
|
Modified by Simon Dear 21 August 1991.
|
|
Ignore value of s3 in readIndexEntry in check on sensible values
|
|
This value is DirEntry.fType in Pharmacia documentation
|
|
|
|
Modified by Simon Dear 25 October 1991.
|
|
Machine independant I/O
|
|
|
|
24 August 1992 [Simon Dear]
|
|
MAJOR HACK - to allow for readings which have no clone name
|
|
*/
|
|
|
|
typedef int mysize_t;
|
|
typedef long mytime_t;
|
|
|
|
#include <stdio.h> /* because need FILE definition for prototypes */
|
|
#include <sys/types.h>
|
|
#include <stdarg.h> /* varargs needed for v*printf() prototypes */
|
|
#include "mach-io.h"
|
|
|
|
/* stdio function prototypes */
|
|
int fclose (FILE *stream);
|
|
int fgetc (FILE *stream);
|
|
int _filbuf (FILE *stream) ;
|
|
/* Causes Alliant gyp
|
|
int _flsbuf (unsigned char x, FILE *stream) ;
|
|
*/
|
|
FILE * fopen (const char *path, const char *mode);
|
|
/* incompat. with Alpha
|
|
int fprintf (FILE *stream, const char *format, ...);
|
|
*/
|
|
/* Causes Alliant gyp
|
|
mysize_t fread (void *ptr, mysize_t size, mysize_t n, FILE *stream);
|
|
*/
|
|
int fseek (FILE *stream, long offset, int whence);
|
|
long ftell (FILE *stream);
|
|
/* Causes Alliant gyp
|
|
mysize_t fwrite (const void *ptr, mysize_t size, mysize_t n,
|
|
FILE *stream);
|
|
*/
|
|
/* incompat. with Alpha
|
|
int printf (const char *format, ...);
|
|
*/
|
|
/* sprintf has incompatible declarations in different SUN OS releases!
|
|
void sprintf (char *buffer, const char *format, ...);
|
|
*/
|
|
/*
|
|
int vfprintf (FILE *stream, const char *format, va_list arglist);
|
|
*/
|
|
|
|
/* allocation prototypes that we use */
|
|
/* void * malloc (mysize_t size);*/
|
|
|
|
/* string.h prototypes that we use */
|
|
int strcmp (const char *s1, const char *s2);
|
|
int strncmp (const char *s1, const char *s2, mysize_t maxlen);
|
|
int atoi (const char *s) ;
|
|
|
|
/* system prototypes */
|
|
void exit (int status);
|
|
|
|
#include <ctype.h>
|
|
|
|
typedef int BOOL ;
|
|
#define TRUE 1
|
|
#define FALSE 0
|
|
|
|
/********** routines to read and write Index entries ***********/
|
|
|
|
static char junk[512] ; /* for when we want to read/write junk */
|
|
|
|
/***** architecture independant reads ******/
|
|
static int_4 read_int_4(FILE *fp)
|
|
|
|
{
|
|
unsigned char buf[sizeof(int_4)];
|
|
|
|
if (fread(buf, sizeof(buf), 1, fp) != 1) return 0;
|
|
return (int_4)
|
|
(((uint_4)buf[0]) +
|
|
((uint_4)buf[1]<<8) +
|
|
((uint_4)buf[2]<<16) +
|
|
((uint_4)buf[3]<<24));
|
|
}
|
|
|
|
static int_2 read_int_2(FILE *fp)
|
|
|
|
{
|
|
unsigned char buf[sizeof(int_2)];
|
|
|
|
if (fread(buf, sizeof(buf), 1, fp) != 1) return 0;
|
|
return (int_2)
|
|
(((uint_2)buf[0]) +
|
|
((uint_2)buf[1]<<8));
|
|
}
|
|
|
|
static void write_int_4(FILE *fp, int_4 l)
|
|
{
|
|
unsigned char buf[sizeof(int_4)];
|
|
|
|
buf[0] = (unsigned char)(l&255);
|
|
buf[1] = (unsigned char)(l>>8&255);
|
|
buf[2] = (unsigned char)(l>>16&255);
|
|
buf[3] = (unsigned char)(l>>24&255);
|
|
|
|
fwrite(buf, sizeof(buf), 1, fp);
|
|
}
|
|
|
|
static void write_int_2(FILE *fp, int_2 l)
|
|
{
|
|
unsigned char buf[sizeof(int_2)];
|
|
|
|
buf[0] = (unsigned char)(l&255);
|
|
buf[1] = (unsigned char)(l>>8&255);
|
|
|
|
fwrite(buf, sizeof(buf), 1, fp);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct IndexEntryStruct
|
|
{ int_4 isTraces ;
|
|
char label[40] ;
|
|
int_4 dataLen ;
|
|
int_4 blockLen ;
|
|
int_4 offset ;
|
|
} *IndexEntry ;
|
|
|
|
static BOOL readIndexEntry (FILE *fil, IndexEntry ent)
|
|
{
|
|
short s1,s2,s3 ;
|
|
|
|
#define readInt() (read_int_4(fil))
|
|
#define readShort() (read_int_2(fil))
|
|
|
|
clearerr (fil) ;
|
|
|
|
s1 = readShort() ;
|
|
s2 = readShort() ;
|
|
s3 = readShort() ;
|
|
/* Was this, but s3 can be 0 now
|
|
if ((s1 != 1 || s3 != 1) && (s1 || s2 || s3))
|
|
*/
|
|
if ((s1 != 1) && (s1 || s2))
|
|
return FALSE ;
|
|
ent->isTraces = (s2 == 4) ;
|
|
|
|
fread (ent->label,40,1,fil) ;
|
|
ent->dataLen = readInt() ;
|
|
ent->blockLen = readInt() ;
|
|
ent->offset = readInt() ;
|
|
|
|
fread (junk,70,1,fil) ;
|
|
|
|
return !ferror (fil) ;
|
|
}
|
|
|
|
static BOOL writeIndexEntry (FILE *fil, IndexEntry ent)
|
|
{
|
|
|
|
#define writeInt(xx) (write_int_4(fil,xx))
|
|
#define writeShort(xx) (write_int_2(fil,xx))
|
|
|
|
clearerr (fil) ;
|
|
|
|
writeShort(1) ;
|
|
if (ent->isTraces)
|
|
writeShort(4) ;
|
|
else
|
|
writeShort(2) ;
|
|
writeShort(1) ;
|
|
|
|
fwrite (ent->label,40,1,fil) ;
|
|
writeInt(ent->dataLen) ;
|
|
writeInt(ent->blockLen) ;
|
|
writeInt(ent->offset) ;
|
|
|
|
fwrite (junk,70,1,fil) ;
|
|
|
|
return !ferror (fil) ;
|
|
}
|
|
|
|
/************************************************************/
|
|
|
|
void crash (char* format,...)
|
|
{
|
|
va_list args ;
|
|
|
|
va_start (args,format) ;
|
|
vfprintf (stderr,format,args) ;
|
|
va_end (args) ;
|
|
|
|
exit (1) ;
|
|
}
|
|
|
|
/*****************/
|
|
|
|
static void readLine (FILE *fil, char* cp)
|
|
{
|
|
while ((*cp = fgetc(fil)) && *cp != EOF && *cp != '\n')
|
|
++cp ;
|
|
*cp = 0 ;
|
|
}
|
|
|
|
/*****************/
|
|
#define MAXCLONES 10
|
|
void main (int argc, char* *argv)
|
|
{
|
|
FILE *inEnt, *inData, *outEnt[MAXCLONES], *outData[MAXCLONES] ;
|
|
/* open two pointers in each file - index and data */
|
|
IndexEntry EN ;
|
|
IndexEntry ent ;
|
|
char expLine[4][20],name[MAXCLONES][20],note[MAXCLONES][80],fname[25];
|
|
off_t seqOffset[MAXCLONES], dataOffset[MAXCLONES];
|
|
int_4 seqDataLen[MAXCLONES], dataDataLen[MAXCLONES];
|
|
int_4 seqBlockLen[MAXCLONES], dataBlockLen[MAXCLONES];
|
|
char buf[512] ;
|
|
int i,j,len ;
|
|
size_t lastDot,lastSlash;
|
|
|
|
if (argc != 2)
|
|
crash ("Usage: alfsplit rawfilename\n") ;
|
|
|
|
inData = fopen (argv[1],"r") ;
|
|
if (!(inEnt = fopen (argv[1],"r")))
|
|
crash ("Could not open file '%s'\n",argv[1]) ;
|
|
|
|
/* first find the experimental notes entry and extract file names */
|
|
|
|
ent = (IndexEntry) malloc (sizeof (struct IndexEntryStruct)) ;
|
|
EN = (IndexEntry) malloc (sizeof (struct IndexEntryStruct)) ;
|
|
if (fseek (inEnt,(off_t)512,0))
|
|
crash ("Could not seek to index in raw file\n") ;
|
|
while (TRUE)
|
|
{ if (!readIndexEntry (inEnt,EN))
|
|
crash ("Can't find Experimental Notes index entry\n") ;
|
|
if (!strcmp (EN->label,"ALF Experimental notes"))
|
|
break ;
|
|
}
|
|
|
|
if (fseek (inData,(off_t)EN->offset,0))
|
|
crash ("Can't seek to Experimental notes\n") ;
|
|
for (i = 0 ; i < 4 ; ++i)
|
|
readLine (inData,expLine[i]) ;
|
|
|
|
/* determine default root name from argv[1]:
|
|
** I assume this has the format {path}/{name}.alf
|
|
** Default names will be {name}.1, {name}.2, ..., {name}.MAXCLONES
|
|
*/
|
|
lastDot = (size_t)0;
|
|
for (i = strlen(argv[1])-1;i>=0 && argv[1][i] != '/'; i--)
|
|
if (lastDot==0 && argv[1][i] == '.') lastDot = (size_t)i;
|
|
if (lastDot==0) lastDot = strlen(argv[1]);
|
|
lastSlash = (size_t)i;
|
|
|
|
for (i = 0 ; i < MAXCLONES ; ++i) {
|
|
readLine (inData,name[i]) ;
|
|
if (!strcmp (name[i],"blank") || *name[i]==0 ) /* clone not named */
|
|
/* assume default */
|
|
sprintf(name[i],
|
|
"%.*s.%d",
|
|
(int)(lastDot-lastSlash-1),
|
|
argv[1]+lastSlash+1,
|
|
(i>10)?i:(i+1)%10);
|
|
}
|
|
for (i = 0 ; i < MAXCLONES ; ++i)
|
|
readLine (inData,note[i]) ;
|
|
|
|
for (i = 0 ; i < MAXCLONES ; ++i)
|
|
seqOffset[i] = dataOffset[i] = 0;
|
|
|
|
/* gather offset information */
|
|
fseek (inEnt,(off_t)512,0) ;
|
|
while (readIndexEntry (inEnt,ent)) {
|
|
printf ("%s: %d\n",ent->label,ent->offset/512) ;
|
|
if (!strncmp (ent->label,"ALF Sequence data Clone ",24))
|
|
len = 24 ;
|
|
else if (!strncmp (ent->label,"ALF Processed data Clone ",25))
|
|
len = 25 ;
|
|
else
|
|
continue ;
|
|
/* fall through to here if sequence or processed */
|
|
i = atoi (&ent->label[len]) - 1 ;
|
|
|
|
if (len == 24) {
|
|
seqOffset[i] = (off_t)ent->offset;
|
|
seqBlockLen[i] = ent->blockLen;
|
|
seqDataLen[i] = ent->dataLen;
|
|
} else {
|
|
dataOffset[i] = (off_t)ent->offset;
|
|
dataBlockLen[i] = ent->blockLen;
|
|
dataDataLen[i] = ent->dataLen;
|
|
}
|
|
}
|
|
|
|
/* initialise output files for clones */
|
|
for (i = 0 ; i < MAXCLONES ; ++i) {
|
|
if (seqOffset[i]==0 && dataOffset[i]==0) {
|
|
/* we are missing sequence and/or trace data */
|
|
printf ("Clone %d: %s - NOT MAKING BECAUSE THERE IS NO TRACE AND SEQUENCE DATA\n",i+1,name[i]);
|
|
} else if (seqOffset[i]==0) {
|
|
/* we are missing sequence and/or trace data */
|
|
printf ("Clone %d: %s - NOT MAKING BECAUSE THERE IS NO SEQUENCE DATA\n",i+1,name[i]);
|
|
} else if (dataOffset[i]==0) {
|
|
/* we are missing sequence and/or trace data */
|
|
printf ("Clone %d: %s - NOT MAKING BECAUSE THERE IS NO TRACE DATA\n",i+1,name[i]);
|
|
} else {
|
|
printf ("Clone %d: %s - %s\n",i+1,name[i],note[i]) ;
|
|
/* create the file and write the notes */
|
|
sprintf (fname,"%sALF",name[i]) ;
|
|
outData[i] = fopen (fname,"w") ;
|
|
if(!fwrite (junk,512,1,outData[i]) )
|
|
fprintf(stderr, "could not write file: %s\n", fname);
|
|
if(!fwrite (junk,512,1,outData[i]) )
|
|
fprintf(stderr, "could not write file: %s\n", fname);
|
|
len = 0 ;
|
|
for (j = 0 ; j < 4 ; ++j)
|
|
len += fprintf (outData[i],"%s\n",expLine[j]) ;
|
|
len += fprintf (outData[i],"%s\n\n\n\n\n\n\n\n\n\n",name[i]) ;
|
|
len += fprintf (outData[i],"%s\n\n\n\n\n\n\n\n\n\n",note[i]) ;
|
|
fwrite (junk,512-len,1,outData[i]) ;
|
|
/* now write the index entry */
|
|
if (!(outEnt[i] = fopen (fname,"a")))
|
|
crash ("Couldn't open output file %s\n",fname) ;
|
|
fseek (outEnt[i],(off_t)512,0) ;
|
|
EN->offset = 1024 ;
|
|
EN->dataLen = len ;
|
|
writeIndexEntry (outEnt[i],EN) ;
|
|
|
|
/*
|
|
** Copy sequence and trace data
|
|
*/
|
|
/* trace data */
|
|
strcpy(ent->label,"ALF Processed data Clone 1");
|
|
ent->isTraces = 1;
|
|
ent->offset = (int_4)ftell (outData[i]) ;
|
|
ent->blockLen = dataBlockLen[i];
|
|
ent->dataLen = dataDataLen[i];
|
|
|
|
fseek (inData,dataOffset[i],0) ;
|
|
len = ent->blockLen/512 ;
|
|
for (j = 0 ; j < len ; ++j) {
|
|
fread (buf,512,1,inData) ;
|
|
fwrite (buf,512,1,outData[i]) ;
|
|
}
|
|
|
|
writeIndexEntry (outEnt[i],ent) ;
|
|
|
|
/* sequence data*/
|
|
strcpy(ent->label,"ALF Sequence data Clone 1");
|
|
ent->isTraces = 0;
|
|
ent->offset = (int_4)ftell (outData[i]) ;
|
|
ent->blockLen = seqBlockLen[i];
|
|
ent->dataLen = seqDataLen[i];
|
|
|
|
fseek (inData,seqOffset[i],0) ;
|
|
len = ent->blockLen/512 ;
|
|
for (j = 0 ; j < len ; ++j) {
|
|
fread (buf,512,1,inData) ;
|
|
fwrite (buf,512,1,outData[i]) ;
|
|
}
|
|
|
|
writeIndexEntry (outEnt[i],ent) ;
|
|
|
|
}
|
|
}
|
|
}
|