staden-lg/src/frog/frog.c

273 lines
6.3 KiB
C

/*
* File: frog.c
*
* Author: Simon Dear
* MRC Laboratory of Molecular Biology
* Hills Road
* Cambridge CB2 2QH
* United Kingdom
*
* Description: utility to alter base lane ordering of an ABI file
*
* Created: 8 October 1992
* Updated:
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include "mach-io.h"
typedef struct {
char id[12];
uint_2 s1;
uint_2 s2;
uint_2 s3;
uint_2 s4;
uint_2 recs;
uint_4 l1;
uint_4 offset;
uint_4 l2;
} Header;
typedef struct {
char id[4];
uint_4 index;
uint_2 type;
uint_2 size;
uint_4 N;
uint_4 length;
uint_4 ptr1;
uint_4 ptr2;
} Rec;
/*
** Meaningful ids: (Explanations c/o jes)
**
** GMBF Gel type
** DATA.1-4 Raw data block
** DATA.5-8 Central (rather featureless) data block
** DATA.9-12 Processed data block
** FWO_ Base order (see seqIOABI.c)
** PBAS.1-2 Base sequence
** PLOC.1-2 Base positions
** S/N% Signal strengths (array of 4 shorts floats)
** SMPL Sample name
** SPAC Base spacing (float)
**
** It appears that if the value for an id occupies less than or equal four
** bytes, it is shoe-horned into the ptr1 field. Otherwise, ptr1 holds
** the byte offset in the file where the data can be found.
**
**
**
**
*/
int read_header(FILE *fp, Header *h)
{
if (fread(&h->id[0],12,1,fp) == 0) return 1;
if (be_read_int_2(fp,&h->s1)==0) return 1;
if (be_read_int_2(fp,&h->s2)==0) return 1;
if (be_read_int_2(fp,&h->s3)==0) return 1;
if (be_read_int_2(fp,&h->s4)==0) return 1;
if (be_read_int_2(fp,&h->recs)==0) return 1;
if (be_read_int_4(fp,&h->l1)==0) return 1;
if (be_read_int_4(fp,&h->offset)==0) return 1;
if (be_read_int_4(fp,&h->l2)==0) return 1;
return 0;
}
int read_record(FILE *fp, Rec *r)
{
if (fread(&r->id[0],4,1,fp) == 0) return 1;
if (be_read_int_4(fp,&r->index)==0) return 1;
if (be_read_int_2(fp,&r->type)==0) return 1;
if (be_read_int_2(fp,&r->size)==0) return 1;
if (be_read_int_4(fp,&r->N)==0) return 1;
if (be_read_int_4(fp,&r->length)==0) return 1;
if (be_read_int_4(fp,&r->ptr1)==0) return 1;
if (be_read_int_4(fp,&r->ptr2)==0) return 1;
return 0;
}
int write_record(FILE *fp, Rec *r)
{
if (fwrite(&r->id[0],4,1,fp) == 0) return 1;
if (be_write_int_4(fp,&r->index)==0) return 1;
if (be_write_int_2(fp,&r->type)==0) return 1;
if (be_write_int_2(fp,&r->size)==0) return 1;
if (be_write_int_4(fp,&r->N)==0) return 1;
if (be_write_int_4(fp,&r->length)==0) return 1;
if (be_write_int_4(fp,&r->ptr1)==0) return 1;
if (be_write_int_4(fp,&r->ptr2)==0) return 1;
return 0;
}
int fix_abi(char *fn,char *old, char *new)
{
FILE *fp;
Header header;
Rec rec;
int i;
off_t pl,pn;
if ( (fp=fopen(fn,"r+b")) == NULL ) {
fprintf(stderr,"frog: %s: error opening file\n",fn);
return 1;
}
fseek(fp,0,0);
if (read_header(fp,&header)) {
fprintf(stderr,"frog: %s: error reading header\n",fn);
fclose(fp);
return 2;
}
pl = 0;
pn = header.offset;
for(i=0;i<header.recs;i++) {
pl = pn;
fseek(fp,pn,0);
if(read_record(fp,&rec)) {
fprintf(stderr,"frog: %s: error reading record\n",fn);
fclose(fp);
return 2;
}
pn = ftell(fp);
if(strncmp(rec.id,"FWO_",4)==0) {
char map[4];
char tmap[4];
#define ind(base) ((base)=='C'?0:(base)=='A'?1:(base)=='G'?2:3)
/*
printf("Changing base to trace mapping (FWO_)\n");
*/
/*
* map of lanes to old bases
*/
tmap[ind('C')] = (rec.ptr1>>24)&255;
tmap[ind('A')] = (rec.ptr1>>16)&255;
tmap[ind('G')] = (rec.ptr1>> 8)&255;
tmap[ind('T')] = rec.ptr1&255;
/*
* map of old bases to new bases
*/
map[ind(old[0])] = new[0];
map[ind(old[1])] = new[1];
map[ind(old[2])] = new[2];
map[ind(old[3])] = new[3];
/*
* map lanes to new bases
*/
rec.ptr1 = (((map[ind(tmap[ind('C')])]<<8)+
map[ind(tmap[ind('A')])]<<8)+
map[ind(tmap[ind('G')])]<<8)+
map[ind(tmap[ind('T')])];
fseek(fp,pl,0);
if (write_record(fp,&rec)) {
fprintf(stderr,"frog: %s: error writing record\n",fn);
fclose(fp);
return 2;
}
} else if(strncmp(rec.id,"PBAS",4)==0) {
char *seq;
char map[4];
int k;
/*
printf("Changing bases (PBAS)\n");
*/
seq = (char *) malloc(rec.length);
/* read sequence */
fseek(fp,rec.ptr1,0);
if (fread(seq,rec.length,1,fp)!=1) {
fprintf(stderr,"frog: %s: error reading sequence\n",fn);
fclose(fp);
return 2;
}
/* swap bases */
map[ind(old[0])] = new[0];
map[ind(old[1])] = new[1];
map[ind(old[2])] = new[2];
map[ind(old[3])] = new[3];
for (k=0;k<rec.length;k++) if (strchr("ACGT",seq[k])) seq[k] = map[ind(seq[k])];
/* write back */
fseek(fp,rec.ptr1,0);
if (fwrite(seq,rec.length,1,fp)!=1) {
fprintf(stderr,"frog: %s: error writing sequence\n",fn);
fclose(fp);
return 2;
}
}
}
fclose(fp);
return 0;
}
void usage()
{
fprintf(stderr,"Usage: frog old new [ABI_files...]\n\n");
fprintf(stderr,"examples\n");
fprintf(stderr,"To swap all Gs with Ts and Ts with Gs\n frog CAGT CATG trace.abi\n");
fprintf(stderr,"To swap all As with Cs, Cs with Gs and Gs with As\n frog ACGT CGAT trace.abi\n");
}
int check(char *bases)
{
int i;
int map[4];
if (strlen(bases)!=4) {
fprintf(stderr,"frog: should be four bases %s\n",bases);
return 1;
}
for(i=0;i<4;i++) map[i]=0;
for(i=0;i<4;i++) {
if (islower(bases[i])) bases[i]=toupper(bases[i]);
if(strchr("ACGT",bases[i])==NULL) {
fprintf(stderr,"frog: invalid base '%c' in bases \"%s\"\n",bases[i],bases);
return 1;
}
map[ind(bases[i])] = 1;
}
for(i=0;i<4;i++) {
if (map[i]==0) {
fprintf(stderr,"frog: repeated bases in \"%s\"\n",bases);
return 1;
}
}
return 0;
}
int main(int argc, char **argv)
{
int i;
if (argc<3) {usage(); exit(1);}
if (check(argv[1])) exit(1);
if (check(argv[2])) exit(1);
for(i=3;i<argc;i++) {
int err;
if (err=fix_abi(argv[i],argv[1],argv[2])) printf("Couldn't read %s (%d)\n",argv[i],err);
}
return 0;
}