203 lines
4.8 KiB
C
203 lines
4.8 KiB
C
/*
|
|
* File: toad.c
|
|
*
|
|
* Author: Simon Dear
|
|
* MRC Laboratory of Molecular Biology
|
|
* Hills Road
|
|
* Cambridge CB2 2QH
|
|
* United Kingdom
|
|
*
|
|
* Description: utility to alter base lane ordering of an SCF file
|
|
*
|
|
* Created: 8 October 1992
|
|
* Updated:
|
|
*
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <sys/types.h>
|
|
#include "scfIO.h"
|
|
#include "mach-io.h"
|
|
|
|
#define ind(base) ((base)=='C'?0:(base)=='A'?1:(base)=='G'?2:3)
|
|
|
|
int fix_scf(char *fn,char *old, char *new)
|
|
{
|
|
FILE *fp;
|
|
Header header;
|
|
Samples *samples;
|
|
Bases *bases;
|
|
int i;
|
|
char map[4];
|
|
|
|
if ( (fp=fopen(fn,"r+b")) == NULL ) {
|
|
fprintf(stderr,"toad: %s: error opening file\n",fn);
|
|
return 1;
|
|
}
|
|
|
|
fseek(fp,0,0);
|
|
if (read_scf_header(fp,&header)==0) {
|
|
fprintf(stderr,"toad: %s: error reading header\n",fn);
|
|
fclose(fp);
|
|
return 2;
|
|
}
|
|
|
|
/* swap samples */
|
|
samples = (Samples *)malloc(sizeof(Samples)*header.samples);
|
|
if (samples==NULL) {
|
|
fprintf(stderr,"toad: %s: out of memory\n",fn);
|
|
fclose(fp);
|
|
return 2;
|
|
}
|
|
fseek(fp,header.samples_offset,0);
|
|
for(i=0;i<header.samples;i++) {
|
|
if (read_scf_sample(fp,&samples[i])==0) {
|
|
fprintf(stderr,"toad: %s: error reading sample %d\n",fn,i);
|
|
fclose(fp);
|
|
return 2;
|
|
}
|
|
}
|
|
for(i=0;i<header.samples;i++) {
|
|
uint_1 smap[4];
|
|
uint_1 osmap[4];
|
|
osmap[ind('A')] = samples[i].sample_A;
|
|
osmap[ind('C')] = samples[i].sample_C;
|
|
osmap[ind('G')] = samples[i].sample_G;
|
|
osmap[ind('T')] = samples[i].sample_T;
|
|
smap[ind(new[0])] = osmap[ind(old[0])];
|
|
smap[ind(new[1])] = osmap[ind(old[1])];
|
|
smap[ind(new[2])] = osmap[ind(old[2])];
|
|
smap[ind(new[3])] = osmap[ind(old[3])];
|
|
samples[i].sample_A = smap[ind('A')];
|
|
samples[i].sample_C = smap[ind('C')];
|
|
samples[i].sample_G = smap[ind('G')];
|
|
samples[i].sample_T = smap[ind('T')];
|
|
}
|
|
fseek(fp,header.samples_offset,0);
|
|
for(i=0;i<header.samples;i++) {
|
|
if (write_scf_sample(fp,&samples[i])==0) {
|
|
fprintf(stderr,"toad: %s: error writing sample %d\n",fn,i);
|
|
fclose(fp);
|
|
return 2;
|
|
}
|
|
}
|
|
free(samples);
|
|
|
|
|
|
/* swap bases */
|
|
bases = (Bases *)malloc(sizeof(Bases)*header.bases);
|
|
|
|
if (bases==NULL) {
|
|
fprintf(stderr,"toad: %s: out of memory\n",fn);
|
|
fclose(fp);
|
|
return 2;
|
|
}
|
|
fseek(fp,header.bases_offset,0);
|
|
for(i=0;i<header.bases;i++) {
|
|
if (read_scf_base(fp,&bases[i])==0) {
|
|
fprintf(stderr,"toad: %s: error reading bases %d\n",fn,i);
|
|
fclose(fp);
|
|
return 2;
|
|
}
|
|
}
|
|
map[ind(old[0])] = new[0];
|
|
map[ind(old[1])] = new[1];
|
|
map[ind(old[2])] = new[2];
|
|
map[ind(old[3])] = new[3];
|
|
for(i=0;i<header.bases;i++) {
|
|
uint_1 smap[4];
|
|
uint_1 osmap[4];
|
|
osmap[ind('A')] = bases[i].prob_A;
|
|
osmap[ind('C')] = bases[i].prob_C;
|
|
osmap[ind('G')] = bases[i].prob_G;
|
|
osmap[ind('T')] = bases[i].prob_T;
|
|
smap[ind(new[0])] = osmap[ind(old[0])];
|
|
smap[ind(new[1])] = osmap[ind(old[1])];
|
|
smap[ind(new[2])] = osmap[ind(old[2])];
|
|
smap[ind(new[3])] = osmap[ind(old[3])];
|
|
bases[i].prob_A = smap[ind('A')];
|
|
bases[i].prob_C = smap[ind('C')];
|
|
bases[i].prob_G = smap[ind('G')];
|
|
bases[i].prob_T = smap[ind('T')];
|
|
if (strchr("ACGT",bases[i].base)) bases[i].base = map[ind(bases[i].base)];
|
|
}
|
|
fseek(fp,header.bases_offset,0);
|
|
for(i=0;i<header.bases;i++) {
|
|
if (write_scf_base(fp,&bases[i])==0) {
|
|
fprintf(stderr,"toad: %s: error writing base %d\n",fn,i);
|
|
fclose(fp);
|
|
return 2;
|
|
}
|
|
}
|
|
free(bases);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fclose(fp);
|
|
return 0;
|
|
}
|
|
|
|
|
|
void usage()
|
|
{
|
|
fprintf(stderr,"Usage: toad old new [SCF_files...]\n\n");
|
|
fprintf(stderr,"examples\n");
|
|
fprintf(stderr,"To swap all Gs with Ts and Ts with Gs\n toad CAGT CATG trace.scf\n");
|
|
fprintf(stderr,"To swap all As with Cs, Cs with Gs and Gs with As\n toad ACGT CGAT trace.scf\n");
|
|
}
|
|
|
|
int check(char *bases)
|
|
{
|
|
int i;
|
|
int map[4];
|
|
if (strlen(bases)!=4) {
|
|
fprintf(stderr,"toad: should be four bases %s\n",bases);
|
|
return 1;
|
|
}
|
|
|
|
for(i=0;i<4;i++) map[i]=0;
|
|
for(i=0;i<4;i++) {
|
|
if (islower(bases[i])) bases[i]=toupper(bases[i]);
|
|
if(strchr("ACGT",bases[i])==NULL) {
|
|
fprintf(stderr,"toad: invalid base '%c' in bases \"%s\"\n",bases[i],bases);
|
|
return 1;
|
|
}
|
|
map[ind(bases[i])] = 1;
|
|
}
|
|
for(i=0;i<4;i++) {
|
|
if (map[i]==0) {
|
|
fprintf(stderr,"toad: repeated bases in \"%s\"\n",bases);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
fprintf(stderr,"Eeek! Not finished yet.\n");
|
|
return 1;
|
|
*/
|
|
|
|
if (argc<3) {usage(); exit(1);}
|
|
if (check(argv[1])) exit(1);
|
|
if (check(argv[2])) exit(1);
|
|
|
|
for(i=3;i<argc;i++) {
|
|
int err;
|
|
if (err=fix_scf(argv[i],argv[1],argv[2])) printf("Couldn't read %s (%d)\n",argv[i],err);
|
|
}
|
|
return 0;
|
|
}
|