/* ureadasn.c -- parse, mangle and otherwise rewrite ASN1 file/entries for readseq reading -- from NCBI toolkit (ncbi.nlm.nih.gov:/toolkit) */ #ifdef NCBI #include #include #include /* NCBI toolkit :include: must be on lib path */ #include #include #define UREADASN #include "ureadseq.h" #pragma segment ureadasn /* this stuff is hacked up from tofasta.c of ncbitools */ #define kBaseAny 0 #define kBaseNucleic 1 #define kBaseAmino 2 typedef struct tofasta { Boolean idonly; short *seqnum; short whichSeq; char **seq, **seqid; long *seqlen; } FastaDat, PNTR FastaPtr; void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly, short whichSeq, short *seqnum, char **seq, char **seqid, long *seqlen) { SeqPortPtr spp; SeqIdPtr bestid; Uint1 repr, code, residue; CharPtr tmp, title; long outlen, outmax; char localid[256], *sp; /* !!! this may be called several times for a single sequence because SeqEntryExplore looks for parts and joins them... assume seq, seqid, seqlen may contain data (or NULL) */ if (bsp == NULL) return; repr = Bioseq_repr(bsp); if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return; (*seqnum)++; if (!(whichSeq == *seqnum || whichSeq == 0)) return; bestid = SeqIdFindBest(bsp->id, (Uint1)0); title = BioseqGetTitle(bsp); if (idonly) { sprintf(localid, " %d) ", *seqnum); tmp = localid + strlen(localid) - 1; } else { strcpy(localid, " "); tmp = localid; } tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT); tmp = StringMove(tmp, " "); StringNCpy(tmp, title, 200); /* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */ /* < seqid is fixed storage */ /* strcpy( *seqid, localid); */ /* < seqid is variable sized */ outmax = strlen(localid) + 3; if (*seqid == NULL) { *seqid = (char *)malloc(outmax); if (*seqid == NULL) return; strcpy(*seqid, localid); } else { outmax += strlen(*seqid) + 2; *seqid = (char *)realloc(*seqid, outmax); if (*seqid == NULL) return; if (!idonly) strcat(*seqid, "; "); strcat(*seqid, localid); } if (idonly) { strcat(*seqid, "\n"); return; } if (ISA_na(bsp->mol)) code = Seq_code_iupacna; else code = Seq_code_iupacaa; spp = SeqPortNew(bsp, 0, -1, 0, code); SeqPortSeek(spp, 0, SEEK_SET); sp = *seq; if (sp == NULL) { /* this is always true now !? */ outlen = 0; outmax = 500; sp = (char *)malloc(outmax); } else { outlen = strlen(sp); outmax = outlen + 500; sp = (char *)realloc(sp, outmax); } if (sp == NULL) return; while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { if (outlen >= outmax) { outmax = outlen + 500; sp = (char *)realloc(sp, outmax); if (sp == NULL) return; } sp[outlen++] = residue; } sp = (char *)realloc(sp, outlen + 1); if (sp != NULL) sp[outlen] = '\0'; *seq = sp; *seqlen = outlen; SeqPortFree(spp); return; } static void SeqEntryRawseq(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent) { FastaPtr tfa; BioseqPtr bsp; if (!IS_Bioseq(sep)) return; bsp = (BioseqPtr)sep->data.ptrvalue; tfa = (FastaPtr)data; BioseqRawToRaw(bsp, tfa->idonly, tfa->whichSeq, tfa->seqnum, tfa->seq, tfa->seqid, tfa->seqlen); } void SeqEntryToRaw(SeqEntryPtr sep, Boolean idonly, short whichSeq, short *seqnum, char **seq, char **seqid, long *seqlen) { FastaDat tfa; if (sep == NULL) return; tfa.idonly = idonly; tfa.seqnum = seqnum; tfa.whichSeq = whichSeq; tfa.seq = seq; tfa.seqid = seqid; tfa.seqlen = seqlen; SeqEntryExplore(sep, (Pointer)&tfa, SeqEntryRawseq); } char *listASNSeqs( const char *filename, const long skiplines, const short format, /* note: this is kASNseqentry or kASNseqset */ short *nseq, short *error) { AsnIoPtr aip = NULL; SeqEntryPtr the_set; AsnTypePtr atp, atp2; AsnModulePtr amp; Boolean inIsBinary = FALSE; /* damn, why can't asn routines test this? */ char *seq = NULL; char *seqid = NULL, stemp[256]; long seqlen; int i, count; *nseq = 0; *error = 0; /* asn dictionary setups */ /*fprintf(stderr,"listASNSeqs: SeqEntryLoad\n");*/ if (!SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */ amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */ if (amp == NULL) goto errxit; atp = AsnFind("Bioseq-set"); /* get the initial type pointers */ if (atp == NULL) goto errxit; atp2 = AsnFind("Bioseq-set.seq-set.E"); if (atp2 == NULL) goto errxit; /*fprintf(stderr,"listASNSeqs: AsnIoOpen\n");*/ /* open the ASN.1 input file in the right mode */ /* !!!! THIS FAILS when filename has MAC PATH (& other paths?) * (:folder:filename) */ if ((aip = AsnIoOpen(filename, inIsBinary ? "rb" : "r")) == NULL) goto errxit; for (i = 0; i < skiplines; i++) fgets(stemp, 255, aip->fp); /* this may mess up asn routines... */ if (!ErrSetLog("stderr")) goto errxit; else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */ if (format == kASNseqentry) { /* read one Seq-entry */ /*fprintf(stderr,"listASNSeqs: SeqEntryAsnRead\n");*/ the_set = SeqEntryAsnRead(aip, NULL); SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen); if (seq) free(seq); seq = NULL; SeqEntryFree(the_set); } else { /* read Seq-entry's from a Bioseq-set */ count = 0; /*fprintf(stderr,"listASNSeqs: AsnReadId\n");*/ while ((atp = AsnReadId(aip, amp, atp)) != NULL) { if (atp == atp2) { /* top level Seq-entry */ the_set = SeqEntryAsnRead(aip, atp); SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen); SeqEntryFree(the_set); if (seq) free(seq); seq = NULL; } else AsnReadVal(aip, atp, NULL); count++; } } AsnIoClose(aip); *error = 0; return seqid; errxit: AsnIoClose(aip); if (seqid) free(seqid); *error = eASNerr; return NULL; } char *readASNSeq( const short whichEntry, const char *filename, const long skiplines, const short format, /* note: this is kASNseqentry or kASNseqset */ long *seqlen, short *nseq, short *error, char **seqid) { AsnIoPtr aip = NULL; SeqEntryPtr the_set; AsnTypePtr atp, atp2; AsnModulePtr amp; Boolean inIsBinary = FALSE; /* damn, why can't asn routines test this? */ char *seq, stemp[200]; int i, count; *seqlen = 0; *nseq = 0; *error = 0; seq = NULL; /*fprintf(stderr,"readASNseq: SeqEntryLoad\n");*/ /* asn dictionary setups */ if (!SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */ amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */ if (amp == NULL) goto errxit; atp = AsnFind("Bioseq-set"); /* get the initial type pointers */ if (atp == NULL) goto errxit; atp2 = AsnFind("Bioseq-set.seq-set.E"); if (atp2 == NULL) goto errxit; /* open the ASN.1 input file in the right mode */ /*fprintf(stderr,"readASNseq: AsnIoOpen(%s)\n", filename);*/ if ((aip = AsnIoOpen(filename, inIsBinary ? "rb" : "r")) == NULL) goto errxit; for (i = 0; i < skiplines; i++) fgets(stemp, 255, aip->fp); /* this may mess up asn routines... */ if (!ErrSetLog("stderr")) goto errxit; else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */ seq = NULL; if (format == kASNseqentry) { /* read one Seq-entry */ /*fprintf(stderr,"readASNseq: SeqEntryAsnRead\n");*/ the_set = SeqEntryAsnRead(aip, NULL); SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen); SeqEntryFree(the_set); goto goodexit; } else { /* read Seq-entry's from a Bioseq-set */ count = 0; /*fprintf(stderr,"readASNseq: AsnReadId\n");*/ while ((atp = AsnReadId(aip, amp, atp)) != NULL) { if (atp == atp2) { /* top level Seq-entry */ the_set = SeqEntryAsnRead(aip, atp); SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen); SeqEntryFree(the_set); if (*nseq >= whichEntry) goto goodexit; } else AsnReadVal(aip, atp, NULL); count++; } } goodexit: AsnIoClose(aip); *error = 0; return seq; errxit: AsnIoClose(aip); *error = eASNerr; if (seq) free(seq); return NULL; } #endif /*NCBI*/