118 lines
2.7 KiB
C
118 lines
2.7 KiB
C
/* emblaccess1 */
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#define MAXLINE 82 /* maximum input line length*/
|
|
|
|
/*
|
|
* program to create index for pir library in embl cdrom form
|
|
* this one gets the accession numbers and entry names and writes them to a
|
|
* file this file is sorted on entryname, then the next program processes the
|
|
* sorted file to add a number to each record that has a new entryname. This
|
|
* file is then sorted on accession number. This file is then processed to add
|
|
* the number of hits for each accession numnber: the first occurrence for
|
|
* each number is given 1, the next 2 and so on. This file is then processed
|
|
* to produce the final accession number target and hit filesin binary with
|
|
* the header
|
|
*/
|
|
|
|
int parseACCESSION(FILE *fp, char *, char *);
|
|
|
|
char patternENTRY[] = "ID";
|
|
char patternACCESSION[] = "AC";
|
|
char patternCONTINUE[] = "AC";
|
|
|
|
#define patmatch(L,P) (strncmp((L),(P),strlen(P))==0)
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
FILE *ifp;
|
|
FILE *ofp;
|
|
char line[MAXLINE];
|
|
char eName[10];
|
|
|
|
long entries = 0;
|
|
int leftChar = 5;
|
|
|
|
printf("emblaccess1 Version 1.0\n");
|
|
|
|
if (argc != 3) {
|
|
fprintf(stderr,"Usage: emblaccess1 filein fileout\n");
|
|
exit(2);
|
|
}
|
|
if ((ifp = fopen(argv[1],"r")) == NULL) {
|
|
fprintf(stderr,"emblaccess1: Cannot open input file %s\n",argv[1]);
|
|
exit(1);
|
|
}
|
|
if ((ofp = fopen(argv[2],"w")) == NULL) {
|
|
fprintf(stderr,"emblaccess1: Cannot open output file %s\n",argv[2]);
|
|
exit(1);
|
|
}
|
|
|
|
eName[0] = '\0';
|
|
while (fgets(line,MAXLINE,ifp)!=NULL) {
|
|
|
|
if (patmatch(line, patternENTRY)){
|
|
|
|
if (*eName)
|
|
printf(" No accession lines for %-10.10s\n",eName);
|
|
|
|
strncpy(eName,&line[leftChar],10);
|
|
entries++;
|
|
|
|
} else if (patmatch(line, patternACCESSION)){
|
|
|
|
if (parseACCESSION( ofp, &line[leftChar], eName) == 0)
|
|
printf(" Empty accession line !!\n");
|
|
|
|
/* Assuming we wont run into an ENTRY line !!!!!!!!!! */
|
|
while (fgets(line,MAXLINE,ifp)!=NULL){
|
|
|
|
if (patmatch(line, patternCONTINUE)){
|
|
|
|
if (parseACCESSION( ofp, &line[leftChar], eName) == 0)
|
|
printf(" Empty accession line !!\n");
|
|
|
|
} else
|
|
break;
|
|
}
|
|
eName[0] = '\0';
|
|
}
|
|
}
|
|
|
|
printf(" Number of entries = %ld\n\n",entries);
|
|
|
|
fclose(ifp);
|
|
fclose(ofp);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int parseACCESSION(FILE *fp, char *line, char *eName)
|
|
{
|
|
char *s, *t;
|
|
int entries;
|
|
|
|
entries = 0;
|
|
s = line;
|
|
while (*s) {
|
|
/* skip over white space etc */
|
|
for ( ; *s && !isalnum(*s) ; s++);
|
|
|
|
/* find end of accession number if there is one*/
|
|
if (*s) {
|
|
entries++;
|
|
for (t=s; isalnum(*s) ; s++);
|
|
fprintf(fp, "%-10.10s %-10.*s\n",eName,s-t,t);
|
|
}
|
|
|
|
}
|
|
|
|
return entries;
|
|
|
|
}
|