staden-lg/src/indexseqlibs/author.c

248 lines
5.1 KiB
C

/*
** author.c
**
** Source for:
** pirauthor
** emblauthor
** genbauthor
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXLINE 100 /* maximum input line length*/
#ifdef PIR
char *entryPATTERN = "ENTRY";
char *authorPATTERN = " #Authors";
char *contPATTERN = " ";
#endif /*PIR*/
#ifdef EMBL
char *entryPATTERN = "ID";
char *authorPATTERN = "RA";
char *contPATTERN = "RA";
#endif /*EMBL*/
#ifdef GENBANK
char *entryPATTERN = "LOCUS";
char *authorPATTERN = " AUTHORS";
char *contPATTERN = " ";
#endif /*GENBANK*/
char entryName[10];
#define patmatch(L,P) (strncmp((L),(P),strlen(P))==0)
#define Number(A) ( sizeof(A) / sizeof((A)[0]))
static void parse(FILE *ofp, char *line)
/*
** Pick out all Surnames
*/
{
char *name_start, *surname_end, *name_end, *initials_end;
char *end; /* end of entry */
if (line != NULL && *line) {
int was_space;
name_start = line;
while (*name_start) {
/* skip white space */
for(;isspace(*name_start);name_start++);
/* skip to first "," or "." */
for(surname_end=name_start;
*surname_end && *surname_end!=',' && *surname_end!='.';
surname_end++);
/* start looking for next surname */
if (*surname_end) surname_end++;
/* skip over initials [-a-zA-Z.]*/
for(name_end=surname_end;
*name_end && (isalpha(*name_end) || *name_end=='-' || *name_end=='.');
name_end++);
initials_end = name_end-1;
/* skip over white space */
for(;isspace(*name_end);name_end++);
/* skip over name terminators "," ";" "and" */
if (*name_end==',' || *name_end==';') name_end++;
else if (strncmp(name_end,"and",3)==0) name_end+=3;
/* skip over trailing white space */
for(;isspace(*name_end);name_end++);
/* nibble back on t to find end of Surname*/
if (*surname_end=='.') surname_end-=3;
else if (*surname_end==',') surname_end--;
end = initials_end;
if (end-name_start > 0) {
fprintf(ofp,"%-10.10s ",entryName);
was_space = 0;
for(;name_start<=end;name_start++) {
if (isspace(*name_start)) {
if (! was_space) putc(' ',ofp);
was_space = 1;
} else {
if (islower(*name_start))
putc(toupper(*name_start),ofp);
else
putc(*name_start,ofp);
was_space = 0;
}
}
putc('\n',ofp);
}
name_start = name_end;
}
} else {
/* must force an entry */
fprintf(ofp,"%-10.10s %s\n",entryName,"BLOGGS");
}
}
/******************************************************/
/* A small toolkit to support variable length strings */
/******************************************************/
typedef struct {
int length;
int allocated;
char *str;
} STR;
static void init_str(STR *s)
/*
** initialise the string
*/
{
s->length = 0;
if(s->allocated) s->str[0] = '\0';
}
static void free_str(STR *s)
/*
** Destroy string (reclaim memory)
*/
{
free(s->str);
free(s);
}
static void cat_str(STR *s, char *t)
/*
** Join string t to string *s
*/
{
int need;
need = s->length + strlen(t) + 1;
if (need > s->allocated) {
if ( s->allocated )
/* create just that little bit more */
s->str = (char *)realloc(s->str,need+need/2);
else {
/* allocating for the first time */
s->str = (char *)malloc(need+need/2);
s->str[0] = '\0';
}
s->allocated = need+need/2;
}
s->length = need-1;
strcat(s->str,t);
}
static STR *create_str()
/*
** Create a new string
*/
{
STR *new;
new = (STR *) malloc(sizeof(STR));
new->length = 0;
new->allocated = 0;
new->str = NULL;
return new;
}
int main(int argc, char *argv[])
{
FILE *ifp;
FILE *ofp;
int entries = 0;
char line[MAXLINE];
STR *authors;
int author_mode;
#ifdef PIR
char *progname = "pirauthor";
int offset = 16; /* Entry name offset in Entry line */
#endif /*PIR*/
#ifdef EMBL
char *progname = "emblauthor";
int offset = 5; /* Entry name offset in Entry line */
#endif /*EMBL*/
#ifdef GENBANK
char *progname = "genbauthor";
int offset = 12; /* Entry name offset in Entry line */
#endif /*GENBANK*/
printf("%s Version 1.0\n",progname);
if (argc != 3) {
fprintf(stderr,"Usage: %s filein fileout\n",progname);
exit(2);
}
if ((ifp = fopen(argv[1],"r")) == NULL) {
fprintf(stderr,"%s: cannot open input file %s\n",progname,argv[1]);
exit(1);
}
if ((ofp = fopen(argv[2],"w")) == NULL) {
fprintf(stderr,"%s: cannot open output file %s\n",progname,argv[2]);
exit(1);
}
authors = create_str();
author_mode = 0;
while (fgets(line,MAXLINE,ifp) != NULL) {
/*
** Determine line type
*/
if (author_mode && patmatch(line,contPATTERN)) {
cat_str(authors,line+strlen(contPATTERN));
} else if (patmatch(line,authorPATTERN)) {
author_mode = 1;
cat_str(authors,line+strlen(authorPATTERN));
} else {
if (author_mode) parse(ofp,authors->str);
author_mode = 0;
if ( patmatch(line,entryPATTERN) ) {
strncpy(entryName,line+offset,10);
entries++;
init_str(authors);
}
}
}
free_str(authors);
printf(" Number of entries = %d\n\n",entries);
fclose(ifp);
fclose(ofp);
return 0;
}