247 lines
5.1 KiB
C
247 lines
5.1 KiB
C
/*
|
|
** author.c
|
|
**
|
|
** Source for:
|
|
** pirauthor
|
|
** emblauthor
|
|
** genbauthor
|
|
*/
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#define MAXLINE 100 /* maximum input line length*/
|
|
|
|
#ifdef PIR
|
|
char *entryPATTERN = "ENTRY";
|
|
char *authorPATTERN = " #Authors";
|
|
char *contPATTERN = " ";
|
|
#endif /*PIR*/
|
|
#ifdef EMBL
|
|
char *entryPATTERN = "ID";
|
|
char *authorPATTERN = "RA";
|
|
char *contPATTERN = "RA";
|
|
#endif /*EMBL*/
|
|
#ifdef GENBANK
|
|
char *entryPATTERN = "LOCUS";
|
|
char *authorPATTERN = " AUTHORS";
|
|
char *contPATTERN = " ";
|
|
#endif /*GENBANK*/
|
|
|
|
char entryName[10];
|
|
|
|
#define patmatch(L,P) (strncmp((L),(P),strlen(P))==0)
|
|
|
|
#define Number(A) ( sizeof(A) / sizeof((A)[0]))
|
|
|
|
|
|
|
|
static void parse(FILE *ofp, char *line)
|
|
/*
|
|
** Pick out all Surnames
|
|
*/
|
|
{
|
|
char *name_start, *surname_end, *name_end, *initials_end;
|
|
char *end; /* end of entry */
|
|
if (line != NULL && *line) {
|
|
int was_space;
|
|
name_start = line;
|
|
while (*name_start) {
|
|
/* skip white space */
|
|
for(;isspace(*name_start);name_start++);
|
|
/* skip to first "," or "." */
|
|
for(surname_end=name_start;
|
|
*surname_end && *surname_end!=',' && *surname_end!='.';
|
|
surname_end++);
|
|
/* start looking for next surname */
|
|
if (*surname_end) surname_end++;
|
|
/* skip over initials [-a-zA-Z.]*/
|
|
for(name_end=surname_end;
|
|
*name_end && (isalpha(*name_end) || *name_end=='-' || *name_end=='.');
|
|
name_end++);
|
|
initials_end = name_end-1;
|
|
/* skip over white space */
|
|
for(;isspace(*name_end);name_end++);
|
|
/* skip over name terminators "," ";" "and" */
|
|
if (*name_end==',' || *name_end==';') name_end++;
|
|
else if (strncmp(name_end,"and",3)==0) name_end+=3;
|
|
/* skip over trailing white space */
|
|
for(;isspace(*name_end);name_end++);
|
|
|
|
/* nibble back on t to find end of Surname*/
|
|
if (*surname_end=='.') surname_end-=3;
|
|
else if (*surname_end==',') surname_end--;
|
|
|
|
end = initials_end;
|
|
if (end-name_start > 0) {
|
|
fprintf(ofp,"%-10.10s ",entryName);
|
|
was_space = 0;
|
|
for(;name_start<=end;name_start++) {
|
|
if (isspace(*name_start)) {
|
|
if (! was_space) putc(' ',ofp);
|
|
was_space = 1;
|
|
} else {
|
|
if (islower(*name_start))
|
|
putc(toupper(*name_start),ofp);
|
|
else
|
|
putc(*name_start,ofp);
|
|
was_space = 0;
|
|
}
|
|
}
|
|
putc('\n',ofp);
|
|
}
|
|
name_start = name_end;
|
|
}
|
|
} else {
|
|
/* must force an entry */
|
|
fprintf(ofp,"%-10.10s %s\n",entryName,"BLOGGS");
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/******************************************************/
|
|
/* A small toolkit to support variable length strings */
|
|
/******************************************************/
|
|
|
|
typedef struct {
|
|
int length;
|
|
int allocated;
|
|
char *str;
|
|
} STR;
|
|
|
|
static void init_str(STR *s)
|
|
/*
|
|
** initialise the string
|
|
*/
|
|
{
|
|
s->length = 0;
|
|
if(s->allocated) s->str[0] = '\0';
|
|
}
|
|
|
|
static void free_str(STR *s)
|
|
/*
|
|
** Destroy string (reclaim memory)
|
|
*/
|
|
{
|
|
free(s->str);
|
|
free(s);
|
|
}
|
|
|
|
static void cat_str(STR *s, char *t)
|
|
/*
|
|
** Join string t to string *s
|
|
*/
|
|
{
|
|
int need;
|
|
|
|
need = s->length + strlen(t) + 1;
|
|
if (need > s->allocated) {
|
|
if ( s->allocated )
|
|
/* create just that little bit more */
|
|
s->str = (char *)realloc(s->str,need+need/2);
|
|
else {
|
|
/* allocating for the first time */
|
|
s->str = (char *)malloc(need+need/2);
|
|
s->str[0] = '\0';
|
|
}
|
|
s->allocated = need+need/2;
|
|
}
|
|
s->length = need-1;
|
|
strcat(s->str,t);
|
|
|
|
}
|
|
|
|
|
|
static STR *create_str()
|
|
/*
|
|
** Create a new string
|
|
*/
|
|
{
|
|
STR *new;
|
|
|
|
new = (STR *) malloc(sizeof(STR));
|
|
new->length = 0;
|
|
new->allocated = 0;
|
|
new->str = NULL;
|
|
|
|
return new;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
FILE *ifp;
|
|
FILE *ofp;
|
|
int entries = 0;
|
|
char line[MAXLINE];
|
|
STR *authors;
|
|
int author_mode;
|
|
|
|
#ifdef PIR
|
|
char *progname = "pirauthor";
|
|
int offset = 16; /* Entry name offset in Entry line */
|
|
#endif /*PIR*/
|
|
#ifdef EMBL
|
|
char *progname = "emblauthor";
|
|
int offset = 5; /* Entry name offset in Entry line */
|
|
#endif /*EMBL*/
|
|
#ifdef GENBANK
|
|
char *progname = "genbauthor";
|
|
int offset = 12; /* Entry name offset in Entry line */
|
|
#endif /*GENBANK*/
|
|
|
|
printf("%s Version 1.0\n",progname);
|
|
|
|
if (argc != 3) {
|
|
fprintf(stderr,"Usage: %s filein fileout\n",progname);
|
|
exit(2);
|
|
}
|
|
|
|
if ((ifp = fopen(argv[1],"r")) == NULL) {
|
|
fprintf(stderr,"%s: cannot open input file %s\n",progname,argv[1]);
|
|
exit(1);
|
|
}
|
|
if ((ofp = fopen(argv[2],"w")) == NULL) {
|
|
fprintf(stderr,"%s: cannot open output file %s\n",progname,argv[2]);
|
|
exit(1);
|
|
}
|
|
|
|
authors = create_str();
|
|
author_mode = 0;
|
|
while (fgets(line,MAXLINE,ifp) != NULL) {
|
|
/*
|
|
** Determine line type
|
|
*/
|
|
if (author_mode && patmatch(line,contPATTERN)) {
|
|
cat_str(authors,line+strlen(contPATTERN));
|
|
} else if (patmatch(line,authorPATTERN)) {
|
|
author_mode = 1;
|
|
cat_str(authors,line+strlen(authorPATTERN));
|
|
} else {
|
|
if (author_mode) parse(ofp,authors->str);
|
|
author_mode = 0;
|
|
if ( patmatch(line,entryPATTERN) ) {
|
|
strncpy(entryName,line+offset,10);
|
|
entries++;
|
|
init_str(authors);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
free_str(authors);
|
|
printf(" Number of entries = %d\n\n",entries);
|
|
|
|
fclose(ifp);
|
|
fclose(ofp);
|
|
|
|
return 0;
|
|
}
|