/*
** author.c
**
**    Source for:
**	pirauthor
**	emblauthor
**	genbauthor
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAXLINE 100     /* maximum input line length*/

#ifdef PIR
char *entryPATTERN  = "ENTRY";
char *authorPATTERN = "   #Authors";
char *contPATTERN   = "           ";
#endif /*PIR*/
#ifdef EMBL
char *entryPATTERN  = "ID";
char *authorPATTERN = "RA";
char *contPATTERN   = "RA";
#endif /*EMBL*/
#ifdef GENBANK
char *entryPATTERN  = "LOCUS";
char *authorPATTERN = "  AUTHORS";
char *contPATTERN   = "         ";
#endif /*GENBANK*/

char entryName[10];

#define patmatch(L,P) (strncmp((L),(P),strlen(P))==0)

#define Number(A) ( sizeof(A) / sizeof((A)[0]))



static void parse(FILE *ofp, char *line)
/*
** Pick out all Surnames
*/
{
    char *name_start, *surname_end, *name_end, *initials_end;
    char *end; /* end of entry */
    if (line != NULL && *line) {
	int was_space;
	name_start = line;
	while (*name_start) {
	    /* skip white space */
	    for(;isspace(*name_start);name_start++);
	    /* skip to first "," or "." */
	    for(surname_end=name_start;
		*surname_end && *surname_end!=',' && *surname_end!='.';
		surname_end++);
	    /* start looking for next surname */
	    if (*surname_end) surname_end++;
	    /* skip over initials [-a-zA-Z.]*/
	    for(name_end=surname_end;
		*name_end && (isalpha(*name_end) || *name_end=='-' || *name_end=='.');
		name_end++);
	    initials_end = name_end-1;
	    /* skip over white space */
	    for(;isspace(*name_end);name_end++);
	    /* skip over name terminators "," ";" "and" */
	    if (*name_end==',' || *name_end==';') name_end++;
	    else if (strncmp(name_end,"and",3)==0) name_end+=3;
	    /* skip over trailing white space */
	    for(;isspace(*name_end);name_end++);
	    
	    /* nibble back on t to find end of Surname*/
	    if (*surname_end=='.') surname_end-=3;
	    else if (*surname_end==',') surname_end--;

	    end = initials_end;
	    if (end-name_start > 0) {
		fprintf(ofp,"%-10.10s ",entryName);
		was_space = 0;
		for(;name_start<=end;name_start++) {
		    if (isspace(*name_start)) {
			if (! was_space) putc(' ',ofp);
			was_space = 1;
		    } else {
			if (islower(*name_start))
			    putc(toupper(*name_start),ofp);
			else
			    putc(*name_start,ofp);
			was_space = 0;
		    }
		}
		putc('\n',ofp);
	    }
	    name_start = name_end;
	}
    } else {
	/* must force an entry */
	fprintf(ofp,"%-10.10s %s\n",entryName,"BLOGGS");
    }
}






/******************************************************/
/* A small toolkit to support variable length strings */
/******************************************************/

typedef struct {
    int length;
    int allocated;
    char *str;
} STR;

static void init_str(STR *s)
/*
** initialise the string
*/
{
    s->length = 0;
    if(s->allocated) s->str[0] = '\0';
}

static void free_str(STR *s)
/*
** Destroy string (reclaim memory)
*/
{
    free(s->str);
    free(s);
}

static void cat_str(STR *s, char *t)
/*
** Join string t to string *s
*/
{
    int need;

    need = s->length + strlen(t) + 1;
    if (need > s->allocated) {
	if ( s->allocated )
	    /* create just that little bit more */
	    s->str = (char *)realloc(s->str,need+need/2);
	else {
	    /* allocating for the first time */
	    s->str = (char *)malloc(need+need/2);
	    s->str[0] = '\0';
	}
	s->allocated = need+need/2;
    }
    s->length = need-1;
    strcat(s->str,t);

}


static STR *create_str()
/*
** Create a new string
*/
{
    STR *new;

    new = (STR *) malloc(sizeof(STR));
    new->length = 0;
    new->allocated = 0;
    new->str = NULL;

    return new;
}





int main(int argc, char *argv[])
{
    FILE *ifp;
    FILE *ofp;
    int entries = 0;
    char line[MAXLINE];
    STR *authors;
    int author_mode;

#ifdef PIR
    char *progname = "pirauthor";
    int offset = 16; /* Entry name offset in Entry line */
#endif /*PIR*/
#ifdef EMBL
    char *progname = "emblauthor";
    int offset = 5; /* Entry name offset in Entry line */
#endif /*EMBL*/
#ifdef GENBANK
    char *progname = "genbauthor";
    int offset = 12; /* Entry name offset in Entry line */
#endif /*GENBANK*/

    printf("%s Version 1.0\n",progname);
    
    if (argc != 3) {
	fprintf(stderr,"Usage: %s filein fileout\n",progname);
	exit(2);
    }

    if ((ifp = fopen(argv[1],"r")) == NULL) {
	fprintf(stderr,"%s: cannot open input file %s\n",progname,argv[1]);
	exit(1);
    }
    if ((ofp = fopen(argv[2],"w")) == NULL) {
	fprintf(stderr,"%s: cannot open output file %s\n",progname,argv[2]);
	exit(1);
    }

    authors = create_str();
    author_mode = 0;
    while (fgets(line,MAXLINE,ifp) != NULL) {
	/*
	** Determine line type
	*/
	if (author_mode && patmatch(line,contPATTERN)) {
	    cat_str(authors,line+strlen(contPATTERN));
	} else if (patmatch(line,authorPATTERN)) {
	    author_mode = 1;
	    cat_str(authors,line+strlen(authorPATTERN));
	} else {
	    if (author_mode) parse(ofp,authors->str);
	    author_mode = 0;
	    if ( patmatch(line,entryPATTERN) ) {
		strncpy(entryName,line+offset,10);
		entries++;
		init_str(authors);
	    }
	}
	
    }

    free_str(authors);
    printf(" Number of entries = %d\n\n",entries); 

    fclose(ifp);
    fclose(ofp);
    
    return 0;
}