#include #include #include #include #include "process.h" #include "bapIO.h" #include "list.h" #include "misc.h" /* ** Bap IO */ static BapIO io; static int cur_gel_index; static int cur_contig_index; static List *file_details() { return build_list( atom_str(db_files), build_list( atom_str(db_files_arch), atom_str(io.ar_file), nil), build_list( atom_str(db_files_rel), atom_str(io.rl_file), nil), build_list( atom_str(db_files_seq), atom_str(io.sq_file), nil), build_list( atom_str(db_files_tag), atom_str(io.tg_file), nil), build_list( atom_str(db_files_com), atom_str(io.cc_file), nil), nil ); } static List *db_details() { return build_list( build_list( atom_str(db_max_db_size), atom_int(io.max_db_size), nil), build_list( atom_str(db_max_gels), atom_int(io.max_gels), nil), build_list( atom_str(db_max_gel_length), atom_int(io.max_gel_length), nil), build_list( atom_str(db_data_class), atom_int(io.data_class), nil), build_list( atom_str(db_num_gels), atom_int(io.num_gels), nil), build_list( atom_str(db_num_contigs), atom_int(io.num_contigs), nil), nil); } void xdap_late_open_for_read(List *l) /* ** */ { char *name; char *version; name = assoc(l,db_name); if (! name) crash("No database name specified\n"); version = assoc(l,db_version); if (! version) crash("No version specified\n"); bap_open_for_read(&io,name,version); cur_gel_index = 1; cur_contig_index = 1; } List *xdap_late_read_header() /* ** */ { List *files; List *details; files = file_details(); details = db_details(); return join_list( build_list( atom_str(db_from), nil), details, build_list( files, nil), nil); } void xdap_late_close(List *l) /* ** Close all relevant files */ { bap_close_files(&io); } List *xdap_late_read_gel_data() { List *gel_details; if (cur_gel_index > io.num_gels) gel_details = nil; else { bap_ar_file_rec ar_line; bap_rl_file_rec rl_line; bap_sq_file_rec sq_line; bap_tg_file_rec tg_line; int index; char name[17]; char *seq; int length; int comp; int pos; int l_nbr; int r_nbr; sq_line = (char *) malloc(io.max_gel_length+1); bap_read_ar(&io,cur_gel_index,&ar_line); bap_read_rl(&io,cur_gel_index,&rl_line); bap_read_tg(&io,cur_gel_index,&tg_line); bap_read_sq(&io,cur_gel_index,sq_line); index = cur_gel_index; f2cstr(ar_line.lines.name,BAP_FILE_NAME_LENGTH,name, (size_t)BAP_FILE_NAME_LENGTH); length = abs(rl_line.lines.length); comp = (rl_line.lines.length < 0); seq = sq_line; seq[length] = '\0'; pos = rl_line.lines.rel_pos; l_nbr = rl_line.lines.left_nbr; r_nbr = rl_line.lines.right_nbr; gel_details = build_list( atom_str(gel_rec), build_list( atom_str(gel_index), atom_int(index), nil), build_list( atom_str(gel_name), atom_str(name), nil), build_list( atom_str(gel_length), atom_int(length), nil), build_list( atom_str(gel_comp), atom_int(comp), nil), build_list( atom_str(gel_seq), atom_str(seq), nil), build_list( atom_str(gel_pos), atom_int(pos), nil), build_list( atom_str(gel_l_nbr), atom_int(l_nbr), nil), build_list( atom_str(gel_r_nbr), atom_int(r_nbr), nil), nil); /* Get raw data details */ if (tg_line.lines.comment) { List *raw_data_details; char *rd; int rd_length; int rd_cut; int rd_ulen; char rd_type[5]; char rd_file[19]; rd = bap_read_comment(&io, tg_line.lines.comment); sscanf(rd,"%6d%6d%6d%*s",&rd_length, &rd_cut, &rd_ulen); f2cstr(&rd[18],4,rd_type,(size_t)4); f2cstr(&rd[22],18,rd_file,(size_t)18); raw_data_details = build_list( build_list( atom_str(gel_rd_length), atom_int(rd_length), nil), build_list( atom_str(gel_rd_cut), atom_int(rd_cut), nil), build_list( atom_str(gel_rd_ulen), atom_int(rd_ulen), nil), build_list( atom_str(gel_rd_type), atom_str(rd_type), nil), build_list( atom_str(gel_rd_file), atom_str(rd_file), nil), nil); join_list (gel_details, raw_data_details, nil); } /* ** Process tags, maintaining separate lists for ** (a) special tags ** (b) annotation ** (c) edits */ { List *specials; List *notes; List *edits; int_4 next; specials = nil; notes = build_list( atom_str(gel_annotation), nil); edits = build_list( atom_str(gel_edits), nil); while (tg_line.lines.next) { next = tg_line.lines.next; bap_read_tg(&io,next,&tg_line); if (strncmp(tg_line.lines.type.c,"*LC*",4)==0) { if (tg_line.lines.comment) { List *lc; lc = build_list( atom_str(gel_l_cut_seq), atom_str(bap_read_comment(&io,tg_line.lines.comment)), nil); if (isNil(specials)) specials = build_list(lc,nil); else specials = join_list(specials,build_list(lc,nil),nil); } } else if (strncmp(tg_line.lines.type.c,"*RC*",4)==0) { if (tg_line.lines.comment) { List *rc; rc = build_list( atom_str(gel_r_cut_seq), atom_str(bap_read_comment(&io,tg_line.lines.comment)), nil); if (isNil(specials)) specials = build_list(rc,nil); else specials = join_list(specials,build_list(rc,nil),nil); } } else if (strncmp(tg_line.lines.type.c,"*",1)==0) { List *ed; char base[2]; base[0] = tg_line.lines.type.c[3]; base[1] = '\0'; ed = build_list( build_list( atom_str(gel_ed_op), atom_str( (strncmp(tg_line.lines.type.c,"*IN",3))==0 ? gel_ed_insert : gel_ed_delete), nil), build_list( atom_str(gel_ed_base), atom_str(base), nil), build_list( atom_str(gel_ed_base_pos), atom_int( tg_line.lines.position ), nil), nil); edits = join_list(edits, build_list(ed,nil),nil); } else { List *an; char type[5]; char *com; strncpy(type,tg_line.lines.type.c,4); type[4]='\0'; com = bap_read_comment(&io,tg_line.lines.comment); an = build_list( build_list( atom_str(gel_an_pos), atom_int(tg_line.lines.position), nil), build_list( atom_str(gel_an_len), atom_int(tg_line.lines.length), nil), build_list( atom_str(gel_an_type), atom_str(type), nil), (com == NULL) ? nil : build_list( atom_str(gel_an_comment), atom_str(com), nil), nil); notes = join_list(notes,build_list(an,nil),nil); } } if (isNil(specials)) gel_details = join_list(gel_details,build_list(edits,nil),build_list(notes,nil),nil); else gel_details = join_list(gel_details,specials,build_list(edits,nil),build_list(notes,nil),nil); } cur_gel_index++; free(sq_line); } return gel_details; } List *xdap_late_read_contig_data() { List *contig_details; if (cur_contig_index > io.num_contigs) contig_details = nil; else { bap_rl_file_rec rl_line; int length; int index; int left_end; int right_end; index = io.max_gels-cur_contig_index; bap_read_rl(&io,index,&rl_line); length = rl_line.clines.length; left_end = rl_line.clines.left_end; right_end = rl_line.clines.right_end; contig_details = build_list( atom_str(contig_rec), build_list( atom_str(contig_index), atom_int(index), nil), build_list( atom_str(contig_length), atom_int(length), nil), build_list( atom_str(contig_left_end), atom_int(left_end), nil), build_list( atom_str(contig_right_end), atom_int(right_end), nil), nil); cur_contig_index++; } return contig_details; } void xdap_late_open_for_write(List *l) /* ** */ { char *name; char *version; name = assoc(l,db_name); if (! name) crash("No database name specified\n"); version = assoc(l,db_version); if (! version) crash("No version specified\n"); bap_open_for_write(&io,name,version); cur_gel_index = 1; cur_contig_index = 1; } void xdap_late_write_header(List *l) { char *a; bap_rl_file_rec rl_dbheader; bap_rl_file_rec rl_header; bap_ar_file_rec ar_line; bap_sq_file_rec sq_line; bap_tg_file_rec tg_line; bap_cc_file_rec cc_line; if ( (a = assoc(l,db_data_class)) == NULL) crash("Sequence type (DNA or protein) not specified\n"); else io.data_class = atoi(a); if ( (a = assoc(l,db_num_gels)) == NULL) crash("Number of gels not specified\n"); else io.num_gels = atoi(a); if ( (a = assoc(l,db_num_contigs)) == NULL) crash("Number of contigs not specified\n"); else io.num_contigs = atoi(a); if ( (a = assoc(l,db_max_gel_length)) == NULL) crash("Maximum length of a gel reading not specified\n"); else io.max_gel_length = atoi(a); if ( (a = assoc(l,db_max_gels)) == NULL) crash("Maximum number of gels not specified\n"); else io.max_gels = atoi(a); if ( (a = assoc(l,db_max_db_size)) == NULL ) { io.max_db_size = 1000; } else { io.max_db_size = atoi(a); } rl_dbheader.dbheader.maxdb = io.max_db_size; rl_dbheader.dbheader.idbsiz = io.max_gels; rl_dbheader.dbheader.maxgel = io.max_gel_length; rl_dbheader.dbheader.idm = io.data_class; bap_write_rl(&io,bap_rl_dbheader_rec(&io),&rl_dbheader); rl_header.header.num_gels = io.num_gels; rl_header.header.num_contigs = io.num_contigs; bap_write_rl(&io,bap_rl_header_rec(&io),&rl_header); bap_write_ar(&io,1,&ar_line); sq_line = (bap_sq_file_rec) malloc(io.max_gel_length); bap_write_sq(&io,1,sq_line); free(sq_line); tg_line.header.free_list = 0; tg_line.header.count = bap_tg_header_rec(&io); bap_write_tg(&io,bap_tg_header_rec(&io),&tg_line); cc_line.header.free_list = 0; cc_line.header.count = bap_cc_header_rec(&io); bap_write_cc(&io,bap_cc_header_rec(&io),&cc_line); } void xdap_late_write_gel_data(List *l) { char *a; int i; bap_rl_file_rec rl_line; bap_ar_file_rec ar_line; bap_sq_file_rec sq_line; bap_tg_file_rec tg_line; List *edits; List *notes; sq_line = (bap_sq_file_rec) malloc(io.max_gel_length); /* ** Relationship line */ if ( (a = assoc(l,gel_l_nbr)) == NULL) crash("No left neighbour for gel %d\n",cur_gel_index); else rl_line.lines.left_nbr = atoi(a); if ( (a = assoc(l,gel_r_nbr)) == NULL) crash("No right neighbour for gel %d\n",cur_gel_index); else rl_line.lines.right_nbr = atoi(a); if ( (a = assoc(l,gel_length)) == NULL) crash("Length of gel reading not specified for gel %d\n", cur_gel_index); else rl_line.lines.length = atoi(a); if ( (a = assoc(l,gel_comp)) == NULL) crash("Not known if gel %d complemented\n", cur_gel_index); else { i = atoi(a); if (i) rl_line.lines.length = -rl_line.lines.length; } if ( (a = assoc(l,gel_pos)) == NULL) crash("No position in contig specified for gel %d\n", cur_gel_index); else rl_line.lines.rel_pos = atoi(a); bap_write_rl(&io,cur_gel_index,&rl_line); /* ** Archive line */ if ( (a = assoc(l,gel_name)) == NULL) crash("No gel name specified for gel %d\n", cur_gel_index); else c2fstr(a,BAP_FILE_NAME_LENGTH,ar_line.lines.name, (size_t)BAP_FILE_NAME_LENGTH); bap_write_ar(&io,cur_gel_index,&ar_line); /* ** Sequence */ if ( (a = assoc(l,gel_seq)) == NULL) crash("No sequence for gel %d\n", cur_gel_index); else c2fstr(a,strlen(a), sq_line, (size_t)io.max_gel_length); bap_write_sq(&io,cur_gel_index,sq_line); /* ** Initialise tag fields */ tg_line.lines.next = 0; tg_line.lines.length = 0; tg_line.lines.comment = 0; tg_line.lines.position = 0; bap_write_tg(&io, cur_gel_index, &tg_line); /* ** Raw data */ if ( (a = assoc(l,gel_rd_length)) != NULL) { int length; int cut; int ulen; char type[4]; char file[18]; char s[41]; length = atoi(a); if ( (a = assoc(l,gel_rd_cut)) == NULL) crash ("No raw data left cutoff specified for gel %d\n",cur_gel_index); else cut = atoi(a); if ( (a = assoc(l,gel_rd_ulen)) == NULL) crash ("No raw data length specified for gel %d\n",cur_gel_index); else ulen = atoi(a); if ( (a = assoc(l,gel_rd_type)) == NULL) crash ("No raw data file type specified for gel %d\n",cur_gel_index); else c2fstr(a,strlen(a),type,sizeof(type)); if ( (a = assoc(l,gel_rd_file)) == NULL) crash ("No raw data file specified for gel %d\n",cur_gel_index); else c2fstr(a,strlen(a),file,sizeof(file)); sprintf(s,"%6d%6d%6d%4.4s%18.18s",length,cut,ulen,type,file); tg_line.lines.comment = bap_write_comment(&io,s); bap_write_tg(&io, cur_gel_index, &tg_line); } /* ** Edits */ edits = index_list_by_str(l,gel_edits); if ( ! isNil(edits)) { bap_tg_file_rec tg_last_ed; long last_ed_index; bap_tg_file_rec tg_ed; long ed_index; tg_last_ed = tg_ed = tg_line; last_ed_index = ed_index = cur_gel_index; for( edits = cdr(edits); ! isNil(edits) ; edits = cdr(edits)) { List *e; int pos; char type[4]; e = car(edits); if ( (a = assoc(e,gel_ed_op)) == NULL) crash ("No edit operation specified, gel %d\n",cur_gel_index); else { if (strcmp(a,gel_ed_delete)==0) strcpy(type,"*DE"); else strcpy(type,"*IN"); } if ( (a = assoc(e,gel_ed_base)) == NULL) crash ("No base specified for edit, gel %d\n",cur_gel_index); else type[3] = a[0]; if ( (a = assoc(e,gel_ed_base_pos)) == NULL) crash ("No base position specified for edit, gel %d\n",cur_gel_index); else pos = atoi(a); ed_index = bap_get_free_tag(&io); tg_last_ed.lines.next = ed_index; bap_write_tg(&io,last_ed_index,&tg_last_ed); tg_ed.lines.position = pos; tg_ed.lines.comment = 0; tg_ed.lines.next = 0; strncpy(tg_ed.lines.type.c,type,4); tg_last_ed = tg_ed; last_ed_index = ed_index; } bap_write_tg(&io,ed_index,&tg_ed); } /* ** Right cut offs */ if ( (a = assoc(l, gel_r_cut_seq)) != NULL) { bap_tg_file_rec rcut; rcut.lines.position = 0; rcut.lines.length = 0; rcut.lines.next = 0; strncpy(rcut.lines.type.c,"*RC*",4); rcut.lines.comment = bap_write_comment(&io,a); bap_insert_tag(&io,cur_gel_index,rcut); } /* ** Left cut offs */ if ( (a = assoc(l, gel_l_cut_seq)) != NULL) { bap_tg_file_rec lcut; lcut.lines.position = 0; lcut.lines.length = 0; lcut.lines.next = 0; strncpy(lcut.lines.type.c,"*LC*",4); lcut.lines.comment = bap_write_comment(&io,a); bap_insert_tag(&io,cur_gel_index,lcut); } /* ** Annotation */ notes = index_list_by_str(l,gel_annotation); if (!isNil(notes)){ for( notes = cdr(notes); ! isNil(notes) ; notes = cdr(notes)) { List *n; bap_tg_file_rec tg_rec; n = car(notes); if ( (a = assoc(n,gel_an_pos)) == NULL) crash("No position for annotation, gel %d\n",cur_gel_index); else tg_rec.lines.position = atoi(a); if ( (a = assoc(n,gel_an_len)) == NULL) crash("No length for annotation, gel %d\n",cur_gel_index); else tg_rec.lines.length = atoi(a); if ( (a = assoc(n,gel_an_type)) == NULL) crash("No type for annotation, gel %d\n",cur_gel_index); else c2fstr(a,strlen(a),tg_rec.lines.type.c,(size_t)4); if ( (a = assoc(n,gel_an_comment)) == NULL) tg_rec.lines.comment = 0; else tg_rec.lines.comment = bap_write_comment(&io,a); bap_insert_tag(&io,cur_gel_index,tg_rec); } } cur_gel_index++; free(sq_line); } void xdap_late_write_contig_data(List *l) { char *a; bap_rl_file_rec rl_line; long index; index = io.max_gels-cur_contig_index; if ( (a = assoc(l,contig_left_end)) == NULL) crash("No left end for contig %d\n", cur_contig_index); else rl_line.clines.left_end = atoi(a); if ( (a = assoc(l,contig_right_end)) == NULL) crash("No right end for contig %d\n",cur_contig_index); else rl_line.clines.right_end = atoi(a); if ( (a = assoc(l,contig_length)) == NULL) crash("No length for contig %d\n",cur_contig_index); else rl_line.clines.length = atoi(a); bap_write_rl(&io,index,&rl_line); cur_contig_index++; }