C 8-4-92 removed minimum block length changing option in dbauto C 29.05.91 IMPLEMENTED REMOVE GEL READING C 21-8-91 Added routines to find internal overlaps C 2-9-91 Fixed bug in copytg C 8-11-91 fixed bugs in "find internal joins" C 24-02-92 fixed some bugs courtesy of Kozlowski's AIX port C 23-Jun-92 COPYCC - params to READCC and WRITCC in wrong order C C SUBROUTINE TO ENTER NEW GEL SEQUENCES INTO DATA BASE. C IT READS IN AN ARCHIVE VERSION AND WRITES OUT A WORKING VERSION. C IT ALSO SETS UP ANY RELATIONSHIPS WITH OTHER DATA IN THE DATABASE C BOTH BY POSITION IN A CONTIG AND POINTERS TO LEFT AND RIGHT C NEIGHBOURS. SUBROUTINE AENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +GEL,NAMARC,X,ITYPE,ISENSE,SEQC2,ITOTPC, +IDIM,IDC,NCONTC,LINCON,IFAIL,IDBSIZ,KBOUT,IDEVR,IDEVW,IDEVN, +IDEVT,IDEVC,IDEVG,MAXGEL) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),X,Y INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL),NAMARC*(*) CHARACTER SEQC2(IDC) CHARACTER NAMARK*10 C WRITE(*,*)'X,ITYPE,ISENSE,IDIM,IDC' C WRITE(*,*)X,ITYPE,ISENSE,IDIM,IDC C SET FAIL FLAG IFAIL=0 C WRITE(KBOUT,1000) C1000 FORMAT(' TRYING TO ENTER NEW GEL READING INTO DATABASE') C IS THERE SPACE? IF((IDBSIZ-(NGELS+NCONTS)).GT.2)GO TO 5 C FULL WRITE(KBOUT,1999)IDBSIZ 1999 FORMAT(' Database full, current size=',I6,' Extend with copy') IFAIL=7 RETURN 5 CONTINUE C NEED TO CHECK TO SEE IF GEL ALREADY IN DB C LOOK THRU ARC FILE DO 10 J=1,NGELS C READ(IDEVN,REC=J)NAMARK CALL READN(IDEVN,J,NAMARK) IF(NAMARK.NE.NAMARC(1:10))GO TO 10 C FOUND WRITE(KBOUT,1013)J 1013 FORMAT(' New gel already in database with number',I6, +' Entry aborted') IFAIL=6 RETURN 10 CONTINUE C INCREMENT NUMBER OF GELS NGELS=NGELS+1 C SET LENGTH THIS GEL LNGTHG(NGELS)=IDIM*ISENSE C WRITE NAME OF ARCHIVE TO LIST OF ARCHIVES C NAMPRO,ARC C WRITE(IDEVN,REC=NGELS)NAMARC(1:10) NAMARK=NAMARC(1:10) CALL WRITEN(IDEVN,NGELS,NAMARK) WRITE(KBOUT,1003)NGELS 1003 FORMAT(' This gel reading has been given the number ',I6) C WRITE GEL TO WORKING VERSION CALL WRITEW(IDEVW,NGELS,GEL,MAXGEL) IF(IDEVT.GT.0) CALL ENTRD(IDEVG,IDEVT,IDEVC,NAMARC,NGELS,IOK) C CREATE TAGS FOR THIS NASTY CALL TAGGEL(NGELS,LNGTHG(NGELS),GEL) C SET UP RELATIONSHIPS C DOES THIS GEL OVERLAP? IF(ITYPE.NE.0)GO TO 100 C C DOES NOT OVERLAP SO IT STARTS A CONTIG OF ITS OWN C SET LEFT AND RIGHT POINTERS TO ZERO,RELPG TO 1 LNBR(NGELS)=0 RNBR(NGELS)=0 RELPG(NGELS)=1 C WRITE NEW GEL LINE CALL WRITER(IDEVR,NGELS,RELPG(NGELS),LNGTHG(NGELS), +LNBR(NGELS),RNBR(NGELS)) C WRITE(IDEVR,REC=NGELS)RELPG(NGELS),LNGTHG(NGELS),LNBR(NGELS), C 1RNBR(NGELS) C C SET CONTIG POINTERS AND GENERAL VALUES C INCREMENT NUMBER OF CONTIGS NCONTS=NCONTS+1 C POINTER TO THIS CONTIG N=IDBSIZ-NCONTS C POINTER TO LEFT GEL THIS CONTIG LNBR(N)=NGELS C POINTER TO RIGHT GEL THIS CONTIG RNBR(N)=NGELS C LENGTH OF CONTIG RELPG(N)=IDIM C WRITE CONTIG DESCRIPTOR CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) C WRITE(IDEVR,REC=N)RELPG(N),LNGTHG(N),LNBR(N),RNBR(N) C WRITE DB DESCRIPTOR CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) C WRITE(IDEVR,REC=IDBSIZ)NGELS,NCONTS RETURN C 100 CONTINUE C C C DOES OVERLAP 150 CONTINUE C C LEFT END OR RIGHT OVERLAP? IF(ITYPE.EQ.1)GO TO 400 C RIGHT END OR INTERNAL OVERLAP C 160 CONTINUE C NEED TO SEARCH THRU THIS CONTIG TO FIND LEFT AND RIGHT C NEIGHBOURS FOR THIS NEW GEL C LINE NUMBER OF LEFT END OF CONTIG N=NCONTC C LOOK THRU UNTIL CURRENT IS >= THEN IT MUST BE THE PREVIOUS ONE 200 CONTINUE IF(RELPG(N).GT.X)GO TO 250 C IS THIS THE LAST GEL IN CONTIG? IF(RNBR(N).EQ.0)GO TO 350 C NO SO LOOK AT NEXT N=RNBR(N) GO TO 200 250 CONTINUE C GEL LIES BETWEEN N AND LNBR(N) C NEED TO EDIT DB HERE IF(ITOTPC.GT.0)CALL ABEDIN(RELPG,LNGTHG,LNBR,RNBR, 1NGELS,NCONTS, 2GEL,LINCON,X,SEQC2,ITOTPC,IDC,IDBSIZ,KBOUT,IDEVR,IDEVW, +MAXGEL) C C C SET POINTERS IN NEW GEL LNBR(NGELS)=LNBR(N) RNBR(NGELS)=N RELPG(NGELS)=X C WRITE NEW GEL LINE CALL WRITER(IDEVR,NGELS,RELPG(NGELS),LNGTHG(NGELS), +LNBR(NGELS),RNBR(NGELS)) C WRITE(IDEVR,REC=NGELS)RELPG(NGELS),LNGTHG(NGELS),LNBR(NGELS), C 1RNBR(NGELS) C SET POINTERS IN LEFT AND RIGHT NEIGHBOURS K=LNBR(N) RNBR(K)=NGELS C RNBR(LNBR(N))=NGELS C WRITE LEFT AND RIGHT NEIGHBOURS CALL WRITER(IDEVR,K,RELPG(K),LNGTHG(K), +LNBR(K),RNBR(K)) C WRITE(IDEVR,REC=K)RELPG(K),LNGTHG(K),LNBR(K),RNBR(K) LNBR(N)=NGELS CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) C WRITE(IDEVR,REC=N)RELPG(N),LNGTHG(N),LNBR(N),RNBR(N) C WRITE NGELS NCONTS CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) C WRITE(IDEVR,REC=IDBSIZ)NGELS,NCONTS C HAVE WE INCREASED LENGTH OF CONTIG? C ITS LINE NUMBER IS LINCON C NEED TO UPDATE IDIM IN CASE OF EDITS IDIM=ABS(LNGTHG(NGELS)) Y=X+IDIM-1 IF(Y.LE.RELPG(LINCON))RETURN RELPG(LINCON)=Y C WRITE NEW CONTIG LINE CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON), +LNBR(LINCON),RNBR(LINCON)) C WRITE(IDEVR,REC=LINCON)RELPG(LINCON),LNGTHG(LINCON),LNBR(LINCON), C 1RNBR(LINCON) RETURN 350 CONTINUE C MUST BE A RIGHT END OVERLAP C NEED TO EDIT DB HERE IF(ITOTPC.GT.0)CALL ABEDIN(RELPG,LNGTHG,LNBR,RNBR, 1NGELS,NCONTS, 2GEL,LINCON,X,SEQC2,ITOTPC,IDC,IDBSIZ,KBOUT,IDEVR,IDEVW, +MAXGEL) C C C SET POINTERS FOR NEW GEL LNBR(NGELS)=N RNBR(NGELS)=0 RELPG(NGELS)=X C WRITE NEW GEL LINE CALL WRITER(IDEVR,NGELS,RELPG(NGELS),LNGTHG(NGELS), +LNBR(NGELS),RNBR(NGELS)) C WRITE(IDEVR,REC=NGELS) C 1RELPG(NGELS),LNGTHG(NGELS),LNBR(NGELS),RNBR(NGELS) C OLD RIGHT END RNBR(N)=NGELS C WRITE NEW RIGHT LINE CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) C WRITE(IDEVR,REC=N)RELPG(N),LNGTHG(N),LNBR(N),RNBR(N) C C RESET RIGHT NAME IN CONTIG C ITS LINE NUMBER IS LINCON RNBR(LINCON)=NGELS C HAVE WE INCREASED LENGTH OF CONTIG? C NEED TO UPDATE LENGTH OF GEL IN CASE OF EDITS IDIM=ABS(LNGTHG(NGELS)) Y=X+IDIM-1 RELPG(LINCON)=MAX(RELPG(LINCON),Y) C WRITE HERE C WRITE CONTIG DESCRIPTOR CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON), +LNBR(LINCON),RNBR(LINCON)) C WRITE(IDEVR,REC=LINCON)RELPG(LINCON),LNGTHG(LINCON),LNBR(LINCON) C 1,RNBR(LINCON) CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) C WRITE(IDEVR,REC=IDBSIZ)NGELS,NCONTS RETURN C 400 CONTINUE C C ADDING TO LEFT END 410 CONTINUE C NEED TO EDIT DB HERE IF(ITOTPC.GT.0)CALL ABEDIN(RELPG,LNGTHG,LNBR,RNBR, 1NGELS,NCONTS, 2GEL,LINCON,1,SEQC2,ITOTPC,IDC,IDBSIZ,KBOUT,IDEVR,IDEVW, +MAXGEL) C 420 CONTINUE C SET POINTERS IN NEW GEL RELPG(NGELS)=1 RNBR(NGELS)=NCONTC LNBR(NGELS)=0 C WRITE NEW GEL LINE CALL WRITER(IDEVR,NGELS,RELPG(NGELS),LNGTHG(NGELS), +LNBR(NGELS),RNBR(NGELS)) C WRITE(IDEVR,REC=NGELS)RELPG(NGELS),LNGTHG(NGELS),LNBR(NGELS), C 1RNBR(NGELS) C SET POINTERS IN OLD LEFT END LNBR(NCONTC)=NGELS RELPG(NCONTC)=X C WRITE NEW LEFT END CALL WRITER(IDEVR,NCONTC,RELPG(NCONTC),LNGTHG(NCONTC), +LNBR(NCONTC),RNBR(NCONTC)) C WRITE(IDEVR,REC=NCONTC)RELPG(NCONTC),LNGTHG(NCONTC),LNBR(NCONTC) C 1,RNBR(NCONTC) C NEW LENGTH OF CONTIG RELPG(LINCON)=RELPG(LINCON)+X-1 C MAY HAVE JUST ADDED A GEL LONGER THAN CONTIG IDIM=ABS(LNGTHG(NGELS)) Y=IDIM IF(Y.GT.RELPG(LINCON))RELPG(LINCON)=Y C NEW NAME OF LEFT END OF CONTIG LNBR(LINCON)=NGELS C WRITE CONTIG DESCRIPTOR CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON), +LNBR(LINCON),RNBR(LINCON)) C WRITE(IDEVR,REC=LINCON)RELPG(LINCON),LNGTHG(LINCON),LNBR(LINCON) C 1,RNBR(LINCON) CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) C WRITE(IDEVR,REC=IDBSIZ)NGELS,NCONTS C NOW GO THRU AND CHANGE ALL RELATIVE POSITIONS N=NCONTC 440 CONTINUE IF(RNBR(N).EQ.0)RETURN N=RNBR(N) RELPG(N)=RELPG(N)+X-1 C WRITE NEW LINE CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) C WRITE(IDEVR,REC=N)RELPG(N),LNGTHG(N),LNBR(N),RNBR(N) GO TO 440 END C SUBROUTINE COPYRD(IDEVRD,IDEV,NGELS,NEWSIZ,IOK) C CHARACTER MTYPE*4,NAMFIL*12 C DO 10 I = 1,NGELS C CALL READRD(IDEVRD,I,LENR,LCUT,LENW,MTYPE,NAMFIL) C CALL WRITRD(IDEV,I,LENR,LCUT,LENW,MTYPE,NAMFIL) C 10 CONTINUE C CALL WRITRD(IDEV,NEWSIZ,LENR,LCUT,LENW,MTYPE,NAMFIL) C IOK = 0 C END SUBROUTINE COPYTG(IDEVT,IDEV,IOK,IDBSIZ,NEWSIZ,NGELS) C Read tag details IDIFF = NEWSIZ - IDBSIZ CALL READTG(IDEVT,IDBSIZ,ICNT,LLEN,LCOM,LTYPE,NEXT) IF (NEXT.NE.0) NEXT = NEXT + IDIFF CALL WRITTG(IDEV,NEWSIZ,ICNT+IDIFF,LLEN,LCOM,LTYPE,NEXT) C Copy headers for each gels DO 10 I = 1,NGELS CALL READTG(IDEVT,I,LPOS,LLEN,LCOM,LTYPE,NEXT) IF (NEXT.NE.0) NEXT = NEXT + IDIFF CALL WRITTG(IDEV,I,LPOS,LLEN,LCOM,LTYPE,NEXT) 10 CONTINUE C Copy rest of tags DO 20 I = IDBSIZ+1, ICNT CALL READTG(IDEVT,I,LPOS,LLEN,LCOM,LTYPE,NEXT) IF (NEXT.NE.0) NEXT = NEXT + IDIFF CALL WRITTG(IDEV,I+IDIFF,LPOS,LLEN,LCOM,LTYPE,NEXT) 20 CONTINUE IOK = 0 END SUBROUTINE COPYCC(IDEVC,IDEV,IOK) C COMMENT_LENGTH CHARACTER NOTE*40 CALL READCC(IDEVC,1,ICNT,NEXT,NOTE) CALL WRITCC(IDEV,1,ICNT,NEXT,NOTE) DO 10 I = 2,ICNT CALL READCC(IDEVC,I,ICNT,NEXT,NOTE) CALL WRITCC(IDEV,I,ICNT,NEXT,NOTE) 10 CONTINUE IOK = 0 END SUBROUTINE DBAUTO(RELPG,LNGTHG,LNBR,RNBR,MAXDB,IDBSIZ, +NGELS,NCONTS,MAXGEL, +TEMP3,WORDP,WORDN,LPOWRC,POSNS,GELN, +SEQ1,MAXSEQ,SEQ2,SEQ3,SEQ4,SEQ5,SEQC2,SEQG2,MATCH, +MAXGLM,MAXGL2,CHRSIZ,ECHRSZ,LENGTH, +SAV1,SAV2,SAV3,MAXSAV,CENDS,NENDS,MAXCON,CONST, +KBIN,KBOUT,IDEV1,IDEV2,IDEV3,IDEV4,IDEV7,IDEV8,IDEV,IDEVT,IDEVC, +IHELPS,IHELPE,HELPF,IDEVH,NAMARC,NAMPRO,FILE, +PERCD,IOPEN,IDM,SEQG3,SEQC3,IOK) INTEGER CHRSIZ,ECHRSZ INTEGER RELPG(MAXDB),PL(2),PR(2),RMOST INTEGER LNGTHG(MAXDB),LNBR(MAXDB),RNBR(MAXDB) INTEGER JOINT(2),ITOTPC(2),ITOTPG(2),IDIM22(2),IDOUT(2) INTEGER LINCON(2),LLINO(2),ITYPE(2),IFAIL(2) INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2) INTEGER LREG,RREG,X,ANS,ANSJOK INTEGER TEMP3(ECHRSZ,MAXGL2),CONST(LENGTH) INTEGER POSNS(MAXSEQ),WORDP(LPOWRC),WORDN(LPOWRC),GELN(MAXGLM) INTEGER CENDS(MAXCON),NENDS(MAXCON) CHARACTER SEQ3(MAXGLM),SEQC2(MAXGLM,2),SEQG2(MAXGLM,2) CHARACTER SEQ1(MAXSEQ),SEQ2(MAXGLM),MATCH(MAXGLM),SEQ4(MAXGLM) INTEGER SAV1(MAXSAV),SAV2(MAXSAV),SAV3(MAXSAV) CHARACTER NAMARC*(*),NAMPRO*(*),FILE*(*) CHARACTER GET,SEQ5(MAXGLM),HELPF*(*),SEQG3(MAXGLM),SEQC3(MAXGLM) PARAMETER (MAXPRM = 32) CHARACTER PROMPT(3)*(MAXPRM) SAVE GET DATA GET/'>'/ WRITE(KBOUT,*)' Automatic sequence assembler' IFAIL(1) = 0 IEMPTY=0 IF(NGELS.LT.1)IEMPTY=1 CALL DBCHEK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +TEMP3,IERR,KBOUT) IF(IERR.GT.1) RETURN CALL YESNO(IOKENT,'Permit entry', +IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(IOKENT.LT.0) RETURN CALL YESNO(INF,'Use file of file names', +IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(INF.LT.0) RETURN IF(INF.EQ.0) THEN FILE = ' ' CALL OPENF1(IDEV7,FILE,0,IOK,KBIN,KBOUT, + 'File of gel reading names', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN END IF IF(IOKENT.EQ.0) THEN FILE = ' ' CALL OPENF1(IDEV8,FILE,1,IOK,KBIN,KBOUT, + 'File for names of failures', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN END IF PROMPT(1) = 'Perform normal shotgun assembly' PROMPT(2) = 'Put all sequences in one contig' PROMPT(3) = 'Put all sequences in new contigs' IOPT = 1 CALL RADION('Select entry mode',PROMPT,3,IOPT, +IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(IOPT.LT.1) RETURN IF(IOPT.EQ.1) THEN C parameters for normal assembly ANSJOK = 0 CALL YESNO(ANSJOK,'Permit joins', +IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(ANSJOK.LT.0) RETURN MN = LENGTH*2 MX = MAXGLM + 1 MINMAT = MAX(15,MN) CALL GETINT(MN,MX,MINMAT, +'Minimum initial match', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN MINMAT = IVAL MINSLI = 3 MN = 0 MX = 25 MAXPG = 8 CALL GETINT(MN,MX,MAXPG, +'Maximum pads per gel', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN MAXPG = IVAL MN = 0 MX = 25 MAXPC = 8 CALL GETINT(MN,MX,MAXPC, +'Maximum pads per gel in contig', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN MAXPC = IVAL IF(IOKENT.EQ.0) THEN RMN = 0. RMX = 15. PERMAX = 8. CALL GETRL(RMN,RMX,PERMAX, + 'Maximum percent mismatch after alignment', + VAL,KBIN,KBOUT, + IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN PERMAX = VAL END IF IDIM1=0 MAXOVR=MAXGEL-3*MAX(MAXPC,MAXPG) ANS=0 IF(IEMPTY.EQ.0) +CALL ACONS(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,MAXSEQ,SEQ2,IDBSIZ,IDIM1,ANS,KDUMM,KDUMM,KDUMM,TEMP3, +ECHRSZ,MAXGL2,KBOUT,IDEV2,IFAIL(1),MAXGEL,IDM,PERCD) END IF JGEL = 0 JNGEL = 0 JNJOIN = 0 IMATC = 0 IF(IFAIL(1).NE.0)GO TO 900 C C 1 CONTINUE C C IDIM2=MAXGEL IF(INF.EQ.1) THEN 3 CONTINUE MN = 0 CALL GTSTR('Gel reading name',' ',NAMARC,MN,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.3) RETURN IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 3 END IF ELSE READ(IDEV7,1002,END=900)NAMARC END IF 1002 FORMAT( A) DO 77 MM=1,80 MATCH(MM)=GET 77 CONTINUE WRITE(IDEV,1077)(MATCH(KK),KK=1,79) 1077 FORMAT(' ',79A1) JGEL = JGEL + 1 WRITE(IDEV,*)'Processing',JGEL,' in batch' 1007 FORMAT(' Gel reading name=',A) WRITE(IDEV,1007)NAMARC CALL OPENRS(IDEV4,NAMARC,IOK,LRECL,2) IF(IOK.NE.0)THEN IF(INF.EQ.1) RETURN WRITE(IDEV,*)' Error opening gel reading file' GO TO 1 END IF CALL ARRFIM(IDEV4,SEQ2,IDIM2,KBOUT) CLOSE(UNIT=IDEV4) WRITE(IDEV,1800)IDIM2 1800 FORMAT(' Gel reading length=',I6) IF(IOPT.NE.1) THEN CALL DBAUTP(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ2,NAMARC,JOINT,ITYPE,ISENSE,SEQC2,ITOTPC, + IDIM2,IDOUT,LLINO,LINCON,IFAIL,IDBSIZ,MAXDB,IDEV, + IDEV1,IDEV2,IDEV3,IDEVT,IDEVC,IDEV4,MAXGEL,IMATC,IEMPTY,IOPT) IF(IFAIL(1).NE.0) GO TO 800 JNGEL = JNGEL + 1 GO TO 1 END IF IF(IDIM2.LT.MINMAT)THEN WRITE(IDEV,*) +' Reading shorter than minimum match, so not processed' GO TO 1 END IF CALL SQCOPY(SEQ2,SEQ3,IDIM2) IFCOMP=0 IMATC=0 IF(IEMPTY.EQ.0) +CALL AUTOCN(SEQ1,IDIM1,SEQ2,IDIM2,ILEFTS,ILC,IPOSC, +IPOSG,ISENSE,LLINO,IMATC,IFCOMP,MINMAT,POSNS,WORDP,WORDN, +CONST,LENGTH,LPOWRC,IDEV,MATCH,MAXGEL,MAXGLM,SEQ5,GELN, +SAV1,SAV2,SAV3,MAXSAV,CENDS,NENDS,MAXCON, +SEQG2,SEQC2,SEQ4,IDOUT,IDIM22,ITOTPG,ITOTPC,JOINT,IFAIL, +ITYPE,MAXPC,MAXPG,PERMAX,MINSLI,SEQG3,SEQC3,KFAIL) IF(IOKENT.NE.0) GO TO 1 C THIS RETURNS THE FOLLOWING: C ILEFTS POSITION IN CONSENSUS OF LEFT END OF MATCHING CONTIGS C ILC LENGTHS OF MATCHING CONTIGS C IPOSC POSITION OF MATCH RELATIVE TO CONTIG C IPOSG POSITION OF MATCH RELATIVE TO NEW GEL C ISENSE SENSE OF NEW GEL C LLINO LEFT GEL NUMBER IN MATCHING CONTIGS C IMATC THE NUMBER OF MATCHING CONTIGS (>2 IS ERROR!) C IFCOMP ERROR FLAG FOR COMPARISON (COMPARISON ARRAYS OVERFLOWED) IF(IFCOMP.NE.0)GO TO 800 CALL SQCOPY(SEQ3,SEQ2,IDIM2) IF(IMATC.GT.0)GO TO 200 C C NO OVERLAP NEW CONTIG C C ITYPE 0 = NO OVERLAP C ISENSE 1 = SAME SENSE AS ARCHIVE IF(IFAIL(1).NE.0) GO TO 800 ITYPE(1)=0 ISENSE(1)=1 IDOUT(1)=MAXGEL WRITE(IDEV,1015) 1015 FORMAT(' New gel reading does not overlap: start a new contig') CALL AENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQ2,NAMARC,X,ITYPE,ISENSE,SEQC2(1,1),ITOTPC(1), +IDIM2,IDOUT(1),LLINO,LINCON,IFAIL,IDBSIZ,IDEV, +IDEV1,IDEV2,IDEV3,IDEVT,IDEVC,IDEV4,MAXGEL) IF(IFAIL(1).NE.0)GO TO 800 IEMPTY=0 IDIM1=IDIM1+1 IF((IDIM1+19+IDIM2).GT.MAXSEQ)THEN WRITE(IDEV,1021)MAXSEQ 1021 FORMAT(' Database maximum consensus length (',I6,') exceeded') GO TO 900 END IF CALL ADDTIT(SEQ1(IDIM1),NAMPRO,NGELS,IDIM1) CALL MSTLKL(SEQ2,IDIM2) CALL SQCOPY(SEQ2,SEQ1(IDIM1),IDIM2) IDIM1=IDIM1+IDIM2-1 JNGEL = JNGEL + 1 GO TO 1 C C C 200 CONTINUE C C C OVERLAP SO TRY TO ALIGN THE SEQUENCES C C DO 100 I=1,IMATC N=IDBSIZ-NCONTS DO 99 J=N,IDBSIZ-1 IF(LNBR(J).NE.LLINO(I))GO TO 99 LINCON(I)=J GO TO 100 99 CONTINUE WRITE(IDEV,10077)LLINO(I) 10077 FORMAT(' Contig line for contig',I6,' not found!') GO TO 800 100 CONTINUE C IF((IMATC.EQ.2).AND.(ANSJOK.EQ.0))GO TO 400 C C C SINGLE OVERLAP C C C WRITE(IDEV,1014)LLINO(1) 1014 FORMAT(' New gel reading overlaps contig',I6) IF(ITOTPG(1).GT.0) CALL CCTA(SEQG2(1,1),IDIM22(1)) CALL AENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQG2(1,1),NAMARC,JOINT(1),ITYPE(1),ISENSE(1), +SEQC2(1,1), +ITOTPC(1),IDIM22(1),IDOUT(1),LLINO(1),LINCON(1), +IFAIL(1),IDBSIZ,IDEV,IDEV1,IDEV2,IDEV3,IDEVT,IDEVC,IDEV4,MAXGEL) IF(IFAIL(1).NE.0)GO TO 800 CALL DELCON(SEQ1,ILEFTS(1),ILC(1),IDIM1) LREG=1 RREG=RELPG(LINCON(1)) IGELC=LNBR(LINCON(1)) ANS=1 CALL ACONS(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,MAXSEQ,SEQ2,IDBSIZ,IDIM1,ANS,IGELC,LREG,RREG,TEMP3, +ECHRSZ,MAXGL2,IDEV,IDEV2,IFAIL(1),MAXGEL,IDM,PERCD) IF(IFAIL(1).NE.0)GO TO 900 JNGEL = JNGEL + 1 IF(KFAIL.NE.0) GO TO 800 GO TO 1 C C C DOUBLE OVERLAP C C 400 CONTINUE WRITE(IDEV,1013)LLINO 1013 FORMAT(' Overlap between contigs',I6,' and',I6) IF(ANSJOK.NE.0)GO TO 800 IF(LLINO(1).EQ.LLINO(2))THEN WRITE(IDEV,*)' Trying to form loop in contig',LLINO(1) WRITE(IDEV,*)' Gel not entered' GO TO 800 END IF CALL AJOIN3(RELPG,IDBSIZ,LINCON,ITYPE,ISENSE,JOINT, +IDIM22,KLASS,IOVER,IDEV,PL,PR) IF(IOVER.GT.MAXOVR)THEN WRITE(IDEV,*)' Overlap too large: entry only' IFAIL(2)=1 GO TO 600 END IF C WHICH CONTIG IS LEFTMOST? LMOST=1 RMOST=2 IF(PL(1).GT.PL(2))THEN LMOST=2 RMOST=1 END IF C SAVE LENGTH OF RMOST CONTIG FOR DELETION STEP LATER ILCR=ILC(RMOST) IF(ITOTPG(LMOST).GT.0) CALL CCTA(SEQG2(1,LMOST),IDIM22(LMOST)) WRITE(IDEV,1012)LLINO(LMOST) 1012 FORMAT(' Entering the new gel reading into contig',I6) CALL AENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQG2(1,LMOST),NAMARC,JOINT(LMOST),ITYPE(LMOST), +ISENSE(LMOST),SEQC2(1,LMOST),ITOTPC(LMOST), +IDIM22(LMOST),IDOUT(LMOST),LLINO(LMOST),LINCON(LMOST), +IFAIL(LMOST),IDBSIZ,IDEV,IDEV1,IDEV2,IDEV3,IDEVT,IDEVC,IDEV4, +MAXGEL) IF(IFAIL(LMOST).NE.0)GO TO 800 JNGEL = JNGEL + 1 IF(ITYPE(LMOST).EQ.1)LLINO(LMOST)=NGELS IF(ILEFTS(LMOST).LT.ILEFTS(RMOST))THEN ILEFTS(RMOST)=ILEFTS(RMOST)-ILC(LMOST)-20 END IF CALL DELCON(SEQ1,ILEFTS(LMOST),ILC(LMOST),IDIM1) LREG=1 RREG=RELPG(LINCON(LMOST)) IGELC=LNBR(LINCON(LMOST)) ANS=1 CALL ACONS(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,MAXSEQ,SEQ2,IDBSIZ,IDIM1,ANS,IGELC,LREG,RREG,TEMP3, +ECHRSZ,MAXGL2,IDEV,IDEV2,IFAIL(1),MAXGEL,IDM,PERCD) IF(IFAIL(1).NE.0)GO TO 900 ILC(LMOST)=RREG ILEFTS(LMOST)=IDIM1-RREG+1 DO 500 I=1,2 IF(ISENSE(I).EQ.-1)THEN CALL CMPLMT(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,LINCON(I), + LLINO(I),SEQ2,IDBSIZ,IDEV,IDEV1,IDEV2,MAXGEL) CALL SQREV(SEQ1(ILEFTS(I)),ILC(I)) CALL SQCOM(SEQ1(ILEFTS(I)),ILC(I)) KT=IDIM1 CALL ADDTIT(SEQ1((ILEFTS(I)-20)),NAMPRO,LNBR(LINCON(I)),KT) END IF 500 CONTINUE C NEED TO KNOW POSITION OF OVERLAP RELATIVE TO CONTIG, TO CONSENSUS C WHICH BITS TO SEND TO ALIGNMENT ROUTINES C SET UP FOR ALINE (NOTE RMOST IS EQUIVALENT TO THE GEL READING AND C SO IS SLID ALONG THE LMOST CONTIG. THE SECTION SENT TO ALINE MUST C BE OF LENGTH < MAXGEL-2*MAX(MAXPC,MAXPG) C IT MUST START AT POSITION 1 IN THE RMOST CONTIG AND EXTEND IPOSC(LMOST)=PL(RMOST)+RELPG(NGELS)-1 ILCT = RELPG(LINCON(LMOST)) - RELPG(NGELS) - PL(RMOST) + 2 ILC(RMOST)=MIN(ILCT,ILC(RMOST)) IPOSC(RMOST)=1 IDOUT(LMOST)=MAXGEL IDOUT(RMOST)=MAXGEL IDSAV=MAXSAV C ON INPUT TO ALINE ILC(RMOST) CONTAINS THE OVERLAP LENGTH C ON OUTPUT IT CONTAINS THE LENGTH OF THE ALIGNED SECTION (IE INCLUDING C PADS) WRITE(IDEV,1009) 1009 FORMAT(' Trying to align the two contigs') CALL ALINE(SEQ1(ILEFTS(LMOST)),SEQ1(ILEFTS(RMOST)), +SEQC2(1,RMOST),SEQC2(1,LMOST),SAV1,SAV2,SAV3,IDSAV, +ILC(LMOST),ILC(RMOST),IDOUT(LMOST),IPOSC(LMOST),IPOSC(RMOST), +MINSLI,JOINT(LMOST),ITOTPC(LMOST),ITOTPC(RMOST),IFAIL(1), +ITYPE(1),MAXPC,MAXPC,PERMAX,IDEV,SEQ4,MAXGEL,Z) C SEQC2(1,LMOST) NOW CONTAINS THE ALIGNED SECTION OF THE LMOST CONTIG C SEQC2(1,RMOST) NOW CONTAINS THE ALIGNED SECTION OF THE RMOST CONTIG C ILC(RMOST) IS NOW THE LENGTH OF ALIGNED SECTION OF THE RMOST CONTIG C IDOUT(LMOST) IS NOW THE LENGTH OF ALIGNED SECTION OF THE LMOST CONTIG C JOINT(LMOST) IS THE POSITION OF THE JOIN RLETIVE TO THE LMOST CONTIG C ITYPE IS TYPE OF OVERLAP (-1 = RIGHT END OR INTERNAL, 1 = LEFT END) C NB SHOULD ALWAYS BE -1 C IF THIS HAS BEEN DONE OK WE CAN EDIT THE TWO CONTIGS THEN JOIN IF(IFAIL(1).NE.0)THEN WRITE(IDEV,*)' Failed to align the two overlapping contigs' GO TO 800 END IF IF(ITOTPC(LMOST).GT.0)THEN WRITE(IDEV,1017)LLINO(LMOST) 1017 FORMAT(' Editing contig',I6) CALL ABEDIN(RELPG,LNGTHG,LNBR,RNBR, + NGELS,NCONTS,SEQ3,LINCON(LMOST),JOINT(LMOST),SEQC2(1,LMOST), + ITOTPC(LMOST),IDOUT(LMOST),IDBSIZ,IDEV,IDEV1,IDEV2, + MAXGEL) END IF JOINT(RMOST)=1 IDOUT(RMOST)=ILC(RMOST) IF(ITOTPC(RMOST).GT.0)THEN WRITE(IDEV,1017)LLINO(RMOST) CALL ABEDIN(RELPG,LNGTHG,LNBR,RNBR, + NGELS,NCONTS,SEQ3,LINCON(RMOST),JOINT(RMOST),SEQC2(1,RMOST), + ITOTPC(RMOST),IDOUT(RMOST),IDBSIZ,IDEV,IDEV1,IDEV2, + MAXGEL) END IF ILC(RMOST)=ILCR LTL=LNBR(LINCON(LMOST)) LTR=LNBR(LINCON(RMOST)) WRITE(IDEV,1018)LNBR(LINCON(LMOST)),LNBR(LINCON(RMOST)) 1018 FORMAT(' Completing the join between contigs',I6,' and',I6) CALL AJOIN2(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +JOINT(LMOST),LTL,LTR,LINCON(LMOST),LINCON(RMOST),IDEV1) LLINO(1)=LTL IF(ILEFTS(LMOST).GT.ILEFTS(RMOST))THEN CALL DELCON(SEQ1,ILEFTS(LMOST),ILC(LMOST),IDIM1) CALL DELCON(SEQ1,ILEFTS(RMOST),ILC(RMOST),IDIM1) END IF IF(ILEFTS(RMOST).GE.ILEFTS(LMOST))THEN CALL DELCON(SEQ1,ILEFTS(RMOST),ILC(RMOST),IDIM1) CALL DELCON(SEQ1,ILEFTS(LMOST),ILC(LMOST),IDIM1) END IF LREG=1 RREG=JOINT(LMOST) IGELC=LLINO(1) ANS=1 CALL ACONS(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,MAXSEQ,SEQ2,IDBSIZ,IDIM1,ANS,IGELC,LREG,RREG,TEMP3, +ECHRSZ,MAXGL2,IDEV,IDEV2,IFAIL(1),MAXGEL,IDM,PERCD) IF(IFAIL(1).NE.0)GO TO 900 C CALL FMTDB(SEQ1,IDIM1,1,IDIM1,60,IDEV) JNJOIN = JNJOIN + 1 IF(KFAIL.NE.0) GO TO 800 GO TO 1 C C C JOINS THAT FAIL INITIAL ALIGNMENT CRITERIA C C 600 CONTINUE C COME HERE FOR JOINS THAT FAIL: WE MIGHT BE ABLE TO ENTER THE GEL C INTO A SINGLE CONTIG IGOOD=0 IF(IFAIL(1).EQ.0)IGOOD=1 IF(IFAIL(2).EQ.0)IGOOD=2 IF(IGOOD.EQ.0)GO TO 800 IF(ITOTPG(IGOOD).GT.0) CALL CCTA(SEQG2(1,IGOOD),IDIM22(IGOOD)) WRITE(IDEV,1012)LLINO(IGOOD) CALL AENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQG2(1,IGOOD),NAMARC,JOINT(IGOOD),ITYPE(IGOOD), +ISENSE(IGOOD),SEQC2(1,IGOOD),ITOTPC(IGOOD), +IDIM22(IGOOD),IDOUT(IGOOD),LLINO(IGOOD),LINCON(IGOOD), +IFAIL(IGOOD),IDBSIZ,IDEV,IDEV1,IDEV2,IDEV3,IDEVT,IDEVC,IDEV4, +MAXGEL) IF(IFAIL(IGOOD).NE.0)GO TO 800 JNGEL = JNGEL + 1 CALL DELCON(SEQ1,ILEFTS(IGOOD),ILC(IGOOD),IDIM1) ANS=1 IGELC=LNBR(LINCON(IGOOD)) LREG=1 RREG=RELPG(LINCON(IGOOD)) CALL ACONS(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,MAXSEQ,SEQ2,IDBSIZ,IDIM1,ANS,IGELC,LREG,RREG,TEMP3, +ECHRSZ,MAXGL2,IDEV,IDEV2,IFAIL(1),MAXGEL,IDM,PERCD) IF(IFAIL(1).NE.0)GO TO 900 WRITE(IDEV,1020)LLINO,LLINO(IGOOD) 1020 FORMAT(' Could not join contigs',I4,' and',I4,' but the gel', +' has been entered into contig',I4,/, +' If required do the join manually.') C C C ALL FAILURES C C C 800 CONTINUE WRITE(IDEV,10888) 10888 FORMAT(' Failed gel name written to error file') WRITE(IDEV8,1002)NAMARC GO TO 1 900 CONTINUE WRITE(KBOUT,*)'Batch finished' WRITE(KBOUT,*)JGEL,' sequences processed' WRITE(KBOUT,*)JNGEL,' sequences entered into database' WRITE(KBOUT,*)JNJOIN,' joins made' RETURN END SUBROUTINE DBAUTP(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQ2,NAMARC,JOINT,ITYPE,ISENSE,SEQC2,ITOTPC, +IDIM2,IDOUT,LLINO,LINCON,IFAIL,IDBSIZ,MAXDB,IDEV, +IDEV1,IDEV2,IDEV3,IDEVT,IDEVC,IDEV4,MAXGEL,IMATC,IEMPTY,IOPT) INTEGER RELPG(MAXDB) INTEGER LNGTHG(MAXDB),LNBR(MAXDB),RNBR(MAXDB) CHARACTER SEQ2(MAXGEL),SEQC2(MAXGEL) CHARACTER NAMARC*(*) C deals with entering all readings into contig 1 (IOPT=2) C or all readings into new contigs (IOPT=3) IF(IOPT.EQ.2) THEN IF(IMATC.EQ.0) THEN ITYPE=0 ISENSE=1 IDOUT=MAXGEL CALL AENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ2,NAMARC,JOINT,ITYPE,ISENSE,SEQC2,ITOTPC, + IDIM2,IDOUT,LLINO,LINCON,IFAIL,IDBSIZ,IDEV, + IDEV1,IDEV2,IDEV3,IDEVT,IDEVC,IDEV4,MAXGEL) IF(IFAIL.NE.0) RETURN IEMPTY=0 IMATC = 1 ELSE ITYPE= - 1 ISENSE=1 JOINT = 1 LLINO = 1 LINCON = IDBSIZ - 1 ITOTPC = 0 IDOUT=MAXGEL CALL AENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ2,NAMARC,JOINT,ITYPE,ISENSE,SEQC2,ITOTPC, + IDIM2,IDOUT,LLINO,LINCON,IFAIL,IDBSIZ,IDEV, + IDEV1,IDEV2,IDEV3,IDEVT,IDEVT,IDEV4,MAXGEL) IF(IFAIL.NE.0) RETURN END IF ELSE IF(IOPT.EQ.3) THEN ITYPE=0 ISENSE=1 IDOUT=MAXGEL CALL AENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ2,NAMARC,JOINT,ITYPE,ISENSE,SEQC2,ITOTPC, + IDIM2,IDOUT,LLINO,LINCON,IFAIL,IDBSIZ,IDEV, + IDEV1,IDEV2,IDEV3,IDEVT,IDEVC,IDEV4,MAXGEL) IF(IFAIL.NE.0) RETURN END IF END SUBROUTINE DBCOPY(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,GEL, +NAMPRO,IDEV,IDBSIZ,IERR,KBIN,KBOUT,IDEVR,IDEVW,IDEVN, +IDEVT,IDEVC, +IHELPS,IHELPE,FILEH,IDEVH,MAXGEL,MAXDB,IDM) CHARACTER FILEH*(*) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER NAMPRO*(*),NAMARC*10,GEL(MAXGEL) CHARACTER V2,V1 INTEGER IWORD,DELDB PARAMETER (IWORD=4) PARAMETER (MAXPRM = 21) CHARACTER PERR(2)*(MAXPRM) EXTERNAL DELDB IERR=1 1 CONTINUE L = 1 V1='1' CALL GTSTR('Make version',V1,V2,L,KBOUT,KBIN,INFLAG) CALL CCASE(V2,1) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 1 END IF IF(L.LT.1) V2 = V1 MN = NGELS + NCONTS + 1 MX = MAXDB NEWSIZ = IDBSIZ CALL GETINT(MN,MX,NEWSIZ, +'New database size', +IVAL,KBIN,KBOUT,IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN CALL BUSY(KBOUT) NEWSIZ = IVAL C WHERE SHOULD CHARS BE CHANGED ? LLL = INDEX(NAMPRO,'.') + 1 NAMPRO(LLL:)='RL'//V2 CALL OPENRS(IDEV,NAMPRO,IOK,4,3) IF(IOK.NE.0) THEN C problem opening file IF(IOK.EQ.2) THEN CALL ERROM(KBOUT,'File already exists') PERR(1) = 'Retype version number' PERR(2) = 'Replace database' IDO = 1 CALL RADION('Select action',PERR,2,IDO, + IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF(IDO.LT.1) RETURN IF(IDO.EQ.1) GO TO 1 IF(IDO.EQ.2) THEN IOK = DELDB(NAMPRO,V2,IDEV,MAXGEL) IF(IOK.EQ.0) THEN LLL = INDEX(NAMPRO,'.') + 1 NAMPRO(LLL:)='RL'//V2 CALL OPENRS(IDEV,NAMPRO,IOK,4,3) IF(IOK.EQ.0) GO TO 2 ELSE CALL ERROM(KBOUT,'File delete failed') END IF END IF END IF GO TO 100 END IF 2 CONTINUE CALL WRITER(IDEV,NEWSIZ,NGELS,NCONTS,NGELS,NCONTS) DO 10 I=1,NGELS CALL WRITER(IDEV,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)) 10 CONTINUE M=NEWSIZ-NCONTS N=IDBSIZ-NCONTS DO 15 I=N,IDBSIZ-1 CALL WRITER(IDEV,M,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)) M=M+1 15 CONTINUE CLOSE(UNIT=IDEV) C DO SEQUENCES NAMPRO(LLL:)='SQ'//V2 IREC=MAXGEL/IWORD IF(MOD(MAXGEL,IWORD).NE.0)IREC=IREC+1 CALL OPENRS(IDEV,NAMPRO,IOK,IREC,3) IF(IOK.NE.0)GO TO 100 C C MAKE SURE FILE IS NEWSIZ LINES LONG! CALL WRITEW(IDEV,NEWSIZ,GEL,MAXGEL) DO 20 I=1,NGELS CALL READW(IDEVW,I,GEL,MAXGEL) CALL WRITEW(IDEV,I,GEL,MAXGEL) 20 CONTINUE CLOSE(UNIT=IDEV) C DO ARCHIVE NAMES NAMPRO(LLL:)='AR'//V2 CALL OPENRS(IDEV,NAMPRO,IOK,3,3) IF(IOK.NE.0)GO TO 100 DO 30 I=1,NGELS CALL READN(IDEVN,I,NAMARC) CALL WRITEN(IDEV,I,NAMARC) 30 CONTINUE C MAKE SURE FILE IS 1000 LINES LONG C ******** PROBLEMS HERE WRITING INTEGER TO NAMES FILE WRITE(IDEV,REC=MAXDB)NEWSIZ,MAXGEL,IDM CLOSE(UNIT=IDEV) C IF(IDEVRD.GT.0) THEN C NAMPRO(LLL:) = 'RD'//V2 C CALL OPENRS(IDEV,NAMPRO,IOK,7,3) C IF(IOK.NE.0) GO TO 100 C CALL COPYRD(IDEVRD,IDEV,NGELS,NEWSIZ,IOK) C IF(IOK.NE.0) GO TO 100 C END IF C DO TAGS + COMMENTS IF(IDEVT.GT.0.AND.IDEVC.GT.0) THEN NAMPRO(LLL:)='TG'//V2 CALL OPENRS(IDEV,NAMPRO,IOK,5,3) IF(IOK.NE.0)GO TO 100 CALL COPYTG(IDEVT,IDEV,IOK,IDBSIZ,NEWSIZ,NGELS) NAMPRO(LLL:)='CC'//V2 C COMMENT_LENGTH: 11 = (40 + long)/long CALL OPENRS(IDEV,NAMPRO,IOK,11,3) IF(IOK.NE.0)GO TO 100 CALL COPYCC(IDEVC,IDEV,IOK) ENDIF IERR=0 RETURN 100 CONTINUE WRITE(KBOUT,9999) 9999 FORMAT(' Error opening new database, copy aborted') RETURN END INTEGER FUNCTION DELDB(NAMPRO,VERSN,IDEV,MAXGEL) CHARACTER NAMPRO*(*),VERSN INTEGER DELF EXTERNAL DELF C C delete an xdap database C C assume relationships are 4 words, names are 3 and seqs are maxgel C all recls in BYTES C DELDB = 1 LLL = INDEX(NAMPRO,'.') + 1 NAMPRO(LLL:)='RL'//VERSN IF(DELF(NAMPRO,IDEV,16,4).NE.0) RETURN NAMPRO(LLL:)='AR'//VERSN IF(DELF(NAMPRO,IDEV,12,4).NE.0) RETURN NAMPRO(LLL:)='SQ'//VERSN IF(DELF(NAMPRO,IDEV,MAXGEL,4).NE.0) RETURN NAMPRO(LLL:)='CC'//VERSN C COMMENT_LENGTH: 11 = (40 + long)/long IF(DELF(NAMPRO,IDEV,44,4).NE.0) RETURN NAMPRO(LLL:)='TG'//VERSN IF(DELF(NAMPRO,IDEV,20,4).NE.0) RETURN DELDB = 0 END SUBROUTINE DBFIX(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +GEL,GEL2,IDBSIZ,KBIN,KBOUT,IDEVR,IDEVW,IDEVN, +IHELPS,IHELPE,IHELP1,IHELP2,FILEH,IDEVH,MAXGEL,IDEVT,IDEVC) CHARACTER FILEH*(*) C AUTHOR: RODGER STADEN C 12-12-90 Added function to change raw data parameter file C and changed menu routines accordingly INTEGER RELPG(IDBSIZ),X INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL) CHARACTER NAME*10,NEWNAM*10 INTEGER L,M,N CHARACTER GEL2(MAXGEL) PARAMETER (MAXPRM = 32) CHARACTER PROMPT(9)*(MAXPRM) INTEGER GCLIN,CHAINL EXTERNAL GCLIN,CHAINL WRITE(KBOUT,1000) 1000 FORMAT( +' Warning:', +' make a copy first, and check logical consistency after use') 10 CONTINUE C C SELECT OPTION C CALL BELL(1,KBOUT) C DBMENU now defunct for bap - so we use RADION instead C CALL DBMENU(4,NOPT,IHELPS,IHELPE,FILEH,IDEVH, C +KBIN,KBOUT) PROMPT(1) = 'Line change' PROMPT(2) = 'Edit gel reading' PROMPT(3) = 'Delete contig line' PROMPT(4) = 'Shift' PROMPT(5) = 'Move gel reading' PROMPT(6) = 'Rename gel reading' PROMPT(7) = 'Break a contig' PROMPT(8) = 'Remove a gel reading' PROMPT(9) = 'Alter raw data parameters' NOPT = 1 CALL RADION('Alter relationships', PROMPT, 9, NOPT, IHELPS, + IHELPE, FILEH, IDEVH, KBIN, KBOUT) IF(NOPT.LT.1)RETURN IF(NOPT.EQ.1)THEN C LINE CHANGE MN = 0 MX = IDBSIZ LNO = 0 CALL GETINT(MN,MX,LNO, + 'Number of line to change', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 IF(IVAL.EQ.0) GO TO 10 LNO = IVAL IF(LNO.EQ.IDBSIZ)GO TO 19 WRITE(KBOUT,*)'Current line' WRITE(KBOUT,1001)RELPG(LNO),LNGTHG(LNO),LNBR(LNO),RNBR(LNO) 1001 FORMAT(' ',4I6) MN = 0 MX = 99999 X = RELPG(LNO) CALL GETINT(MN,MX,X, + 'Relative position', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 X = IVAL MN = -MAXGEL MX = 99999 L = LNGTHG(LNO) CALL GETINT(MN,MX,L, + 'Length', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 L = IVAL MN = 0 MX = IDBSIZ M = LNBR(LNO) CALL GETINT(MN,MX,M, + 'Left neighbour', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 M = IVAL MN = 0 MX = IDBSIZ N = RNBR(LNO) CALL GETINT(MN,MX,N, + 'Right neighbour', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN N = IVAL CALL WRITER(IDEVR,LNO,X,L,M,N) RELPG(LNO)=X LNGTHG(LNO)=L LNBR(LNO)=M RNBR(LNO)=N GO TO 10 19 CONTINUE C NCONTS NGELS LINES MN = 0 MX = IDBSIZ LL = NGELS CALL GETINT(MN,MX,LL, + 'Number of gel readings', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 LL = IVAL MN = 0 MX = IDBSIZ MM = NCONTS CALL GETINT(MN,MX,MM, + 'Number of contigs', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 MM = IVAL CALL WRITER(IDEVR,IDBSIZ,LL,MM,LL,MM) NGELS=LL NCONTS=MM GO TO 10 END IF C C IF(NOPT.EQ.4)THEN MN = 0 MX = NGELS LNO = 0 CALL GETINT(MN,MX,LNO, + 'Number of first gel reading to shift', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 IF(IVAL.LT.1) GO TO 10 LNO = IVAL I = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,LNO) IF(I.EQ.0)THEN WRITE(KBOUT,*) + 'Problem with this gel reading. Check logical consistency' WRITE(KBOUT,*)'of database. Shift not done' GO TO 10 END IF NCONTO = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,I) IF(NCONTO.EQ.0)THEN WRITE(KBOUT,*) + 'No contig line for this contig. Check logical' WRITE(KBOUT,*)'consistency of database. Shift not done' GO TO 10 END IF MN = 1 - RELPG(LNO) MX = RELPG(NCONTO) - RELPG(LNO) X = MN CALL GETINT(MN,MX,X, + 'Distance to shift', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN X = IVAL CALL SHIFTC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDEVR, + IDBSIZ,LNO,NCONTO,X) WRITE(KBOUT,*)'Shift complete' GO TO 10 END IF C IF(NOPT.EQ.2)THEN MN = 0 MX = NGELS LNO = 0 CALL GETINT(MN,MX,LNO, + 'Number of gel reading to edit', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 IF(IVAL.LT.1) GO TO 10 LNO = IVAL IDIM=ABS(LNGTHG(LNO)) C ALLOW EDITING TO BE RELATIVE TO CONTIG POSITION ISTART=RELPG(LNO) CALL READW(IDEVW,LNO,GEL,MAXGEL) C @jkb@ C CALL EDITGL(GEL,GEL2,IDIM,KBIN,KBOUT,MAXGEL, C + IHELP1,IHELP2,FILEH,IDEVH,ISTART) CALL WRITEW(IDEVW,LNO,GEL,MAXGEL) LNGTHG(LNO)=SIGN(IDIM,LNGTHG(LNO)) CALL WRITER(IDEVR,LNO,RELPG(LNO),LNGTHG(LNO), + LNBR(LNO),RNBR(LNO)) GO TO 10 END IF IF(NOPT.EQ.3)THEN MN=IDBSIZ-NCONTS MX = IDBSIZ - 1 LNO = 0 CALL GETINT(MN,MX,LNO, + 'Number of contig line to delete', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 LNO = IVAL CALL REMCNL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, + LNO,IDEVR) GO TO 10 END IF IF(NOPT.EQ.6)THEN MN = 0 MX = NGELS LNO = 0 CALL GETINT(MN,MX,LNO, + 'Number of gel reading to rename', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 IF(IVAL.LT.1) GO TO 10 LNO = IVAL CALL READN(IDEVN,LNO,NAME) L = 10 CALL GTSTR('name for gel reading', + NAME,NEWNAM,L,KBOUT,KBIN,INFLAG) IF(L.GT.0)CALL WRITEN(IDEVN,LNO,NEWNAM) GO TO 10 END IF IF(NOPT.EQ.5)THEN MN = 0 MX = NGELS IFROM = 0 CALL GETINT(MN,MX,IFROM, + 'Number of gel reading to move', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 IF(IVAL.LT.1) GO TO 10 IFROM = IVAL MN = 0 MX = NGELS ITO = 0 CALL GETINT(MN,MX,ITO, + 'New number for gel reading', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 IF(IVAL.LT.1) GO TO 10 ITO = IVAL CALL MOVGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, + GEL,IFROM,ITO,IDEVR,IDEVW,IDEVN,MAXGEL,KBOUT) GO TO 10 END IF IF(NOPT.EQ.7)THEN CALL BREAKC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,KBIN,KBOUT,IDEVR,IDEVW,IDEVN, + IHELPS,IHELPE,IHELP1,IHELP2,FILEH,IDEVH,IOK) GO TO 10 END IF IF(NOPT.EQ.8) THEN CALL REMGD(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, + KBIN,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN, + IHELPS,IHELPE,FILEH,IDEVH,IOK) GO TO 10 END IF IF(NOPT.EQ.9) THEN CALL FIXRD(IDEVT,IDEVC,IDBSIZ,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH) GO TO 10 END IF GO TO 10 END SUBROUTINE REMGD(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +KBIN,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN, +IHELPS,IHELPE,HELPF,IDEVH,IOK) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER HELPF*(*),GEL(MAXGEL) INTEGER REMME,GCLIN,CHAINL EXTERNAL GCLIN,CHAINL C assumes db is logical consistent WRITE(KBOUT,*)'Remove reading from database' REMME = NGELS CALL GETINT(1,NGELS,REMME, +'Number of reading to remove', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN REMME = IVAL I = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,REMME) ICONT = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,I) IF(ICONT.EQ.0) THEN WRITE(KBOUT,*)'No contig line for this reading' IOK = 1 RETURN END IF CALL REMGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +REMME,ICONT,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN) END SUBROUTINE REMGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +REMME,ICONT,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN) C Routine to remove a reading from a database C Cases: 1 left end C 2 right end C 3 internal and dispensible C 4 internal and indispensible C if 1 change contig lnbr, contig length, lnbr of rnbr of remme, relpgs C if 2 change contig rnbr, contig length, rnbr of lnbr of remme C if 3 change contig length, lnbr of rnbr of remme rnbr of lnbr of remme C if 4 need to break contig, then as for 1 C if 1 and 2 then also remove contig line C for all cases move gel ngels to remme (if remme/=ngels) C and update line idbsiz C INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER REMME,CLEN LOGICAL LEFTE,RIGHTE,DISPEN CHARACTER GEL(MAXGEL) EXTERNAL CLEN LEFTE = .FALSE. RIGHTE = .FALSE. DISPEN = .FALSE. C C Left end ? C IF(LNBR(REMME).EQ.0) LEFTE = .TRUE. C C Right end ? C IF(RNBR(REMME).EQ.0) RIGHTE = .TRUE. C C If both true remove the contig line, then overwrite the gel C IF(LEFTE.AND.RIGHTE) THEN WRITE(KBOUT,*)'Removing reading and contig' IFROM = NGELS NGELS = NGELS - 1 CALL REMCNL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, + ICONT,IDEVR) IF(REMME.NE.IFROM) THEN WRITE(KBOUT,*)'Renumbering reading',IFROM,' to',REMME CALL MOVGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS, + NCONTS,IDBSIZ,GEL,IFROM,REMME,IDEVR,IDEVW,IDEVN,MAXGEL,KBOUT) END IF ELSE IF(LEFTE) THEN WRITE(KBOUT,*)'Removing reading from left end of contig' LNBR(ICONT) = RNBR(REMME) I = 1 - RELPG(RNBR(REMME)) WRITE(KBOUT,*)'Shifting readings in contig by distance=',I CALL SHIFTC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDEVR, + IDBSIZ,RNBR(REMME),ICONT,I) I = LNBR(ICONT) LNBR(I) = 0 CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I), + LNBR(I),RNBR(I)) IFROM = NGELS IF(REMME.NE.IFROM) THEN WRITE(KBOUT,*)'Renumbering reading',IFROM,' to',REMME CALL MOVGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS, + NCONTS,IDBSIZ,GEL,IFROM,REMME,IDEVR,IDEVW,IDEVN,MAXGEL,KBOUT) END IF NGELS = NGELS - 1 CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) ELSE IF(RIGHTE) THEN WRITE(KBOUT,*)'Removing reading from right end of contig' RNBR(ICONT) = LNBR(REMME) I = RNBR(ICONT) RNBR(I) = 0 CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I), + LNBR(I),RNBR(I)) RELPG(ICONT) = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,LNBR(ICONT)) CALL WRITER(IDEVR,ICONT,RELPG(ICONT),LNGTHG(ICONT), + LNBR(ICONT),RNBR(ICONT)) IFROM = NGELS IF(REMME.NE.IFROM) THEN WRITE(KBOUT,*)'Renumbering reading',IFROM,' to',REMME CALL MOVGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS, + NCONTS,IDBSIZ,GEL,IFROM,REMME,IDEVR,IDEVW,IDEVN,MAXGEL,KBOUT) END IF NGELS = NGELS - 1 CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) ELSE C Is remme indispensible ? NSTART = RELPG(RNBR(REMME)) I = REMME 10 CONTINUE I = LNBR(I) IF(I.NE.0) THEN IF((RELPG(I)+ABS(LNGTHG(I))-1).LT.NSTART) GO TO 10 DISPEN = .TRUE. END IF IF(DISPEN) THEN WRITE(KBOUT,*) + 'Removing dispensible reading from middle of contig' I = LNBR(REMME) RNBR(I) = RNBR(REMME) CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I), + LNBR(I),RNBR(I)) I = RNBR(REMME) LNBR(I) = LNBR(REMME) CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I), + LNBR(I),RNBR(I)) IFROM = NGELS IF(REMME.NE.IFROM) THEN WRITE(KBOUT,*)'Renumbering reading',IFROM,' to',REMME CALL MOVGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS, + NCONTS,IDBSIZ,GEL,IFROM,REMME,IDEVR,IDEVW,IDEVN, + MAXGEL,KBOUT) END IF NGELS = NGELS - 1 RELPG(ICONT) = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,LNBR(ICONT)) CALL WRITER(IDEVR,ICONT,RELPG(ICONT),LNGTHG(ICONT), + LNBR(ICONT),RNBR(ICONT)) CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) ELSE WRITE(KBOUT,*) + 'Removing indispensible reading from middle of contig' WRITE(KBOUT,*)'So breaking contig first' IR = REMME IL = LNBR(REMME) ILO = LNBR(ICONT) NCONTO = ICONT NCONTR = IDBSIZ - NCONTS - 1 CALL CBREAK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, + KBOUT,IDEVR,IDEVW,IDEVN,IR,IL,ILO,NCONTO,NCONTR,IOK) IF(IOK.NE.0) RETURN WRITE(KBOUT,*)'Removing reading from left end of contig' ICONT = IDBSIZ - NCONTS LNBR(ICONT) = RNBR(REMME) I = 1 - RELPG(RNBR(REMME)) WRITE(KBOUT,*)'Shifting readings in contig by distance=',I CALL SHIFTC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDEVR, + IDBSIZ,RNBR(REMME),ICONT,I) I = LNBR(ICONT) LNBR(I) = 0 CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I), + LNBR(I),RNBR(I)) IFROM = NGELS IF(REMME.NE.IFROM) THEN WRITE(KBOUT,*)'Renumbering reading',IFROM,' to',REMME CALL MOVGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS, + NCONTS,IDBSIZ,GEL,IFROM,REMME,IDEVR,IDEVW,IDEVN, + MAXGEL,KBOUT) END IF NGELS = NGELS - 1 CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) END IF END IF END SUBROUTINE REMCNL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +REMME,IDEVR) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER REMME C Routine to remove a contig line from a db C Loop deals with case of remove top contig C Move down all lines from above DO 10 I = REMME,IDBSIZ-NCONTS+1,-1 RELPG(I) = RELPG(I-1) LNGTHG(I) = LNGTHG(I-1) LNBR(I) = LNBR(I-1) RNBR(I) = RNBR(I-1) CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)) 10 CONTINUE NCONTS = NCONTS - 1 CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) END SUBROUTINE MOVGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +GEL,FROM,TO,IDEVR,IDEVW,IDEVN,MAXGEL,KBOUT) C Subroutine to move a gel from line from to line to C Extended 22-5-91 C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),FROM,TO INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER NAMGEL*10,GEL(MAXGEL) INTEGER GCLIN,CHAINL LOGICAL LEFTE,RIGHTE EXTERNAL GCLIN,CHAINL LEFTE = .FALSE. RIGHTE = .FALSE. C C left end ? C IF(LNBR(FROM).EQ.0) LEFTE = .TRUE. C C right end ? C IF(RNBR(FROM).EQ.0) RIGHTE = .TRUE. C C if both true remove the contig line, then overwrite the gel C IF(LEFTE.AND.RIGHTE) THEN NCONTO = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,FROM) IF(NCONTO.EQ.0)THEN WRITE(KBOUT,*) + 'This gel has no left neighbour but does not' WRITE(KBOUT,*)'appear in a contig line!' ELSE LNBR(NCONTO) = TO RNBR(NCONTO) = TO CALL WRITER(IDEVR,NCONTO,RELPG(NCONTO),LNGTHG(NCONTO), + LNBR(NCONTO),RNBR(NCONTO)) END IF ELSE IF(LEFTE) THEN NCONTO = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,FROM) IF(NCONTO.EQ.0)THEN WRITE(KBOUT,*) + 'This gel has no left neighbour but does not' WRITE(KBOUT,*)'appear in a contig line!' ELSE LNBR(NCONTO) = TO CALL WRITER(IDEVR,NCONTO,RELPG(NCONTO),LNGTHG(NCONTO), + LNBR(NCONTO),RNBR(NCONTO)) END IF ELSE IF(RIGHTE) THEN I = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,FROM) NCONTO = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,I) IF(NCONTO.EQ.0)THEN WRITE(KBOUT,*) + 'This gel has no right neighbour and does not' WRITE(KBOUT,*)'appear in a contig!' ELSE IF(RNBR(NCONTO).NE.FROM)THEN WRITE(KBOUT,*) + 'This gel has no right neighbour but does not' WRITE(KBOUT,*)'appear in a contig line!' ELSE RNBR(NCONTO) = TO CALL WRITER(IDEVR,NCONTO,RELPG(NCONTO),LNGTHG(NCONTO), + LNBR(NCONTO),RNBR(NCONTO)) END IF END IF END IF RELPG(TO)=RELPG(FROM) LNGTHG(TO)=LNGTHG(FROM) LNBR(TO)=LNBR(FROM) RNBR(TO)=RNBR(FROM) CALL READW(IDEVW,FROM,GEL,MAXGEL) CALL WRITEW(IDEVW,TO,GEL,MAXGEL) CALL READN(IDEVN,FROM,NAMGEL) CALL WRITEN(IDEVN,TO,NAMGEL) CALL WRITER(IDEVR,TO,RELPG(TO),LNGTHG(TO), +LNBR(TO),RNBR(TO)) C Do neighbours IF(LNBR(FROM).NE.0) THEN I=LNBR(FROM) RNBR(I)=TO CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I), + LNBR(I),RNBR(I)) END IF IF(RNBR(FROM).NE.0) THEN I=RNBR(FROM) LNBR(I)=TO CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I), + LNBR(I),RNBR(I)) END IF CALL MOVTAG(FROM,TO) END SUBROUTINE DBOPEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,NAMPRO,GEL, +IDBSIS,IDBSIZ,IERR,KBIN,KBOUT, +IDEVR,IDEVW,IDEVN,IDEVT,IDEVC, +MAXGEL,MAXGLM,LLINO, +IDM,IHELPS,IHELPE,HELPF,IDEVH) C AUTHOR: RODGER STADEN CHARACTER GEL(MAXGLM) INTEGER RELPG(IDBSIS) INTEGER LNGTHG(IDBSIS),LNBR(IDBSIS),RNBR(IDBSIS) CHARACTER NAMPRO*(*),COPYNO*4,HELPF*(*) INTEGER IWORD,ANS PARAMETER (IWORD=4) C NOTE THIS IS THE MACHINES WORD LENGTH IE HOW MANY CHARS PER WORD CALL FILLI(RELPG,IDBSIS,0) CALL FILLI(LNGTHG,IDBSIS,0) CALL FILLI(LNBR,IDBSIS,0) CALL FILLI(RNBR,IDBSIS,0) NAMPRO(1:)=' ' IERR=1 1 CONTINUE L = 0 CALL GTSTR('Project name',' ',NAMPRO,L,KBOUT,KBIN,INFLAG) IF(L.LT.1) RETURN LL = L CALL CCASE(NAMPRO,1) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) GO TO 1 END IF L = 1 CALL GTSTR('Version','0',COPYNO,L,KBOUT,KBIN,INFLAG) CALL CCASE(COPYNO,1) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) GO TO 1 END IF IF(L.LT.1) COPYNO(1:1) = '0' C GET .ARN FIRST AS IT CONTAINS LENGTH OF OTHER FILES NAMPRO(LL+1:LL+3)='.AR' NAMPRO(LL+4:LL+4)=COPYNO(1:1) CALL OPENRS(IDEVN,NAMPRO,IOK,3,4) IF(IOK.NE.0)GO TO 100 C ******* PROBLEM HERE READING RECORD 1000 AND INTEGER FROM IDEVN!!! READ(IDEVN,REC=IDBSIS,ERR=50)IDBSIZ,MAXGEL,IDM IF(MAXGEL.LT.512)GO TO 50 IF(MAXGEL.GT.MAXGLM)GO TO 50 IF(MOD(MAXGEL,512).NE.0)GO TO 50 20 CONTINUE IF((IDM.NE.5).AND.(IDM.NE.26))GO TO 51 21 CONTINUE NAMPRO(LL+2:LL+3)='RL' CALL OPENRS(IDEVR,NAMPRO,IOK,4,4) IF(IOK.NE.0)GO TO 100 NAMPRO(LL+2:LL+3)='SQ' C DEFINE RECORD LENGTH IN TERMS OF NUMBER OF CHARS PER WORD (4 ON VAX) C AND MAXGEL SIZE IREC=MAXGEL/IWORD IF(MOD(MAXGEL,IWORD).NE.0)IREC=IREC+1 CALL OPENRS(IDEVW,NAMPRO,IOK,IREC,4) IF(IOK.NE.0)GO TO 100 C NAMPRO(LL+2:LL+3) = 'RD' C CALL OPENRS(IDEVRD,NAMPRO,IOK,7,4) C IF(IOK.NE.0) IDEVRD = -9 NAMPRO(LL+2:LL+3) = 'TG' CALL OPENRS(IDEVT,NAMPRO,IOK,5,4) IF(IOK.NE.0) IDEVT = -1 NAMPRO(LL+2:LL+3) = 'CC' C COMMENT_LENGTH: 11 = (40 + long)/long CALL OPENRS(IDEVC,NAMPRO,IOK,11,4) IF(IOK.NE.0) IDEVC = -1 C READ A LINE FOR LUCK CALL READW(IDEVW,1,GEL,MAXGEL) CALL READR(IDEVR,IDBSIZ,NGELS,NCONTS,IDUM1,IDUM2) WRITE(KBOUT,10011)NGELS,NCONTS,IDBSIZ,MAXGEL 10011 FORMAT(' Number of gel readings=',I3,' Number of contigs=',I3,/, +' Database size=',I4,' Maximum gel reading length=',I4) LLINO = 0 IF(NGELS.LT.1)GO TO 5 DO 3 I=1,NGELS CALL READR(IDEVR,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)) 3 CONTINUE N=IDBSIZ-NCONTS MXT = 0 DO 4 I=N,IDBSIZ-1 CALL READR(IDEVR,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)) IF(RELPG(I).GT.MXT) THEN MXT = RELPG(I) LLINO = LNBR(I) END IF 4 CONTINUE 5 CONTINUE IERR=0 NAMPRO(LL+2:LL+2) = COPYNO(1:1) RETURN 100 CONTINUE WRITE(KBOUT,9999) 9999 FORMAT(' Error encountered opening database files') NAMPRO(LL+2:LL+2) = COPYNO(1:1) RETURN 50 CONTINUE WRITE(KBOUT,2000)MAXGLM 2000 FORMAT(' Error reading maximum gel reading length',/, +' The maximum gel reading length must be a multiple', +' of 512.',/,' E.G. 512, 1024, 1536, 2048, etc up to',I6) MN = 512 MX = MAXGLM MAXIN1 = MN CALL GETINT(MN,MX,MAXIN1, +'Maximum gel reading length', +IVAL,KBIN,KBOUT,IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN MAXGEL = IVAL IF(MOD(MAXGEL,MAXIN1).NE.0)GO TO 50 C PROBLEM WRITING TO RECORD IDBSIZ WITH NO SUBROUTINE CALL!!!!!!!!!! WRITE(IDEVN,REC=IDBSIS)IDBSIZ,MAXGEL GO TO 20 51 CONTINUE WRITE(KBOUT,2003) 2003 FORMAT(' Error reading protein or dna label') CALL YESNO(ANS,'DNA', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.LT.0) RETURN IF(ANS.EQ.0)IDM = 5 IF(ANS.EQ.1)IDM = 26 C PROBLEM WRITING TO RECORD IDBSIZ WITH NO SUBROUTINE CALL!!!!!!!!!! WRITE(IDEVN,REC=IDBSIS)IDBSIZ,MAXGEL,IDM GO TO 21 END SUBROUTINE DBSTAR(NAMPRO,GEL,IDBSIS,IDBSIZ,KBIN,KBOUT, +IDEV1,IDEV2,IDEV3,IDEVT,IDEVC, +IERR,IHELPS,IHELPE,IDEVH,FILEH, +MAXGEL,MAXGLM,IDM) CHARACTER GEL(MAXGLM),FILEH*(*) CHARACTER NAMPRO*(*) INTEGER IWORD PARAMETER (IWORD=4) IERR=1 3 CONTINUE MN = 0 CALL GTSTR('New project name',' ',NAMPRO,MN,KBOUT,KBIN,INFLAG) IF(MN.LT.1) RETURN LL = MIN(12,MN) CALL CCASE(NAMPRO,1) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 3 END IF MN = 10 MX = IDBSIS IDBSIZ = 50 CALL GETINT(MN,MX,IDBSIZ, +'Database size', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN IDBSIZ = IVAL 5 CONTINUE MN = 512 MX = MAXGLM MAXIN1 = MIN(512,MAXGEL) CALL GETINT(MN,MX,MAXIN1, +'Maximum gel reading length', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN MAXGEL = IVAL IF(MOD(IVAL,MAXIN1).NE.0) THEN MAXGEL = 512 + (IVAL/512)*512 WRITE(KBOUT,*)'Maximum set to',MAXGEL END IF CALL YESNO(IDM,'Database is for DNA', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF(IDM.LT.0) RETURN IF(IDM.EQ.0)IDM = 5 IF(IDM.EQ.1)IDM = 26 NAMPRO(LL+1:LL+4)='.RL0' CALL OPENRS(IDEV1,NAMPRO,IOK,4,3) IF(IOK.NE.0)GO TO 100 C IRAW = 0 C IDEVRT = IDEVRD C IDEVRD = -9 C CALL YESNO(IRAW,'Create raw data pointer file', C +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) C IF(IRAW.LT.0) RETURN C IF(IRAW.EQ.0) THEN C NAMPRO(LL+1:LL+4)='.RD0' C CALL OPENRS(IDEVRT,NAMPRO,IOK,7,3) C IF(IOK.EQ.0) IDEVRD = IDEVRT C END IF WRITE(KBOUT,1004) 1004 FORMAT(' Writing new database files') J = 0 K = 0 N = 0 M = 0 DO 10 I=1,IDBSIZ CALL WRITER(IDEV1,I,J,K,M,N) 10 CONTINUE NAMPRO(LL+2:LL+3)='SQ' IREC=MAXGEL/IWORD IF(MOD(MAXGEL,IWORD).NE.0)IREC=IREC+1 CALL OPENRS(IDEV2,NAMPRO,IOK,IREC,3) IF(IOK.NE.0)GO TO 100 CALL FILLC(GEL,MAXGEL,' ') CALL WRITEW(IDEV2,IDBSIZ,GEL,MAXGEL) NAMPRO(LL+2:LL+3)='AR' CALL OPENRS(IDEV3,NAMPRO,IOK,3,3) IF(IOK.NE.0)GO TO 100 C NOTE HERE IS A WRITE TO NAMES FILE NOT USING A SUBROUTINE CALL WRITE(IDEV3,REC=IDBSIS)IDBSIZ,MAXGEL,IDM C IF(IDEVRD.GT.0) THEN C NAMFIL = ' ' C MTYPE = ' ' C I = 0 C J = 0 C K = 0 C CALL WRITRD(IDEVRD,IDBSIZ,I,J,K,MTYPE,NAMFIL) C ENDIF C CREATE TAG FILES (TAGS AND COMMENTS) IF (.TRUE.) THEN NAMPRO(LL+2:LL+3)='TG' CALL OPENRS(IDEVT,NAMPRO,IOK,5,3) IF(IOK.NE.0)GO TO 100 CALL WRITTG(IDEVT,IDBSIZ,IDBSIZ,0,0,0,0) NAMPRO(LL+2:LL+3)='CC' C COMMENT_LENGTH: 11 = (40 + long)/long CALL OPENRS(IDEVC,NAMPRO,IOK,11,3) IF(IOK.NE.0)GO TO 100 CALL WRITCC(IDEVC,1,1,0,' ') ENDIF WRITE(KBOUT,1003)NAMPRO(1:LL),IDBSIZ 1003 FORMAT(' Database ',A,' version 0, size',I5, +' successfully started') C WRITE(KBOUT,1006)NAMPRO(1:LL) 1006 FORMAT( +' Note the 3 database files are named ',A,'.AR0, .SQ0, .RL0') IERR=0 NAMPRO(LL+2:LL+2) = '0' RETURN 100 CONTINUE WRITE(KBOUT,9999) 9999 FORMAT(' Error when trying to open database,', +' database not started') RETURN END SUBROUTINE ENTER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +GEL,GEL2,IDBSIZ,IDEV,KBIN,KBOUT,IDEVR,IDEVW,IDEVN,IDEVG, +IDEVT,IDEVC,LINLEN,PERCD,NAMARC, +HELPS1,HELPE1,HELPS2,HELPE2,FILEH,IDEVH,MAXGEL,IDM,NCONTC) CHARACTER FILEH*(*) INTEGER HELPS1,HELPS2,HELPE1,HELPE2 C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER X,Y,ANS INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL),NAMARC*(*) CHARACTER NAMARK*10 CHARACTER GEL2(MAXGEL) C WRITE(KBOUT,1000) C1000 FORMAT( C +' Enter a new gel reading into the database.',//, C +' Note that before a gel reading is entered into the',/, C +' database it must be compared with the database to ',/, C +' search for overlaps using a function such as auto assemble',/, C +' The replies to all the questions that follow are given',/, C +' by these searches',//) IF((IDBSIZ-(NGELS+NCONTS)).GT.2)GO TO 5 WRITE(KBOUT,1999)IDBSIZ 1999 FORMAT(' Current database size=',I5,' extend with copy') RETURN 5 CONTINUE C WRITE(KBOUT,1018) C1018 FORMAT( C +' We need the name of the file containing the gel reading') CALL OPENF1(IDEVG,NAMARC,0,IOK,KBIN,KBOUT, +'File name of gel reading to enter', +HELPS1,HELPE1,FILEH,IDEVH) IF(IOK.NE.0)RETURN IF(NGELS.GT.0) THEN DO 10 J=1,NGELS CALL READN(IDEVN,J,NAMARK) IF(NAMARC(1:10).NE.NAMARK)GO TO 10 WRITE(KBOUT,1013)J 1013 FORMAT( ' Gel reading already in database (number,',I6, + '), entry aborted') RETURN 10 CONTINUE END IF IDIM=MAXGEL CALL ARRFIM(IDEVG,GEL,IDIM,KBOUT) CLOSE(UNIT=IDEVG) NGELS=NGELS+1 LNGTHG(NGELS)=IDIM NAMARK=NAMARC(1:10) CALL WRITEN(IDEVN,NGELS,NAMARK) WRITE(KBOUT,1003)NGELS 1003 FORMAT( ' This gel reading has been given the number ',I6) IF(IDEVT.GT.0) CALL ENTRD(IDEVG,IDEVRD,NAMARC,NGELS,IOK) ANS=0 IF(NGELS.GT.1)THEN C WRITE(KBOUT,1017) C1017 FORMAT( C +' We need to know if the gel reading overlaps data already in', C +' the database') CALL YESNO(ANS,'New gel overlaps a contig', + HELPS1,HELPE1,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.LT.0) THEN NGELS = NGELS - 1 RETURN END IF IF(ANS.EQ.0)GO TO 100 END IF C C DOES NOT OVERLAP SO IT STARTS A CONTIG OF ITS OWN LNBR(NGELS)=0 RNBR(NGELS)=0 RELPG(NGELS)=1 CALL WRITER(IDEVR,NGELS,RELPG(NGELS),LNGTHG(NGELS), +LNBR(NGELS),RNBR(NGELS)) NCONTS=NCONTS+1 N=IDBSIZ-NCONTS LNBR(N)=NGELS RNBR(N)=NGELS RELPG(N)=(IDIM) CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) CALL WRITEW(IDEVW,NGELS,GEL,MAXGEL) RETURN C 100 CONTINUE C WRITE(KBOUT,1019) C1019 FORMAT( C +' We need to know if the new gel reading overlaps in the',/, C +' normal or complementary sense') C COMPLEMENT GEL IF REQUIRED CALL YESNO(ANS,'Gel overlaps in normal sense', +HELPS1,HELPE1,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.LT.0) THEN NGELS = NGELS - 1 RETURN END IF IF(ANS.EQ.1) THEN CALL SQREV(GEL,IDIM) CALL SQCOM(GEL,IDIM) LNGTHG(NGELS)=-1*LNGTHG(NGELS) END IF C WRITE(KBOUT,1020) C1020 FORMAT( C +' We need to know which contig the new gel reading overlaps') CALL GETLN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,LINCON,NCONTC, +IERR,IDBSIZ,KBIN,KBOUT,IDEVN, +HELPS1,HELPE1,FILEH,IDEVH) IF(IERR.NE.0) THEN NGELS=NGELS-1 RETURN END IF C WRITE(KBOUT,1014) C1014 FORMAT( C +' We need information about the position of the overlap.',/, C +' First there are two types: those that extend the contig',/, C +' leftwards and those that start internally.') CALL YESNO(ANS,'Left end of gel reading is inside contig', + HELPS1,HELPE1,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.LT.0)THEN NGELS=NGELS-1 RETURN END IF IF(ANS.EQ.1)GO TO 400 C RIGHT END OR INTERNAL OVERLAP C C WRITE(KBOUT,1015) C1015 FORMAT(' Now we need to know the exact position of the overlap') 160 CONTINUE MN = 1 MX = RELPG(LINCON) X = 0 CALL GETINT(MN,MX,X, +'Position in contig of left end of gel reading', +IVAL,KBIN,KBOUT, +HELPS1,HELPE1,FILEH,IDEVH,IOK) IF(IOK.NE.0) THEN NGELS = NGELS - 1 RETURN END IF X = IVAL IF(X.EQ.0)GO TO 160 N=NCONTC 200 CONTINUE IF(RELPG(N).GT.X)GO TO 250 IF(RNBR(N).EQ.0)GO TO 350 N=RNBR(N) GO TO 200 250 CONTINUE C GEL LIES BETWEEN N AND LNBR(N) C DISPLAY JOINT NOPT = 1 C @jkb@ C CALL BEDIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, C +GEL,GEL2,LINCON,NCONTC,NOPT,X,IDBSIZ,IDEV,KBIN,KBOUT, C +IDEVR,IDEVW,IDEVN,LINLEN,PERCD, C +HELPS1,HELPE1,HELPS2,HELPE2,FILEH,IDEVH,MAXGEL,IDM) C CHECK ON RESULT IF(NOPT.EQ.1)GO TO 160 IF(NOPT.EQ.3)GO TO 260 C COCKUP NGELS=NGELS-1 RETURN 260 CONTINUE CALL WRITEW(IDEVW,NGELS,GEL,MAXGEL) LNBR(NGELS)=LNBR(N) RNBR(NGELS)=N RELPG(NGELS)=X CALL WRITER(IDEVR,NGELS,RELPG(NGELS),LNGTHG(NGELS), +LNBR(NGELS),RNBR(NGELS)) RNBR(LNBR(N))=NGELS K=LNBR(N) CALL WRITER(IDEVR,K,RELPG(K),LNGTHG(K), +LNBR(K),RNBR(K)) LNBR(N)=NGELS CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) IDIM=ABS(LNGTHG(NGELS)) Y=X+(IDIM)-1 IF(Y.LE.RELPG(LINCON))RETURN RELPG(LINCON)=Y CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON), +LNBR(LINCON),RNBR(LINCON)) RETURN 350 CONTINUE C MUST BE A RIGHT END OVERLAP C DSPLAY JOINT NOPT = 1 C @jkb@ C CALL BEDIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, C +GEL,GEL2,LINCON,NCONTC,NOPT,X,IDBSIZ,IDEV,KBIN,KBOUT, C +IDEVR,IDEVW,IDEVN,LINLEN,PERCD, C +HELPS1,HELPE1,HELPS2,HELPE2,FILEH,IDEVH,MAXGEL,IDM) IF(NOPT.EQ.1)GO TO 160 IF(NOPT.EQ.3)GO TO 360 C COCKUP NGELS=NGELS-1 RETURN 360 CONTINUE CALL WRITEW(IDEVW,NGELS,GEL,MAXGEL) LNBR(NGELS)=N RNBR(NGELS)=0 RELPG(NGELS)=X CALL WRITER(IDEVR,NGELS,RELPG(NGELS),LNGTHG(NGELS), +LNBR(NGELS),RNBR(NGELS)) RNBR(N)=NGELS CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) RNBR(LINCON)=NGELS IDIM=ABS(LNGTHG(NGELS)) Y=X+(IDIM)-1 RELPG(LINCON)=MAX(RELPG(LINCON),Y) CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON), +LNBR(LINCON),RNBR(LINCON)) CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) RETURN C 400 CONTINUE C C ADDING TO LEFT END C WRITE(KBOUT,1015) 410 CONTINUE MN = 1 MX = IDIM X = 0 CALL GETINT(MN,MX,X, +'Position in new gel reading of left end of contig', +IVAL,KBIN,KBOUT, +HELPS1,HELPE1,FILEH,IDEVH,IOK) IF(IOK.NE.0) THEN NGELS = NGELS - 1 RETURN END IF X = IVAL IF(X.EQ.0)GO TO 410 C SET FLAG TO SHOW LEFT END OVERLAP NOPT=2 C @jkb@ C CALL BEDIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, C +GEL,GEL2,LINCON,NCONTC,NOPT,X,IDBSIZ,IDEV,KBIN,KBOUT, C +IDEVR,IDEVW,IDEVN,LINLEN,PERCD, C +HELPS1,HELPE1,HELPS2,HELPE2,FILEH,IDEVH,MAXGEL,IDM) IF(NOPT.EQ.1)GO TO 400 IF(NOPT.EQ.3)GO TO 420 C COCKUP NGELS=NGELS-1 RETURN 420 CONTINUE CALL WRITEW(IDEVW,NGELS,GEL,MAXGEL) RELPG(NGELS)=1 RNBR(NGELS)=NCONTC LNBR(NGELS)=0 CALL WRITER(IDEVR,NGELS,RELPG(NGELS),LNGTHG(NGELS), +LNBR(NGELS),RNBR(NGELS)) LNBR(NCONTC)=NGELS RELPG(NCONTC)=X CALL WRITER(IDEVR,NCONTC,RELPG(NCONTC),LNGTHG(NCONTC), +LNBR(NCONTC),RNBR(NCONTC)) RELPG(LINCON)=RELPG(LINCON)+X-1 IDIM=ABS(LNGTHG(NGELS)) Y=IDIM IF(Y.GT.RELPG(LINCON))RELPG(LINCON)=Y LNBR(LINCON)=NGELS CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON), +LNBR(LINCON),RNBR(LINCON)) CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) N=NCONTC 440 CONTINUE IF(RNBR(N).EQ.0)RETURN N=RNBR(N) RELPG(N)=RELPG(N)+X-1 CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) GO TO 440 END C SUBROUTINE XENTRD(IDEVG,IDEVRD,NAMARC,NGEL,IOK) C CHARACTER NAMARC*(*),NAMFIL*12,MTYPE*4 CC Enters raw data info into database C IOK = 0 C IF(IDEVRD.GT.0) THEN C CALL OPENRS(IDEVG,NAMARC,IOK,LRECL,2) CC IF(IOK.EQ.0) THEN C CALL RRD(IDEVG,LENR,LCUT,LENW,MTYPE,NAMFIL) C CALL WRITRD(IDEVRD,NGEL,LENR,LCUT,LENW,MTYPE,NAMFIL) CC END IF C END IF C END SUBROUTINE FIXRD(IDEVT,IDEVC,IDBSIZ,KBIN,KBOUT, +IHELPS,IHELPE,FILEH,IDEVH) C FILE_NAME_LENGTH CHARACTER NAMFIL*18,NEWNAM*18,MTYPE*4,NEWMT*4,FILEH*(*) IF(IDEVRD.LT.0) THEN WRITE(KBOUT,*)'No raw data file!' RETURN END IF 10 CONTINUE C Change raw data record MN = 0 MX = IDBSIZ-1 LNO = 0 CALL GETINT(MN,MX,LNO, + 'Number of line to change', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN IF(IVAL.EQ.0) RETURN LNO = IVAL CALL READRD(IDEVT,IDEVC,LNO,LENR,LCUT,LENW,MTYPE,NAMFIL) WRITE(KBOUT,*)'Current line' WRITE(KBOUT,1001)LENR,LCUT,LENW,MTYPE,NAMFIL 1001 FORMAT(' ',3I6,' ',A,' ',A) MN = 1 MX = 9999 LX = LENR CALL GETINT(MN,MX,LX, + 'Length raw sequence', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN LX = IVAL MN = 1 MX = LX L = LCUT CALL GETINT(MN,MX,L, + 'Left cutoff', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) GO TO 10 L = IVAL MN = 1 MX = LX M = LENW CALL GETINT(MN,MX,M, + 'Length of original working sequence', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN M = IVAL 20 CONTINUE LNAM = 4 CALL GTSTR('Machine type', + MTYPE,NEWMT,LNAM,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 20 END IF IF(INFLAG.EQ.3) NEWMT = MTYPE 30 CONTINUE C FILE_NAME_LENGTH LNAM = 18 CALL GTSTR('Name for raw data file', + NAMFIL,NEWNAM,LNAM,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 30 END IF IF(INFLAG.EQ.3) NEWNAM = NAMFIL WRITE(KBOUT,1001)LX,L,M,NEWMT,NEWNAM CALL WRITRD(IDEVT,IDEVC,LNO,LX,L,M,NEWMT,NEWNAM) WRITE(KBOUT,*)'New record written to disk' GO TO 10 END C SUBROUTINE READRD(IDEVT,IDEVC,NGEL,LENR,LCUT,LENW,MTYPE,NAMFIL) C CHARACTER MTYPE*(*),NAMFIL*(*) C IF(IDEVT.GT.0)READ(IDEVRD,REC=NGEL)LENR,LCUT,LENW,MTYPE,NAMFIL C END C SUBROUTINE RRD(IDEVG,LENR,LCUT,LENW,MTYPE,NAMFIL) C CHARACTER LINE*80,NAMFIL*(*),MTYPE*(*) C Reads raw data info from reading file assuming it starts with ; C 1 CONTINUE C READ(IDEVG,1000,END=100,ERR=100)LINE C 1000 FORMAT(A) C IF(LINE(1:1).NE.';') GO TO 1 C READ(LINE,1001,ERR=100)LENR,LCUT,LENW,MTYPE,NAMFIL C 1001 FORMAT(1X,3I6,A,A) C RETURN C 100 CONTINUE C LENR = 0 C LCUT = 0 C LENW = 0 C MTYPE = ' ' C NAMFIL = ' ' C END C SUBROUTINE WRITRD(IDEVT,IDEVC,NGEL,LENR,LCUT,LENW,MTYPE,NAMFIL) C CHARACTER MTYPE*(*),NAMFIL*(*) C IF(IDEVT.GT.0)WRITE(IDEVRD,REC=NGEL)LENR,LCUT,LENW,MTYPE,NAMFIL C END SUBROUTINE READRD(IDEVT,IDEVC,NGEL,LENR,LCUT,LENW,MTYPE,NAMFIL) CHARACTER MTYPE*(*),NAMFIL*(*) C COMMENT_LENGTH CHARACTER NOTE*40 IF(IDEVT.GT.0)THEN CALL READTG(IDEVT,NGEL,LPOS,LLEN,LCOM,LTYPE,NEXT) CALL READCC(IDEVC,LCOM,ICNT,NEXT,NOTE) READ(NOTE,1001,ERR=100)LENR,LCUT,LENW,MTYPE,NAMFIL 1001 FORMAT(3I6,A,A) ENDIF RETURN 100 CONTINUE LENR = 0 LCUT = 0 LENW = 0 MTYPE = ' ' NAMFIL = ' ' END SUBROUTINE WRITRD(IDEVT,IDEVC,NGEL,LENR,LCUT,LENW,MTYPE,NAMFIL) CHARACTER MTYPE*(*),NAMFIL*(*) INTEGER FREECC C COMMENT_LENGTH CHARACTER NOTE*40 IF(IDEVT.GT.0)THEN CALL READTG(IDEVT,NGEL,LPOS,LLEN,LCOM,LTYPE,NEXT) IF(LCOM.EQ.0)THEN LCOM = FREECC(IDEVC) ENDIF WRITE(NOTE,1001,ERR=100)LENR,LCUT,LENW,MTYPE,NAMFIL 1001 FORMAT(3I6,A,A) NEXT = 0 CALL WRITCC(IDEVC,LCOM,ICNT,NEXT,NOTE) ENDIF RETURN 100 CONTINUE END SUBROUTINE READTG(IDEVT,I,LPOS,LLEN,LCOM,LTYPE,NEXT) IF(IDEVT.GT.0) THEN READ(IDEVT,REC=I)LPOS,LLEN,LCOM,LTYPE,NEXT ENDIF END SUBROUTINE WRITTG(IDEVT,I,LPOS,LLEN,LCOM,LTYPE,NEXT) IF (IDEVT.GT.0) THEN WRITE(IDEVT,REC=I)LPOS,LLEN,LCOM,LTYPE,NEXT ENDIF END SUBROUTINE READCC(IDEVC,I,ICNT,NEXT,NOTE) C COMMENT_LENGTH CHARACTER NOTE*40 C COMMENT_LENGTH - 4 CHARACTER DUMM*36 IF(IDEVC.GT.0)THEN READ(IDEVC,REC=1)NEXT,ICNT,DUMM IF(I.EQ.0.OR.I.GT.ICNT)THEN NEXT = 0 NOTE = ' ' ELSE READ(IDEVC,REC=I)NEXT,NOTE ENDIF ENDIF END SUBROUTINE WRITCC(IDEVC,I,ICNT,NEXT,NOTE) C COMMENT_LENGTH CHARACTER NOTE*40 C COMMENT_LENGTH - 4 CHARACTER DUMM*36 IF(IDEVC.GT.0)THEN IF(I.EQ.1) THEN WRITE(IDEVC,REC=1)NEXT,ICNT,DUMM ELSE READ(IDEVC,REC=1)IDUM,ICNT,DUMM IF(I.LE.ICNT) WRITE(IDEVC,REC=I)NEXT,NOTE ENDIF ENDIF END SUBROUTINE PADCON(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +GEL,LINCON,POSN,NC,IDBSIZ,IDEVR,IDEVW,MAXGEL,KBOUT) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),POSN,X INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL) CHARACTER PAD SAVE PAD DATA PAD/'*'/ C NOW FIND FIRST CHAR THAT OVERLAPS REGION LLINO=LNBR(LINCON) 30 CONTINUE X=RELPG(LLINO)+ABS(LNGTHG(LLINO))-1 IF(X.GE.POSN)GO TO 40 C NOT IN REGION LLINO=RNBR(LLINO) GO TO 30 40 CONTINUE C NOW GET THIS GEL FROM DISK C READ(IDEVW,REC=LLINO)GEL CALL READW(IDEVW,LLINO,GEL,MAXGEL) C CALC POSN IN THIS GEL TO EDIT X=POSN-RELPG(LLINO)+1 K=X C MOVE THE DATA RIGHT M=ABS(LNGTHG(LLINO)) LNGTHG(LLINO)=LNGTHG(LLINO)+SIGN(NC,LNGTHG(LLINO)) C CHECK FOR OVER END OF ARRAY N=ABS(LNGTHG(LLINO)) IF(N.GT.MAXGEL)THEN WRITE(KBOUT,1000)LLINO 1000 FORMAT( +' Data pushed off end of gel',I4,' during padding') M=M-(N-MAXGEL) N=MAXGEL LNGTHG(LLINO)=SIGN(MAXGEL,LNGTHG(LLINO)) END IF J=M-K+1 DO 55 I=1,J GEL(N)=GEL(M) N=N-1 M=M-1 55 CONTINUE C PERFORM THE INSERTION DO 60 I=K,MIN(MAXGEL,K+NC-1) GEL(I)=PAD 60 CONTINUE C WRITE BACK TO DISK CALL WRITEW(IDEVW,LLINO,GEL,MAXGEL) C WRITE(IDEVW,REC=LLINO)GEL C WRITE NEW LINE CALL WRITER(IDEVR,LLINO,RELPG(LLINO),LNGTHG(LLINO), +LNBR(LLINO),RNBR(LLINO)) C WRITE(IDEVR,REC=LLINO) C 1RELPG(LLINO),LNGTHG(LLINO),LNBR(LLINO),RNBR(LLINO) C C NOW UPDATE TAG FILES ACCORDINGLY CALL PADTAG(LLINO,K,NC,LNGTHG(LLINO)) C 65 CONTINUE C NOW GET NEXT GEL LLINO=RNBR(LLINO) C LAST GEL? IF(LLINO.EQ.0)GO TO 70 C DOES IT HAVE DATA IN REGION? C IE DO RELPG AND RELPG+LNGTHG-1 LIE EITHER SIDE OF POSN? IF(RELPG(LLINO).GT.POSN)GO TO 70 X=RELPG(LLINO)+ABS(LNGTHG(LLINO))-1 IF(X.LT.POSN)GO TO 65 C WITHIN GO TO 40 70 CONTINUE C INSERTS FINISHED SO NEED TO INCREMENT ALL THOSE GELS TO RIGHT LLINO=LNBR(LINCON) 75 CONTINUE IF(RELPG(LLINO).GT.POSN)GO TO 80 76 CONTINUE LLINO=RNBR(LLINO) IF(LLINO.EQ.0)GO TO 90 GO TO 75 80 CONTINUE RELPG(LLINO)=RELPG(LLINO)+NC C WRITE NEW LINE CALL WRITER(IDEVR,LLINO,RELPG(LLINO),LNGTHG(LLINO), +LNBR(LLINO),RNBR(LLINO)) C WRITE(IDEVR,REC=LLINO) C 1RELPG(LLINO),LNGTHG(LLINO),LNBR(LLINO),RNBR(LLINO) GO TO 76 90 CONTINUE C NEED TO INCREMENT CONTIG LINE RELPG(LINCON)=RELPG(LINCON)+NC CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON), +LNBR(LINCON),RNBR(LINCON)) C WRITE(IDEVR,REC=LINCON)RELPG(LINCON),LNGTHG(LINCON),LNBR(LINCON), C 1RNBR(LINCON) RETURN END SUBROUTINE AUTOJ(RELPG,LNGTHG,LNBR,RNBR,MAXDB,IDBSIZ, +NGELS,NCONTS,MAXGEL, +TEMP3,WORDP,WORDN,LPOWRC,POSNS,GELN, +SEQ1,MAXSEQ,SEQ2,SEQ3,SEQ4,SEQ5,SEQC2,SEQG2,MATCH, +MAXGLM,MAXGL2,CHRSIZ,ECHRSZ,LENGTH, +SAV1,SAV2,SAV3,MAXSAV,CENDS,NENDS,MAXCON,CONST, +KBIN,KBOUT,IDEV1,IDEV2,IDEV3,IDEV4,IDEV7,IDEV8,IDEV, +IHELPS,IHELPE,HELPF,IDEVH,NAMARC,NAMPRO,FILE, +PERCD,IOPEN,IDM,SEQG3,SEQC3,IOK) INTEGER CHRSIZ,ECHRSZ INTEGER RELPG(MAXDB) INTEGER LNGTHG(MAXDB),LNBR(MAXDB),RNBR(MAXDB) INTEGER JOINT(2),ITOTPC(2),ITOTPG(2),IDIM22(2),IDOUT(2) INTEGER LLINO(2),ITYPE(2),IFAIL(2) INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2) INTEGER ANS,WINDOW INTEGER TEMP3(ECHRSZ,MAXGL2),CONST(LENGTH) INTEGER POSNS(MAXSEQ),WORDP(LPOWRC),WORDN(LPOWRC),GELN(MAXGLM) INTEGER CENDS(MAXCON),NENDS(MAXCON) CHARACTER SEQ3(MAXGLM),SEQC2(MAXGLM,2),SEQG2(MAXGLM,2) CHARACTER SEQ1(MAXSEQ),SEQ2(MAXGLM),MATCH(MAXGLM),SEQ4(MAXGLM) INTEGER SAV1(MAXSAV),SAV2(MAXSAV),SAV3(MAXSAV) CHARACTER NAMARC*(*),NAMPRO*(*),FILE*(*) CHARACTER SEQ5(MAXGLM),HELPF*(*),SEQG3(MAXGLM),SEQC3(MAXGLM) CALL DBCHEK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, + TEMP3,IERR,KBOUT) IF(IERR.GT.1) RETURN IFAIL(1) = 0 IF(NGELS.LT.1) RETURN MN = LENGTH*2 MX = MAXGLM + 1 MINMAT = MAX(15,MN) CALL GETINT(MN,MX,MINMAT, +'Minimum initial match', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN MINMAT = IVAL MN = 2 MX = 5 MINSLI = MAX(3,MN) CALL GETINT(MN,MX,MINSLI, +'Minimum alignment block', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN MINSLI = IVAL MN = 0 MX = 50 MAXPG = 8 CALL GETINT(MN,MX,MAXPG, +'Maximum pads per sequence', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN MAXPG = IVAL MAXPC = IVAL RMN = 0. RMX = 100. PERMAX = 8. CALL GETRL(RMN,RMX,PERMAX, + 'Maximum percent mismatch after alignment', + VAL,KBIN,KBOUT, + IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN PERMAX = VAL MN = MINMAT MX = MAXGEL WINDOW = 100 CALL GETINT(MN,MX,WINDOW, +'Probe length', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN WINDOW = IVAL IOK = 1 I = 0 CALL YESNO(I,'Use clipped data', + IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(I.LT.0) RETURN IWING = 0 IF(I.EQ.0) THEN MN = 1 MX = MAXGEL IWING = 100 CALL GETINT(MN,MX,IWING, + 'Window size for good data scan', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN IWING = IVAL MN = 1 MX = MIN(100,IWING) C Note nbad must be compatible with nok2 NBAD = MIN(IWING,5) CALL GETINT(MN,MX,NBAD, + 'Maximum number of dashes in scan window', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN NBAD = IVAL END IF IDIM1=0 MAXOVR=MAXGEL-3*MAX(MAXPC,MAXPG) ANS=0 CALL JCONS(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,MAXSEQ,SEQ2,IDBSIZ,IDIM1,ANS,KDUMM,KDUMM,KDUMM,TEMP3, +ECHRSZ,MAXGL2,KBOUT,IDEV2,IFAIL(1),MAXGEL,IDM,PERCD,SEQ5, +CENDS,NENDS,MAXCON,IWING,NBAD) IDIM2=MAXGEL C C Note I am doing something horrible here to save space: C sending cends and nends to jcons to get the extension lengths C then swapping them with temp3 by sending temp to ajoin and C receiving them as cends and nends, and sending cends nends C and receiving them as iladd,iradd CALL AUTOJN(SEQ1,IDIM1,SEQ2,IDIM2,ILEFTS,ILC,IPOSC, +IPOSG,ISENSE,LLINO,IMATC,IFCOMP,MINMAT,POSNS,WORDP,WORDN, +CONST,LENGTH,LPOWRC,IDEV,MATCH,MAXGEL,MAXGLM,SEQ5,GELN, +SAV1,SAV2,SAV3,MAXSAV,TEMP3,TEMP3(MAXCON+1,1),MAXCON, +SEQG2,SEQC2,SEQ4,IDOUT,IDIM22,ITOTPG,ITOTPC,JOINT,IFAIL, +ITYPE,MAXPC,MAXPG,PERMAX,MINSLI,SEQG3,SEQC3,KFAIL, +WINDOW,CENDS,NENDS,RELPG,LNBR,IDBSIZ,NCONTS) END SUBROUTINE AUTOJN(SEQ1,IDIMIN,GEL,IDIMGI,ILEFTS,ILC,IPOSC, +IPOSG,ISENSE,LLINO,IMATC,IFCOMP,MINMAT,POSNS,WORDP,WORDN, +CONST,LENGTH,LPOWRC,KBOUT,MATCH,MAXGEL,MAXGLM,GELCOP,GELN, +SAVPS,SAVPG,SAVL,MAXSAV,CENDS,NENDS,MAXCON, +SEQG2,SEQC2,SEQ4,IDOUT,IDIM22,ITOTPG,ITOTPC,JOINT,IFAIL, +ITYPE,MAXPC,MAXPG,PERMAX,MINSLI,SEQG3,SEQC3,KFAIL, +WINDOW,ILADD,IRADD,RELPG,LNBR,IDBSIZ,NCONTS) C AUTHOR: RODGER STADEN INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2),LLINO(2) INTEGER POSNS(IDIMIN),GELN(MAXGLM),WORDP(LPOWRC),SAVPS(MAXSAV) INTEGER SAVPG(MAXSAV),SAVL(MAXSAV) INTEGER WORDN(LPOWRC),RELPG(IDBSIZ),LNBR(IDBSIZ) CHARACTER GELCOP(MAXGLM),MATCH(MAXGLM) INTEGER CENDS(MAXCON),NENDS(MAXCON),ILADD(MAXCON),IRADD(MAXCON) INTEGER CONST(LENGTH) CHARACTER SEQ1(IDIMIN),GEL(MAXGLM) CHARACTER SEQG2(MAXGLM,2),SEQC2(MAXGLM,2),SEQ4(MAXGLM) INTEGER IDOUT(2),IDIM22(2),ITOTPG(2),ITOTPC(2),JOINT(2) INTEGER IFAIL(2),ITYPE(2) PARAMETER (MAXC = 10) CHARACTER SEQG3(MAXGLM),SEQC3(MAXGLM) INTEGER JLEFTS(MAXC),JLC(MAXC),JPOSC(MAXC),JPOSG(MAXC),MCON(MAXC) INTEGER JSENSE(MAXC),JLLINO(MAXC),WINDOW IFAIL(1) = 1 IFAIL(2) = 1 KFAIL = 0 IDCEND=MAXCON CALL FNDCON(SEQ1,IDIMIN,CENDS,NENDS,IDCEND,MAXCON,KBOUT) C find possible missed joins C we have consensus in seq1 in order first contig,second contig etc C compare the ends in reverse order, simultaneously shortening the consensus IDIM = IDIMIN DO 200 JCON = IDCEND,2,-1 IDIM = CENDS(JCON) - 1 CALL ENCO(SEQ1,IDIM,POSNS,CONST,LENGTH) CALL ENCONA(POSNS,IDIM,WORDP,WORDN,LPOWRC,LENGTH) JS = CENDS(JCON) + 20 JE = CENDS(JCON+1) - 1 IEND = 1 IDIMG = MIN(WINDOW,JE-JS+1) C check for case where contig is shorter than probe (window) C in which case only compare the left hand end IF(JE-JS+1.LE.WINDOW) IEND = 2 C WRITE(*,*)JCON,JS,JE,IDIMG 1 CONTINUE IMATC = 0 IF(IEND.EQ.1) THEN CALL SQCOPY(SEQ1(JE-IDIMG+1),GEL,IDIMG) ELSE CALL SQCOPY(SEQ1(JS),GEL,IDIMG) END IF CALL SQCOPY(GEL,GELCOP,IDIMG) ISTRAN=1 2 CONTINUE CALL ENCO(GEL,IDIMG,GELN,CONST,LENGTH) IDSAV=MAXSAV CALL CFGEL(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LPOWRC, +SAVPG,SAVPS,SAVL, +IDSAV,SEQ1,GEL,MINMAT,IFCOMP,KBOUT) IF(IFCOMP.NE.0) THEN CALL ERROM(KBOUT,'Error in CFGEL') RETURN END IF IF(IDSAV.NE.0)THEN CALL ADISM5(IDIM,IDIMG,SAVPS,SAVPG,IDSAV,CENDS,NENDS, + IDCEND,MAXCON,JLEFTS,JLC,JPOSC,JPOSG,JSENSE,JLLINO, + IMATC,ISTRAN,MAXC,KBOUT,MCON) END IF ISTRAN=ISTRAN+1 IF(ISTRAN.EQ.2) THEN CALL SQCOPY(GELCOP,GEL,IDIMG) CALL SQREV(GEL,IDIMG) CALL SQCOM(GEL,IDIMG) GO TO 2 END IF CALL SQCOPY(GELCOP,GEL,IDIMG) KSENSE = 0 C WRITE(KBOUT,*)'Total matches found',IMATC IF(IMATC.NE.0) THEN JMATC = 0 DO 100 I = 1,IMATC IF(JSENSE(I).EQ.-1) THEN IF(KSENSE.EQ.0) THEN CALL SQREV(GEL,IDIMG) CALL SQCOM(GEL,IDIMG) KSENSE = 1 END IF END IF JDIM22 = IDIMG JDOUT = MAXGEL IDSAV = MAXSAV C IF(IEND.EQ.1) WRITE(KBOUT,*)'Left end of Contig',NENDS(JCON) C IF(IEND.EQ.2) WRITE(KBOUT,*)'Right end of Contig',NENDS(JCON) C WRITE(KBOUT,*)'Trying to align with contig',JLLINO(I) CALL ALINEJ(SEQ1(JLEFTS(I)),GEL,SEQG3,SEQC3, + SAVPS,SAVPG,SAVL,IDSAV,JLC(I),JDIM22,JDOUT, + JPOSC(I),JPOSG(I),MINSLI,JJOINT,JTOTPC,JTOTPG, + JFAIL,JTYPE,MAXPC,MAXPG,PERMAX,KBOUT,SEQ4,MAXGEL,PERMS, + NENDS(JCON),IEND,JLLINO(I),JSENSE(I),ILADD,IRADD,MAXCON,MCON(I), + JCON,RELPG,LNBR,IDBSIZ,NCONTS,WINDOW) 100 CONTINUE END IF IEND = IEND + 1 IF(IEND.EQ.2) GO TO 1 C IDIM = CENDS(JCON)-1 200 CONTINUE END SUBROUTINE ALINEJ(SEQ1,SEQ2,SEQG2,SEQC2,ISAV1,ISAV2,ISAV3, +IDSAV,IDC,IDIM2,IDOUT,IC1,IG1,MINSLI,JOINT, +ITOTPC,ITOTPG,IFAIL,ITYPE,MAXPC,MAXPG,PERMAX,KBOUT,SEQ3,MAXGEL, +PERCM,JCONN,IEND,NCON,JSENSE,ILADD,IRADD,MAXCON,MCON,JCON, +RELPG,LNBR,IDBSIZ,NCONTS,WINDOW) C AUTHOR: RODGER STADEN CHARACTER SEQ1(IDC),SEQ2(IDIM2),SEQG2(IDOUT),SEQC2(IDOUT) CHARACTER SEQ3(MAXGEL) INTEGER ISAV1(IDSAV),ISAV2(IDSAV),ISAV3(IDSAV) INTEGER ILADD(MAXCON),IRADD(MAXCON) INTEGER RELPG(IDBSIZ),LNBR(IDBSIZ),WINDOW MINSLT=MINSLI C SAVE SEQ2 CALL SQCOPY(SEQ2,SEQ3,IDIM2) CALL MSTLKL(SEQ3,IDIM2) IFAIL=1 C FIND MATCHES IPP=IDSAV CALL SLIDER(SEQ1,IDC,SEQ3,IDIM2,IC1,IG1,MAXPG,MAXPC,MINSLT, +ISAV1,ISAV2,ISAV3,IPP) IF(IPP.GT.IDSAV)RETURN IF(IPP.LT.1)RETURN CALL REMOVL(ISAV2,ISAV3,ISAV1,IPP) CALL BUB3AS(ISAV2,ISAV3,ISAV1,IPP) C DO TOPOLOGICAL CHECK CALL TPCHEK(ISAV2,ISAV3,ISAV1,IPP) IFAIL=0 CALL LINEUP(SEQ2,SEQ1,SEQG2,SEQC2,IDC,IDIM2,IDOUT,ISAV3,ISAV2, +ISAV1,IPP,ITOTPC,ITOTPG,JOINT,ITYPE,KBOUT,MAXGEL,IFAIL) IF(IFAIL.NE.0)RETURN C IDIM2 IS NOW LENGTH OF ALIGNED GEL C WRITE(KBOUT,1052)ITOTPC,ITOTPG C1052 FORMAT(' Padding in contig= ',I4,' and in gel= ',I4) CALL JALIGN(SEQC2,SEQG2,SEQ3,MAXGEL,IDOUT,IDIM2,JOINT, +ITYPE,PERCM,KBOUT,IFAIL,PERMAX,JCONN,IEND,NCON,JSENSE, +ILADD,IRADD,MAXCON,MCON,JCON,RELPG,LNBR,IDBSIZ,NCONTS,WINDOW) C ARE ALL CHECKS OK? IF(ITOTPC.GT.MAXPC)IFAIL=1 IF(ITOTPG.GT.MAXPG)IFAIL=1 IF(PERCM.GT.PERMAX)IFAIL=1 END C SUBROUTINE DALIGN C C COUNTS MISMATCHES AND DISPLAYS OVERLAP. SUBROUTINE JALIGN(SEQC2,SEQG2,SEQ3,MAXGEL,IDOUT,IDIM2, +JOINT,ITYPE,X,KBOUT,IFAIL,PERMAX,JCONN,IEND,NCON,JSENSE, +ILADD,IRADD,MAXCON,MCON,JCON,RELPG,LNBR,IDBSIZ,NCONTS,WINDOW) C AUTHOR: RODGER STADEN CHARACTER SEQC2(MAXGEL),SEQG2(MAXGEL),SEQ3(MAXGEL) CHARACTER PAD,DASH,STRAND,NAME1*6,NAME2*6 INTEGER ILADD(MAXCON),IRADD(MAXCON),RELPG(IDBSIZ),LNBR(IDBSIZ) INTEGER CLINNO,WINDOW EXTERNAL CLINNO SAVE PAD,DASH DATA PAD,DASH/',','-'/ C C where are the overlaps? C C C C if ITYPE is 1 the overlap starts within the reading at JOINT C else it starts at the left end of the reading at JOINT in the contig IENDG=1 IENDC=JOINT C ONLY LOOK AT OVERLAP WHICH IS FROM JOINT FOR LEFT TYPE JOIN IF(ITYPE.EQ.1)THEN IENDG=JOINT IENDC=1 END IF 100 CONTINUE C LENGTH OF OVERLAP? LG=IDIM2-IENDG+1 LO=MIN(IDOUT,LG) C SAVE RAW DATA CALL SQCOPY(SEQG2,SEQ3,IDIM2) CALL MSTLKL(SEQ3,IDIM2) X=FLOAT(LO) Y=X K=IENDG+LO-1 C POINT TO CONSENSUS J=0 C CHECK FOR OVERFLOW IF(K.GT.MAXGEL)THEN WRITE(KBOUT,1001) 1001 FORMAT( +' Matching region too long for routine dalign. Alignment aborted') IFAIL=1 RETURN END IF DO 200 I=IENDG,K J=J+1 IF(SEQC2(J).EQ.SEQ3(I))GO TO 200 C IF(SEQ3(I).EQ.DASH)GO TO 200 C IF(SEQC2(J).EQ.DASH)GO TO 200 C IF(SEQC2(J).EQ.PAD)GO TO 200 X=X-1. 200 CONTINUE X=(Y-X)*100./Y IF(X.LT.PERMAX) THEN IC = CLINNO(LNBR,IDBSIZ,NCONTS,JCONN) IF(IC.EQ.0) THEN IFAIL = 99 RETURN END IF IF(JSENSE.EQ.-1) THEN STRAND = '-' IF(IEND.EQ.1) THEN C C probe is complement of right hand end of contig. Give posns assuming C this contig is complemented. C IENDGT = IENDG - IRADD(JCON) C C Next line gives posns relative to original orientation C IENDGT = RELPG(IC) + IRADD(JCON) - IENDG + 1 ELSE C C probe is complement of left end of contig. Give posns assuming this C contig is going to be complemented. C IENDGT = RELPG(IC) - WINDOW + ILADD(JCON) + IENDG END IF ELSE STRAND = '+' IF(IEND.EQ.2) THEN C C probe is left hand end of contig in original sense C IENDGT = IENDG - ILADD(JCON) ELSE C C probe is right hand end of contig in original sense C IENDGT = RELPG(IC) + IRADD(JCON) - WINDOW + IENDG END IF END IF IENDCT = IENDC - ILADD(MCON) WRITE(KBOUT,*) + ' Possible join between contig ',JCONN,' in the ', + STRAND,' sense and contig ',NCON WRITE(KBOUT,1000)X 1000 FORMAT(' Percentage mismatch after alignment = ',F4.1) WRITE(NAME1,1002)JCONN WRITE(NAME2,1002)NCON 1002 FORMAT(I6) CALL FMT4LP(SEQC2(1),SEQG2(IENDG),LO,IENDCT,IENDGT,KBOUT, + NAME2,NAME1) END IF IFAIL=0 END SUBROUTINE ADISM5(IDIM,IDIMG,SAVPS,SAVPG,IDSAV, +CENDS,NENDS,IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE, +LLINO,IMATC,ISTRAN,MAXC,KBOUT,MCON) C AUTHOR: RODGER STADEN INTEGER ILEFTS(MAXC),ILC(MAXC),IPOSC(MAXC),IPOSG(MAXC) INTEGER ISENSE(MAXC),LLINO(MAXC),MCON(MAXC) INTEGER CENDS(MAXCON) INTEGER NENDS(MAXCON) INTEGER SAVPS(IDSAV),SAVPG(IDSAV) NEXTC=IDIM+1 CALL BUB2AS(SAVPS,SAVPG,IDSAV) IMATC=IMATC+1 CALL ADISM6(SAVPS(1),SAVPG(1),CENDS,NENDS,IDCEND,MAXCON, + ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,MAXC, + KBOUT,MCON) LEND=IDIMG-SAVPG(1)+SAVPS(1) DO 10 I=2,IDSAV IF((SAVPS(I).LT.LEND).AND.(SAVPS(I).LT.NEXTC))GO TO 10 IMATC=IMATC+1 CALL ADISM6(SAVPS(I),SAVPG(I),CENDS,NENDS,IDCEND,MAXCON, + ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,MAXC, + KBOUT,MCON) LEND=IDIMG-SAVPG(I)+SAVPS(I) 10 CONTINUE IMATC = MIN(IMATC,MAXC) END SUBROUTINE ADISM6(ISAVPS,SAVPG,CENDS,NENDS, +IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN, +NEXTC,MAXC,KBOUT,MCON) C AUTHOR: RODGER STADEN INTEGER ILEFTS(MAXC),ILC(MAXC),IPOSC(MAXC),IPOSG(MAXC) INTEGER ISENSE(MAXC),LLINO(MAXC),MCON(MAXC) INTEGER SAVPS,SAVPG,CENDS(MAXCON) INTEGER NENDS(MAXCON) SAVPS=ISAVPS-19 JJ=1 DO 5 J=2,IDCEND IF(SAVPS.GT.CENDS(J))GO TO 5 JJ=J-1 GO TO 6 5 CONTINUE JJ=IDCEND 6 CONTINUE C new bit need to save contig number for alinej SAVPS=SAVPS-1 LCL=SAVPS-CENDS(JJ) LCR=CENDS(JJ+1)-ISAVPS-1 NEXTC=CENDS(JJ+1)+20 IF(IMATC.LE.MAXC) THEN MCON(IMATC) = JJ ILEFTS(IMATC)=CENDS(JJ)+20 ILC(IMATC)=LCL+LCR+1 IPOSC(IMATC)=LCL+1 IPOSG(IMATC)=SAVPG LLINO(IMATC)=NENDS(JJ) ISENSE(IMATC)=1 IF(ISTRAN.EQ.2)ISENSE(IMATC)=-1 ELSE CALL ERROM(KBOUT,'Warning: too many overlaps') END IF END C JCONS SUBROUTINE JCONS(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,IDIM1,GEL,IDBSIZ,ISTART,ANS,LLINO,LREG,RREG,TEMP, +CHRSIZ,MAXGL2,KBOUT, +IDEVW,IFAIL,MAXGEL,IDM,PERCD,TGEL,ILADD,IRADD,MAXCON, +IWIN,NBAD) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),ANS,CHRSIZ,ILADD(MAXCON),IRADD(MAXCON) INTEGER LREG,RREG,X,Y,TEMP(CHRSIZ,MAXGL2) CHARACTER SEQ1(IDIM1) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL),TGEL(MAXGEL) CHARACTER NAMPRO*(*) CALL BUSY(KBOUT) C IF(IWIN.GT.0) PERC = REAL(NBAD)/REAL(IWIN) IEND = 1 IFAIL=0 N=IDBSIZ-NCONTS NCONS = 0 DO 110 I=N,IDBSIZ-1 J=LNBR(I) X=1 Y=RELPG(I) ISTART=ISTART+1 IF((ISTART+19+Y+2*MAXGEL).GT.IDIM1)THEN WRITE(KBOUT,1009)IDIM1 1009 FORMAT( + ' Maximum consensus length(',I6,') exceeded',/, + ' calculation aborted') IFAIL=1 RETURN END IF CALL ADDTIT(SEQ1(ISTART),NAMPRO,J,ISTART) NCONS = NCONS + 1 IDIN = 0 IF(IWIN.GT.0) THEN IDIN = MAXGEL CALL GETEX(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,I,1, + GEL,TGEL,IDIN,IWIN,NBAD) IF(IDIN.GT.0)CALL SQCOPY(GEL,SEQ1(ISTART),IDIN) END IF ILADD(NCONS) = IDIN ISTART = ISTART + IDIN CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ1(ISTART),Y,GEL,X,Y,J,IDBSIZ,TEMP,CHRSIZ,MAXGL2,IDEVW,MAXGEL, + IDM,PERCD) ISTART=ISTART+Y-1 IDIN = 0 IF(IWIN.GT.0) THEN IDIN = MAXGEL CALL GETEX(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,I,2, + GEL,TGEL,IDIN,IWIN,NBAD) IF(IDIN.GT.0)CALL SQCOPY(GEL,SEQ1(ISTART+1),IDIN) END IF IRADD(NCONS) = IDIN ISTART = ISTART + IDIN 110 CONTINUE END SUBROUTINE GETEX(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,ICONT,IEND, +GEL,GELT,ID,IWIN,NBAD) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ),RELPG(IDBSIZ) CHARACTER GEL(ID),GELT(ID) EXTERNAL NOK2 MAXGEL = ID IDT = 0 C routine to find a possible extension to a contig by looking in a tag file C contig ICONT end IEND = 1 =left 2=right C return data in GEL, of length ID C The worst aspect of this is that if we dont choose the very end reading C we dont know where it lies relative to the consensus. For now just assume C there are no length difference between the extension and the consensus C and just add it on the end IF(IEND.EQ.1) THEN LMOST = 1 IGEL = LNBR(ICONT) 10 CONTINUE IF(IGEL.EQ.0) GO TO 100 IF(RELPG(IGEL).GT.MAXGEL) GO TO 100 IF(LNGTHG(IGEL).LT.0) THEN ID = MAXGEL CALL GETEXT(IGEL,GELT,ID,IOK) IF(IOK.EQ.0) THEN C look for number of n's here and change id accordingly K = NOK2(GELT,ID,IWIN,NBAD) LT = MIN(LMOST,RELPG(IGEL)-K) IF(LT.LT.LMOST) THEN IS = RELPG(IGEL) N = K - IS + 1 CALL SQCOPY(GELT(IS),GEL,N) CALL SQREV(GEL,N) CALL SQCOM(GEL,N) IDT = N LMOST = LT END IF ELSE C WRITE(*,*)'COCKUP IN GETEXT, gel',IGEL END IF END IF IGEL = RNBR(IGEL) GO TO 10 ELSE IGEL = RNBR(ICONT) LMOST = RELPG(ICONT) IDC = RELPG(ICONT) 20 CONTINUE IF(IGEL.EQ.0) GO TO 100 IF(LMOST-RELPG(IGEL).GT.MAXGEL) GO TO 100 IF(LNGTHG(IGEL).GT.0) THEN ID = MAXGEL CALL GETEXT(IGEL,GELT,ID,IOK) IF(IOK.EQ.0) THEN K = NOK2(GELT,ID,IWIN,NBAD) LT = MAX(LMOST,RELPG(IGEL)+LNGTHG(IGEL)+K-1) IF(LT.GT.LMOST) THEN IS = RELPG(ICONT) - (RELPG(IGEL) + LNGTHG(IGEL)) + 2 N = K - IS + 1 CALL SQCOPY(GELT(IS),GEL,N) IDT = N LMOST = LT END IF ELSE C WRITE(*,*)'COCKUP IN GETEXT, GEL',IGEL END IF END IF IGEL = LNBR(IGEL) GO TO 20 END IF 100 CONTINUE ID = IDT END INTEGER FUNCTION NOK2(GEL,ID,IWIN,NBADIN) CHARACTER GEL(ID) PARAMETER (MAXPOS = 101) INTEGER POSNS(MAXPOS),R EXTERNAL KWRAP C count N's over a window of iwin, return position C when over NBAD C INIT NBAD = NBADIN + 1 I = 0 N = 0 NOK2 = ID L = 1 R = 0 IF(NBAD.GT.MAXPOS)THEN WRITE(*,*)'Scream: nok2 not happy' RETURN END IF 10 CONTINUE I = I + 1 IF(I.GT.ID) RETURN IF(GEL(I).EQ.'-') THEN N = N + 1 R = KWRAP(R,NBAD) POSNS(R) = I IF(N.GE.NBAD) THEN IF(POSNS(R)-POSNS(L)+1.LT.IWIN) THEN NOK2 = POSNS(L) RETURN END IF L = KWRAP(L,NBAD) END IF END IF GO TO 10 END INTEGER FUNCTION KWRAP(I,J) IT = I + 1 IF(IT.GT.J) IT = 1 KWRAP = IT END