C DBSYS ROUTINES COMMON TO PRE AND POST .RD PROGRAMS C C 25-8-92 NOTE at the end of the file are new versions of getln2 (getln3) C and gelid (gelidn) and a new routine nameno, that should C replace getln2 and gelid. C C Further sap routines are in dbsysold.f (pre .rd) and dbsysnew.f (post) C the split was made by rs 23-1-91 C 15-6-92 added fasta format output from consen C 15-4-92 added in all the speedup changes ive been making and made C screnv compatible C 13-4-92 changed autocn to use a new hashing routine encof and inite C 2-4-92 Added new dbauto related routines and changed gtconc C 2-4-92 Added filnam = ' ' and brought uptodate with dap C 4-5-90 Change to getreg to allow escape C 4-5-90 addition of graphics routines and changes to menus C 9-5-90 added default gel reading: many changes C 17-5-90 Fixed 3 bugs in screen editing: 1) rightjustified names C caused problems; 2) beginnings of sequences starting at far C right of lines where not seen; 3) lines with no numbers at C the end of a contig (i.e. with <10 chars) were flagged C as errors. Changes to ltype for 1, linlen for 2, dsplay for 3 C 9-7-90 removed menu routines C 20-8-90 changed gelid to add / to reading name because xsap did C not return the INFLAG = 3 for the default C 23-8-90 Changes to dbauto and autocn to deal with failures better C Plus addition of calls to BUSY C 9-11-90 Replaced call to radio with call to radion C 19-11-90 Changed max match length in dbauto to maxglm+1 (was 50) C 25-11-90 Very important bug fix in tpchek. Old versions could C duplicate bits of working versions. C 28-11-90 Modified slider to receive maxpg and maxpc and to allow exactly C the requested number of matches at each end of the two C sequences. C Added two new options to dbauto: all gels to new contigs, all C gels to contig 1; plus resurrected forbidding joins to allow C sequences to be entered only into the contig the overlap best. C Changed autocn to sort overlaps into order based on % mismatch C (previously it saved the best two in any order) C Minor change to dbstar C 3-1-91 Discovered bug in dbopen: incorrect call to getint when the C database is very old and needs values for the current format C 21-1-91 GELID allowed illegal gel numbers to be returned! Fixed it. C 22-1-91 Modified autocn, adism4,adism3 to give more info about C overlaps, and to allow 10 overlaps. Modified dbopen to C return version number, ditto dbstar C 23-1-91 Split into dbsyscommon, dbsysold, dbsysnew C 26-2-91 Improved overflow check in padcop C 28-7-91 added extra parameter to quality calc: mxgood is the maximum C reading length in which we have confidence, so only add this C many chars from the start of each reading. Also changed the C quality calc to make it the same as the consensus one. Made C all characters have nonzero score and made lowercase = 100 C 21-8-91 Changed arrfil to arrfim which does not display comments C 22-8-91 Added routine to find contig line number given left gel (CLINNO) C C C 12-11-91 BIG CHANGE: made database handle 99,999 readings and 16 char names C C C Also added routine to make aedit take strandedness into account C (SUMSS). C Also added fmt4lp which is used by find internal joins and C could be used to advantage by others that call fmt4ln. C 18-11-91 New routine GETLN2 with returns gel number specified C C enconn C routine to store positions of words in posns and first occurences C in wordp and number of occurences in wordn C each number is a value representing one of the le4 possible C words of length length made up of 4 characters C words in posns are numbers from 1 to 4**length SUBROUTINE ENCONN(POSNS,IDIM,WORDP,WORDN,LE4,LENGTH,START) C AUTHOR: RODGER STADEN INTEGER WORDP(LE4),POSNS(IDIM) INTEGER WORDN(LE4),START C number of words of length length IDIM1 = IDIM - (LENGTH-1) IF (START.EQ.1) THEN DO 10 I=1,LE4 WORDN(I) = 0 10 CONTINUE END IF C loop for each word DO 100 I=START,IDIM1 N = POSNS(I) IF(N.NE.0) THEN NW = WORDN(N) C is their already an entry for this word? IF(NW.EQ.0) THEN C first entry, put in wordp WORDP(N) = I WORDN(N) = NW + 1 ELSE WORDN(N) = NW + 1 POSNS(I) = WORDP(N) WORDP(N) = I END IF END IF 100 CONTINUE END SUBROUTINE ENCOF(SEQ,IDSEQ,CONST,CSTART,LENGTH,POSNS) CHARACTER SEQ(IDSEQ) INTEGER CONST(LENGTH),CSTART,POSNS(IDSEQ),HASH INTEGER CTONUM,CONSTL EXTERNAL CTONUM C C new hashing routine. hash = k1 + k2 C C hash = k1.c1 + k2.c2 + ... + kn.cn - cstart C now c1=1, c2=4*c1, c3=4*c2,... C C find length bases in a row, then do first word base by base, C for rest only change what is necessary C DO 1 I=1,IDSEQ POSNS(I) = 0 1 CONTINUE CONSTL = CONST(LENGTH) LM1 = LENGTH - 1 IDSQML = IDSEQ - LENGTH IS = 1 I = 1 IP = 1 HASH = 0 10 CONTINUE C C end approaching ? C IF (IS.GT.IDSQML) RETURN C C at least a words length of characters left C 11 CONTINUE K = CTONUM(SEQ(I)) IF (K.EQ.5) THEN C C start a new word C IS = I + 1 I = IS IP = 1 HASH = 0 GO TO 10 END IF HASH = HASH + CONST(IP) * K IF (IP.NE.LENGTH) THEN I = I + 1 IP = IP + 1 GO TO 11 END IF C C word finished C C save the hash value and the C 20 CONTINUE POSNS(IS) = HASH + CSTART C K1 = CONST(1) * CTONUM(SEQ(IS)) note const(1) = 1 K1 = CTONUM(SEQ(IS)) K2 = (HASH - K1) / 4 IS = IS + 1 IF (IS.GT.IDSQML) RETURN K = CTONUM(SEQ(IS+LM1)) IF (K.EQ.5) THEN IS = IS + 1 I = IS IP = 1 HASH = 0 GO TO 10 END IF C HASH = K2 + K * CONST(LENGTH) note this is a constant constant HASH = K2 + K * CONSTL GO TO 20 END SUBROUTINE INITE(CONST,CSTART,LENGTH) INTEGER CONST(LENGTH),CSTART CSTART = 1 DO 1 I=1,LENGTH C WRITE(*,*)I CONST(I) = 4**(I-1) CSTART = CSTART - CONST(I) 1 CONTINUE END C SUBROUTINE TO READ CHARACTER DATA FROM IDEV, REMOVE SPACES, FILL C ARRAY AND RETURN NUMBER OF ELEMENTS USED. ANY LINES STARTING WITH C A ; ARE TREATED AS COMMENTS SUBROUTINE ARRFIM(IDEV,SEQNCE,J,KBOUT) C 14-8-91 Added err= option to read, and set length to 0 if error found C AUTHOR: RODGER STADEN CHARACTER TEMP(80),SEQNCE(J) CHARACTER SPACE,ENDCHR,TITCHR SAVE ENDCHR,SPACE,TITCHR DATA ENDCHR/'@'/ DATA SPACE/' '/ DATA TITCHR/';'/ IDMX=J J=0 1 CONTINUE READ(IDEV,1001,END=30,ERR=40)TEMP 1001 FORMAT(80A1) IF(TEMP(1).EQ.TITCHR)THEN C WRITE(KBOUT,1003)(TEMP(K),K=2,80) C1003 FORMAT(' ',79A1) GO TO 1 END IF 10 CONTINUE DO 20 I=1,80 IF(TEMP(I).NE.SPACE)THEN IF(TEMP(I).EQ.ENDCHR)RETURN IF(J.EQ.IDMX)THEN WRITE(KBOUT,1002)IDMX 1002 FORMAT( + ' Too much data. Maximum possible', + ' =',I6,', input stopped there') RETURN END IF J=J+1 SEQNCE(J)=TEMP(I) END IF 20 CONTINUE GO TO 1 30 CONTINUE RETURN 40 CONTINUE CALL ERROM(KBOUT,'Error reading file') J = 0 END C ABEDIN C C ROUTINE TO EDIT THE DB USING A PADDED SEQ C HAVE AN ARRAY SEQC2 LENGTH IDC OF PADDED SECTION OF CONTIG LINCON C THE LEFT END OF THE PADDED CONTIG STARTS AT X C THERE ARE ITOTPC PADS TO MAKE C SUBROUTINE ABEDIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +GEL,LINCON,X,SEQC2,ITOTPC,IDC,IDBSIZ,KBOUT,IDEVR,IDEVW, +MAXGEL) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),X,POSN INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER SEQC2(IDC),GEL(MAXGEL),P SAVE P DATA P/','/ C C POINT TO CONTIG POSN=X-1 C POINT TO SEQC2 IAT=0 C COUNT PADS DONE IDONE=0 C LOOP FOR ALL SEQC2 DO 100 J=1,IDC POSN=POSN+1 IAT=IAT+1 IPAD=0 C IS THIS A PADDING CHAR? IF(SEQC2(IAT).NE.P)GO TO 100 50 CONTINUE C COUNT PADS IPAD=IPAD+1 IAT=IAT+1 IF(SEQC2(IAT).EQ.P)GO TO 50 C END OF THIS STRETCH OF PADS,DO INSERT C HAVE IPAD INSERTS TO MAKE AT POSN CALL PADCON(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +GEL,LINCON,POSN,IPAD,IDBSIZ,IDEVR,IDEVW,MAXGEL,KBOUT) C MOVE POINTER TO CONTIG POSN=POSN+IPAD C COUNT PADS DONE IDONE=IDONE+IPAD C ANY MORE TO DO? IF(IDONE.EQ.ITOTPC)GO TO 101 100 CONTINUE C ERROR SHOULD HAVE DONE ALL PADS WRITE(KBOUT,1000) 1000 FORMAT(' Problem: some pads were not done!') 101 CONTINUE END SUBROUTINE ADDTIT(SEQ1,NAMPRO,NGELS,IDIM1) C AUTHOR: RODGER STADEN CHARACTER SEQ1(20),NAMPRO*(*) CHARACTER NUMS(10) C Set maximum number of digits in reading number PARAMETER (MAXGD = 5) SAVE NUMS DATA NUMS/'0','1','2','3','4','5','6','7','8','9'/ CALL FILLC(SEQ1(2),18,'-') SEQ1(1) = '<' SEQ1(20) = '>' IEND = INDEX(NAMPRO,'.') N=NGELS K=IEND+MAXGD DO 10 J=1,MAXGD N=MOD(N,10)+1 NAMPRO(K:K)=NUMS(N) N=NGELS/(10**J) K=K-1 10 CONTINUE K = 18-IEND K=K/2 DO 20 I=1,IEND+MAXGD SEQ1(K)=NAMPRO(I:I) K=K+1 20 CONTINUE IDIM1=IDIM1+20 END SUBROUTINE ADISM1(SEQ,IDIM,GEL,IDIMG,SAVPS,SAVPG,IDSAV, +CENDS,NENDS,IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE, +LLINO,IMATC, +ISTRAN,KBOUT,MATCH) C AUTHOR: RODGER STADEN C NEW PARMS INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2),LLINO(2) CCCCCCCCCCCC INTEGER CENDS(MAXCON) INTEGER NENDS(MAXCON) INTEGER SAVPS(IDSAV),SAVPG(IDSAV) CHARACTER SEQ(IDIM),GEL(IDIMG),MATCH(IDIMG) C C EDITED 07-02-83 TO ALLOW FOR CASE WHERE A GEL OVERLAPS ADJACENT C CONTIGS WITHIN THE LENGTH OF THE GEL. USE PARM THAT CONTAINS C THE POSITION OF THE LEFT END OF THE NEXT CONTIG. SET TO VERY HIGH C VALUE TO START NEXTC=IDIM+1 C SORT THE MATCHING WORDS INTO ASCENDING ORDER ON POSITION IN SEQ CALL BUB2AS(SAVPS,SAVPG,IDSAV) C LOOK FOR SEPERATE MATCHES LEND=IDIMG-SAVPG(1)+SAVPS(1) C COUNT NUMBER OF MATCHING CONTIGS IMATC=IMATC+1 CALL ADISM2(SEQ,IDIM,GEL,IDIMG,SAVPS(1), 1SAVPG(1),CENDS,NENDS,IDCEND,MAXCON, 1ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,KBOUT, 2MATCH) DO 10 I=2,IDSAV IF((SAVPS(I).LT.LEND).AND.(SAVPS(I).LT.NEXTC))GO TO 10 C NEW MATCH, DISPLAY IT C COUNT NUMBER OF MATCHING CONTIGS IMATC=IMATC+1 CALL ADISM2(SEQ,IDIM,GEL,IDIMG,SAVPS(I), 1SAVPG(I),CENDS,NENDS,IDCEND,MAXCON, 1ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,KBOUT, 2MATCH) C C RESET LEND LEND=IDIMG-SAVPG(I)+SAVPS(I) 10 CONTINUE RETURN END C C ADISM2 C ROUTINE TO DISPLAY MATCHES SUBROUTINE ADISM2(SEQ,IDIM1,GEL,IDIMG,ISAVPS,SAVPG,CENDS,NENDS, +IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN, +NEXTC,KBOUT,MATCH) C AUTHOR: RODGER STADEN C NEW PARMS INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2),LLINO(2) CCCCCCCCCCC CHARACTER SEQ(IDIM1),GEL(IDIMG),MATCH(IDIMG) INTEGER SAVPS,SAVPG,CENDS(MAXCON) INTEGER NENDS(MAXCON) C EDITED 07-02-83 FOR NEXTC. SEE ADISM1. C DELETE 20 FROM END OF CONSENSUS MATCH SAVPS=ISAVPS-19 C FIND CONTIG CONSENSUS ENDS JJ=1 DO 5 J=2,IDCEND IF(SAVPS.GT.CENDS(J))GO TO 5 C GONE PAST SO LAST IS THE ONE JJ=J-1 GO TO 6 5 CONTINUE JJ=IDCEND 6 CONTINUE C SUBTRACT 1 FROM END SAVPS=SAVPS-1 C LENGTH FROM MATCH TO LEFT OF CONTIG LCL=SAVPS-CENDS(JJ) C RIGHT LCR=CENDS(JJ+1)-ISAVPS-1 C LEFT GEL LGL=SAVPG-1 LGR=IDIMG-SAVPG C NEED MIN OF EACH PAIR LL=MIN(LCL,LGL) LR=MIN(LCR,LGR) C LENGTH OF OVERLAP LM=LR+LL+1 C DISPLAY STARTS ICL=ISAVPS-LL IGL=SAVPG-LL WRITE(KBOUT,1000)NENDS(JJ) 1000 FORMAT(' Match found with contig number =',I6) CALL SQMTCH(SEQ(ICL),GEL(IGL),MATCH,LM) L=ICL-CENDS(JJ)-19 CALL FMT4LN(SEQ(ICL),GEL(IGL),MATCH,LM,L,IGL,KBOUT) C UPDATE END OF NEXT CONTIG NEXTC=CENDS(JJ+1)+20 IF(IMATC.GT.2)RETURN ILEFTS(IMATC)=CENDS(JJ)+20 ILC(IMATC)=LCL+LCR+1 IPOSC(IMATC)=LCL+1 IPOSG(IMATC)=SAVPG LLINO(IMATC)=NENDS(JJ) ISENSE(IMATC)=1 IF(ISTRAN.EQ.2)ISENSE(IMATC)=-1 RETURN END SUBROUTINE ADISM3(ISAVPS,SAVPG,CENDS,NENDS, +IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN, +NEXTC,MAXC,KBOUT) C AUTHOR: RODGER STADEN INTEGER ILEFTS(MAXC),ILC(MAXC),IPOSC(MAXC),IPOSG(MAXC) INTEGER ISENSE(MAXC),LLINO(MAXC) INTEGER SAVPS,SAVPG,CENDS(MAXCON) INTEGER NENDS(MAXCON) SAVPS=ISAVPS-19 JJ=1 DO 5 J=2,IDCEND IF(SAVPS.GT.CENDS(J))GO TO 5 JJ=J-1 GO TO 6 5 CONTINUE JJ=IDCEND 6 CONTINUE SAVPS=SAVPS-1 LCL=SAVPS-CENDS(JJ) LCR=CENDS(JJ+1)-ISAVPS-1 NEXTC=CENDS(JJ+1)+20 IF(IMATC.LE.MAXC) THEN ILEFTS(IMATC)=CENDS(JJ)+20 ILC(IMATC)=LCL+LCR+1 IPOSC(IMATC)=LCL+1 IPOSG(IMATC)=SAVPG LLINO(IMATC)=NENDS(JJ) ISENSE(IMATC)=1 IF(ISTRAN.EQ.2)ISENSE(IMATC)=-1 WRITE(KBOUT,1000)LLINO(IMATC),IPOSC(IMATC),ISTRAN, + IPOSG(IMATC) 1000 FORMAT + (' Contig',I5,' position',I6,' matches strand',I2, + ' at position',I5) ELSE CALL ERROM(KBOUT,'Warning: too many overlaps') END IF END SUBROUTINE ADISM4(IDIM,IDIMG,SAVPS,SAVPG,IDSAV, +CENDS,NENDS,IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE, +LLINO,IMATC,ISTRAN,MAXC,KBOUT) C AUTHOR: RODGER STADEN INTEGER ILEFTS(MAXC),ILC(MAXC),IPOSC(MAXC),IPOSG(MAXC) INTEGER ISENSE(MAXC),LLINO(MAXC) INTEGER CENDS(MAXCON) INTEGER NENDS(MAXCON) INTEGER SAVPS(IDSAV),SAVPG(IDSAV) NEXTC=IDIM+1 CALL BUB2AS(SAVPS,SAVPG,IDSAV) IMATC=IMATC+1 CALL ADISM3(SAVPS(1),SAVPG(1),CENDS,NENDS,IDCEND,MAXCON, + ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,MAXC, + KBOUT) LEND=IDIMG-SAVPG(1)+SAVPS(1) DO 10 I=2,IDSAV IF((SAVPS(I).LT.LEND).AND.(SAVPS(I).LT.NEXTC))GO TO 10 IMATC=IMATC+1 CALL ADISM3(SAVPS(I),SAVPG(I),CENDS,NENDS,IDCEND,MAXCON, + ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,MAXC, + KBOUT) LEND=IDIMG-SAVPG(I)+SAVPS(I) 10 CONTINUE IMATC = MIN(IMATC,MAXC) RETURN END SUBROUTINE AEDIT(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT, +GEL,MAXGEL,CON,IDC,IDEVW,IDEVR,LREG,RREG,KBOUT) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL),CON(IDC) INTEGER RREG,PC,PCA,PG NG = LGEL PG = RELPG(NG) PC = LREG NTT = 0 NCT = 0 NDT = 0 10 CONTINUE C WRITE(*,*)'GEL',NG CALL READW(IDEVW,NG,GEL,MAXGEL) LG = ABS(LNGTHG(NG)) IF(PC.LT.LREG) PC = LREG PCA = PC - LREG + 1 IG = PC - PG + 1 LC = MIN(LG,RREG-PC+1) C WRITE(*,*)'PC,PG,IG,LG,PCA,LC',PC,PG,IG,LG,PCA,LC CALL ET(GEL(IG),LG,CON(PCA),LC,NE) NTT = NTT + NE CALL EC(GEL(IG),LG,CON(PCA),LC,NE) NCT = NCT + NE CALL ED(GEL(IG),LG,CON(PCA),LC,ND) NDT = NDT + ND CALL WRITEW(IDEVW,NG,GEL,MAXGEL) IF(ND.GT.0) THEN K = LNGTHG(NG) LNGTHG(NG) = ABS(LNGTHG(NG)) - ND LNGTHG(NG) = SIGN(LNGTHG(NG),K) CALL WRITER(IDEVR,NG,RELPG(NG),LNGTHG(NG),LNBR(NG),RNBR(NG)) END IF IF(RNBR(NG).NE.0) THEN NG = RNBR(NG) PG = RELPG(NG) PC = PG IF(PG.LE.RREG) GO TO 10 END IF CALL EDR(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT, +CON,IDC,IDEVW,IDEVR,LREG) WRITE(KBOUT,1000)NTT 1000 FORMAT(' Number of transpositions=',I6) WRITE(KBOUT,1001)NCT 1001 FORMAT(' Number of changes =',I6) WRITE(KBOUT,1002)NDT 1002 FORMAT(' Number of deletions =',I6) END C AJOIN2 C COMPLETES JOIN AND RETURNS LENGTH OF NEW CONTIG IN LLINOR SUBROUTINE AJOIN2(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +RELX,LLINOL,LLINOR,LNCONL,LNCONR,IDEVR) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNBR(IDBSIZ),RNBR(IDBSIZ),LNGTHG(IDBSIZ) INTEGER RELX C RELX IS THE POSITION OF THE JOINT C LLINOL IS THE LEFT GEL NUMBER OF THE LEFT CONTIG C LLINOR IS THE LEFT GEL OF THE RIGHT CONTIG C LNCONL IS THE LEFT CONTIG LINE NUMBER C LNCONR IS THE RIGHT CONTIG LINE NUMBER C C ADJUST ALL RELATIVE POSITIONS IN RIGHT CONTIG N=LLINOR RELPG(N)=RELX 50 CONTINUE IF(RNBR(N).EQ.0)GO TO 60 N=RNBR(N) RELPG(N)=RELPG(N)+RELX-1 GO TO 50 60 CONTINUE C C FIX UP NEW GEL LINE FOR OLD LEFT OF RIGHT CONTIG LNBR(LLINOR)=RNBR(LNCONL) C FIX UP RIGHT GEL OF LEFT CONTIG N=RNBR(LNCONL) RNBR(N)=LLINOR C MERGE WILL SORT OUT THE CORRECT NEIGHBOURS C CALL MERGE(RELPG,LNGTHG,LNBR,RNBR,LNCONL,IDBSIZ) C MERGE DOES NOT WRITE TO DISK N=LNBR(LNCONL) 65 CONTINUE C WRITE(IDEVR,REC=N)RELPG(N),LNGTHG(N),LNBR(N),RNBR(N) CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N),LNBR(N),RNBR(N)) N=RNBR(N) IF(N.NE.0)GO TO 65 C CONTIG LINES X=RELPG(LNCONR)+RELX-1 C LENGTH MAY NOT HAVE INCREASED! IF(X.GT.RELPG(LNCONL))RELPG(LNCONL)=X C SAVE LENGTH OF NEW CONTIG RELX=RELPG(LNCONL) C WRITE(IDEVR,REC=LNCONL)RELPG(LNCONL),LNGTHG(LNCONL),LNBR(LNCONL), C 1RNBR(LNCONL) CALL WRITER(IDEVR,LNCONL,RELPG(LNCONL),LNGTHG(LNCONL), +LNBR(LNCONL),RNBR(LNCONL)) C C NOW MOVE ALL DATA DOWN TO DELETE OLD RIGHT END N=IDBSIZ-NCONTS M=LNCONR-N IF(M.EQ.0)GO TO 80 K=LNCONR J=LNCONR-1 DO 70 I=1,M RELPG(K)=RELPG(J) LNGTHG(K)=LNGTHG(J) LNBR(K)=LNBR(J) RNBR(K)=RNBR(J) C WRITE(IDEVR,REC=K)RELPG(K),LNGTHG(K),LNBR(K),RNBR(K) CALL WRITER(IDEVR,K,RELPG(K),LNGTHG(K),LNBR(K),RNBR(K)) K=K-1 J=J-1 70 CONTINUE 80 CONTINUE NCONTS=NCONTS-1 C WRITE(IDEVR,REC=IDBSIZ)NGELS,NCONTS CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) 90 CONTINUE RETURN END C SUBROUTINE AJOIN3 SUBROUTINE AJOIN3(RELPG,IDBSIZ,LINCON,ITYPE,ISENSE,JOINT,IDIM22, +KLASS,IOVER,KBOUT,PL,PR) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),LINCON(2),IDIM22(2) INTEGER ITYPE(2),ISENSE(2),JOINT(2),PL(2),PR(2) C C CALC POSITIONS OF CONTIGS RELATIVE TO FIXED GEL DO 20 I=1,2 C R+ IF((ITYPE(I).NE.-1).OR.(ISENSE(I).NE.1))GO TO 11 PL(I)=-1*JOINT(I)+2 PR(I)=PL(I)+RELPG(LINCON(I))-1 GO TO 20 C L+ 11 CONTINUE IF((ITYPE(I).NE.1).OR.(ISENSE(I).NE.1))GO TO 12 PL(I)=JOINT(I) PR(I)=PL(I)+RELPG(LINCON(I))-1 GO TO 20 C R- 12 CONTINUE IF((ITYPE(I).NE.-1).OR.(ISENSE(I).NE.-1))GO TO 13 PR(I)=JOINT(I)+IDIM22(I)-1 PL(I)=PR(I)-RELPG(LINCON(I))+1 GO TO 20 C L- 13 CONTINUE PR(I)=IDIM22(I)-JOINT(I)+1 PL(I)=PR(I)-RELPG(LINCON(I))+1 20 CONTINUE C LENGTH OF OVERLAP IOVER=MIN(PR(1),PR(2))-MAX(PL(1),PL(2))+1 WRITE(KBOUT,1002)IOVER 1002 FORMAT(' Length of overlap between the contigs=',I6) C C CLASS NUMBER 1-16 KLASS=1 IF(ITYPE(1).EQ.1)KLASS=KLASS+8 IF(ISENSE(1).EQ.-1)KLASS=KLASS+4 IF(ITYPE(2).EQ.1)KLASS=KLASS+2 IF(ISENSE(2).EQ.-1)KLASS=KLASS+1 C WRITE(KBOUT,1001)KLASS C1001 FORMAT(' CLASS OF JOIN=',I6) RETURN END C ALINE C C ROUTINE TO LINE UP 2 SEQS. C IT SLIDES,REMOVES OVERLAPPING MATCHES, C SORTS MATCHES INTO ASCENDING ORDER, THEN DOES DOES A TOPOLOGICAL C CHECK, AND THEN PRODUCES 2 LINED UP SEQS WITH PADDING CHARS C VARIABLES C SEQ1 CONSENSUS C SEQ2 GEL ORIGINAL IN CORRECT ORIENTATION C SEQG2 ALIGNED GEL C SEQC2 ALIGNED CONSENSUS C SEQ3 SAVED GEL RAW DATA C ISAV1,2,3 STORE MATCHES AND POSITIONS C IDSAV NUMBER ISAV'S C IDC LENGTH OF INPUT SEQ1 C IDIM2 LENGTH OF INPUT SEQ2 C IDOUT LENGTH OF OUTPUT ALIGNED SEQ1 C IDIM2 LENGTH OF SEQ2 ON OUTPUT AFTER ALIGNMENT C MINSLI MIN MATCH FOR SLIDING C IFAIL FLAG TO SHOW IF ALIGNMENT FAILED DUE TO TOO C MANY MISMATCHES OR TOPOLIGICAL CHECK OR TOO MANY OR TOO MANY C PADDING CHARS. 1=FAIL,0=PASS C SUBROUTINE ALINE(SEQ1,SEQ2,SEQG2,SEQC2,ISAV1,ISAV2,ISAV3, +IDSAV,IDC,IDIM2,IDOUT,IC1,IG1,MINSLI,JOINT, +ITOTPC,ITOTPG,IFAIL,ITYPE,MAXPC,MAXPG,PERMAX,KBOUT,SEQ3,MAXGEL, +PERCM,LENO,ISHOW) C AUTHOR: RODGER STADEN CHARACTER SEQ1(IDC),SEQ2(IDIM2),SEQG2(IDOUT),SEQC2(IDOUT) CHARACTER SEQ3(MAXGEL) INTEGER ISAV1(IDSAV),ISAV2(IDSAV),ISAV3(IDSAV) MINSLT=MINSLI C SAVE SEQ2 CALL SQCOPY(SEQ2,SEQ3,IDIM2) CALL MSTLKL(SEQ3,IDIM2) IFAIL=1 C FIND MATCHES IPP=IDSAV CALL SLIDER(SEQ1,IDC,SEQ3,IDIM2,IC1,IG1,MAXPG,MAXPC,MINSLT, +ISAV1,ISAV2,ISAV3,IPP) IF(IPP.GT.IDSAV)RETURN IF(IPP.LT.1)RETURN CALL REMOVL(ISAV2,ISAV3,ISAV1,IPP) CALL BUB3AS(ISAV2,ISAV3,ISAV1,IPP) C DO TOPOLOGICAL CHECK CALL TPCHEK(ISAV2,ISAV3,ISAV1,IPP) C C added next routine 27-2-93 C CALL UPCHEK(ISAV2,ISAV3,ISAV1,IPP) CALL LINEUP(SEQ2,SEQ1,SEQG2,SEQC2,IDC,IDIM2,IDOUT,ISAV3,ISAV2, +ISAV1,IPP,ITOTPC,ITOTPG,JOINT,ITYPE,KBOUT,MAXGEL,IFAIL) IF(ITOTPC.GT.MAXPC)IFAIL=1 IF(ITOTPG.GT.MAXPG)IFAIL=1 IF(IFAIL.NE.0)RETURN C IDIM2 IS NOW LENGTH OF ALIGNED GEL CALL DALIGN(SEQC2,SEQG2,SEQ3,MAXGEL,IDOUT,IDIM2,JOINT, +ITYPE,PERCM,KBOUT,IFAIL,LENO,PERMAX,ISHOW) IF(IFAIL.NE.0)RETURN IF(ISHOW.EQ.1) THEN WRITE(KBOUT,1052)PERCM,ITOTPC,ITOTPG 1052 FORMAT(' Percent mismatch=',F4.1,', pads in contig=',I3, + ', pads in gel=',I3) END IF END SUBROUTINE ARCSER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,KBIN,KBOUT,IDEVN, +IHELPS,IHELPE,FILEH,IDEVH) CHARACTER FILEH*(*) C AUTHOR: RODGER STADEN C SEARCHES FOR ARCHIVE NAMES INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER NAME1*16,NAME2*16 10 CONTINUE L = 0 CALL GTSTR('Archive name',' ',NAME1,L,KBOUT,KBIN,INFLAG) IF(L.EQ.0) RETURN CALL CCASE(NAME1,1) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.3) RETURN IF(NAME1(1:1).EQ.' ') RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 10 END IF DO 100 I=1,NGELS CALL READN(IDEVN,I,NAME2) IF(NAME1.EQ.NAME2) THEN WRITE(KBOUT,1003)NAME2,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I) 1003 FORMAT(' ',A,2X,I4,2X,I6,2X,I6,2X,I6,2X,I6/) GO TO 10 END IF 100 CONTINUE WRITE(KBOUT,1004)NAME1 1004 FORMAT(' ',A,' Not in database') GO TO 10 END SUBROUTINE AUTOCN(SEQ1,IDIM,GEL,IDIMG,ILEFTS,ILC,IPOSC, +IPOSG,ISENSE,LLINO,IMATC,IFCOMP,MINMAT,POSNS,WORDP,WORDN, +CONST,LENGTH,LPOWRC,KBOUT,MATCH,MAXGEL,MAXGLM,GELCOP,GELN, +SAVPS,SAVPG,SAVL,MAXSAV,CENDS,NENDS,MAXCON, +SEQG2,SEQC2,SEQ4,IDOUT,IDIM22,ITOTPG,ITOTPC,JOINT,IFAIL, +ITYPE,MAXPC,MAXPG,PERMAX,MINSLI,SEQG3,SEQC3,KFAIL,CSTART, +JOBC,PERMIS,LENO,ISHOW) C AUTHOR: RODGER STADEN C changed 29-11-90 to make first in list of alignments the best INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2),LLINO(2) INTEGER POSNS(IDIM),GELN(MAXGLM),WORDP(LPOWRC),SAVPS(MAXSAV) INTEGER SAVPG(MAXSAV),SAVL(MAXSAV) INTEGER WORDN(LPOWRC) CHARACTER GELCOP(MAXGLM),MATCH(MAXGLM) INTEGER CENDS(MAXCON),NENDS(MAXCON) INTEGER CONST(LENGTH) CHARACTER SEQ1(IDIM),GEL(MAXGLM) C CHARACTER SEQG2(MAXGLM,2),SEQC2(MAXGLM,2),SEQ4(MAXGLM) INTEGER IDOUT(2),IDIM22(2),ITOTPG(2),ITOTPC(2),JOINT(2) INTEGER IFAIL(2),ITYPE(2) PARAMETER (MAXC = 100) CHARACTER SEQG3(MAXGLM),SEQC3(MAXGLM) INTEGER JLEFTS(MAXC),JLC(MAXC),JPOSC(MAXC),JPOSG(MAXC) INTEGER JSENSE(MAXC),JLLINO(MAXC),CSTART,START REAL PERMIS(2) C C jobc tells how to update the hash tables: C 0 means dont do anything because the consensus hasnt changed C 1 means add the last contig because a new one has been stuck on the end C 2 means do the whole consensus C IFAIL(1) = 1 IFAIL(2) = 1 KFAIL = 0 C 23-8-90 Need to deal with failures in a better way. Problem is C case where overlaps are found but fail to align. In future C signal them with new variable KFAIL which will be nonzero C if any alignment fails. C 29-11-90 Changed sorting of overlaps so that the best is first in the C list returned to caller. C SAVE GEL CALL SQCOPY(GEL,GELCOP,IDIMG) C COUNT NUMBER OF CONTIGS THAT MATCH IMATC=0 IDCEND=MAXCON CALL BUSY(KBOUT) CALL FNDCON(SEQ1,IDIM,CENDS,NENDS,IDCEND,MAXCON,KBOUT) IF (JOBC.NE.0) THEN START = 1 IF(JOBC.EQ.1) START = CENDS(IDCEND) CALL ENCOF(SEQ1(START),IDIM-START+1,CONST,CSTART,LENGTH, +POSNS(START)) CALL ENCONN(POSNS,IDIM,WORDP,WORDN,LPOWRC,LENGTH,START) END IF 1 CONTINUE ISTRAN=1 2 CONTINUE CALL MSTLKL(GEL,IDIMG) CALL ENCOF(GEL,IDIMG,CONST,CSTART,LENGTH,GELN) IDSAV=MAXSAV CALL CFGEL(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LPOWRC, +SAVPG,SAVPS,SAVL, +IDSAV,SEQ1,GEL,MINMAT,IFCOMP,KBOUT) IF(IFCOMP.NE.0)RETURN IF(IDSAV.NE.0)THEN CALL ADISM4(IDIM,IDIMG,SAVPS,SAVPG,IDSAV,CENDS,NENDS, + IDCEND,MAXCON,JLEFTS,JLC,JPOSC,JPOSG,JSENSE,JLLINO, + IMATC,ISTRAN,MAXC,KBOUT) END IF ISTRAN=ISTRAN+1 IF(ISTRAN.EQ.2) THEN CALL SQCOPY(GELCOP,GEL,IDIMG) CALL SQREV(GEL,IDIMG) CALL SQCOM(GEL,IDIMG) GO TO 2 END IF CALL SQCOPY(GELCOP,GEL,IDIMG) KSENSE = 0 WRITE(KBOUT,*)'Total matches found',IMATC IF(IMATC.EQ.0) THEN IFAIL(1) = 0 RETURN END IF JMATC = 0 DO 100 I = 1,IMATC IF(JSENSE(I).EQ.-1) THEN IF(KSENSE.EQ.0) THEN CALL SQREV(GEL,IDIMG) CALL SQCOM(GEL,IDIMG) KSENSE = 1 END IF END IF JDIM22 = IDIMG JDOUT = MAXGEL IDSAV = MAXSAV WRITE(KBOUT,*)'Trying to align with contig',JLLINO(I) CALL ALINE(SEQ1(JLEFTS(I)),GEL,SEQG3,SEQC3, + SAVPS,SAVPG,SAVL,IDSAV,JLC(I),JDIM22,JDOUT, + JPOSC(I),JPOSG(I),MINSLI,JJOINT,JTOTPC,JTOTPG, + JFAIL,JTYPE,MAXPC,MAXPG,PERMAX,KBOUT,SEQ4,MAXGEL,PERMS,LENO, + ISHOW) IF(JFAIL.EQ.0) THEN JMATC = JMATC + 1 IF(JMATC.EQ.1) THEN C Save in elements 1 CALL COPYM(JLEFTS(I),ILEFTS(1),JLC(I),ILC(1), + JPOSC(I),IPOSC(1),JSENSE(I),ISENSE(1), + JLLINO(I),LLINO(1),JJOINT,JOINT(1),JTOTPC, + ITOTPC(1),JTOTPG,ITOTPG(1),JTYPE,ITYPE(1), + JDOUT,IDOUT(1),JDIM22,IDIM22(1), + SEQG3,SEQG2(1,1),SEQC3,SEQC2(1,1), + PERMS,PERMIS(1)) IFAIL(1) = 0 ELSE IF(JMATC.EQ.2) THEN IF(PERMS.LT.PERMIS(1)) THEN C Better match so save in elements 1, so copy 1 to 2 first CALL COPYM(ILEFTS(1),ILEFTS(2),ILC(1),ILC(2), + IPOSC(1),IPOSC(2),ISENSE(1),ISENSE(2), + LLINO(1),LLINO(2),JOINT(1),JOINT(2),ITOTPC(1), + ITOTPC(2),ITOTPG(1),ITOTPG(2),ITYPE(1),ITYPE(2), + IDOUT(1),IDOUT(2),IDIM22(1),IDIM22(2), + SEQG2(1,1),SEQG2(1,2),SEQC2(1,1),SEQC2(1,2), + PERMIS(1),PERMIS(2)) IFAIL(2) = 0 C Now save in 1 CALL COPYM(JLEFTS(I),ILEFTS(1),JLC(I),ILC(1), + JPOSC(I),IPOSC(1),JSENSE(I),ISENSE(1), + JLLINO(I),LLINO(1),JJOINT,JOINT(1),JTOTPC, + ITOTPC(1),JTOTPG,ITOTPG(1),JTYPE,ITYPE(1), + JDOUT,IDOUT(1),JDIM22,IDIM22(1), + SEQG3,SEQG2(1,1),SEQC3,SEQC2(1,1), + PERMS,PERMIS(1)) ELSE C Save in element 2 CALL COPYM(JLEFTS(I),ILEFTS(2),JLC(I),ILC(2), + JPOSC(I),IPOSC(2),JSENSE(I),ISENSE(2), + JLLINO(I),LLINO(2),JJOINT,JOINT(2),JTOTPC, + ITOTPC(2),JTOTPG,ITOTPG(2),JTYPE,ITYPE(2), + JDOUT,IDOUT(2),JDIM22,IDIM22(2), + SEQG3,SEQG2(1,2),SEQC3,SEQC2(1,2), + PERMS,PERMIS(2)) IFAIL(2) = 0 END IF ELSE IF(PERMS.LT.PERMIS(1)) THEN C Better match so save in elements 1, so copy 1 to 2 first CALL COPYM(ILEFTS(1),ILEFTS(2),ILC(1),ILC(2), + IPOSC(1),IPOSC(2),ISENSE(1),ISENSE(2), + LLINO(1),LLINO(2),JOINT(1),JOINT(2),ITOTPC(1), + ITOTPC(2),ITOTPG(1),ITOTPG(2),ITYPE(1),ITYPE(2), + IDOUT(1),IDOUT(2),IDIM22(1),IDIM22(2), + SEQG2(1,1),SEQG2(1,2),SEQC2(1,1),SEQC2(1,2), + PERMIS(1),PERMIS(2)) IFAIL(2) = 0 C Now save in 1 CALL COPYM(JLEFTS(I),ILEFTS(1),JLC(I),ILC(1), + JPOSC(I),IPOSC(1),JSENSE(I),ISENSE(1), + JLLINO(I),LLINO(1),JJOINT,JOINT(1),JTOTPC, + ITOTPC(1),JTOTPG,ITOTPG(1),JTYPE,ITYPE(1), + JDOUT,IDOUT(1),JDIM22,IDIM22(1), + SEQG3,SEQG2(1,1),SEQC3,SEQC2(1,1), + PERMS,PERMIS(1)) ELSE IF(PERMS.LT.PERMIS(2)) THEN C Save in element 2 CALL COPYM(JLEFTS(I),ILEFTS(2),JLC(I),ILC(2), + JPOSC(I),IPOSC(2),JSENSE(I),ISENSE(2), + JLLINO(I),LLINO(2),JJOINT,JOINT(2),JTOTPC, + ITOTPC(2),JTOTPG,ITOTPG(2),JTYPE,ITYPE(2), + JDOUT,IDOUT(2),JDIM22,IDIM22(2), + SEQG3,SEQG2(1,2),SEQC3,SEQC2(1,2), + PERMS,PERMIS(2)) END IF END IF ELSE KFAIL = 1 END IF 100 CONTINUE IMATC = MIN(2,JMATC) END SUBROUTINE BREAKC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,KBIN,KBOUT,IDEVR,IDEVW,IDEVN, +IHELPS,IHELPE,IHELP1,IHELP2,FILEH,IDEVH,IOK) CHARACTER FILEH*(*) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER CHAINL,GCLIN EXTERNAL CHAINL,GCLIN C ROUTINE TO BREAK A CONTIG INTO 2 C LEFT GEL OF NEW RIGHT CONTIG IS IR C RIGHT GEL OF NEW LEFT CONTIG IS IL C LEFT GEL OF OLD LEFT CONTIG IS ILO C CONTIG LINE OF OLD CONTIG IS NCONTO C CONTIG LINE OF NEW RIGHT CONTIG IS NCONTR C CONTIG LINE OF NEW LEFT CONTIG IS NCONTO C LENGTH OF OLD CONTIG IS LCONTO IOK = 1 NCONTR = IDBSIZ - NCONTS - 1 IF(NCONTR.LE.NGELS) THEN WRITE(KBOUT,*)'Insufficient space for new contig line.' WRITE(KBOUT,*)'Increase database size with copy' RETURN END IF 10 CONTINUE MN = 0 MX = NGELS IR = 0 CALL GETINT(MN,MX,IR, + 'Number of gel reading that will become a left end', + IVAL,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN IF(IVAL.LT.1) RETURN IR = IVAL IL = LNBR(IR) IF(IL.EQ.0)THEN WRITE(KBOUT,*)'Gel number',IR,' is already a left end' GO TO 10 END IF ILO = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,IR) IF(ILO.EQ.0)THEN WRITE(KBOUT,*) +'Problem with this contig. Check logical consistency' WRITE(KBOUT,*)'of database. Break not made' RETURN END IF NCONTO = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,ILO) IF(NCONTO.EQ.0)THEN WRITE(KBOUT,*)'No contig line for this contig. Check logical' WRITE(KBOUT,*)'consistency of database. Break not made' RETURN END IF LCONTO = RELPG(NCONTO) IF(LCONTO.LT.1)THEN WRITE(KBOUT,*)'Contig has zero length. Break not made' RETURN END IF CALL CBREAK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,KBOUT,IDEVR,IDEVW,IDEVN,IR,IL,ILO,NCONTO,NCONTR,IOK) END SUBROUTINE CBREAK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,KBOUT,IDEVR,IDEVW,IDEVN,IR,IL,ILO,NCONTO,NCONTR,IOK) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER CLEN EXTERNAL CLEN C ROUTINE TO BREAK A CONTIG INTO 2 C LEFT GEL OF NEW RIGHT CONTIG IS IR C RIGHT GEL OF NEW LEFT CONTIG IS IL C LEFT GEL OF OLD LEFT CONTIG IS ILO C CONTIG LINE OF OLD CONTIG IS NCONTO C CONTIG LINE OF NEW RIGHT CONTIG IS NCONTR C CONTIG LINE OF NEW LEFT CONTIG IS NCONTO C LENGTH OF OLD CONTIG IS LCONTO IOK = 1 NCONTS = NCONTS + 1 C WRITE LAST LINE OF DB WRITE(KBOUT,*)'Increasing number of contigs by 1' CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS) C MAKE NEW CONTIG A COPY OF OLD RELPG(NCONTR) = RELPG(NCONTO) LNGTHG(NCONTR) = LNGTHG(NCONTO) LNBR(NCONTR) = IR RNBR(NCONTR) = RNBR(NCONTO) WRITE(KBOUT,*)'Writing new right contig line' CALL WRITER(IDEVR,NCONTR,RELPG(NCONTR),LNGTHG(NCONTR), +LNBR(NCONTR),RNBR(NCONTR)) C NEED LENGTH FOR OLD LEFT CONTIG RNBR(IL) = 0 C L = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, C +IDBSIZ,IL) C Change 24/6/93 jkb L = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,ILO) IF(L.LT.1)THEN WRITE(KBOUT,*) + 'New left contig has zero length. Break not made' RETURN END IF RELPG(NCONTO) = L RNBR(NCONTO) = IL C DO CONTIG LINE FOR NEW LEFT CONTIG WRITE(KBOUT,*)'Writing new left contig line' WRITE(KBOUT,*)'New length=',RELPG(NCONTO) WRITE(KBOUT,*)'New right gel=',RNBR(NCONTO) CALL WRITER(IDEVR,NCONTO,RELPG(NCONTO),LNGTHG(NCONTO), +LNBR(NCONTO),RNBR(NCONTO)) C DO GEL LINE FOR RIGHT GEL OF NEW LEFT CONTIG WRITE(KBOUT,*)'Writing new right gel of left contig' WRITE(KBOUT,*)'Gel number=',IL CALL WRITER(IDEVR,IL,RELPG(IL),LNGTHG(IL), +LNBR(IL),RNBR(IL)) C DO GEL LINE FOR NEW RIGHT CONTIG LNBR(IR) = 0 WRITE(KBOUT,*)'Writing new left gel of right contig' WRITE(KBOUT,*)'Gel number=',IR CALL WRITER(IDEVR,IR,RELPG(IR),LNGTHG(IR), +LNBR(IR),RNBR(IR)) C NOW SHIFT I = 1 - RELPG(IR) WRITE(KBOUT,*)'Shifting gels in right contig by distance=',I CALL SHIFTC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDEVR, +IDBSIZ,IR,NCONTR,I) WRITE(KBOUT,*)'Right contig length=',RELPG(NCONTR) WRITE(KBOUT,*)'Break completed' IOK = 0 END C BUBBL3 C SUBROUTINE TO SORT INTEGER ARRAY (LIST) INTO ASCENDING ORDER C SUBROUTINE BUBBL3(LIST,LISTEL,LISTAL,IDIM) C AUTHOR: RODGER STADEN INTEGER LIST(IDIM),LISTEL(IDIM),LISTAL(IDIM) C C SET POINTERS TO ZERO I=0 J=0 C 10 CONTINUE C C SET I=J IF WE HAVE JUST CORRECTLY POSITIONED AN ELEMENT IF(J.GT.I)I=J C C INCREMENT POINTER TO NEXT ELEMENT I=I+1 C TEST FOR END OF ARRAY IF(I.EQ.IDIM)RETURN C 20 CONTINUE C C COMPARE ADJACENT ELEMENTS IF(LIST(I).GE.LIST(I+1))GO TO 10 C C FIRST MOVE THIS ELEMENT? IF SO SET POINTER TO ITS INITIAL POSITION IF(J.LT.I)J=I C C EXCHANGE ADJACENT ELEMENTS ITEMP=LIST(I) LIST(I)=LIST(I+1) LIST(I+1)=ITEMP C ITEMP=LISTEL(I) LISTEL(I)=LISTEL(I+1) LISTEL(I+1)=ITEMP ITEMP=LISTAL(I) LISTAL(I)=LISTAL(I+1) LISTAL(I+1)=ITEMP C C C DECREMENT BACK THRU LIST WITH THIS ELEMENT IF(I.GT.1)I=I-1 C GO TO 20 END SUBROUTINE CCTA(SEQ,ID) CHARACTER SEQ(ID),COM,AS SAVE COM,AS DATA COM/','/,AS/'*'/ DO 10 I = 1,ID IF(SEQ(I).EQ.COM) SEQ(I) = AS 10 CONTINUE END C C CFGEL new version 15-4-92 C C ROUTINE TO COMPARE A STRING OF WORD NUMBERS FOR A GEL WITH A SERIES C OF ARRAYS REPRESENTING A CONSENSUS SEQUENCE. WE LOOK FOR OCCURENCES C OF PAIRS OF WORDS (EACH WORD IS LENGTH CHARS LONG AND SO TOTAL MATCH IS C 2*LENGTH CHARS LONG). THE ARRAYS SENT ARE OF SIZE 4**LENGTH (LE4) SUBROUTINE CFGEL(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LE4, +SAVPG, +SAVPS,SAVL,IDSAV,SEQ,GEL,MINMAT,IFAIL,KBOUT) C AUTHOR: RODGER STADEN CHARACTER SEQ(IDIM),GEL(IDIMG) INTEGER POSNS(IDIM),WORDP(LE4),SAVPS(IDSAV),SAVPG(IDSAV) INTEGER GELN(IDIMG),SAVL(IDSAV) INTEGER WORDN(LE4) INTEGER W1,W2,PS1W1,PS1W2 INTEGER CTONUM EXTERNAL CTONUM C go thru the gel reading looking at words to see if they exist in the consensus IDSAVM=IDSAV IDSAV=0 C length of a pair of words is LX2 LX2=2*LENGTH C number of pairs of words in gel reading is NW NW=IDIMG-(LX2-1) C loop for each words start point DO 200 I=1,NW C IS THIS WORD A ZERO? W1=GELN(I) IF(W1.EQ.0)GO TO 200 C POINT TO NEXT WORD OF PAIR W2=GELN(I+LENGTH) IF(W2.EQ.0)GO TO 200 C DOES W1 EXIST IN SEQ? N1S1=WORDN(W1) IF(N1S1.EQ.0)GO TO 200 N2S1=WORDN(W2) IF(N2S1.EQ.0)GO TO 200 C BOTH EXIST, SO POINT TO THE FIRST + LENGTH PS1W1=WORDP(W1)+LENGTH C LOOP FOR ALL PAIRS C there are N1S1 occurrences of word 1 and N2S1 of word 2 in consensus C compare the positions of all pairs to see if they are LENGTH apart DO 50 J=1,N1S1 C POINT TO FIRST W2 BECAUSE IT IS IN WORDP NOT POSNS PS1W2=WORDP(W2) C LOOP FOR THESE DO 40 K=1,N2S1 C ARE THIS PAIR LENGTH APART? N=PS1W1-PS1W2 IF(N.NE.0)GO TO 20 C THEY ARE SO, IF REQUIRED LOOK FOR REST OF MATCH LMAT=LX2 C C new code C IPC = PS1W2 + LENGTH - 1 IPG = I + LX2 - 1 16 CONTINUE IF (LMAT.LT.MINMAT) THEN IPC = IPC + 1 IPG = IPG + 1 IF(IPG.GT.IDIMG)GO TO 20 IF(IPC.GT.IDIM)GO TO 20 IF(CTONUM(SEQ(IPC)).NE.CTONUM(GEL(IPG)))GO TO 20 LMAT=LMAT+1 GO TO 16 END IF C C match found, is it an extension of a previous one ? C C WRITE(*,*)I,PS1W1-LENGTH IF (IDSAV.GT.0) THEN IF (I-SAVPG(IDSAV).EQ.PS1W1-LENGTH-SAVPS(IDSAV)) GO TO 20 END IF IDSAV = IDSAV + 1 IF (IDSAV.GT.IDSAVM) THEN WRITE(KBOUT,1000)IDSAVM 1000 FORMAT(' More than ',I6,' matches. Search aborted') IFAIL = 1 RETURN END IF C WRITE(*,*)IDSAV SAVPG(IDSAV) = I SAVPS(IDSAV) = PS1W1 - LENGTH 20 CONTINUE C POINT TO NEXT W2 PS1W2=POSNS(PS1W2) 40 CONTINUE C ALL TRIED THIS PS1W1, TRY NEXT PS1W1=POSNS(PS1W1-LENGTH)+LENGTH 50 CONTINUE 200 CONTINUE IFAIL=0 RETURN END C C CFGEL old version (before 15-4-92) C C ROUTINE TO COMPARE A STRING OF WORD NUMBERS FOR A GEL WITH A SERIES C OF ARRAYS REPRESENTING A CONSENSUS SEQUENCE. WE LOOK FOR OCCURENCES C OF PAIRS OF WORDS (EACH WORD IS LENGTH CHARS LONG AND SO TOTAL MATCH IS C 2*LENGTH CHARS LONG). THE ARRAYS SENT ARE OF SIZE 4**LENGTH (LE4) SUBROUTINE CFGELO(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LE4, +SAVPG, +SAVPS,SAVL,IDSAV,SEQ,GEL,MINMAT,IFAIL,KBOUT) C AUTHOR: RODGER STADEN CHARACTER SEQ(IDIM),GEL(IDIMG) INTEGER POSNS(IDIM),WORDP(LE4),SAVPS(IDSAV),SAVPG(IDSAV) INTEGER GELN(IDIMG),SAVL(IDSAV) INTEGER WORDN(LE4) INTEGER W1,W2,PS1W1,PS1W2 INTEGER CTONUM EXTERNAL CTONUM IDSAVM=IDSAV IDSAV=0 C LENGTH OF PAIR OF WORDS LX2=2*LENGTH C NUMBER OF PAIRS OF WORDS OF LENGTH LENGTH IN GEL NW=IDIMG-(LX2-1) C LOOP FOR EACH START POINT DO 200 I=1,NW C IS THIS WORD A ZERO? W1=GELN(I) IF(W1.EQ.0)GO TO 200 C POINT TO NEXT WORD OF PAIR W2=GELN(I+LENGTH) IF(W2.EQ.0)GO TO 200 C DOES W1 EXIST IN SEQ? N1S1=WORDN(W1) IF(N1S1.EQ.0)GO TO 200 N2S1=WORDN(W2) IF(N2S1.EQ.0)GO TO 200 C BOTH EXIST, SO POINT TO THE FIRST + LENGTH PS1W1=WORDP(W1)+LENGTH C LOOP FOR ALL PAIRS DO 50 J=1,N1S1 C POINT TO FIRST W2 BECAUSE IT IS IN WORDP NOT POSNS PS1W2=WORDP(W2) C LOOP FOR THESE DO 40 K=1,N2S1 C ARE THIS PAIR LENGTH APART? N=PS1W1-PS1W2 IF(N.NE.0)GO TO 20 C THEY ARE SO, IF REQUIRED LOOK FOR REST OF MATCH LMAT=LX2 IF(MINMAT.EQ.LX2)GO TO 15 IPC=PS1W2+LENGTH IPG=I+LX2 16 CONTINUE IF(IPG.GT.IDIMG)GO TO 15 IF(IPC.GT.IDIM)GO TO 15 C IF(CTONUM(SEQ(IPC)).NE.CTONUM(GEL(IPG)))GO TO 15 LMAT=LMAT+1 IPC=IPC+1 IPG=IPG+1 GO TO 16 15 CONTINUE C IS MATCH LONG ENOUGH? IF(LMAT.LT.MINMAT)GO TO 20 IDSAV=IDSAV+1 IF(IDSAV.LE.IDSAVM)GO TO 18 WRITE(KBOUT,1000)IDSAVM 1000 FORMAT(' More than ',I6,' matches. Search aborted') IFAIL=1 RETURN 18 CONTINUE SAVL(IDSAV)=LMAT SAVPG(IDSAV)=I SAVPS(IDSAV)=PS1W1-LENGTH 20 CONTINUE C POINT TO NEXT W2 PS1W2=POSNS(PS1W2) 40 CONTINUE C ALL TRIED THIS PS1W1, TRY NEXT PS1W1=POSNS(PS1W1-LENGTH)+LENGTH 50 CONTINUE 200 CONTINUE IFAIL=0 RETURN END INTEGER FUNCTION CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,IIN) C AUTHOR: RODGER STADEN C RETURNS CONTIG LEFT GEL NUMBER OR ZERO FOR ERROR INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) I = IIN J = I CHAINL = 0 10 CONTINUE IF(I.NE.0)THEN J = I I = LNBR(I) IF(I.EQ.IIN)RETURN GO TO 10 END IF CHAINL = J END C C CHANGE C C ROUTINE TO EXCHANGE ALL THE CHARS IN A CHARACTER ARRAY USING C A PAIR OF LOOKUP ARRAYS SENT BY CALLING PROG C C SUBROUTINE CHANGE(SEQ,IDIM1,CHAR1,CHAR2,IDIM2,ELSE) C AUTHOR: RODGER STADEN CHARACTER SEQ(IDIM1) CHARACTER CHAR1(IDIM2),CHAR2(IDIM2) CHARACTER ELSE DO 100 I=1,IDIM1 C DO 50 J=1,IDIM2 C IF(SEQ(I).NE.CHAR1(J))GO TO 50 C MATCH SO EXCHANGE CHARS SEQ(I)=CHAR2(J) GO TO 100 50 CONTINUE SEQ(I)=ELSE 100 CONTINUE C RETURN END CHARACTER*1 FUNCTION CHARSL(I) CHARACTER C*6 SAVE C DATA C/'ctag*-'/ CHARSL = C(I:I) END CHARACTER*1 FUNCTION CHARSU(I) CHARACTER C*6 SAVE C DATA C/'CTAG*-'/ CHARSU = C(I:I) END INTEGER FUNCTION CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,IIN) C AUTHOR: RODGER STADEN C RETURNS CONTIG LEFT GEL NUMBER OR ZERO FOR ERROR INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) I = IIN CLEN= 0 LEN = 0 10 CONTINUE IF(I.NE.0)THEN LEN = MAX(LEN,(RELPG(I) + ABS(LNGTHG(I)) - 1)) I = RNBR(I) IF(I.EQ.IIN)RETURN GO TO 10 END IF CLEN = LEN END SUBROUTINE CLIST(GELNO1,LINNO1,IGEL1,GELNO2,LINNO2, +IGEL2,GELNOS,GELSTR,GELEND,IUNIQ1,IUNIQ,KBOUT,IOK) C AUTHOR: RODGER STADEN INTEGER GELNO1(IGEL1),GELNO2(IGEL2),GELNOS(IUNIQ1) INTEGER GELSTR(IUNIQ1),GELEND(IUNIQ1) INTEGER LINNO1(IGEL1),LINNO2(IGEL2) EXTERNAL INLIST C GELNOS === GEL NUMBERS (GELNOS) C GELSTR === GEL START LINES C GELEND === GEL END LINES C GELNO === GEL NUMBERS PER STRIP C LINNO === GEL LINE NUMBERS PER STRIP C IGEL === NUMBER OF GELS PER STRIP C LINENO === CURRENT LINE NUMBER C C WHICH GELS IN GELNO2 DO NOT APPEAR IN GELNO1 C IE HAVE STARTED IN GELNO2 DO 20 I=1,IGEL2 MATCH=INLIST(GELNO1,IGEL1,GELNO2(I)) IF(MATCH.EQ.0)THEN C NO MATCH SO NEW C PUT IN GELSTR IUNIQ=IUNIQ+1 GELNOS(IUNIQ)=GELNO2(I) GELSTR(IUNIQ)=LINNO2(I) END IF 20 CONTINUE C WHICH GELS IN GELNO1 DO NOT APPEAR IN GELNO2 C IE WHICH HAVE ENDED IN GELNO1 DO 10 I=1,IGEL1 MATCH=INLIST(GELNO2,IGEL2,GELNO1(I)) IF(MATCH.EQ.0)THEN C NO MATCH SO MUST HAVE ENDED C WHERE IS IT STORED IN GELNOS? MATCH=INLIST(GELNOS,IUNIQ,GELNO1(I)) IF(MATCH.NE.0)THEN GELEND(MATCH)=LINNO1(I) GO TO 10 END IF C ERROR WRITE(KBOUT,1000)GELNO1(I) 1000 FORMAT( ' Error: gel number ',I5, + ' expected but not found in list') IOK = 1 RETURN END IF 10 CONTINUE IOK = 0 RETURN END C C CMPLMT C C SUBROUTINE TO REVERSE AND COMPLEMENT GELS AND DATA BASE C THE POSITIONS OF THE RIGHT ENDS OF GELS ARE FIRST STORED C IN RELPG THEN WE DO A BUBBLE SORT ON THESE POSITIONS C UPDATING RELATIONSHIPS AS WE GO C ALSO SEQUENCES ARE COMPLEMENTED, SIGNS OF LENGTH ARE C MULTIPLIED BY -1 AND THE CONTIG LINE IS ALTERED SUBROUTINE CMPLMT(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LINCON,LLINO,GEL,IDBSIZ,KBOUT,IDEVR,IDEVW,MAXGEL) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL) INTEGER X C WRITE(KBOUT,1000)LLINO 1000 FORMAT( ' Complementing contig',I6) C CHAIN THRU AND PUT RIGHT ENDS IN RELPG N=LLINO 10 CONTINUE RELPG(N)=RELPG(N)+(ABS(LNGTHG(N)))-1 IF(RNBR(N).EQ.0)GO TO 20 N=RNBR(N) GO TO 10 20 CONTINUE C C NOW EFFECTIVELY BUBBLE SORT ON RELPG N=RNBR(LINCON) GO TO 22 21 CONTINUE N=NL IF(I1.GT.0)N=I2 22 CONTINUE NL=LNBR(N) IF(NL.EQ.0)GO TO 30 I1=0 23 CONTINUE IF(RELPG(N).GE.RELPG(NL))GO TO 21 C NOT IN CORRECT ORDER SO CHAIN ALONG UNTIL CORRECT,THEN COME C BACK TO THIS POINT AND CONTINUE C IF FIRST MOVE THIS LINE SET POINTER TO CURRENT POSITION IF(I1.EQ.0)I2=N I1=1 C C EXCHANGE NEIGHBOURS. CURRENTLY LOOKING AT N AND ITS LEFT C NBR, AND THE LEFT NBR IS FURTHER RIGHT THAN N C FIX UP POINTERS TO LEFT AND RIGHT OF THESE TWO M=LNBR(NL) IF(M.NE.0)RNBR(M)=N M=RNBR(N) IF(M.NE.0)LNBR(M)=NL LNBR(N)=LNBR(NL) LNBR(NL)=N RNBR(NL)=RNBR(N) RNBR(N)=NL C CHAIN BACK THRU LIST WITH THIS LINE N=RNBR(NL) IF(N.EQ.0)GO TO 21 C IE END MET GO TO 23 30 CONTINUE C FINISH WITH LEFT END IN N 40 CONTINUE C NOW REVERSE NBRS SO CHAIN BACK RIGHT NL=RNBR(N) IF(NL.EQ.0)GO TO 50 RNBR(N)=LNBR(N) LNBR(N)=NL N=NL GO TO 40 50 CONTINUE C NEED TO FIX UP NEW LEFT END RNBR(N)=LNBR(N) LNBR(N)=0 C ALL POINTERS FIXED NOW DO RELATIVE POSITION C FINISH WITH LEFT END IN N C SO CHAIN BACK RIGHT C SAVE RIGHT LINE NUMBER NL=N X=RELPG(N) 60 CONTINUE RELPG(N)=1+(-1*(RELPG(N)-X)) IF(RNBR(N).EQ.0)GO TO 70 N=RNBR(N) GO TO 60 70 CONTINUE C NOW FIX CONTIG LINE LNBR(LINCON)=NL RNBR(LINCON)=N C WRITE NEW CONTIG LINE CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON), +LNBR(LINCON),RNBR(LINCON)) C WRITE(IDEVR,REC=LINCON)RELPG(LINCON),LNGTHG(LINCON),LNBR(LINCON), C 1RNBR(LINCON) C NOW REVERSE AND COMPLEMENT GELS N=NL 80 CONTINUE C READ(IDEVW,REC=N)GEL CALL READW(IDEVW,N,GEL,MAXGEL) M=ABS(LNGTHG(N)) CALL SQREV(GEL,M) CALL SQCOM(GEL,M) CALL WRITEW(IDEVW,N,GEL,MAXGEL) C WRITE(IDEVW,REC=N)GEL C CHANGE SIGNS LNGTHG(N)=-1*LNGTHG(N) C WRITE NEW GEL LINE CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N), +LNBR(N),RNBR(N)) C WRITE(IDEVR,REC=N)RELPG(N),LNGTHG(N),LNBR(N),RNBR(N) C ANY MORE? N=RNBR(N) IF(N.NE.0)GO TO 80 C NO MORE RETURN END C CONSEN C CALCULATES A CONSENSUS USING THE RULES OUTLINED IN THE DOCUMENTATION C AND SUBROUTINE SUMMER C UNIT IDEV IS USED FOR OUTPUT SUBROUTINE CONSEN(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,IDIM1,GEL,IDBSIZ,TEMP,CHRSIZ,MAXGL2, +KBIN,KBOUT,IDEVW,IDEV,NAMCON, +IHELPS,IHELPE,FILEH,IDEVH,MAXGEL,IDM,PERCD,IDEVN,LLINO) CHARACTER FILEH*(*) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),ANS,CHRSIZ INTEGER LREG,RREG,X,Y,TEMP(CHRSIZ,MAXGL2) CHARACTER SEQ1(IDIM1) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL) CHARACTER NAMPRO*(*) CHARACTER NAMCON*(*) 100 CONTINUE ISTART=1 NAMCON = ' ' CALL OPENF1(IDEV,NAMCON,1,IOK,KBIN,KBOUT, +'Name for consensus file', +IHELPS,IHELPE,FILEH,IDEVH) IF(IOK.NE.0)RETURN CALL YESNO(ANS,'Make consensus for whole database', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.LT.0) RETURN IF(ANS.EQ.1)GO TO 150 N=IDBSIZ-NCONTS CALL BUSY(KBOUT) DO 110 I=N,IDBSIZ-1 J=LNBR(I) X=1 Y=RELPG(I) IF((ISTART+19+Y).GT.IDIM1)THEN WRITE(KBOUT,1009)IDIM1 1009 FORMAT( + ' Maximum consensus length(',I6,') exceeded,',/, + ' calculation aborted') RETURN END IF CALL ADDTIT(SEQ1(ISTART),NAMPRO,J,ISTART) CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ1(ISTART),Y,GEL,X,Y,J,IDBSIZ,TEMP,CHRSIZ,MAXGL2, + IDEVW,MAXGEL,IDM,PERCD) ISTART=ISTART+Y 110 CONTINUE ISTART=ISTART-1 CALL YESNO(ANS,'Staden format', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF (ANS.LT.0) RETURN IF(ANS.EQ.0) THEN CALL FMTDK(IDEV,SEQ1,ISTART) ELSE CALL WRITCF(IDEV,SEQ1,ISTART,NAMPRO,KBOUT,IOK) END IF RETURN 150 CONTINUE CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LINCON,LLINO,NULGEL,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, +'Contig identifier', +IHELPS,IHELPE,FILEH,IDEVH) IF(IERR.NE.0)GO TO 400 CALL GETREG(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +1,RELPG(LINCON),LREG,RREG,LINCON,LLINO,IDBSIZ,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IERR) IF(IERR.NE.0)GO TO 400 IDIM2=RREG-LREG+1 IF((ISTART+19+IDIM2).GT.IDIM1)THEN WRITE(KBOUT,1009)IDIM1 RETURN END IF CALL BUSY(KBOUT) CALL ADDTIT(SEQ1(ISTART),NAMPRO,LLINO,ISTART) CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQ1(ISTART),IDIM2,GEL,LREG,RREG,LLINO,IDBSIZ,TEMP, +CHRSIZ,MAXGL2,IDEVW,MAXGEL,IDM,PERCD) ISTART=ISTART+IDIM2 300 CONTINUE CALL YESNO(ANS,'Select another contig', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.EQ.0) GO TO 150 ISTART=ISTART-1 CALL YESNO(ANS,'Staden format', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF (ANS.LT.0) RETURN IF(ANS.EQ.0) THEN CALL FMTDK(IDEV,SEQ1,ISTART) ELSE CALL WRITCF(IDEV,SEQ1,ISTART,NAMPRO,KBOUT,IOK) END IF 400 CONTINUE CALL YESNO(ANS,'Make another consensus', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.EQ.0)GO TO 100 END SUBROUTINE WRITCF(IDEV,SEQ,IDSEQ,NAMPRO,KBOUT,IOK) CHARACTER SEQ(IDSEQ),TITLE*10,ENAME*10,NAMPRO*(*) CHARACTER NL PARAMETER (MAXDG = 5) EXTERNAL INDEXA C C write out in fasta format C also change -,* to N C currently name is project name for single contig, but left gel number C for multiple contigs, title is always left gel number C NC = 0 DO 1 I=1,IDSEQ IF (SEQ(I).EQ.'>') NC = NC + 1 1 CONTINUE NL = CHAR(10) ENAME = ' ' I = INDEX(NAMPRO,'.') ENAME(1:I-1) = NAMPRO(1:I-1) IAT = 1 10 CONTINUE IF (IAT.GT.IDSEQ) THEN CLOSE(UNIT=IDEV) IOK =0 RETURN END IF IF(SEQ(IAT).NE.'<') THEN CALL ERROM(KBOUT,'Missing header in consensus') IOK = 1 RETURN END IF INAMES = INDEXA(SEQ(IAT),20,'.') IF (INAMES.EQ.0) THEN CALL ERROM(KBOUT,'Missing dot in header') IOK = 1 RETURN END IF INAMES = IAT + INAMES INAMEE = INAMES + MAXDG - 1 TITLE = ' ' K = 0 DO 20 I=INAMES,INAMEE K = K + 1 TITLE(K:K) = SEQ(I) 20 CONTINUE IF (NC.GT.1) ENAME = TITLE IAT = IAT + 20 IDT = IDSEQ-IAT+2 IDSQ = INDEXA(SEQ(IAT),IDT,'<') IF (IDSQ.EQ.0) IDSQ = IDT IDSQ = IDSQ - 1 CALL SETCCS(SEQ(IAT),IDSQ) CALL WRITFF(IDEV,SEQ(IAT),IDSQ,ENAME,TITLE) IAT = IAT + IDSQ GO TO 10 END SUBROUTINE SETCCS(SEQ,IDSEQ) CHARACTER SEQ(IDSEQ),TO(5) INTEGER CTONUM EXTERNAL CTONUM SAVE TO DATA TO/'t','c','a','g','n'/ C C change chars in array seq of type found to type to C DO 10 I=1,IDSEQ K = CTONUM(SEQ(I)) SEQ(I) = TO(K) 10 CONTINUE END SUBROUTINE COPYM(JLEFTS,ILEFTS,JLC,ILC, +JPOSC,IPOSC,JSENSE,ISENSE,JLLINO,LLINO, +JJOINT,JOINT,JTOTPC,ITOTPC,JTOTPG,ITOTPG, +JTYPE,ITYPE,JDOUT,IDOUT,JDIM22,IDIM22, +SEQG3,SEQG2,SEQC3,SEQC2,PERMS,PERMIS) CHARACTER SEQG3(JDIM22),SEQG2(JDIM22),SEQC3(JDOUT),SEQC2(JDOUT) ILEFTS = JLEFTS ILC = JLC IPOSC = JPOSC ISENSE = JSENSE LLINO = JLLINO JOINT = JJOINT ITOTPC = JTOTPC ITOTPG = JTOTPG ITYPE = JTYPE IDOUT = JDOUT IDIM22 = JDIM22 CALL SQCOPY(SEQG3,SEQG2,JDIM22) CALL SQCOPY(SEQC3,SEQC2,JDOUT) PERMIS = PERMS END C SUBROUTINE DALIGN C C COUNTS MISMATCHES AND DISPLAYS OVERLAP. SUBROUTINE DALIGN(SEQC2,SEQG2,SEQ3,MAXGEL,IDOUT,IDIM2, +JOINT,ITYPE,X,KBOUT,IFAIL,LO,PERMAX,ISHOW) C AUTHOR: RODGER STADEN CHARACTER SEQC2(MAXGEL),SEQG2(MAXGEL),SEQ3(MAXGEL) CHARACTER PAD,DASH SAVE PAD,DASH DATA PAD,DASH/',','-'/ IFAIL = 1 IENDG=1 IENDC=JOINT C ONLY LOOK AT OVERLAP WHICH IS FROM JOINT FOR LEFT TYPE JOIN IF(ITYPE.EQ.1)THEN IENDG=JOINT IENDC=1 END IF 100 CONTINUE C LENGTH OF OVERLAP? LG=IDIM2-IENDG+1 LO=MIN(IDOUT,LG) C SAVE RAW DATA CALL SQCOPY(SEQG2,SEQ3,IDIM2) CALL MSTLKL(SEQ3,IDIM2) X=FLOAT(LO) Y=X K=IENDG+LO-1 C POINT TO CONSENSUS J=0 C CHECK FOR OVERFLOW IF(K.GT.MAXGEL)THEN CALL ERROM(KBOUT,'DALIGN: matching region too long') RETURN END IF DO 200 I=IENDG,K J=J+1 IF(SEQC2(J).EQ.SEQ3(I))GO TO 200 C IF(SEQ3(I).EQ.DASH)GO TO 200 C IF(SEQC2(J).EQ.DASH)GO TO 200 C IF(SEQC2(J).EQ.PAD)GO TO 200 X=X-1. 200 CONTINUE X=(Y-X)*100./Y IF (X.GT.PERMAX) RETURN IF (ISHOW.EQ.1) THEN WRITE(KBOUT,1002) 1002 FORMAT(' Best alignment found') CALL SQMTCH(SEQC2(1),SEQG2(IENDG),SEQ3,LO) CALL FMT4LN(SEQC2(1),SEQG2(IENDG),SEQ3,LO,IENDC,IENDG,KBOUT) END IF IFAIL=0 END SUBROUTINE DBCHEK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +TEMP,IERR,KBOUT) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER TEMP(IDBSIZ) EXTERNAL LCCHEK,NCCHEK C to check the logical consistency of a database C C 1. are all nbrs holding hands C 2. are all gels in exactly 1 contig C 3. are there loops in contigs C 4. do the gels designated left or right ends have outward neighbours C 5. are the relative positions in same order as hand holding C 6. are there gels of zero length C 7. are there contigs of length < 1 C 8. does the designated length of the contigs agree with the gel positions C 9. if i chain left thru a contig do i reach the gel designated as the left end C10.if i chain right thru a contig do i reach the gel designated as the right end C C return error code 2 for all errors except where only error is "gel not used" C for which we return 1 IERR=0 C hand holding OK? DO 100 I=1,NGELS K=LNBR(I) IF(K.EQ.0)GO TO 50 IF(RNBR(K).EQ.I)GO TO 50 WRITE(KBOUT,1000)I 1000 FORMAT(' Hand holding problem for gel reading',I6) WRITE(KBOUT,1001)I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I) WRITE(KBOUT,1001)K,RELPG(K),LNGTHG(K),LNBR(K),RNBR(K) 1001 FORMAT(' ',5I6) IERR=2 50 CONTINUE K=RNBR(I) IF(K.EQ.0)GO TO 100 IF(LNBR(K).EQ.I)GO TO 100 WRITE(KBOUT,1000)I WRITE(KBOUT,1001)I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I) WRITE(KBOUT,1001)K,RELPG(K),LNGTHG(K),LNBR(K),RNBR(K) IERR=2 100 CONTINUE C C are all gels in exactly 1 contig C CALL FILLI(TEMP,IDBSIZ,0) N=IDBSIZ-NCONTS C C count the number of times thru loop 320 to 300: if this exceeds the C database size a loop has been found C ICOUNT=0 DO 300 I=N,IDBSIZ-1 II=I K=LNBR(I) J=RNBR(I) IF((K.NE.0).AND.(J.NE.0))GO TO 310 C C This contig points to zero gel number as a left or right end C WRITE(KBOUT,1002)I 1002 FORMAT(' Contig',I4,' has gel numbers of zero') IERR=2 GO TO 290 310 CONTINUE IF((LNBR(K).EQ.0).AND.(RNBR(J).EQ.0))GO TO 290 C C These ends reads have outward neighbours C WRITE(KBOUT,1004)I 1004 FORMAT(' The end gels of contig',I4,' have outward neighbours') IERR=2 290 CONTINUE C C Does the contig have nonzero length? C IF(RELPG(I).GT.0)GO TO 320 WRITE(KBOUT,1010)I 1010 FORMAT(' The contig on line number',I4,' has zero length') IERR=2 320 CONTINUE TEMP(K)=TEMP(K)+1 ICOUNT=ICOUNT+1 IF(ICOUNT.GT.IDBSIZ)GO TO 601 K=RNBR(K) IF(K.NE.0)GO TO 320 300 CONTINUE DO 400 I=1,NGELS IF(TEMP(I).EQ.1)GO TO 390 IF(TEMP(I).EQ.0)GO TO 410 WRITE(KBOUT,1005)I,TEMP(I) 1005 FORMAT(' Gel number ',I6,' is used ',I6,' times') IERR=2 GO TO 400 390 CONTINUE C C does the gel have nonzero length (only check those used once) C IF(LNGTHG(I).NE.0)GO TO 400 WRITE(KBOUT,1011)I 1011 FORMAT(' Gel number',I6,' has zero length') IERR=2 GO TO 400 410 CONTINUE WRITE(KBOUT,1006)I 1006 FORMAT(' Gel number ',I6,' is not used') C C need to increase the error count (dont reset to lower value) C IF(IERR.LT.2)IERR=1 400 CONTINUE C C all relative positions ok? C N=IDBSIZ-NCONTS DO 500 I=N,IDBSIZ-1 K=LNBR(I) IF(K.EQ.0)GO TO 500 510 CONTINUE J=RNBR(K) IF(J.EQ.0)GO TO 500 IF(RELPG(K).GT.RELPG(J))GO TO 520 K=J GO TO 510 520 CONTINUE WRITE(KBOUT,1007)K,RELPG(K),J,RELPG(J) 1007 FORMAT(' Gel number',I6,' with position',I6, + ' is the left neighbour of', + /,' gel number',I6,' with position',I6) K=J IERR=2 GO TO 510 500 CONTINUE IOK = LCCHEK(RELPG,LNGTHG,LNBR,RNBR,NCONTS,IDBSIZ,KBOUT) IF (IOK.NE.0) IERR = 2 IOK = NCCHEK(RELPG,LNGTHG,LNBR,RNBR,NCONTS,IDBSIZ,KBOUT) IF (IOK.NE.0) IERR = 2 IF(IERR.EQ.0) WRITE(KBOUT,1013) 1013 FORMAT(' Database is logically consistent') RETURN 601 CONTINUE IERR=2 WRITE(KBOUT,1008)II 1008 FORMAT(' Loop in contig',I6,/, +' No further checking done but gel numbers follow') CALL FILLI(TEMP,IDBSIZ,0) K=LNBR(II) 710 CONTINUE TEMP(K)=TEMP(K)+1 WRITE(KBOUT,1009)K 1009 FORMAT(' ',I6) IF(TEMP(K).GT.1)RETURN K=RNBR(K) GO TO 710 END INTEGER FUNCTION LCCHEK(RELPG,LNGTHG,LNBR,RNBR,NCONTS,IDBSIZ, +KBOUT) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),CLEN INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) EXTERNAL CLEN LCCHEK = 0 DO 10 I=IDBSIZ-NCONTS,IDBSIZ-1 IL = LNBR(I) L1 = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,IL) L2 = RELPG(I) IF (L1.NE.L2) THEN WRITE(KBOUT,1000)I,L2,L1 1000 FORMAT( +' Contig line',I6,' records length',I6,' but actual length is',I6) LCCHEK = LCCHEK + 1 END IF 10 CONTINUE END INTEGER FUNCTION NCCHEK(RELPG,LNGTHG,LNBR,RNBR,NCONTS,IDBSIZ, +KBOUT) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),CHAINL,CHAINR INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) EXTERNAL CHAINL,CHAINR NCCHEK = 0 DO 10 I=IDBSIZ-NCONTS,IDBSIZ-1 IL = LNBR(I) L1 = CHAINR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,IL) L2 = RNBR(I) IF (L1.NE.L2) THEN WRITE(KBOUT,1000)I,L2,L1 1000 FORMAT( +' Contig line',I6,' records right neighbour as',I6, +' but left to right chaining gives',I6) NCCHEK = NCCHEK + 1 END IF IL = RNBR(I) L1 = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,IL) L2 = LNBR(I) IF (L1.NE.L2) THEN WRITE(KBOUT,1001)I,L2,L1 1001 FORMAT( +' Contig line',I6,' records left neighbour as',I6, +' but right to left chaining gives',I6) NCCHEK = NCCHEK + 1 END IF 10 CONTINUE END INTEGER FUNCTION CHAINR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,IIN) C AUTHOR: RODGER STADEN C RETURNS CONTIG RIGHT GEL NUMBER OR ZERO FOR ERROR INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) I = IIN J = I CHAINR = 0 10 CONTINUE IF(I.NE.0)THEN J = I I = RNBR(I) IF(I.EQ.IIN)RETURN GO TO 10 END IF CHAINR = J END C DBPRNT C PRINTS A DATABASE. IE ITS RELATIONSHIPS SUBROUTINE DBPRNT(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +IDEV,KBIN,KBOUT,IDEVN,LLINO, +IHELPS,IHELPE,FILEH,IDEVH) CHARACTER FILEH*(*) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),LREG,RREG,ANS INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER NAMARC*16 IF(NGELS.GT.0)CALL DBSTAT(RELPG,LNGTHG,LNBR,RNBR,NGELS, +NCONTS,IDBSIZ,IDEV) WRITE(IDEV,10011)NGELS,NCONTS 10011 FORMAT(' Number of gel readings ',I6,' Number of contigs ',I6) 20 CONTINUE CALL YESNO(ANS,'Select contigs', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.LT.0) RETURN IF(ANS.EQ.0) GO TO 45 N=IDBSIZ-NCONTS 25 CONTINUE CALL YESNO(ANS,'Show gel readings in positional order', +IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) IF(ANS.LT.0) RETURN IF(ANS.EQ.0)GO TO 41 WRITE(IDEV,1009) 1009 FORMAT(' CONTIG LINES') WRITE(IDEV,1000) 1000 FORMAT( +' CONTIG LINE LENGTH ENDS'/ +' LEFT RIGHT') DO 30 I=N,IDBSIZ-1 WRITE(IDEV,1007)I,RELPG(I),LNBR(I),RNBR(I) 30 CONTINUE 1007 FORMAT( ' ',18X,I6,2X,I7,9X,I6,2X,I6) WRITE(IDEV,1008) 1008 FORMAT(' GEL LINES') WRITE(IDEV,1001) 1001 FORMAT( +' NAME NUMBER POSITION LENGTH NEIGHBOURS'/ +' LEFT RIGHT') DO 40 I=1,NGELS CALL READN(IDEVN,I,NAMARC) WRITE(IDEV,1006)NAMARC,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I) 1006 FORMAT( ' ',A,2X,I6,2X,I7,2X,I5,2X,I6,2X,I6) 40 CONTINUE RETURN C 41 CONTINUE C C SORTED DATA DO 43 I=N,IDBSIZ-1 WRITE(IDEV,1021) 1021 FORMAT( ) WRITE(IDEV,1000) WRITE(IDEV,1007)I,RELPG(I),LNBR(I),RNBR(I) J=LNBR(I) WRITE(IDEV,1001) 42 CONTINUE CALL READN(IDEVN,J,NAMARC) WRITE(IDEV,1006)NAMARC,J,RELPG(J),LNGTHG(J),LNBR(J),RNBR(J) J=RNBR(J) IF(J.NE.0)GO TO 42 43 CONTINUE RETURN 45 CONTINUE C SELECTED CONTIGS ONLY C C GET GEL NUMBER AND CONTIG NUMBER CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,LINCON, +LLINO,NULGEL,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, +'Contig identifier', +IHELPS,IHELPE,FILEH,IDEVH) IF(IERR.NE.0)RETURN WRITE(IDEV,1009) WRITE(IDEV,1000) WRITE(IDEV,1007)LINCON,RELPG(LINCON),LNBR(LINCON),RNBR(LINCON) CALL GETREG(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +1,RELPG(LINCON),LREG,RREG,LINCON,LLINO,IDBSIZ,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IERR) IF(IERR.NE.0)RETURN WRITE(IDEV,1008) N=LLINO WRITE(IDEV,1001) 46 CONTINUE CALL READN(IDEVN,N,NAMARC) WRITE(IDEV,1006)NAMARC,N,RELPG(N),LNGTHG(N),LNBR(N),RNBR(N) IF(RNBR(N).EQ.0)GO TO 48 N=RNBR(N) IF(RELPG(N).GT.RREG)GO TO 48 GO TO 46 48 CONTINUE GO TO 45 END SUBROUTINE DBSCAN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,SEQ1, +IDIM1,GEL,IDBSIZ,TEMP3,ID1,CHRSIZ,MAXGL2,KBIN,KBOUT,IDEVW, +IDEV,LINLEN,PERCD, +IHELPS,IHELPE,FILEH,IDEVH,MAXGEL,LINOU1,LINOU2, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEVN, + LLINO,LINCON,LREG,RREG,MXGOOD) C 28-7-91 added extra parameter mxgood: the max length of read C we have confidence in CHARACTER FILEH*(*) PARAMETER (MAXPRM = 10) CHARACTER PROMPT(2)*(MAXPRM) C AUTHOR: RODGER STADEN INTEGER RREG, RELPG(IDBSIZ),CHRSIZ INTEGER LREG,TEMP3(ID1,CHRSIZ,MAXGL2),ANS INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL),LINOU1(MAXGEL),LINOU2(MAXGEL) CHARACTER SEQ1(IDIM1) CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LINCON,LLINO,NULGEL,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, +'Contig identifier', +IHELPS,IHELPE,FILEH,IDEVH) IF(IERR.NE.0) RETURN CALL GETREG(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +1,RELPG(LINCON),LREG,RREG,LINCON,LLINO,IDBSIZ,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IERR) IF(IERR.NE.0) RETURN IDIM2=RREG-LREG+1 CALL SUMMAR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQ1,IDIM1,GEL,LREG,RREG,LLINO,PERCD,IDBSIZ, +TEMP3,ID1,CHRSIZ,MAXGL2,IDEVW, +MAXGEL,LINOU1,LINOU2,MXGOOD) CALL DBSCSM(SEQ1(LREG),IDIM2,KBOUT) 160 CONTINUE ANS = 1 PROMPT(1) = 'List codes' PROMPT(2) = 'Plot codes' CALL RADION('Select results display mode',PROMPT,2,ANS, + IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(ANS.LT.1) RETURN IF(ANS.EQ.1) THEN CALL FMTDB(SEQ1,IDIM1,LREG,RREG,LINLEN,IDEV) RETURN ELSE CALL PLTQ(SEQ1(LREG),IDIM2, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) END IF END SUBROUTINE DBSCNP(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,SEQ1, +IDIM1,GEL,IDBSIZ,TEMP3,ID1,CHRSIZ,MAXGL2,IDEVW,LLINO, +PERCD,MAXGEL,LINOU1,LINOU2,LREG,RREG, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,MXGOOD) C AUTHOR: RODGER STADEN INTEGER RREG, RELPG(IDBSIZ),CHRSIZ INTEGER LREG,TEMP3(ID1,CHRSIZ,MAXGL2) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL),LINOU1(MAXGEL),LINOU2(MAXGEL) CHARACTER SEQ1(IDIM1) IDIM2=RREG-LREG+1 C 28-7-91 added extra parameter mxgood: the max length of read C we have confidence in CALL SUMMAR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQ1,IDIM1,GEL,LREG,RREG,LLINO,PERCD,IDBSIZ, +TEMP3,ID1,CHRSIZ,MAXGL2,IDEVW, +MAXGEL,LINOU1,LINOU2,MXGOOD) CALL PLTQ(SEQ1(LREG),IDIM2, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) END SUBROUTINE DBSCSM(SEQ1,IDIM1,KBOUT) C AUTHOR: RODGER STADEN CHARACTER SEQ1(IDIM1) CHARACTER CODES(5) REAL X(5) SAVE CODES DATA CODES/'0','1','2','3','4'/ DO 50 J=1,5 X(J)=0. 50 CONTINUE DO 100 I=1,IDIM1 DO 60 J=1,5 IF(SEQ1(I).NE.CODES(J))GO TO 60 X(J)=X(J)+1. GO TO 61 60 CONTINUE 61 CONTINUE 100 CONTINUE SUM=0. DO 130 J=1,5 SUM=SUM+X(J) 130 CONTINUE DO 140 J=1,5 IF(SUM.NE.0)X(J)=X(J)*100./SUM 140 CONTINUE WRITE(KBOUT,1001)X(1) 1001 FORMAT(' ',F6.2,'% OK on both strands and they agree(0)') WRITE(KBOUT,1002)X(2) 1002 FORMAT(' ',F6.2,'% OK on plus strand only(1)') WRITE(KBOUT,1003)X(3) 1003 FORMAT(' ',F6.2,'% OK on minus strand only(2)') WRITE(KBOUT,1004)X(4) 1004 FORMAT(' ',F6.2,'% Bad on both strands(3)') WRITE(KBOUT,1005)X(5) 1005 FORMAT(' ',F6.2,'% OK on both strands but they disagree(4)') RETURN END C DBSTAT SUBROUTINE DBSTAT(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +KBOUT) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) N=IDBSIZ-NCONTS SUM=0. DO 20 I=N,IDBSIZ-1 SUM=SUM+RELPG(I) 20 CONTINUE AV=SUM/NCONTS WRITE(KBOUT,1020)SUM,AV 1020 FORMAT( ' Total contig length ',F10.0,' Average', +' length ',F10.1) SUMG=0. DO 30 I=1,NGELS SUMG=SUMG+FLOAT(ABS(LNGTHG(I))) 30 CONTINUE AV=SUMG/SUM WRITE(KBOUT,1021)SUMG 1021 FORMAT( ' Total characters in gel readings ',F10.0) WRITE(KBOUT,1022)AV 1022 FORMAT +( ' Average gel characters per consensus character ',F10.2) 99 CONTINUE RETURN END C DELCON C C DELETES CONTIG FROM CONSENSUS SEQUENCE SUBROUTINE DELCON(SEQ1,ILEFT,ILC,IDIM1) C AUTHOR: RODGER STADEN CHARACTER SEQ1(IDIM1) C FIRST CHAR TO REPLACE I1=ILEFT-20 C FIRST CHAR TO MOVE I2=ILEFT+ILC C IS THIS RIGHTMOST CONTIG ANYWAY? IF(I2.GT.IDIM1)GO TO 10 C NUMBER TO MOVE ID=IDIM1-I2+1 C MOVE CALL SQCOPY(SEQ1(I2),SEQ1(I1),ID) C RESET LENGTH IDIM1=I1+ID-1 RETURN 10 CONTINUE C RIGHTMOST CONTIG SO DONT MOVE IDIM1=I1-1 C RETURN END SUBROUTINE DISMAT(SEQ,IDIM,GEL,IDIMG,SAVPS,SAVPG,IDSAV, +CENDS,NENDS,IDCEND,MAXCON,KBOUT,MATCH) C AUTHOR: RODGER STADEN INTEGER CENDS(MAXCON) INTEGER NENDS(MAXCON) INTEGER SAVPS(IDSAV),SAVPG(IDSAV) CHARACTER SEQ(IDIM),GEL(IDIMG),MATCH(IDIMG) C EDIT 07-02-83 TO CHECK FOR CASE WHEN GEL OVERLAPS ADJACENT C CONTIGS WITHIN THE LENGTH OF THE GEL! DONE BY HAVING A C PARAMETER THAT STORES THE POSITION OF THE LEFT END OF THE C NEXT CONTIG (IE THE ONE AFTER THE ONE THE CURRENT GEL OVERLAPS) C SET IT TO A VERY LARGE VALUE INITIALLY NEXTC=99999 C SORT THE MATCHING WORDS INTO ASCENDING ORDER ON POSITION IN SEQ CALL BUB2AS(SAVPS,SAVPG,IDSAV) C LOOK FOR SEPARATE MATCHES LEND=IDIMG-SAVPG(1)+SAVPS(1) C WRITE(KBOUT,1000)SAVPG(1),SAVPS(1) CALL DISMAU(SEQ,IDIM,GEL,IDIMG,SAVPS(1), +SAVPG(1),CENDS,NENDS,IDCEND,MAXCON, +NEXTC,KBOUT,MATCH) DO 10 I=2,IDSAV IF((SAVPS(I).LT.LEND).AND.(SAVPS(I).LT.NEXTC))GO TO 10 C NEW MATCH, DISPLAY IT C WRITE(KBOUT,1000)SAVPG(I),SAVPS(I) C1000 FORMAT(' ',2I6) CALL DISMAU(SEQ,IDIM,GEL,IDIMG,SAVPS(I), +SAVPG(I),CENDS,NENDS,IDCEND,MAXCON, +NEXTC,KBOUT,MATCH) C RESET LEND LEND=IDIMG-SAVPG(I)+SAVPS(I) 10 CONTINUE RETURN END C C DISMAU C ROUTINE TO DISPLAY MATCHES C EDITED 17-12-81 TO NOT SUBTRACT 1 FROM LCL AND LGR SUBROUTINE DISMAU(SEQ,IDIM1,GEL,IDIMG,ISAVPS,SAVPG,CENDS,NENDS, +IDCEND,MAXCON,NEXTC,KBOUT,MATCH) C AUTHOR: RODGER STADEN CHARACTER SEQ(IDIM1),GEL(IDIMG),MATCH(IDIMG) INTEGER SAVPS,SAVPG,CENDS(MAXCON) INTEGER NENDS(MAXCON) C EDITED 07-02-83 FOR NEXTC (SEE DISMAT) C DELETE 20 FROM END OF CONSENSUS MATCH SAVPS=ISAVPS-19 C FIND CONTIG CONSENSUS ENDS JJ=1 DO 5 J=2,IDCEND IF(SAVPS.GT.CENDS(J))GO TO 5 C GONE PAST SO LAST IS THE ONE JJ=J-1 GO TO 6 5 CONTINUE JJ=IDCEND 6 CONTINUE C SUBTRACT 1 FROM END SAVPS=SAVPS-1 C LENGTH FROM MATCH TO LEFT OF CONTIG LCL=SAVPS-CENDS(JJ) C RIGHT LCR=CENDS(JJ+1)-ISAVPS-1 C LEFT GEL LGL=SAVPG-1 LGR=IDIMG-SAVPG C NEED MIN OF EACH PAIR LL=MIN(LCL,LGL) LR=MIN(LCR,LGR) C LENGTH OF OVERLAP LM=LR+LL+1 C DISPLAY STARTS ICL=ISAVPS-LL IGL=SAVPG-LL WRITE(KBOUT,1000)NENDS(JJ) 1000 FORMAT(' Match found with vector number =',I6) CALL SQMTCH(SEQ(ICL),GEL(IGL),MATCH,LM) L=ICL-CENDS(JJ)-19 CALL FMT4LN(SEQ(ICL),GEL(IGL),MATCH,LM,L,IGL,KBOUT) C SAVE POSN OF END OF NEXT CONTIG NEXTC=CENDS(JJ+1)+20 RETURN END SUBROUTINE DSPLAY(RELPG,LNGTHG,LNBR,RNBR, +GEL,LLINOO,LINCON,LREG,RREG,GEL2,I1,IDIM,NOPT, +LLINOR,IDBSIZ,IDEV,KBOUT,IDEVW,IDEVN,LINLEN,PERCD, +MAXGEL,IDM) C AUTHOR: RODGER STADEN INTEGER CHRSIZ PARAMETER (CHRSIZ = 6) PARAMETER (IDC1 = CHRSIZ*100) INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER LREG,RREG,X,XLS2,XLS1,XRS2,XRS1,RREG2 CHARACTER MATCH(100) INTEGER CHARS(CHRSIZ,100),CHARS1(IDC1) CHARACTER NAMARC*16 CHARACTER GEL(MAXGEL) CHARACTER GEL2(MAXGEL) INTEGER RELPOS(10),RELPO2(10) INTEGER GELC INTEGER RP INTEGER LSEQNO,RSEQNO CHARACTER LINOUT(100) CHARACTER MUNOTP CHARACTER GTCONC EXTERNAL GTCONC EQUIVALENCE (CHARS1,CHARS) CALL FILLI(CHARS1,IDC1,0) C SET CONTIG NUMBER ICON=1 LLINO=LLINOO NLEN=LINLEN/10 LSEQNO=LREG X=LINLEN+LSEQNO-1 RSEQNO=MIN(RREG,X) C SET LEFT GEL NUMBER FOR RIGHT CONTIG LN2=LLINOR C FIRST GEL NO IS LLINOO C SET RREG FOR RIGHT CONTIG RREG2=IDIM C SET UP LSEQNO,RSEQNO FOR FOR NOPT=3 XLS2=I1 XRS2=RSEQNO-LSEQNO+XLS2 9 CONTINUE C IF RIGHT CONTIG SKIP NUMBER PRINTING IF(ICON.EQ.2)GO TO 8 C NEED TO KEEP LONGEST LINE LENGTH FOR OUTPUT OF CONSENSUS IE=0 C SETUP AND WRITE NUMBERS RELPOS(1)=LSEQNO+9 DO 5 I=2,NLEN RELPOS(I)=RELPOS(I-1)+10 5 CONTINUE WRITE(IDEV,1023) +(RELPOS(K),K=1,MIN(NLEN,MAX(1,(RSEQNO-LSEQNO+1)/10))) 1023 FORMAT( ' ',25X,10(I9,1X)) C SET CURRENT LINE NUMBER 8 CONTINUE GELC=LLINO 10 CONTINUE C IS LEFT END OF CURRENT GEL >RREG IF(RELPG(GELC).GT.RSEQNO)GO TO 200 C ALSO NEED TO KNOW IF RIGHT END ON THIS LINE (IF .LT. NO DATA C TO DISPLAY) X=RELPG(GELC)+ABS(LNGTHG(GELC))-1 IF(X.LT.LSEQNO)GO TO 190 CALL READW(IDEVW,GELC,GEL,MAXGEL) CALL FILLC(LINOUT,LINLEN,' ') CALL READN(IDEVN,GELC,NAMARC) C C NEED TO KNOW HOW MANY CHARS TO COPY OVER TO OUTPUT LINE C AND WHERE IN LINE TO PUT THEM C CURRENT LINE LEFT END IS LSEQNO,RIGHT END RSEQNO C SO LEFT START CHAR IS X=MAX(LSEQNO,RELPG(GELC)) C POSITION IN ARRAY LINE LP=X-LSEQNO+1 C RIGHT END CHAR IS X=RELPG(GELC)+ABS(LNGTHG(GELC))-1 X=MIN(RSEQNO,X) C POSITION IN ARRAY LINE RP=X-LSEQNO+1 C LOOK FOR LONGEST LINE IF(RP.GT.IE)IE=RP C NEED LEFT START IN GEL K=LSEQNO-RELPG(GELC)+1 IF(K.LT.1)K=1 NCOP=RP-LP+1 IF(NCOP.GT.0)CALL SQCOPY(GEL(K),LINOUT(LP),NCOP) N=LP+NCOP-1 II=K-1 IF(IDM.EQ.26)THEN DO 50 I = LP,N II = II + 1 CALL PCON1(GEL(II),CHARS(1,I)) 50 CONTINUE ELSE DO 70 I=LP,N II=II+1 JJ = INDEXS(GEL(II),JSCORE) CHARS(JJ,I) = CHARS(JJ,I) + JSCORE C CHARS(CHRSIZ,I) = CHARS(CHRSIZ,I) + JSCORE 70 CONTINUE END IF I=SIGN(GELC,LNGTHG(GELC)) WRITE(IDEV,1020)I,NAMARC,(LINOUT(K),K=1,RP) C1020 FORMAT( ' ',I4,2X,A,2X,100A1) 1020 FORMAT( ' ',I6,1X,A,1X,100A1) C 190 CONTINUE C NOW GET NEXT GEL TO RIGHT GELC=RNBR(GELC) IF(GELC.NE.0)GO TO 10 200 CONTINUE C CALC CONSENSUS AND WRITE IT IF(IDM.EQ.26)THEN DO 49 I = 1,LINLEN LINOUT(I) = MUNOTP(CHARS(1,I)) CHARS(1,I) = 0 49 CONTINUE ELSE DO 230 I=1,LINLEN LINOUT(I) = GTCONC(CHARS(1,I),CHRSIZ,PERCD) CALL FILLI(CHARS(1,I),CHRSIZ,0) 230 CONTINUE END IF WRITE(IDEV,1019)(LINOUT(K),K=1,IE) C IF REQUIRED WRITE COMPARISON GEL C WHICH OPTION IN OPERATION? IF(NOPT.EQ.2)GO TO 52 IF(NOPT.NE.3)GO TO 250 53 CONTINUE C ALREADY DONE THIS LINE CONTIG2? IF(ICON.EQ.2)GO TO 54 ICON=2 C NEED TO SAVE CONSENSUS FROM LEFT CONTIG CALL SQCOPY(LINOUT,GEL2,IE) C SAVE VALUES FROM LEFT CONTIG XLS1=LSEQNO XRS1=RSEQNO C SAVE CURRENT LEFT GEL NUMBER LN1=LLINO C SET UP VALUES FOR RIGHT CONTIG LSEQNO=XLS2 RSEQNO=XRS2 C SET LEFT GEL NUMBER LLINO=LN2 C GET NEXT GEL GO TO 150 54 CONTINUE C SAVE CURRENT LEFT GEL NUMBER LN2=LLINO C SET VALUES FOR RIGHT CONTIG NEXT PASS XLS2=XRS2+1 XRS2=XLS2+LINLEN-1 IF(XRS2.GT.RREG2)XRS2=RREG2 C SET UP VALUES FOR LEFT CONTIG LLINO=LN1 ICON=1 LSEQNO=XLS1 RSEQNO=XRS1 C SET DECREMENT FOR POINTER TO GEL2 MMM=I1-1 52 CONTINUE C1020 FORMAT( ' ',I4,2X,A,2X,100A1) 1017 FORMAT(' NEWGEL ',100A1) 1018 FORMAT(' MISMATCH ',100A1) 1019 FORMAT(' CONSENSUS ',100A1) 1022 FORMAT( ' ',26X,100A1) I2=I1+LINLEN-1 IF(I2.GT.IDIM)I2=IDIM IF(NOPT.EQ.2)WRITE(IDEV,1017)(GEL2(K),K=I1,I2) C SET DECREMENT IF(NOPT.EQ.2)MMM=0 55 CONTINUE CALL FILLC(MATCH,LINLEN,'*') K=0 DO 667 J=I1,I2 K=K+1 IF(GEL2(J-MMM).EQ.LINOUT(K))MATCH(K) = ' ' 667 CONTINUE WRITE(IDEV,1018)(MATCH(K),K=1,IE) RELPO2(1)=(I1)+9 DO 240 I=2,NLEN RELPO2(I)=RELPO2(I-1)+10 240 CONTINUE WRITE(IDEV,1023)(RELPO2(K),K=1,NLEN) I1=I2+1 I2=I2+LINLEN IF(I2.GT.IDIM)I2=IDIM IF(I1.GT.I2)RETURN 250 CONTINUE C WRITE(IDEV,1021) 1021 FORMAT( ) C NEXT LINE LENGTH C NEXT LENGTH IS OLD RIGHT +1 LSEQNO=RSEQNO+1 C NEW RIGHT IS LEFT +LENGTH RSEQNO=LSEQNO+(LINLEN)-1 C ARE WE OVER END OF REGION IF(RSEQNO.GT.RREG)RSEQNO=RREG C HAVE WE FINISHED REGION COMPLETELY IF(RSEQNO.LT.LSEQNO) RETURN C NOT FINISHED SO NEED TO FIND CURRENT LEFT GEL NO C CURRENT LEFT GEL IS LLINO C 150 CONTINUE C NEED TO KNOW IF CURRENT LEFT GELS RIGHT END IS INSIDE REGION X=RELPG(LLINO)+ABS(LNGTHG(LLINO))-1 IF(X.GE.LSEQNO)GO TO 9 C LOOK AT NEXT GEL TO RIGHT LLINO=RNBR(LLINO) C MAY HAVE GONE OVER END OF CONTIG????? IF(LLINO.GT.0)GO TO 150 300 CONTINUE RETURN END SUBROUTINE EC(GEL,IDG,CON,IDC,K) CHARACTER GEL(IDG),CON(IDC),CHARSL EXTERNAL CHARSL,INDEXS PARAMETER (IDASH = 6) K = 0 DO 10 I = 1,MIN(IDC,IDG) JC = INDEXS(CON(I),J) IF(JC.NE.IDASH) THEN JG = INDEXS(GEL(I),J) IF(JG.NE.JC) THEN GEL(I) = CHARSL(JC) K = K + 1 END IF END IF 10 CONTINUE C WRITE(*,*)'NUMBER OF CHARS CORRECTED=',K END SUBROUTINE ED(GEL,IDG,CON,IDC,K) CHARACTER GEL(IDG),CON(IDC),CHARSL EXTERNAL CHARSL,INDEXS K = 0 DO 10 I = MIN(IDC,IDG),1,-1 JC = INDEXS(CON(I),J) IF(JC.EQ.5) THEN IF(I.LT.IDG) CALL SQCOPY(GEL(I+1),GEL(I),IDG-I) K = K + 1 END IF 10 CONTINUE C WRITE(*,*)'NUMBER OF CHARS DELETED=',K END SUBROUTINE EDR(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT, +CON,IDC,IDEVW,IDEVR,LREG) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER CON(IDC) INTEGER CHNRP EXTERNAL CHNRP C CHANGE RELATIVE POSITIONS FOR AE ND = 0 DO 10 I = IDC,1,-1 IF(CON(I).EQ.'*') THEN ND = ND + 1 K = I + LREG - 1 J = CHNRP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT,K) IF(J.NE.0) THEN CALL SHIFTC(RELPG,LNGTHG,LNBR,RNBR,IDUM,JDUM,IDEVR, + IDBSIZ,J,NCONT,-1) END IF END IF 10 CONTINUE C WRITE(*,*)' NUMBER OF DELETIONS=',ND END SUBROUTINE ET(GEL,IDG,CON,IDC,K) CHARACTER GEL(IDG),CON(IDC),CHARSL EXTERNAL CHARSL,INDEXS K = 0 DO 10 I = 2,MIN(IDC,IDG) JC = INDEXS(CON(I),J) IF(JC.NE.6) THEN JG = INDEXS(GEL(I),J) IF(JG.NE.JC) THEN JNG = INDEXS(GEL(I-1),J) JNC = INDEXS(CON(I-1),J) IF(JNC.NE.JNG) THEN IF((JNG.EQ.JC).AND.(JNC.EQ.JG)) THEN GEL(I) = CHARSL(JNG) GEL(I-1) = CHARSL(JG) K = K + 1 END IF END IF END IF END IF 10 CONTINUE C WRITE(*,*)' NUMBER OF CHARS TRANSPOSED=',K END SUBROUTINE FDEPTH(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,LGEL,LREG,RREG,LENCON, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER RREG,DEPTHP,DEPTHM,STRAND STRAND = 1 CALL FDPTH(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,LGEL,LREG,RREG,LENCON,STRAND,DEPTHP) IF(DEPTHP.LT.0) RETURN STRAND = -1 CALL FDPTH(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,LGEL,LREG,RREG,LENCON,STRAND,DEPTHM) IF(DEPTHM.LT.0) RETURN CALL PLTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ, +MARGL,MARGR,MARGB, +MARGT,ISXMAX,ISYMAX,LGEL,LREG,RREG,DEPTHP,DEPTHM) END SUBROUTINE FDPTH(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,LGEL,LREG,RREG,LENCON,STRAND,DEPTH) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER RREG,DEPTH,STRAND EXTERNAL NCDEP C LREG = left contig position C RREG = right '' '' C LENCON = RREG-LREG+1 I = LGEL DEPTH = 0 5 CONTINUE IF(I.NE.0) THEN IF((RELPG(I)+ABS(LNGTHG(I))-1).LT.LREG) THEN I = RNBR(I) GO TO 5 END IF ELSE DEPTH = -1 RETURN END IF C WRITE(*,*)'LGEL',LGEL 10 CONTINUE IF(I.NE.0)THEN IF(RELPG(I).LE.RREG) THEN IF(SIGN(1,LNGTHG(I)).EQ.STRAND) THEN K = RELPG(I) + ABS(LNGTHG(I)) -1 DEPTH = MAX(NCDEP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,I, + STRAND,K),DEPTH) END IF I = RNBR(I) GO TO 10 END IF END IF C WRITE(*,*)'DEPTH',DEPTH END C FIND C C SUBROUTINE TO FIND THE FIRST OCCURENCE OF A GIVEN STRING C IN A GIVEN ARRAY C SUBROUTINE FIND(SEQ,IDIM1,STRING,IDIM2,IMATCH) C AUTHOR: RODGER STADEN CHARACTER SEQ(IDIM1),STRING(IDIM2),DASH INTEGER PSEQ,PSTR SAVE DASH DATA DASH/'-'/ PSEQ=0 PSTR=1 IMATCH=0 C 100 CONTINUE C C PUT PSEQ TO WHERE THIS FAILED MATCH STARTED PSEQ=PSEQ+1-PSTR C 400 CONTINUE C PSTR=0 C 500 CONTINUE C C POINT TO NEXT SEQ CHAR PSEQ=PSEQ+1 C TEST FOR END IF(PSEQ.GT.IDIM1)GO TO 300 C POINT TO NEXT STRING CHAR PSTR=PSTR+1 C TEST FOR DASH IN STRING IF(STRING(PSTR).EQ.DASH)GO TO 450 C TEST FOR DASH IN SEQ IF(SEQ(PSEQ).EQ.DASH)GO TO 400 C TEST FOR MATCH IF(SEQ(PSEQ).NE.STRING(PSTR))GO TO 100 C 450 CONTINUE C C TEST FOR END OF STRING IE. WHOLE STRING MATCH IF(PSTR.LT.IDIM2)GO TO 500 C HAVE MATCH. GET POINTER TO WHERE IT STARTED IMATCH=PSEQ-IDIM2+1 C 300 CONTINUE RETURN END SUBROUTINE FMT4LP(SEQ1,SEQ2,IDIM,ISW,ISX,IDEV,NAME1,NAME2) C AUTHOR: RODGER STADEN CHARACTER SEQ1(IDIM),SEQ2(IDIM),MATCH(60),NAME1*(*),NAME2*(*) INTEGER KL(6) ISXX=ISX ISWW=ISW IE=0 10 CONTINUE IS=IE+1 IE=IE+60 IF(IE.GT.IDIM)IE=IDIM N=IE-IS+1 N=1+(N-1)/10 C SET UP DECIMAL COUNTERS DO 50 J=1,N KL(J)=ISWW ISWW=ISWW+10 50 CONTINUE WRITE(IDEV,1001)(KL(K),K=1,N) WRITE(IDEV,1002)NAME1,(SEQ1(K),K=IS,IE) IL = IE - IS + 1 CALL SQMTCH(SEQ1(IS),SEQ2(IS),MATCH,IL) WRITE(IDEV,1003)(MATCH(K),K=1,IL) WRITE(IDEV,1002)NAME2,(SEQ2(K),K=IS,IE) 1002 FORMAT(2X,A,2X,6(10A1,1X)) 1003 FORMAT(10X,6(10A1,1X)) C SET UP DECIMAL COUNTERS DO 60 J=1,N KL(J)=ISXX ISXX=ISXX+10 60 CONTINUE WRITE(IDEV,1001)(KL(K),K=1,N) 1001 FORMAT( 5X,6(I6,5X)) IF(IE.LT.IDIM) GO TO 10 END SUBROUTINE FMTDB(SEQ1,IDIM,ISW,ISE,LINLEN,IDEV) C NOTE SAME AS FMTSEP! C AUTHOR: RODGER STADEN CHARACTER SEQ1(IDIM) INTEGER KL(12) ISWW=ISW-1 IE=ISW-1 1 CONTINUE WRITE(IDEV,1003) 1003 FORMAT( ) C SET UP DECIMAL COUNTERS DO 50 J=1,LINLEN/10 ISWW=ISWW+10 KL(J)=ISWW 50 CONTINUE IS=IE+1 IE=IE+LINLEN IF(IE.GT.ISE)IE=ISE WRITE(IDEV,1001)(KL(KKK),KKK=1,MIN(IE-IS+1,LINLEN)/10) WRITE(IDEV,1002)(SEQ1(K),K=IS,IE) 1002 FORMAT( ' ',12(10A1,1X)) 1001 FORMAT( ' ',12(5X,I6)) IF(IE.EQ.ISE)RETURN GO TO 1 END SUBROUTINE FNDCON(SEQ,IDIM,CENDS,NENDS,IDCEND,MAXCON,KBOUT) C AUTHOR: RODGER STADEN C STORES THEIR POSITIONS IN CENDS AND THEIR LEFT LINE NUMBERS IN NENDS PARAMETER (MAXDG = 5) CHARACTER SEQ(IDIM),DC(MAXDG) INTEGER CENDS(MAXCON) INTEGER NENDS(MAXCON) EXTERNAL IFROMC,INDEXA IDCEND=0 DO 10 I=1,IDIM IF(SEQ(I).NE.'<')GO TO 10 IDCEND=IDCEND+1 C PUT POSITION OF LEFT END OF CONTIG IN CENDS CENDS(IDCEND)=I K = INDEXA(SEQ(I),20,'.') IF(K.EQ.0) THEN WRITE(KBOUT,*)'Error in contig title: no dot!' IDCEND = 0 RETURN END IF K = K + I C K=I+11 DO 5 J=1,MAXDG DC(J)=SEQ(K) K=K+1 5 CONTINUE NENDS(IDCEND)=IFROMC(DC,MAXDG,KBOUT) 10 CONTINUE C STORE POSITION OF LAST CHAR +1 TO SIMPLIFY DISPLAY ROUTINES CENDS(IDCEND+1)=IDIM+1 RETURN END INTEGER FUNCTION GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,IIN) C AUTHOR: RODGER STADEN C RETURNS CONTIG LINE NUMBER OR ZERO FOR ERROR INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) GCLIN = 0 N=IDBSIZ-NCONTS DO 10 J=N,IDBSIZ-1 IF(LNBR(J).EQ.IIN) THEN GCLIN = J RETURN END IF 10 CONTINUE END INTEGER FUNCTION GELID(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LLINO,IDBSIZ,KBIN,KBOUT,IDEVN, +IHELPS,IHELPE,FILEH,IDEVH,INFLAG) CHARACTER FILEH*(*) C AUTHOR: RODGER STADEN C SEARCHES FOR ARCHIVE NAMES INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER NAME1*17,NAME2*16,NAME3*17,NFLAG PARAMETER (NFLAG='/') NAME3 = ' ' IF(LLINO.NE.0) THEN NAME3(1:1) = NFLAG CALL READN(IDEVN,LLINO,NAME3(2:)) END IF GELID = 0 10 CONTINUE L = 0 IF(LLINO.NE.0) L = 17 CALL GTSTR('Contig identfier',NAME3, +NAME1,L,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.4) RETURN IF(INFLAG.EQ.3) THEN GELID = LLINO RETURN END IF IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 10 END IF IF(NAME1(1:1).EQ.NFLAG) THEN CALL CCASE(NAME1,1) DO 20 I=1,NGELS CALL READN(IDEVN,I,NAME2) CALL CCASE(NAME2,1) IF(NAME1(2:17).EQ.NAME2) THEN GELID = I RETURN END IF 20 CONTINUE WRITE(KBOUT,1004)NAME1(2:) 1004 FORMAT(' ',A,' is not in the database!') ELSE CALL RJST(NAME1) READ(NAME1,1001,ERR=10,END=10)GELID 1001 FORMAT(I17) IF((GELID.LT.1).OR.(GELID.GT.NGELS)) THEN CALL ERROM(KBOUT,'Illegal gel reading number') GO TO 10 END IF END IF END SUBROUTINE GELOUT(RELPG,LNGTHG,LNBR,RNBR,MAXDB,IDBSIZ,NGELS, +NCONTS,GEL,MAXGEL,IDEV3,IDEV4,IDEV5,IDEV1,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,FILNAM) INTEGER RELPG(MAXDB) INTEGER LNGTHG(MAXDB),LNBR(MAXDB),RNBR(MAXDB) CHARACTER GEL(MAXGEL) CHARACTER FILNAM*(*),HELPF*(*) CHARACTER NAMARC*16 FILNAM = ' ' CALL OPENF1(IDEV5,FILNAM,1,IOK,KBIN,KBOUT, +'File for names of extracted gel readings', +IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0)RETURN CALL YESNO(I,'Extract ends of contigs only', +IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(I.LT.0) RETURN IF(I.EQ.0) GO TO 15 DO 10 I=1,NGELS L=ABS(LNGTHG(I)) IF(L.GT.0)THEN CALL READN(IDEV4,I,NAMARC) WRITE(KBOUT,1002)NAMARC 1002 FORMAT(' ',A) WRITE(IDEV5,1003)NAMARC 1003 FORMAT(A) FILNAM = NAMARC CALL OPENRS(IDEV1,FILNAM,IOK,LRECL,1) IF(IOK.NE.0) GO TO 100 CALL READW(IDEV3,I,GEL,MAXGEL) IF(LNGTHG(I).LT.0)THEN CALL SQREV(GEL,L) CALL SQCOM(GEL,L) END IF CALL FMTDKN(IDEV1,GEL,L) CLOSE(UNIT=IDEV1) END IF 10 CONTINUE RETURN 15 CONTINUE C NUMBER OF LINES TO PROCESS N=IDBSIZ-NCONTS DO 20 I=N,IDBSIZ-1 JL=LNBR(I) JR=RNBR(I) CALL READN(IDEV4,JL,NAMARC) WRITE(KBOUT,1002)NAMARC WRITE(IDEV5,1003)NAMARC FILNAM = NAMARC CALL OPENRS(IDEV1,NAMARC,IOK,LRECL,1) IF(IOK.NE.0) GO TO 100 CALL READW(IDEV3,JL,GEL,MAXGEL) L=ABS(LNGTHG(JL)) IF(LNGTHG(JL).LT.0)THEN CALL SQREV(GEL,L) CALL SQCOM(GEL,L) END IF CALL FMTDKN(IDEV1,GEL,L) CLOSE(UNIT=IDEV1) IF(JR.EQ.JL)GO TO 20 CALL READN(IDEV4,JR,NAMARC) WRITE(KBOUT,1002)NAMARC WRITE(IDEV5,1003)NAMARC CALL OPENRS(IDEV1,NAMARC,IOK,LRECL,1) IF(IOK.NE.0) GO TO 100 CALL READW(IDEV3,JR,GEL,MAXGEL) L=ABS(LNGTHG(JR)) IF(LNGTHG(JR).LT.0)THEN CALL SQREV(GEL,L) CALL SQCOM(GEL,L) END IF CALL FMTDKN(IDEV1,GEL,L) CLOSE(UNIT=IDEV1) 20 CONTINUE RETURN 100 CONTINUE WRITE(KBOUT,*)'Error opening file for extracted gel reading' RETURN END SUBROUTINE GETLN2(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LINCON,LLINO,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, +IHELPS,IHELPE,FILEH,IDEVH) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),GELID INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER FILEH*(*) EXTERNAL GELID IERR = 1 NCONTC = GELID(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,LLINO, +IDBSIZ,KBIN,KBOUT,IDEVN, +IHELPS,IHELPE,FILEH,IDEVH,INFLAG) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.4) RETURN IF(NCONTC.EQ.0) RETURN IGELNO = NCONTC IF(LNBR(NCONTC).NE.0) THEN WRITE(KBOUT,1013)RELPG(NCONTC) 1013 FORMAT(' Position of this reading=',I6) 25 CONTINUE NCONTC = LNBR(NCONTC) IF(LNBR(NCONTC).NE.0) GO TO 25 WRITE(KBOUT,1014)NCONTC 1014 FORMAT( ' Number of leftmost reading this contig=',I6) END IF 30 CONTINUE N = IDBSIZ - NCONTS DO 20 J=N,IDBSIZ-1 IF(LNBR(J).EQ.NCONTC) THEN LINCON=J GO TO 21 END IF 20 CONTINUE WRITE(KBOUT,9999) 9999 FORMAT(' No contig line for this gel! Fix the database') RETURN 21 CONTINUE LLINO = NCONTC IERR = 0 END SUBROUTINE GETLN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LINCON,LLINO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, +IHELPS,IHELPE,FILEH,IDEVH) CALL GETLN2(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LINCON,LLINO,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, +IHELPS,IHELPE,FILEH,IDEVH) END SUBROUTINE GETREG(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LEFTMN,RIGHTM,LREG,RREG,LINCON,LLINO,IDBSIZ,KBIN,KBOUT, + IHELPS,IHELPE,FILEH,IDEVH,IOK) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER LREG,RREG,RIGHTM CHARACTER FILEH*(*) 40 CONTINUE MN = LEFTMN MX = RIGHTM LREG = MN CALL GETINT(MN,MX,LREG, +'Start position in contig', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN LREG = IVAL MN = LREG MX = RIGHTM RREG = MX CALL GETINT(MN,MX,RREG, +'End position in contig', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,FILEH,IDEVH,IOK) IF(IOK.NE.0) RETURN RREG = IVAL C NOW FIND FIRST GEL THAT OVER LAPS 50 CONTINUE X=RELPG(LLINO)+(ABS(LNGTHG(LLINO)))-1 IF(X.GE.LREG)GO TO 60 C NOT IN REGION LLINO=RNBR(LLINO) GO TO 50 60 CONTINUE RETURN END SUBROUTINE GLEVEL(T,YF,YT,Y0,YP1,YP2,YM1,YM2) CHARACTER T IF(T.EQ.'0') THEN YF = Y0 YT = Y0 ELSE IF(T.EQ.'1') THEN YF = Y0 YT = YM1 ELSE IF(T.EQ.'2') THEN YF = Y0 YT = YP1 ELSE IF(T.EQ.'3') THEN YF = YP1 YT = YM1 ELSE IF(T.EQ.'4') THEN YF = YP2 YT = YM2 END IF END CHARACTER*1 FUNCTION GTCONC(COUNTS,IDM,CUT) INTEGER IDM INTEGER COUNTS(IDM) CHARACTER CHARSU EXTERNAL CHARSU C 30-3-92 made this routine sum counts GTCONC = '-' ISUM = 0 DO 5 I=1,IDM ISUM = ISUM + COUNTS(I) 5 CONTINUE IF(ISUM.EQ.0) RETURN Y = ISUM DO 10 I = 1,IDM - 1 X = REAL(COUNTS(I))/Y IF(X.GE.CUT) THEN GTCONC = CHARSU(I) RETURN END IF 10 CONTINUE END SUBROUTINE HIGHLT(GELSAV,NAMSAV,NUMSAV,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IDEV1,IDEV2, +FILNAM,IOK) CHARACTER LINEIN*132,CONSEN*100 CHARACTER GELNO*6,GEL*100,GELSAV*100,GELNAM*16 CHARACTER NAMSAV*16,NUMSAV*6 CHARACTER FILNAM*(*),HELPF*(*) DIMENSION GELSAV(50),NAMSAV(50),NUMSAV(50) CHARACTER PLUS*4,MINUS*4 EQUIVALENCE (LINEIN(2:2),GELNO),(LINEIN(9:9),GELNAM) EQUIVALENCE (LINEIN(26:26),GEL) EXTERNAL NOTIRL CALL OPENF1(IDEV1,FILNAM,0,IOK,KBIN,KBOUT, +'File containing contig display', +IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN FILNAM = ' ' CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT, +'File for problem display', +IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN C C FORMAT: C C12345678901234567890 10 20 30 ETC C 12 GELNAM0000 CAGACGCGCGCGCGCGCGGATATAGTCTCTCCGCTCT C 100 GELNAM0000 TGATACGCTCGCTCTCTCTCTCTCTCTCTTTC C AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA C C 70 80 ETC C 12 GELNAM0000 AAAAAAAAAAAAAAAAAAAAAAAAAAAA C C LIN = 1 CALL GTSTR('plus strand symbol',':',PLUS,LIN,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) GO TO 10 END IF IF(INFLAG.EQ.2) RETURN IF(LIN.EQ.0) PLUS = ':' LIN = 1 CALL GTSTR('minus strand symbol','.',MINUS,LIN,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) GO TO 10 END IF IF(INFLAG.EQ.2) RETURN IF(LIN.EQ.0) MINUS = '.' C COUNT LINE NUMBERS LINNO=0 10 CONTINUE C READ LINE OF NOS READ(IDEV1,1003,END=100)LINEIN LINNO=LINNO+1 1003 FORMAT(A) C WRITE IT OUT AGAIN WRITE(IDEV2,1003)LINEIN C ZERO GEL COUNT FOR THIS STRIP IGEL=0 20 CONTINUE C C READ A LINE, COULD BE 1 GEL, 2 CONSENSUS OR BLANK C LINEIN=' ' READ(IDEV1,1003,END=100)LINEIN LINNO=LINNO+1 C WHAT SORT OF LINE? ONLY A GEL WILL HAVE NON BLANK CHARS AT THE LEFT END IF(LINEIN(2:7).NE.' ')THEN C GEL LINE SO SAVE IGEL=IGEL+1 GELSAV(IGEL)=GEL NAMSAV(IGEL)=GELNAM NUMSAV(IGEL)=GELNO GO TO 20 END IF C MUST BE CONSENSUS CONSEN=GEL C PROCESS THIS STRIP OF GELS (IGEL OF THEM) DO 50 I=1,IGEL C WHERE DOES DATA START AND END? IFIRST=1 40 CONTINUE IF(GELSAV(I)(IFIRST:IFIRST).NE.' ')GO TO 45 IFIRST=IFIRST+1 IF(IFIRST.LE.100)GO TO 40 C ERROR --- NO DATA FOUND WRITE(KBOUT,1004)LINNO 1004 FORMAT(' Error on line',I6,' of file') RETURN 45 CONTINUE C NOW WHERE DOES IT END ILAST=NOTIRL(GELSAV(I),100,' ') C COMPARE WITH CONSENSUS READ(NUMSAV(I),1001,ERR=900)INTEG 1001 FORMAT(I6) IF(INTEG.GE.0)CALL IDTOD(CONSEN,GELSAV(I),IFIRST,ILAST,PLUS) IF(INTEG.LT.0)CALL IDTOD(CONSEN,GELSAV(I),IFIRST,ILAST,MINUS) WRITE(IDEV2,1008)NUMSAV(I),NAMSAV(I),GELSAV(I)(1:ILAST) 1008 FORMAT(' ',A,1X,A,1X,A) 50 CONTINUE WRITE(IDEV2,1009)CONSEN 1009 FORMAT(' ',24X,A) 1006 FORMAT( ) C READ A BLANK LINE READ(IDEV1,1003,END=100)LINEIN LINNO=LINNO+1 WRITE(IDEV2,1003)LINEIN C NO GO BACK FOR THE NEXT LINE OF NUMBERS GO TO 10 100 CONTINUE WRITE(KBOUT,1005) 1005 FORMAT(' Finished') RETURN 900 WRITE(KBOUT,*)'Error reading gel number' END SUBROUTINE IDPLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ, +NCONTS,IX,IY,MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX, +DBTDUX,DBTDUY,NCONT,IGEL,IS) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER CHNRP1 EXTERNAL CWORLD,CHNRP1 YMAX = ISYMAX YMIN = 0. XMIN = 0. LENCON = 0 DO 10 I = IDBSIZ-NCONTS,IDBSIZ-1 LENCON = LENCON + RELPG(I) 10 CONTINUE XMAX = LENCON XX = CWORLD(IX,MARGL,MARGR,XMIN,XMAX) YX = CWORLD(IY,MARGB,MARGT,YMIN,YMAX) YINC = (YMAX-YMIN)/3. Y = 0. XF = XMIN N = 0 DO 20 I = IDBSIZ-NCONTS,IDBSIZ-1 N = N + 1 XT = XF + RELPG(I) Y = Y + YINC IF((XX.GT.XF).AND.(XX.LT.XT)) THEN IS = NINT(((XX-XF)/(XT-XF)) * RELPG(I)) JGEL = LNBR(I) IGEL = CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,JGEL,IS) NCONT = I RETURN END IF XF = XT IF(N.EQ.2) THEN N = 0 Y = 0. END IF 20 CONTINUE IGEL = 0 NCONT = 0 END SUBROUTINE IDTOD(TOPLIN,GEL,IFIRST,ILAST,SYMBOL) CHARACTER TOPLIN*100,GEL*100,SYMBOL*4 DO 10 I=IFIRST,ILAST IF(GEL(I:I).EQ.TOPLIN(I:I))GEL(I:I)=SYMBOL(1:1) 10 CONTINUE END INTEGER FUNCTION INDEXS(C,S) PARAMETER (IDM = 29) CHARACTER C INTEGER POINTS(0:255),SCORES(IDM),IND(IDM),S COMMON /SHOTC/POINTS SAVE /SHOTC/ SAVE SCORES,IND DATA +IND/1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,6,6,6,6,6,6,1,2,3,4,5,5,6/ C DATA DUP/'CTAG1234DVBHKLMNRY5678ctag*,-'/ C changed 28-7-91 to give 10 to old zeroes and 100 to lowercase DATA SCORES/ +100,100,100,100, +75,75,75,75, +100,100,100,100, +100,100,100,100, +10,10,10,10,10,10, +100,100,100,100,100,100,10/ I = ICHAR(C) I = POINTS(I) S = SCORES(I) INDEXS = IND(I) END C ROUTINES TO CONTROL CHARACTER LOOKUP FOR SHOTGUN SEQUENCING SUBROUTINE INITS C AUTHOR RODGER STADEN INTEGER POINTS(0:255) PARAMETER (IDM = 29) CHARACTER DUP*29 COMMON /SHOTC/POINTS SAVE /SHOTC/ DATA DUP/'CTAG1234DVBHKLMNRY5678ctag*,-'/ C ICHAR RETURNS THE COLLATING SEQUENCE NUMBER C I WANT 1-4 FOR ACGT C acgt C 1234 C BDHV C KLMN C 5 FOR * C 6 FOR 5678- AND ELSE C THE ACTUAL VALUE RETURNED BY ICHAR IS NOT PORTABLE C SO I NEED TO INITIALIZE POINTR SO THAT THE CORRECT C ELEMENTS CONTAIN VALUES 1 - 6 C DO 30 I = 0,255 POINTS(I) = IDM 30 CONTINUE DO 35 I = 1,IDM J = ICHAR(DUP(I:I)) POINTS(J) = I 35 CONTINUE END FUNCTION INLIST(LIST,IDLIST,ITEM) C AUTHOR: RODGER STADEN C SENT LIST LIST, AND ITEM ITEM. IF IN LIST RETURNS ELEMENT NUMBER, ELSE 0 INTEGER LIST(IDLIST) INLIST=0 DO 1 I=1,IDLIST IF(LIST(I).NE.ITEM)GO TO 1 INLIST=I RETURN 1 CONTINUE RETURN END SUBROUTINE IPLTC(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,LGEL,LREG,RREG,STRAND, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,YMID,YINC,DEPTH,X,Y,KBOUT, +IGEL,IOK) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER RREG,STRAND,DEPTH IOK = 1 XMIN = LREG XMAX = RREG YMAX = ISYMAX YMIN = 0. YINCO2 = STRAND*YINC/2. I = LGEL IGEL = 0 5 CONTINUE IF(I.NE.0) THEN IF((RELPG(I)+ABS(LNGTHG(I))-1).LT.LREG) THEN I = RNBR(I) GO TO 5 END IF END IF N = 0 10 CONTINUE IF(I.NE.0)THEN IF(RELPG(I).LE.RREG) THEN IF(SIGN(1,LNGTHG(I)).EQ.STRAND) THEN XF = MAX(RELPG(I),LREG) XT = MIN(ABS(LNGTHG(I))+RELPG(I)-1,RREG) N = N + 1 IF(N.GT.DEPTH) N = 1 YF = YMID + N * YINC IF((X.GE.XF).AND.(X.LE.XT)) THEN IGEL = I IF((Y.GE.YF-YINCO2).AND.(Y.LE.YF+YINCO2)) THEN IOK = 0 RETURN END IF END IF END IF I = RNBR(I) GO TO 10 END IF END IF END C LINEUP C C TAKES 2 SEQS SET OF MATCHES AND PRODUCES LINED UP SEQS C FINDS IF WE HAVE A LEFT OVERLAP C RETURNS POSITION OF JOINT. THIS IS RELATIVE TO THE CONTIG C FOR MOST MATCHES BUT I RELATIVE TO THE GEL FOR A LEFT OVERLAP SUBROUTINE LINEUP(SEQG,SEQC,SEQG2,SEQC2,IDC,IDG,IDOUT, 1MATG,MATC,MATL,IP,ITOTPC,ITOTPG,JOINT,ITYPE,KBOUT,MAXGEL,IFAIL) C AUTHOR: RODGER STADEN CHARACTER SEQG(IDG),SEQC(IDC),SEQG2(IDOUT),SEQC2(IDOUT),PAD INTEGER MATG(IP),MATC(IP),MATL(IP) SAVE PAD DATA PAD/','/ IFAIL=0 C ZERO PADDING CHARS IN CONTIG (GEL DONE AT END BY DIFFERENCE C IN INPUT AND OUTPUT LENGTHS) ITOTPC=0 C FILL OUTPUT WITH PADDING DO 10 I=1,IDOUT SEQG2(I)=PAD SEQC2(I)=PAD 10 CONTINUE NMTCH=0 C SET INITIAL POINTERS TO OUTPUT C CONSENSUS IS1=1 C GEL IS2=1 C FIND DISTANCE FROM LEFT MATCH IN GEL TO LEFT OF GEL IG2=MATG(1)-1 IF(IG2.EQ.0)THEN C THE LEFT END OF THE GEL MATCHES SO THIS IS NOT A LEFT OVERLAP C SET TYPE ITYPE=-1 C SET JOINT JOINT=MATC(1) C SKIP NEXT SECTION GO TO 50 END IF C FIND DISTANCE FROM LEFT MATCH IN CONTIG TO LEFT OF CONTIG IC2=MATC(1)-1 C GET DISTANCE FROM FIRST MATCH IN CONTIG TO FIRST MATCH IN GEL. C IF THIS DISTANCE <0 THEN WE HAVE A LEFT OVERLAP IC1=IC2-IG2+1 IF(IC1.GT.0)THEN C THIS IS NOT A LEFT OVERLAP C SET TYPE ITYPE=-1 C SET LEFT END JOINT=IC1 C COPY THE GEL UPTO THE FIRST MATCH, INTO THE OUTPUT ARRAY C CHECK FOR OVERFLOW IF(IG2.GT.MAXGEL)GO TO 700 CALL SQCOPY(SEQG(1),SEQG2(1),IG2) C COPY THE CONTIG FOR THE SAME REGION IF(IG2.GT.MAXGEL)GO TO 700 CALL SQCOPY(SEQC(IC1),SEQC2(1),IG2) IS1=IS1+IG2 IS2=IS2+IG2 GO TO 50 END IF C MUST BE LEFT END OVERLAP C SET TYPE ITYPE=1 C SET POSITION OF JOINT RELATIVE TO GEL JOINT=ABS(IC1)+2 C COPY OVER THE GEL UPTO THE JOINT C CHECK FOR OVERFLOW IF(IG2.GT.MAXGEL)GO TO 700 CALL SQCOPY(SEQG(1),SEQG2(1),IG2) IS2=IS2+IG2 C WE MAY ALSO HAVE MISMATCHING C DATA AT THE JOIN SO DEAL WITH THAT NOW C IF IC2 >0 THE LEFT END OF THE CONTIG MATCHES THE GEL BUT OTHERWISE C WE HAVE SOME MISMATCHED DATA TO DEAL WITH - WE NEED TO TRANSFER C THE MISMATCHED REGION OF THE CONTIG TO THE OUTPUT ARRAY IF(IC2.GT.0)THEN IF(IC2.GT.MAXGEL)GO TO 700 CALL SQCOPY(SEQC(1),SEQC2(1),IC2) IS1=IS1+IC2 END IF C WHEN WE GET HERE WE HAVE SORTED OUT THE LEFT ENDS FOR LEFT OVERLAP C AND MISMATCHED LEFT ENDS, WE NOW DEAL WITH THE REST OF THE SEQUENCE C STARTING WITH THE FIRST BLOCK OF IDENTITY C C IG1 POSITION IN INPUT GEL C IS2 POSITION IN OUTPUT GEL C IC1 POSITION IN INPUT CONTIG C IS1 POSITION IN OUTPUT CONTIG C LG1 POSITION OF END OF CURRENT MATCH IN OUTPUT GEL C LC1 POSITION OF END OF CURRENT MATCH IN OUTPUT CONTIG C LG2 DISTANCE FROM CURRENT MATCH IN INPUT GEL TO NEXT MATCH C LC2 DISTANCE FROM CURRENT MATCH IN INPUT CONTIG TO NEXT MATCH C 50 CONTINUE C POINT TO NEXT MATCH NMTCH=NMTCH+1 C COPY NEXT MATCH IG1=MATG(NMTCH) IC1=MATC(NMTCH) L=MATL(NMTCH) C CHECK FOR OVERFLOW IF(IS2+L-1.GT.MAXGEL)GO TO 700 CALL SQCOPY(SEQG(IG1),SEQG2(IS2),L) C CHECK FOR OVERFLOW IF(IS1+L-1.GT.MAXGEL)GO TO 700 CALL SQCOPY(SEQC(IC1),SEQC2(IS1),L) C POINT TO NEXT OUTPUT POSITIONS IS1=IS1+L IS2=IS2+L C END OF CURRENT MATCH LG1=IG1+L LC1=IC1+L C ANY MORE MATCHES IF(NMTCH.EQ.IP)GO TO 500 K=NMTCH+1 LG2=MATG(K)-LG1 LC2=MATC(K)-LC1 C ANY DIFFERENCE IN LENGTH? IF SO WE HAVE TO PAD SO THEY BECOME THE SAME L5=ABS(LG2-LC2) C COUNT PADDING CHARS IN CONTIG IF(LG2.GT.LC2)ITOTPC=ITOTPC+L5 C IF DIFFERENCE INCREMENT SHORTER IF(LG2.GT.LC2)IS1=IS1+L5 C IF GEL NEEDS PADDING TRY TO PUT PADS NEXT TO DOUBLE CODES IF(LC2.GT.LG2)CALL PADCOP(SEQG,SEQG2, +LG1,MATG(K),L5,IS2,LG2,MAXGEL,IFAIL,KBOUT,SEQC,LC1) C CHECK FOR OVERFLOW IF(IFAIL.EQ.1)GO TO 700 C NOW COPY MISSMATCHED REGION C CHECK FOR OVERFLOW IF(IS2+LG2-1.GT.MAXGEL)GO TO 700 IF(LG2.GT.0)CALL SQCOPY(SEQG(LG1),SEQG2(IS2),LG2) C CHECK FOR OVERFLOW IF(IS1+LC2-1.GT.MAXGEL)GO TO 700 IF(LC2.GT.0)CALL SQCOPY(SEQC(LC1),SEQC2(IS1),LC2) C POINT TO NEXT OUTPUT POSITIONS IS1=IS1+LC2 IS2=IS2+LG2 C GET NEXT MATCH GO TO 50 500 CONTINUE C C FINISH RIGHT ENDS C ONLY COPY TO END OF GEL IN GEL AND TO THE SAME RELATIVE POSITION C IN THE CONTIG FOR DISPLAY PURPOSES AND FOR COUNTING MISMATCH C CURRENT ENDS AT LG1,LC1 C HOW FAR TO END OF GEL? C SET M M=0 L=IDG-LG1+1 IF(L.LT.1)GO TO 600 C CHECK FOR OVERFLOW IF(IS2+L-1.GT.MAXGEL)GO TO 700 CALL SQCOPY(SEQG(LG1),SEQG2(IS2),L) C NEED TO COPY TO END OF GEL IN CONTIG FOR DISPLAY C POINT TO POSN IN CONTIG LEVEL WITH END OF GEL M=LC1+L-1 C IS THIS OVER END OF CONTIG? IF(M.GT.IDC)M=IDC C NUMBER TO COPY M=M-LC1+1 C CHECK FOR OVERFLOW IF(IS1+M-1.GT.MAXGEL)GO TO 700 IF(M.GT.0)CALL SQCOPY(SEQC(LC1),SEQC2(IS1),M) 600 CONTINUE C COUNT PADDING IN GEL ITOTPG=IS2+L-1-IDG C SET NEW LENGTHS FOR RETURN TO CALLING ROUTINE IDOUT=IS1+M-1 IDG=IS2+L-1 IFAIL=0 RETURN 700 CONTINUE WRITE(KBOUT,1000) 1000 FORMAT(' Matching region too long for routine lineup,', +' alignment aborted') IFAIL=1 RETURN END SUBROUTINE LSTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LLINO, +RREG,IDEV,IDEVN,NAMARC) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER RREG CHARACTER NAMARC*(*) N = LLINO WRITE(IDEV,1001) 10 CONTINUE CALL READN(IDEVN,N,NAMARC) WRITE(IDEV,1006)NAMARC,N,RELPG(N),LNGTHG(N),LNBR(N),RNBR(N) IF(RNBR(N).NE.0) THEN N = RNBR(N) IF(RELPG(N).LE.RREG) GO TO 10 END IF 1001 FORMAT( +' NAME NUMBER POSITION LENGTH NEIGHBOURS'/ +' LEFT RIGHT') 1006 FORMAT( ' ',A,2X,I6,2X,I7,2X,I5,2X,I6,2X,I6) C1006 FORMAT( ' ',A,2X,I4,2X,I7,2X,I5,2X,I6,2X,I6) END C12345678901234567890 C 710 720 730 740 750 C -1 HINW.004 CGTCAGACGCACGCTGGAAAA INTEGER FUNCTION LTYPE(LINE,LL,J1,J2,N,MAXDB,KBOUT) PARAMETER (MAXDG = 5) CHARACTER LINE*(*),NUM*(MAXDG),SPACE EXTERNAL NOTRL,NOTLR PARAMETER (SPACE= ' ') J1 = NOTLR(LINE,LL,SPACE) IF(J1.EQ.0) THEN C BLANK LINE LTYPE = 1 RETURN END IF IF(J1.GT.20) THEN C LINE OF NUMBERS LTYPE = 2 RETURN END IF IF(J1.GT.MAXDG+2) THEN C CONSENSUS LINE LTYPE = 3 RETURN END IF C SHOULD BE A SEQUENCE LINE J = INDEX(LINE(J1:),SPACE) NUM = SPACE NUM = LINE(J1:J1+J-2) CALL RJST(NUM) 1001 FORMAT(I6) READ(NUM,1001,ERR=10) N IF(N.GT.MAXDB-2) GO TO 10 C NUMBER ENDS AT J1+J-2 J1 = J1 + J - 1 C LOOK FOR BEGINNING OF NAME J = NOTLR(LINE(J1:),LL-J1+1,SPACE) N1 = J1 + J - 1 C LOOK FOR END OF NAME J = INDEX(LINE(N1:),SPACE) N2 = N1 + J - 2 C LOOK FOR BEGINNING OF SEQ J = NOTLR(LINE(N2+1:),LL-N2,SPACE) J1 = N2 + J LTYPE = 4 C LOOK FOR END OF SEQ J2 = NOTRL(LINE,LL,SPACE) IF(J2.GT.N2) RETURN 10 CONTINUE LTYPE = 0 END INTEGER FUNCTION LWRAPS(I,J) K = MOD(I,J) IF(K.EQ.0) K = J LWRAPS = K END C MERGE C C ROUTINE SENT CONTIG WHOSE GELS MAY BE OUT OF ORDER C REORDERS GELS ON POSITION OF LEFT ENDS AND SETS LEFT C GEL NUMBER FOR THE REORDERED CONTIG C SUBROUTINE MERGE(RELPG,LNGTHG,LNBR,RNBR,LINCON,IDBSIZ) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) C C START AT LEFT END N=LNBR(LINCON) GO TO 22 21 CONTINUE C SET POINTER TO NEXT GEL TO RIGHT IN LIST N=NR IF(I1.GT.0)N=I2 22 CONTINUE C SET POINTER TO NEXT GEL TO RIGHT NR=RNBR(N) IF(NR.EQ.0)GO TO 30 C HAVENT REACHED END YET I1=0 23 CONTINUE C ARE THESE 2 IN CORRECT ORDER IE N<=NR ? IF(RELPG(N).LE.RELPG(NR))GO TO 21 C NOT IN ORDER SO CHAIN LEFT UNTIL CORRECTLY POSITIONED C THEN COME BACK TO THIS POINT AND CONTINUE C IF FIRST MOVE SAVE POSITION IF(I1.EQ.0)I2=N I1=1 C EXCHANGE NEIGHBOURS M=RNBR(NR) IF(M.NE.0)LNBR(M)=N M=LNBR(N) IF(M.NE.0)RNBR(M)=NR RNBR(N)=RNBR(NR) RNBR(NR)=N LNBR(NR)=LNBR(N) LNBR(N)=NR C CHAIN BACK THRU LIST N=LNBR(NR) IF(N.EQ.0)GO TO 21 C END NOT REACHED GO TO 23 30 CONTINUE C ALL DONE POINTER AT RIGHT GEL RNBR(LINCON)=N RETURN END SUBROUTINE MINCOM(SEQ1,IDIM1,SEQ2,IDIM2,SAV1,SAV2,SAV3, +IP,MINM,KBOUT) C AUTHOR: RODGER STADEN C CHARACTER SEQ1(IDIM1),SEQ2(IDIM2) INTEGER SAV1(IP),SAV2(IP),SAV3(IP) C IP1=IP IP=0 C C SITUATION 1 NT1=IDIM2-MINM IES1=MINM-1 ISS2=NT1+1 C DO 100 I=1,NT1 C C POINT TO FIRST CHAR-1 OF SEQ2 ISS2=ISS2-1 C POINT TO LAST CHAR SEQ1 IES1=IES1+1 C N=0 C DO 200 J=1,IES1 C STORE POINTER JJ=J C C POINT TO SEQ2 K=ISS2+J C TEST FOR EQUALITY IF(SEQ1(J).NE.SEQ2(K))GO TO 220 C INCREMENT N N=N+1 GO TO 200 220 CONTINUE C TEST FOR SUFFICENTLY LARGE N IF(N.GE.MINM)CALL SAVIT(N,J,K,IP,SAV1,SAV2,SAV3,IP1) C TEST FOR OVERFLOW IF(IP.GT.IP1)GO TO 5000 C RESET N TO ZERO N=0 200 CONTINUE C C GOOD SCORE AT END? C NEED TO INCREMENT POINTERS AS SAVIT EXPECTS TO BE POINTING AT NEXT C MISMATCH JJ=JJ+1 KK=K+1 IF(N.GE.MINM)CALL SAVIT(N,JJ,KK,IP,SAV1,SAV2,SAV3,IP1) C TEST FOR OVERFLOW IF(IP.GT.IP1)GO TO 5000 C 100 CONTINUE C C C SITUATION 2 NT2=IDIM1-IDIM2+1 C DO 300 I=1,NT2 N=0 C DO 400 J=1,IDIM2 C SAVE POINTER JJ=J C C SET POINTER TO SEQ1 L=I+J-1 IF(SEQ1(L).NE.SEQ2(J))GO TO 420 N=N+1 GO TO 400 420 CONTINUE IF(N.GE.MINM)CALL SAVIT(N,L,J,IP,SAV1,SAV2,SAV3,IP1) C TEST FOR OVERFLOW IF(IP.GT.IP1)GO TO 5000 N=0 400 CONTINUE LL=L+1 JJ=JJ+1 IF(N.GE.MINM)CALL SAVIT(N,LL,JJ,IP,SAV1,SAV2,SAV3,IP1) C TEST FOR OVERFLOW IF(IP.GT.IP1)GO TO 5000 300 CONTINUE C C C SITUATION 3 ISS1=IDIM1-IDIM2 C DO 500 I=1,NT1 C C POINT TO FIRST CHAR SEQ1 K=ISS1+I IES2=IDIM2-I N=0 C DO 600 J=1,IES2 C SAVE POINTER JJ=J C C POINT TO SEQ1 L=K+J IF(SEQ1(L).NE.SEQ2(J))GO TO 620 N=N+1 GO TO 600 620 CONTINUE IF(N.GE.MINM)CALL SAVIT(N,L,J,IP,SAV1,SAV2,SAV3,IP1) C TEST FOR OVERFLOW IF(IP.GT.IP1)GO TO 5000 N=0 600 CONTINUE C LL=L+1 JJ=JJ+1 IF(N.GE.MINM)CALL SAVIT(N,LL,JJ,IP,SAV1,SAV2,SAV3,IP1) C TEST FOR OVERFLOW IF(IP.GT.IP1)GO TO 5000 500 CONTINUE C RETURN 5000 CONTINUE C OVERFLOW C WRITE(KBOUT,1000)IP1 1000 FORMAT(/' TOO MANY MATCHES. LIMIT = ',I6) RETURN END SUBROUTINE ML(PC,PG,L,N,J) INTEGER PC(N),PG(N),L(N) DO 10 I = J,N-1 PC(I) = PC(I+1) PG(I) = PG(I+1) L(I) = L(I+1) 10 CONTINUE END SUBROUTINE MSTLKL(SEQ,IDIM) C AUTHOR: RODGER STADEN CHARACTER SEQ(IDIM) CHARACTER CHARSU EXTERNAL CHARSU,INDEXS DO 100 I=1,IDIM J = INDEXS(SEQ(I),K) SEQ(I) = CHARSU(J) 100 CONTINUE END CHARACTER FUNCTION MUNOTP(IP) C AUTHOR RODGER STADEN CHARACTER PUP*26 SAVE PUP DATA PUP/'CSTPAGNDEQBZHRKMILVFYW-X? '/ MUNOTP = '-' IF((IP.GT.0).AND.(IP.LT.23))MUNOTP = PUP(IP:IP) END INTEGER FUNCTION NCDEP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,IGEL, +STRAND,RREG) INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER RREG,STRAND NCDEP = 0 N = 0 I = IGEL 10 CONTINUE IF(I.NE.0) THEN IF(RELPG(I).LE.RREG) THEN IF(SIGN(1,LNGTHG(I)).EQ.STRAND) N = N + 1 I = RNBR(I) GO TO 10 END IF END IF NCDEP = N END SUBROUTINE PADCOP(SEQG,SEQG2,LG1,MG,L5,IS2,LG2,MAXGEL,IFAIL, +KBOUT,SEQC,IC1) C AUTHOR: RODGER STADEN PARAMETER (NDUBL = 4) CHARACTER SEQG(MAXGEL),SEQG2(MAXGEL),DUBBL(NDUBL),SEQC(MAXGEL) SAVE DUBBL DATA DUBBL/'D','B','V','H'/ JC1 = IC1 C Make seqg2 from seqg placing L5 padding chars before position MG C which is the start of the next block of identity. Try to put the C padding either in line with consensus pads, or next to double C codes. The positions in seqg are LG1 to MG-1. seqg2 needs to be long C enough to be extended from IS2 to IS2 + L5 -1 + MGM1-LG1 +1 C ie we add L5 pads, plus the chars between and including LG1 and MGM1 IDONE=0 C POINT TO END OF MISMATCH MGM1=MG-1 C MAY BE NO CHARS TO COPY IF(MGM1.LT.LG1)GO TO 111 C Next check added 26-2-91 MAXREQ = IS2 + L5 - 1 + MGM1 - LG1 + 1 IF((MGM1.GT.MAXGEL).OR.(MAXREQ.GT.MAXGEL)) THEN WRITE(KBOUT,1000) 1000 FORMAT(' Matching region too large for routine padcop,', + ' alignment aborted') IFAIL=1 RETURN END IF DO 110 J=LG1,MGM1 IF(IDONE.LT.L5) THEN IF((JC1.GT.0).AND.(JC1.LT.MAXGEL)) THEN IF(SEQC(JC1).EQ.'*') THEN IS2 = IS2 + 1 JC1 = JC1 + 1 IDONE = IDONE + 1 GO TO 109 END IF END IF DO 108 M=1,NDUBL IF(SEQG(J).EQ.DUBBL(M)) THEN IS2 = IS2 + 1 JC1 = JC1 + 1 IDONE = IDONE + 1 GO TO 109 END IF 108 CONTINUE 109 CONTINUE END IF SEQG2(IS2) = SEQG(J) IS2 = IS2 + 1 JC1 = JC1 + 1 110 CONTINUE 111 CONTINUE C ALL CHARS COPIED. ENOUGH PADDING? IF(IDONE.LT.L5)IS2=IS2+L5-IDONE C IS2 SHOULD NOW BE POINTING AT NEXT CHAR C ZERO LG2 TO SHOW CALLING ROUTINE COPYING DONE LG2=0 IFAIL=0 END SUBROUTINE PCON1(CHAR,CHRSUM) C AUTHOR RODGER STADEN C PART OF PROTEIN 'CONSENSUS' CALCULATION CHARACTER CHAR INTEGER CHRSUM INTEGER CTONUM EXTERNAL CTONUM K = CTONUM(CHAR) IF(K.NE.26)THEN IF(CHRSUM.EQ.0)THEN CHRSUM = K ELSE IF(K.NE.CHRSUM)CHRSUM = -1 END IF END IF END SUBROUTINE PLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LINCON,IGEL, +NCONTS,MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) YMIN = 0. YMAX = ISYMAX XMIN = 0. LENCON = 0 DO 10 I = IDBSIZ-NCONTS,IDBSIZ-1 LENCON = LENCON + RELPG(I) 10 CONTINUE XMAX = LENCON YINC = (YMAX-YMIN)/3. Y = 0. XF = XMIN N = 0 CALL VECTOM CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) DO 20 I = IDBSIZ-NCONTS,IDBSIZ-1 N = N + 1 XT = XF + RELPG(I) Y = Y + YINC CALL LINE(XF,XT,Y,Y,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) IF((IGEL.NE.0).AND.(I.EQ.LINCON)) THEN XZ = XF + RELPG(IGEL) + ABS(LNGTHG(IGEL))/2 CALL LINE(XZ,XZ,YMAX,YMIN,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) END IF XF = XT IF(N.EQ.2) THEN N = 0 Y = 0. END IF 20 CONTINUE CALL VT100M END SUBROUTINE PLTC(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,LGEL,LREG,RREG,STRAND, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,YMID,YINC,DEPTH) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER RREG,STRAND,DEPTH XMIN = LREG XMAX = RREG YMAX = ISYMAX YMIN = 0. I = LGEL 5 CONTINUE IF(I.NE.0) THEN IF((RELPG(I)+ABS(LNGTHG(I))-1).LT.LREG) THEN I = RNBR(I) GO TO 5 END IF END IF N = 0 10 CONTINUE IF(I.NE.0)THEN IF(RELPG(I).LE.RREG) THEN IF(SIGN(1,LNGTHG(I)).EQ.STRAND) THEN XF = MAX(RELPG(I),LREG) XT = MIN(ABS(LNGTHG(I))+RELPG(I)-1,RREG) N = N + 1 IF(N.GT.DEPTH) N = 1 YF = YMID + N * YINC CALL LINE(XF,XT,YF,YF,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) END IF I = RNBR(I) GO TO 10 END IF END IF END SUBROUTINE PLTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ, +MARGL,MARGR,MARGB, +MARGT,ISXMAX,ISYMAX,LGEL,LREG,RREG,DEPTHP,DEPTHM) INTEGER DEPTHP,DEPTHM INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER RREG,STRAND C have window size margt starting at margb C depths depthp, depthm YMAX = ISYMAX YMIN = 0. XMIN = LREG XMAX = RREG RINC = YMAX / (DEPTHP + DEPTHM + 2) RMID =(DEPTHM+1) * RINC CALL VECTOM CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL LINE(XMIN,XMAX,RMID,RMID,XMAX,XMIN,YMAX,YMIN, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL TEXT(XMIN,RMID,'*',1,ISIZE,XMAX,XMIN,YMAX,YMIN, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL TEXT(XMAX,RMID,'*',1,ISIZE,XMAX,XMIN,YMAX,YMIN, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) STRAND = 1 YINC = RINC * STRAND CALL PLTC(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,LGEL,LREG,RREG,STRAND, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,RMID,YINC,DEPTHP) STRAND = -1 YINC = RINC * STRAND CALL PLTC(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,LGEL,LREG,RREG,STRAND, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,RMID,YINC,DEPTHM) CALL VT100M END SUBROUTINE PLTQ(SEQ,IDIM2, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CHARACTER SEQ(IDIM2),T PARAMETER (Y0 = 0., + YP1 = 1., + YP2 = 2., + YM1 = -1., + YM2 = -2.) XMIN = 0. XMAX = IDIM2 YMIN = YM2 YMAX = YP2 CALL VECTOM CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL LINE(XIN,XMAX,Y0,Y0,XMAX,XMIN,YMAX,YMIN, +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) I = 1 10 CONTINUE XF = I T = SEQ(I) 20 CONTINUE IF(SEQ(I).NE.T) THEN CALL GLEVEL(T,YF,YT,Y0,YP1,YP2,YM1,YM2) XT = I - 1 CALL LINE(XF,XF,YF,YT,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL LINE(XF,XT,YT,YT,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL LINE(XT,XT,YF,YT,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) GO TO 10 END IF I = I + 1 IF(I.LT.IDIM2) GO TO 20 CALL GLEVEL(T,YF,YT,Y0,YP1,YP2,YM1,YM2) XT = I CALL LINE(XF,XF,YF,YT,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL LINE(XF,XT,YT,YT,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL LINE(XT,XT,YF,YT,XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX) CALL VT100M END SUBROUTINE READN(IDEVN,N,NAME) CHARACTER NAME*(*) READ(IDEVN,REC=N)NAME RETURN END SUBROUTINE READR(IDEVR,N,RELPG,LNGTHG,LNBR,RNBR) INTEGER RELPG,RNBR INTEGER SWAPBO EXTERNAL SWAPBO READ(IDEVR,REC=N+1)RELPG,LNGTHG,LNBR,RNBR RELPG = SWAPBO(RELPG) LNGTHG = SWAPBO(LNGTHG) LNBR = SWAPBO(LNBR) RNBR = SWAPBO(RNBR) RETURN END SUBROUTINE READW(IDEVW,N,GEL,MAXGEL) CHARACTER GEL(MAXGEL) READ(IDEVW,REC=N)GEL RETURN END SUBROUTINE REMOVL(MATC,MATG,MATL,IP) C AUTHOR: RODGER STADEN INTEGER MATC(IP),MATG(IP),MATL(IP) C C SET POINTER TO FIRST MATCH NMTCH=0 10 CONTINUE C POINT TO NEXT MATCH NMTCH=NMTCH+1 C SORT MATCHES ON LENGTH IPP=IP-NMTCH+1 CALL BUBBL3(MATL(NMTCH),MATG(NMTCH),MATC(NMTCH),IPP) C LOOK FOR END OF POSITIVES DO 20 I=NMTCH,IP J=I 20 IF(MATL(I).LT.1)GO TO 30 J=J+1 30 CONTINUE IP=J-1 C END OF POSITIVES AT IP IF(NMTCH.GE.IP)RETURN K1=MATC(NMTCH) K2=K1+MATL(NMTCH)-1 K3=MATG(NMTCH) K4=K3+MATL(NMTCH)-1 C POINT TO FIRST MATCH TO TEST K6=NMTCH+1 DO 200 I=K6,IP C DO CONSENSUS FIRST C OVERLAP? IF(MATC(I).GT.K2)GO TO 100 K5=MATC(I)+MATL(I)-1 IF(K5.LT.K1)GO TO 100 C DOES OVERLAP C WHICH END IF(K5.LE.K2)GO TO 80 C LENGTH TO REDUCE MATCH BY IS IDELT IDELT=K2-MATC(I)+1 C NEW LENGTH MATL(I)=MATL(I)-IDELT C MOVE LEFT ENDS MATC(I)=MATC(I)+IDELT MATG(I)=MATG(I)+IDELT GO TO 100 80 CONTINUE C LENGTH MATL(I)=K1-MATC(I) 100 CONTINUE C NOW LOOK FOR OVERLAPS WITH GEL C OVERLAP? IF(MATG(I).GT.K4)GO TO 200 K5=MATG(I)+MATL(I)-1 IF(K5.LT.K3)GO TO 200 C DOES OVERLAP C WHICH END? IF(K5.LE.K4)GO TO 180 C LENGTH TO REDUCE MATCH BY IS IDELT IDELT=K4-MATG(I)+1 C NEW LENGTH MATL(I)=MATL(I)-IDELT C MOVE LEFT ENDS MATC(I)=MATC(I)+IDELT MATG(I)=MATG(I)+IDELT GO TO 200 180 CONTINUE C LENGTH MATL(I)=K3-MATG(I) 200 CONTINUE GO TO 10 END C SAVIT C SUBROUTINE SAVIT(N,J,K,IP,S1,S2,S3,IP1) C AUTHOR: RODGER STADEN INTEGER S1(IP1),S2(IP1),S3(IP1) C IP=IP+1 C TEST FOR OVERFLOW IF(IP.GT.IP1)RETURN S1(IP)=N S2(IP)=J-N S3(IP)=K-N C RETURN END SUBROUTINE SCRENR(GEL,MAXGEL,STRING,NAME,FILNAM, +IDEV1,IDEV2,IDEV3,IDEV4,IDEV,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH) CHARACTER NAME*(*),FILNAM*(*),HELPF*(*) CHARACTER GEL(MAXGEL),STRING(60) INTEGER GNFFOF EXTERNAL GNFFOF CALL YESNO(INF,'Use file of file names', +IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(INF.LT.0) RETURN IF(INF.EQ.0) THEN FILNAM = ' ' CALL OPENF1(IDEV1,FILNAM,0,IOK,KBIN,KBOUT, + 'File of gel reading names', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN FILNAM = ' ' CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT, + 'File for names of sequences that pass', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN END IF FILNAM = ' ' CALL OPENF1(IDEV3,FILNAM,0,IOK,KBIN,KBOUT, +'File name of recognition sequences', +IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN C JGEL = 0 IGEL = 0 1 CONTINUE IF(INF.EQ.1) THEN 31 CONTINUE MN = 0 CALL GTSTR('Gel reading name',' ',NAME,MN,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.3) RETURN IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 31 END IF ELSE IOK = GNFFOF(IDEV1,NAME) IF(IOK.EQ.1) GO TO 100 IF(IOK.NE.0) GO TO 1 END IF 1002 FORMAT(A) JGEL = JGEL + 1 WRITE(IDEV,*)'Processing', JGEL,' in batch' WRITE(IDEV,1003)NAME 1003 FORMAT(' Gel reading name ',A) IDIMG=MAXGEL CALL OPENRS(IDEV4,NAME,IOK,LRECL,2) IF(IOK.NE.0)THEN IF(INF.EQ.1) RETURN WRITE(KBOUT,*)' Error opening gel reading file' GO TO 1 END IF CALL ARRFIM(IDEV4,GEL,IDIMG,KBOUT) CLOSE(UNIT=IDEV4) 2 CONTINUE IF(IDIMG.LT.1)THEN WRITE(KBOUT,*)' Gel reading too short to compare' GO TO 1 END IF CALL MSTLKL(GEL,IDIMG) 3 CONTINUE READ(IDEV3,1005,END=6)STRING 1005 FORMAT(60A1) C FIND LENGTH OF STRING ASSUMING NO SPACES DO 4 I=1,60 II=I IF(STRING(I).EQ.' ')GO TO 5 4 CONTINUE 5 CONTINUE II=II-1 IF(II.GT.0)CALL FIND(GEL,IDIMG,STRING,II,JMATCH) IF(JMATCH.EQ.0)GO TO 3 C A MATCH WRITE(IDEV,1007)JMATCH,(STRING(K),K=1,II) 1007 FORMAT(' Match at',I6,' with ',60A1) REWIND IDEV3 GO TO 1 C NO MATCH SO SAVE 6 CONTINUE WRITE(IDEV2,1002)NAME IGEL = IGEL + 1 REWIND IDEV3 GO TO 1 100 CONTINUE WRITE(KBOUT,*)'Batch finished' WRITE(KBOUT,*)JGEL,' compared and ',IGEL,' passed' RETURN END SUBROUTINE SCRENV(MAXGEL, +WORDP,WORDN,LPOWRC,POSNS,GELN, +SEQ,MAXSEQ,GEL,GELCOP,MATCH, +LENGTH, +SAVPS,SAVPG,SAVL,MAXMAT,CENDS,NENDS,MAXCON,CONST, +KBIN,KBOUT,IDEV1,IDEV2,IDEV3,IDEV4,IDEV, +IHELPS,IHELPE,HELPF,IDEVH,FILNAM,NAME,IOK) INTEGER POSNS(MAXSEQ),GELN(MAXGEL),WORDP(LPOWRC),SAVPS(MAXMAT) INTEGER SAVPG(MAXMAT),SAVL(MAXMAT) INTEGER WORDN(LPOWRC) CHARACTER FILNAM*(*),NAME*(*),HELPF*(*) CHARACTER GELCOP(MAXGEL) INTEGER CENDS(MAXCON) INTEGER NENDS(MAXCON) INTEGER CONST(LENGTH),CSTART CHARACTER SEQ(MAXSEQ),GEL(MAXGEL),MATCH(MAXGEL) INTEGER GNFFOF EXTERNAL GNFFOF JGEL = 0 IGELS = 0 CALL YESNO(INF,'Use file of file names', +IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(INF.LT.0) RETURN IF(INF.EQ.0) THEN FILNAM = ' ' CALL OPENF1(IDEV1,FILNAM,0,IOK,KBIN,KBOUT, + 'File of gel reading names', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN FILNAM = ' ' CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT, + 'File for names of gel readings that pass', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN END IF FILNAM = ' ' CALL OPENF1(IDEV4,FILNAM,0,IOK,KBIN,KBOUT, +'File name of vector sequence', +IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN IDIM = MAXSEQ CALL ARRFIM(IDEV4,SEQ,IDIM,KBOUT) CLOSE(UNIT=IDEV4) MN = LENGTH*2 MX = 50 MINMAT = MAX(15,MN) CALL GETINT(MN,MX,MINMAT, +'Minimum initial match', +IVAL,KBIN,KBOUT, +IHELPS,IHELPE,HELPF,IDEVH,IOK) IF(IOK.NE.0) RETURN MINMAT = IVAL IDCEND=MAXCON CALL FNDCON(SEQ,IDIM,CENDS,NENDS,IDCEND,MAXCON,KBOUT) C IS THE VECTOR SEQUENCE IN THE CORRECT FORMAT WITH A TITLE AT THE FRONT? IF(IDCEND.EQ.0)THEN CENDS(1) = -19 NENDS(1) = 1 CENDS(2) = IDIM + 1 IDCEND = 1 END IF C WRITE(KBOUT,9999) C9999 FORMAT(' VECTOR SEQUENCE REQUIRES A TITLE EG ', C 1' <---M13MP7.001----->') C RETURN C END IF CALL BUSY(KBOUT) C C init hashing routines C CALL INITE(CONST,CSTART,LENGTH) CALL ENCOF(SEQ,IDIM,CONST,CSTART,LENGTH,POSNS) CALL ENCONN(POSNS,IDIM,WORDP,WORDN,LPOWRC,LENGTH,1) C 1 CONTINUE IF(INF.EQ.1) THEN 3 CONTINUE MN = 0 CALL GTSTR('Gel reading name',' ',NAME,MN,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.3) RETURN IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 3 END IF ELSE IOK = GNFFOF(IDEV1,NAME) IF(IOK.EQ.1) GO TO 100 IF(IOK.NE.0) GO TO 1 END IF JGEL = JGEL + 1 WRITE(IDEV,*)'Processing',JGEL,' in batch' WRITE(IDEV,1003)NAME 1003 FORMAT(' Gel reading name ',A) IDIMG=MAXGEL CALL OPENRS(IDEV3,NAME,IOK,LRECL,2) IF(IOK.NE.0)THEN IF(INF.EQ.1) RETURN WRITE(IDEV,*)' Gel reading file not found' GO TO 1 END IF CALL ARRFIM(IDEV3,GEL,IDIMG,KBOUT) CLOSE(UNIT=IDEV3) C LONG ENOUGH ? IF(IDIMG.LT.MINMAT)THEN WRITE(IDEV,*)' Gel reading too short to compare' GO TO 1 END IF CALL SQCOPY(GEL,GELCOP,IDIMG) ISTRAN=1 IMATCH=0 2 CONTINUE CALL BUSY(KBOUT) CALL MSTLKL(GEL,IDIMG) CALL ENCO(GEL,IDIMG,GELN,CONST,LENGTH) WRITE(IDEV,1009)ISTRAN 1009 FORMAT(' Searching strand',I6) IDSAV=MAXMAT CALL CFGEL(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LPOWRC, +SAVPG,SAVPS,SAVL, +IDSAV,SEQ,GELCOP,MINMAT,IFAIL,KBOUT) IF(IDSAV.GT.0) THEN IMATCH=1 CALL DISMAT(SEQ,IDIM,GELCOP,IDIMG,SAVPS,SAVPG,IDSAV, + CENDS,NENDS,IDCEND,MAXCON,IDEV,MATCH) END IF IF(ISTRAN.EQ.1) THEN CALL SQREV(GELCOP,IDIMG) CALL SQCOM(GELCOP,IDIMG) CALL SQCOPY(GELCOP,GEL,IDIMG) ISTRAN = 2 GO TO 2 END IF IF(IMATCH.EQ.0) THEN WRITE(IDEV2,1010)NAME IGELS = IGELS + 1 END IF GO TO 1 1010 FORMAT(A) 100 CONTINUE WRITE(KBOUT,*)'Batch finished' WRITE(KBOUT,*)JGEL,' compared and ',IGELS,' passed' RETURN END SUBROUTINE SHIFTC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDEVR, +IDBSIZ,IGN,NCONT,DIST) C AUTHOR: RODGER STADEN C SHIFTS PART OF A CONTIG FORM GEL IGN TO RIGHT END C CONTIG LINE NUMBER IF NCONT INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER DIST,CLEN EXTERNAL CLEN I = IGN 10 CONTINUE IF(I.NE.0)THEN RELPG(I) = RELPG(I) + DIST CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)) I = RNBR(I) GO TO 10 END IF C UPDATE CONTIG LENGTH L = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +IDBSIZ,IGN) RELPG(NCONT) = L CALL WRITER(IDEVR,NCONT,RELPG(NCONT),LNGTHG(NCONT), +LNBR(NCONT),RNBR(NCONT)) END SUBROUTINE SLIDER(SEQ1,IDC,SEQ2,IDIM2,MS1,MS2,MAXPG,MAXPC,MINSLI, +MATL,MATC,MATG,IP) C AUTHOR: RODGER STADEN CHARACTER SEQ1(IDC),SEQ2(IDIM2) INTEGER MATL(IP),MATC(IP),MATG(IP),P1S,P1,P2 IP1 = IP IP = 0 C LEFT END S2 RELATIVE S1 - MAX PADS -2 READY FOR LOOP P1S = MS1 - MS2 - MAXPC - 1 C TRY NSLIDE START POSNS FOR SEQ2 DO 100 I=1,MAXPG+MAXPC+1 C POINT TO SEQ1 START P1S = P1S + 1 C POINT TO CURRENT SEQ1 POSN P1 = P1S N = 0 C COMPARE WHOLE LENGTH OF SEQ2 (IF P1 WITHIN RANGE) DO 50 J=1,IDIM2 P2 = J P1 = P1 + 1 IF(P1.LT.1)GO TO 50 C OFF RIGHT END? IF SO MAY HAVE BEEN A MATCH IF(P1.GT.IDC)GO TO 40 IF(SEQ1(P1).EQ.SEQ2(P2))GO TO 45 40 CONTINUE IF(N.GE.MINSLI)CALL SAVIT(N,P1,P2,IP,MATL,MATC,MATG,IP1) N = 0 GO TO 50 45 CONTINUE N = N + 1 50 CONTINUE C GOOD SCORE AT END? NEED TO INCREMENT POINTERS FOR SAVIT P1 = P1 + 1 P2 = P2 + 1 IF(N.GE.MINSLI)CALL SAVIT(N,P1,P2,IP,MATL,MATC,MATG,IP1) 100 CONTINUE END SUBROUTINE SUBS(SEQ,IDIMS,FROM,TO) CHARACTER SEQ(IDIMS),FROM,TO C AUTHOR RODGER STADEN DO 10 I = 1,IDIMS IF(SEQ(I).EQ.FROM) SEQ(I) = TO 10 CONTINUE END SUBROUTINE SUMMAR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQ1,IDIM1,GEL,LREG,RREG,IGELC,PERCD,IDBSIZ,CHARS, +ID1,CHRSIZ,MAXGL2,IDEVW,MAXGEL,LINOU1,LINOU2,MXGOOD) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),CHRSIZ INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER SEQ1(IDIM1) CHARACTER GEL(MAXGEL) INTEGER LREG,RREG,LSEQNO,POSN,Y,POSN1 INTEGER GELC CHARACTER LINOU1(MAXGEL),LINOU2(MAXGEL),GTCONC INTEGER CHARS(CHRSIZ,ID1,MAXGL2) EXTERNAL INDEXS,LWRAPS,GTCONC C 28-7-91 added extra parameter: mxgood is the maximum reading C length for which we have confidence, so only the parts of C reads 1 to mxgood will be included in the calculation C SET INITIAL VALUES C hard to understand this very old code! rewrite it. C we have a summing array of twice the length of the longest sequence C posn is posn in contig of next to write, lseqno is current posn in contig C we write when lseqno-posn ge the length of the longest seq POSN=LREG GELC=IGELC LINLEN=MAXGEL LSEQNO=RELPG(GELC) IEND=0 DO 40 I=1,MAXGL2 DO 40 J=1,ID1 DO 40 K=1,CHRSIZ CHARS(K,J,I)=0 40 CONTINUE 50 CONTINUE ISS=1 IF(LNGTHG(GELC).LT.0)ISS=2 CALL READW(IDEVW,GELC,GEL,MAXGEL) C LOOP FOR RELEVANT ELEMENTS THIS GEL C only use mxgood characters from start of read C IF(ISS.EQ.1) THEN N = MIN(MXGOOD,ABS(LNGTHG(GELC))) IF(LSEQNO.LT.LREG)LSEQNO=LREG IS = LSEQNO-RELPG(GELC)+1 ELSE C LOOP FOR RELEVANT ELEMENTS THIS GEL C only use mxgood characters from start of read (right end for these) C IREND = RELPG(GELC) - LNGTHG(GELC) + 1 IF (MXGOOD.LT.MAXGEL) THEN LSEQNO = IREND - MXGOOD + 1 ELSE LSEQNO = RELPG(GELC) END IF LSEQNO = MAX(LSEQNO,LREG) IS = LSEQNO - RELPG(GELC) + 1 N = ABS(LNGTHG(GELC)) END IF DO 70 I=IS,N JJ = INDEXS(GEL(I),JSCORE) JJJ = LWRAPS(LSEQNO,MAXGL2) CHARS(JJ,ISS,JJJ) = CHARS(JJ,ISS,JJJ) + JSCORE LSEQNO = LSEQNO + 1 70 CONTINUE IF(RNBR(GELC).EQ.0)GO TO 200 GELC=RNBR(GELC) LSEQNO=RELPG(GELC) IF(LSEQNO.GT.RREG)GO TO 200 C ENOUGH TO OUTPUT? Y=LSEQNO-POSN IF(Y.GE.MAXGEL)GO TO 210 GO TO 50 200 CONTINUE C SET FLAG TO SHOW END REACHED IEND=1 LINLEN=MAXGEL Y=RREG-POSN IF(Y.LT.MAXGEL)LINLEN=Y+1 210 CONTINUE C SET POINTER TO SEQ1 POSN1=POSN-1 C PREPARE NEXT SECTION OF CHARS FOR OUTPUT DO 230 I=1,LINLEN JJJ = LWRAPS(POSN,MAXGL2) LINOU1(I) = GTCONC(CHARS(1,1,JJJ),CHRSIZ,PERCD) LINOU2(I) = GTCONC(CHARS(1,2,JJJ),CHRSIZ,PERCD) DO 250 J=1,CHRSIZ CHARS(J,1,JJJ)=0 CHARS(J,2,JJJ)=0 250 CONTINUE POSN=POSN+1 230 CONTINUE C C COMPARE STRANDS C DO 500 I=1,LINLEN C WRITE(*,*)I,LINOU1(I),LINOU2(I) POSN1=POSN1+1 IF(LINOU1(I).EQ.LINOU2(I)) THEN IF(LINOU1(I).EQ.'-') THEN SEQ1(POSN1) = '3' GO TO 500 END IF IF(LINOU1(I).EQ.'*') THEN SEQ1(POSN1) = '3' GO TO 500 END IF SEQ1(POSN1) = '0' ELSE IF((LINOU1(I).EQ.'*').AND.(LINOU2(I).EQ.'-')) THEN SEQ1(POSN1) = '3' GO TO 500 END IF IF((LINOU2(I).EQ.'*').AND.(LINOU1(I).EQ.'-')) THEN SEQ1(POSN1) = '3' GO TO 500 END IF IF((LINOU1(I).NE.'-').AND.(LINOU1(I).NE.'*')) THEN SEQ1(POSN1) = '1' IF((LINOU2(I).NE.'-').AND.(LINOU2(I).NE.'*')) + SEQ1(POSN1) = '4' GO TO 500 END IF IF((LINOU2(I).NE.'-').AND.(LINOU2(I).NE.'*')) THEN SEQ1(POSN1) = '2' IF((LINOU1(I).NE.'-').AND.(LINOU1(I).NE.'*')) + SEQ1(POSN1) = '4' GO TO 500 END IF END IF 500 CONTINUE IF(POSN.GT.RREG)RETURN IF((IEND.EQ.1).AND.(POSN.LE.RREG))GO TO 200 C ANY MORE MAXGEL CHAR LENGTHS TO OUTPUT Y=LSEQNO-POSN IF(Y.LT.MAXGEL)GO TO 50 C FINISHED COMPLETELY? GO TO 210 END C SUMMER C C SUBROUTINE TO PRODUCE A CONSENSUS FROM LINED UP GEL READINGS SUBROUTINE SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, 1SEQ1,IDIM1,GEL,LREG,RREG,IGELC,IDBSIZ,CHARS,CHRSIZ,MAXGL2, +IDEVW,MAXGEL,IDM,PERCD) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),CHRSIZ INTEGER LREG,RREG,LSEQNO,POSN,Y INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL) INTEGER GELC CHARACTER SEQ1(IDIM1) INTEGER CHARS(CHRSIZ,MAXGL2) CHARACTER MUNOTP,GTCONC EXTERNAL MUNOTP,INDEXS,GTCONC,LWRAPS C C SET INITIAL VALUES POSN=LREG GELC=IGELC LINLEN=MAXGEL LSEQNO=RELPG(GELC) IEND=0 IPSEQ1=0 C C ZERO ARRAY DO 40 I=1,MAXGL2 DO 40 J=1,CHRSIZ CHARS(J,I)=0 40 CONTINUE 50 CONTINUE CALL READW(IDEVW,GELC,GEL,MAXGEL) C LOOP FOR RELEVANT ELEMENTS THIS GEL N=ABS(LNGTHG(GELC)) IF(LSEQNO.LT.LREG)LSEQNO=LREG IS=(LSEQNO-RELPG(GELC))+1 **************************** IF(IDM.EQ.26)THEN DO 51 I = IS,N JJJ=(MOD(LSEQNO,MAXGL2)) IF(JJJ.EQ.0)JJJ=MAXGL2 CALL PCON1(GEL(I),CHARS(1,JJJ)) LSEQNO = LSEQNO + 1 51 CONTINUE ELSE **************************** DO 70 I=IS,N JJ = INDEXS(GEL(I),JSCORE) JJJ = LWRAPS(LSEQNO,MAXGL2) CHARS(JJ,JJJ) = CHARS(JJ,JJJ) + JSCORE LSEQNO = LSEQNO + 1 70 CONTINUE END IF C C LOOK AT NEXT GEL TO RIGHT IF(RNBR(GELC).EQ.0)GO TO 200 GELC=RNBR(GELC) C RESET LSEQNO LSEQNO=RELPG(GELC) C IS THIS OVER END? IF(LSEQNO.GT.RREG)GO TO 200 C ENOUGH TO OUTPUT? Y=LSEQNO-POSN IF(Y.GE.MAXGEL)GO TO 210 GO TO 50 200 CONTINUE C SET FLAG TO SHOW END REACHED IEND=1 C NEED TO SUM AND OUTPUT LINLEN=MAXGEL Y=RREG-POSN IF(Y.LT.MAXGEL)LINLEN=Y+1 210 CONTINUE C SUM NEXT SECTION OF CHARS IF(IDM.EQ.26)THEN DO 211 I = 1,LINLEN IPSEQ1 = IPSEQ1 + 1 SEQ1(IPSEQ1) = '-' JJJ = MOD(POSN,MAXGL2) IF(JJJ.EQ.0)JJJ = MAXGL2 SEQ1(IPSEQ1) = MUNOTP(CHARS(1,JJJ)) CHARS(1,JJJ) = 0 POSN = POSN + 1 211 CONTINUE ELSE DO 230 I=1,LINLEN IPSEQ1=IPSEQ1+1 ISUM=0 JJJ = LWRAPS(POSN,MAXGL2) SEQ1(IPSEQ1) = GTCONC(CHARS(1,JJJ),CHRSIZ,PERCD) CALL FILLI(CHARS(1,JJJ),CHRSIZ,0) POSN = POSN + 1 230 CONTINUE END IF C C C ANY MORE TO OUTPUT? IF(POSN.GT.RREG)RETURN IF((IEND.EQ.1).AND.(POSN.LE.RREG))GO TO 200 C ANY MORE MAXGLEL CHAR LENGTHS TO OUTPUT Y=LSEQNO-POSN IF(Y.LT.MAXGEL)GO TO 50 C FINISHED COMPLETELY? GO TO 210 END SUBROUTINE SUMSS(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +SEQ1,IDIM1,GEL,LREG,RREG,IGELC,PERCD,IDBSIZ,CHARS, +ID1,CHRSIZ,MAXGL2,IDEVW,MAXGEL,LINOU1,LINOU2) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),CHRSIZ INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER SEQ1(IDIM1) CHARACTER GEL(MAXGEL) INTEGER LREG,RREG,LSEQNO,POSN,Y,POSN1 INTEGER GELC CHARACTER LINOU1(MAXGEL),LINOU2(MAXGEL),GTCONC INTEGER CHARS(CHRSIZ,ID1,MAXGL2) EXTERNAL INDEXS,LWRAPS,GTCONC C C Routine to calculate a consensus. Only if the two strands agree C is a non dash character assigned. C POSN=LREG GELC=IGELC LINLEN=MAXGEL LSEQNO=RELPG(GELC) IEND=0 DO 40 I=1,MAXGL2 DO 40 J=1,ID1 DO 40 K=1,CHRSIZ CHARS(K,J,I)=0 40 CONTINUE 50 CONTINUE ISS=1 IF(LNGTHG(GELC).LT.0)ISS=2 CALL READW(IDEVW,GELC,GEL,MAXGEL) C LOOP FOR RELEVANT ELEMENTS THIS GEL N=ABS(LNGTHG(GELC)) IF(LSEQNO.LT.LREG)LSEQNO=LREG IS=LSEQNO-RELPG(GELC)+1 DO 70 I=IS,N JJ = INDEXS(GEL(I),JSCORE) JJJ = LWRAPS(LSEQNO,MAXGL2) CHARS(JJ,ISS,JJJ) = CHARS(JJ,ISS,JJJ) + JSCORE LSEQNO = LSEQNO + 1 70 CONTINUE IF(RNBR(GELC).EQ.0)GO TO 200 GELC=RNBR(GELC) LSEQNO=RELPG(GELC) IF(LSEQNO.GT.RREG)GO TO 200 C ENOUGH TO OUTPUT? Y=LSEQNO-POSN IF(Y.GE.MAXGEL)GO TO 210 GO TO 50 200 CONTINUE C SET FLAG TO SHOW END REACHED IEND=1 LINLEN=MAXGEL Y=RREG-POSN IF(Y.LT.MAXGEL)LINLEN=Y+1 210 CONTINUE C SET POINTER TO SEQ1 POSN1=POSN-1 C PREPARE NEXT SECTION OF CHARS FOR OUTPUT DO 230 I=1,LINLEN JJJ = LWRAPS(POSN,MAXGL2) LINOU1(I) = GTCONC(CHARS(1,1,JJJ),CHRSIZ,PERCD) LINOU2(I) = GTCONC(CHARS(1,2,JJJ),CHRSIZ,PERCD) DO 250 J=1,CHRSIZ CHARS(J,1,JJJ)=0 CHARS(J,2,JJJ)=0 250 CONTINUE POSN=POSN+1 230 CONTINUE C C Compare the strands. If they the same then set the consensus C accordingly, otherwise set it to - so no edits are made. C DO 500 I=1,LINLEN POSN1=POSN1+1 IF(LINOU1(I).EQ.LINOU2(I)) THEN SEQ1(POSN1) = LINOU1(I) ELSE SEQ1(POSN1) = '-' END IF 500 CONTINUE IF(POSN.GT.RREG)RETURN IF((IEND.EQ.1).AND.(POSN.LE.RREG))GO TO 200 C ANY MORE MAXGEL CHAR LENGTHS TO OUTPUT Y=LSEQNO-POSN IF(Y.LT.MAXGEL)GO TO 50 C FINISHED COMPLETELY? GO TO 210 END SUBROUTINE TPCHEK(PC,PG,L,N) INTEGER PC(N),PG(N),L(N) C AUTHOR RODGER STADEN C IF OVERLAPPING BLOCKS ARE FOUND REMOVE THE SHORTER ONE C THEN REMOVE LARGE GAPS AT ENDS (THOSE AS LARGE AS THE END BLOCK) K1 = 2 1 CONTINUE DO 10 I = K1,N J1 = I IF(PC(I).LE.PC(I-1)) GO TO 20 IF(PG(I).LE.PG(I-1)) GO TO 20 10 CONTINUE C REMOVE LARGE GAPS FROM ENDS C THIS RULE OF THUMB COULD BE CHANGED TO USE A DIFFERENCE C BETWEEN THE NUMBERS OF MISMATCHING CHARACTERS IF(N.GT.1) THEN K1 = PC(2) - PC(1) - L(1) J1 = PG(2) - PG(1) - L(1) IF(MAX(K1,J1).GT.L(1)) THEN CALL ML(PC,PG,L,N,1) N = N - 1 END IF IF(N.GT.1) THEN K1 = PC(N) - PC(N-1) - L(N-1) J1 = PG(N) - PG(N-1) - L(N-1) IF(MAX(K1,J1).GT.L(N)) THEN CALL ML(PC,PG,L,N,N) N = N - 1 END IF END IF END IF RETURN 20 CONTINUE IF(L(J1-1).GT.L(J1)) THEN CALL ML(PC,PG,L,N,J1) ELSE CALL ML(PC,PG,L,N,J1-1) END IF C Until 25-11-90 next line was k1=j1 but this does not deal with all C cases: when a line is deleted we must compare it with the previous C one before dealing with the rest, because it could be left of that C one as well! K1 = MAX(2,J1-1) N = N - 1 GO TO 1 END SUBROUTINE WRITEN(IDEVN,N,NAME) CHARACTER NAME*(*) WRITE(IDEVN,REC=N)NAME RETURN END SUBROUTINE WRITER(IDEVR,N,RELPG,LNGTHG,LNBR,RNBR) INTEGER RELPG,RNBR INTEGER SWAPBO EXTERNAL SWAPBO WRITE(IDEVR,REC=N+1)SWAPBO(RELPG),SWAPBO(LNGTHG), +SWAPBO(LNBR),SWAPBO(RNBR) RETURN END SUBROUTINE WRITEW(IDEVW,N,GEL,MAXGEL) CHARACTER GEL(MAXGEL) WRITE(IDEVW,REC=N)GEL RETURN END SUBROUTINE XHSAP(RELPG,LNGTHG,LNBR,RNBR, +IDBSIZ,NCONTS,LLINOI,LINCNI,LREG,RREG, +WINDOW,GWIND,LENCON,DEPTHP,DEPTHM, +MARGL,MARGR,MARGB,MARGT,MAXOPT,ISXMAX,ISYMAX,KBIN,IDEV, +KBOUT,GEL,GEL2,IDEV2,IDEV3,LINLEN,PERCD,MAXGEL,IDM, +SEQ1,IDIM1,NGELS,TEMP3,CHRSIZ,MAXGL2,LINOU1,LINOU2, +NOPT1,NOPT2,NOPT3, +IHELPS,IHELPE,HELPF,IDEVH,MXGOOD) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),WINDOW,CHRSIZ,GWIND INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER MARGB(MAXOPT),MARGT(MAXOPT) INTEGER RREG,DEPTHP,DEPTHM,STRAND,CHNRP1,HQN INTEGER TEMP3(2,CHRSIZ,MAXGL2) CHARACTER GEL(MAXGEL),GEL2(MAXGEL) CHARACTER TERM,TUPPER,NAMARC*16,HELPF*(*) CHARACTER SEQ1(IDIM1),LINOU1(MAXGEL),LINOU2(MAXGEL) EXTERNAL NOPWIN,CWORLD,TUPPER,CHNRP1,HQN C nopt1 = single contig C nopt2 = all contigs C nopt3 = scan 10 CONTINUE LLINO = LLINOI LINCON = LINCNI LOCLR = 0 LOCRR = 0 CALL BPAUSE(KBIN,KBOUT,IOK) CALL CLEARV CALL XHAIRR(ISXMAX,ISYMAX,IX,IY,TERM,DBTDUX,DBTDUY) CALL VT100M INFLAG = HQN(TERM) IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) GO TO 10 END IF IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.3) RETURN NOPT = NOPWIN(IY,MARGB,MARGT,MAXOPT) TERM = TUPPER(TERM) IF(NOPT.EQ.0) RETURN IF(NOPT.EQ.NOPT3) THEN IF(TERM.EQ.'S') THEN XMIN = LREG XMAX = RREG X = CWORLD(IX,MARGL,MARGR,XMIN,XMAX) LOCLR = MAX(LREG,NINT(X)-WINDOW) LOCRR = MIN(RREG,NINT(X)+WINDOW-1) IF(LOCLR.NE.0) THEN CALL DSPLAY(RELPG,LNGTHG,LNBR,RNBR, + GEL,LLINO,LINCON,LOCLR,LOCRR,GEL2,I1,I2,0,I, + IDBSIZ,IDEV,KBOUT, + IDEV2,IDEV3,LINLEN,PERCD,MAXGEL,IDM) GO TO 10 END IF END IF IF((TERM.EQ.'N').OR.(TERM.EQ.'Z').OR.(TERM.EQ.'I')) GO TO 10 END IF IF(NOPT.EQ.NOPT1) THEN STRAND = 1 CALL FDPTH(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,LENCON,STRAND,DEPTHP) IF(DEPTHP.LT.0) RETURN STRAND = -1 CALL FDPTH(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,LENCON,STRAND,DEPTHM) IF(DEPTHM.LT.0) RETURN YMAX = ISYMAX YMIN = 0. XMIN = LREG XMAX = RREG RINC = ISYMAX / (DEPTHP + DEPTHM + 2) RMID =(DEPTHM+1) * RINC X = CWORLD(IX,MARGL,MARGR,XMIN,XMAX) Y = CWORLD(IY,MARGB(NOPT),MARGT(NOPT),YMIN,YMAX) IF(TERM.EQ.'I') THEN STRAND = 1 YINC = RINC * STRAND CALL IPLTC(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,STRAND, + MARGL,MARGR,MARGB(NOPT),MARGT(NOPT), + ISXMAX,ISYMAX,RMID,YINC,DEPTHP,X,Y, + KBOUT,IGEL,ICLOSE) IF(ICLOSE.EQ.1) THEN STRAND = -1 YINC = RINC * STRAND CALL IPLTC(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,STRAND, + MARGL,MARGR,MARGB(NOPT),MARGT(NOPT), + ISXMAX,ISYMAX,RMID,YINC,DEPTHM,X,Y, + KBOUT,IGEL,ICLOSE) END IF IF(ICLOSE.EQ.1) GO TO 10 CALL READN(IDEV3,IGEL,NAMARC) WRITE(IDEV,1006)NAMARC,IGEL,RELPG(IGEL),LNGTHG(IGEL) 1006 FORMAT + ( ' Name ',A,' Number ',I6,' Rel. Posn. ',I7,' Length ',I5) GO TO 10 END IF IF(TERM.EQ.'Z') THEN STRAND = 1 YINC = RINC * STRAND CALL IPLTC(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,STRAND, + MARGL,MARGR,MARGB(NOPT),MARGT(NOPT), + ISXMAX,ISYMAX,RMID,YINC,DEPTHP,X,Y, + KBOUT,IGEL,ICLOSE) IF(ICLOSE.EQ.1) THEN STRAND = -1 YINC = RINC * STRAND CALL IPLTC(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,STRAND, + MARGL,MARGR,MARGB(NOPT),MARGT(NOPT), + ISXMAX,ISYMAX,RMID,YINC,DEPTHM,X,Y, + KBOUT,IGEL,ICLOSE) END IF IF(IGEL.EQ.0) GO TO 10 CALL CLEARG CALL PLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LINCON,IGEL, + NCONTS,MARGL,MARGR,MARGB(NOPT2),MARGT(NOPT2),ISXMAX,ISYMAX) LREG = MAX(1,RELPG(IGEL)-GWIND) RREG = MIN(RELPG(LINCON),RELPG(IGEL)+GWIND) LLINO = LNBR(LINCON) LLINOI = LLINO LINCNI = LINCON LENCON = RREG - LREG + 1 CALL FDEPTH(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,LENCON, + MARGL,MARGR,MARGB(NOPT1),MARGT(NOPT1),ISXMAX,ISYMAX) GO TO 10 END IF IF(TERM.EQ.'S') THEN LOCLR = MAX(LREG,NINT(X)-WINDOW) LOCRR = MIN(RREG,NINT(X)+WINDOW-1) IF(LOCLR.NE.0) THEN CALL DSPLAY(RELPG,LNGTHG,LNBR,RNBR, + GEL,LLINO,LINCON,LOCLR,LOCRR,GEL2,I1,I2,0,I, + IDBSIZ,IDEV,KBOUT, + IDEV2,IDEV3,LINLEN,PERCD,MAXGEL,IDM) GO TO 10 END IF END IF IF(TERM.EQ.'N') THEN LOCLR = MAX(LREG,NINT(X)-WINDOW) LOCRR = MIN(RREG,NINT(X)+WINDOW-1) IGEL = CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ, + LLINO,LREG) IF(LOCLR.NE.0) THEN CALL LSTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,IGEL, + LOCRR,IDEV,IDEV3,NAMARC) END IF GO TO 10 END IF IF(TERM.EQ.'Q') THEN CALL DBSCNP(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,SEQ1, + IDIM1,GEL,IDBSIZ,TEMP3,2,CHRSIZ,MAXGL2,IDEV2,LLINO, + PERCD,MAXGEL,LINOU1,LINOU2,LREG,RREG, + MARGL,MARGR,MARGB(NOPT3),MARGT(NOPT3),ISXMAX,ISYMAX, + MXGOOD) GO TO 10 END IF END IF IF(NOPT.EQ.NOPT2) THEN CALL IDPLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ, + NCONTS,IX,IY,MARGL,MARGR,MARGB(NOPT),MARGT(NOPT), + ISXMAX,ISYMAX,DBTDUX,DBTDUY, + LINCON,IGEL,IS) IF(IGEL.EQ.0) RETURN IF(TERM.EQ.'Z') THEN CALL CLEARG CALL PLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LINCON,IGEL, + NCONTS,MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),ISXMAX,ISYMAX) LREG = 1 RREG = RELPG(LINCON) LLINO = LNBR(LINCON) LLINOI = LLINO LINCNI = LINCON LENCON = RREG - LREG + 1 CALL FDEPTH(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,LENCON, + MARGL,MARGR,MARGB(NOPT1),MARGT(NOPT1),ISXMAX,ISYMAX) GO TO 10 END IF IF(TERM.EQ.'Q') THEN CALL CLEARG CALL PLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LINCON,IGEL, + NCONTS,MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),ISXMAX,ISYMAX) LREG = 1 RREG = RELPG(LINCON) LLINO = LNBR(LINCON) LLINOI = LLINO LINCNI = LINCON LENCON = RREG - LREG + 1 CALL FDEPTH(RELPG,LNGTHG,LNBR,RNBR, + IDBSIZ,LLINO,LREG,RREG,LENCON, + MARGL,MARGR,MARGB(NOPT1),MARGT(NOPT1),ISXMAX,ISYMAX) CALL DBSCNP(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,SEQ1, + IDIM1,GEL,IDBSIZ,TEMP3,2,CHRSIZ,MAXGL2,IDEV2,LLINO, + PERCD,MAXGEL,LINOU1,LINOU2,LREG,RREG, + MARGL,MARGR,MARGB(NOPT3),MARGT(NOPT3),ISXMAX,ISYMAX, + MXGOOD) GO TO 10 END IF IF(TERM.EQ.'I') THEN CALL READN(IDEV3,IGEL,NAMARC) WRITE(IDEV,1006)NAMARC,IGEL,RELPG(IGEL),LNGTHG(IGEL) GO TO 10 END IF IF(TERM.EQ.'S') THEN LOCLR = MAX(1,IS-WINDOW) LOCRR = MIN(RELPG(LINCON),IS+WINDOW-1) LLINO = LNBR(LINCON) IF(LOCLR.NE.0) THEN CALL DSPLAY(RELPG,LNGTHG,LNBR,RNBR, + GEL,LLINO,LINCON,LOCLR,LOCRR,GEL2,I1,I2,0,I, + IDBSIZ,IDEV,KBOUT, + IDEV2,IDEV3,LINLEN,PERCD,MAXGEL,IDM) END IF GO TO 10 END IF IF(TERM.EQ.'N') THEN LOCLR = MAX(1,IS-WINDOW) LOCRR = MIN(RELPG(LINCON),IS+WINDOW-1) LLINO = LNBR(LINCON) IF(LOCLR.NE.0) THEN CALL LSTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,IGEL, + LOCRR,IDEV,IDEV3,NAMARC) END IF GO TO 10 END IF END IF END INTEGER FUNCTION CLINNO(LNBR,IDBSIZ,NCONTS,IIN) C AUTHOR: RODGER STADEN C RETURNS CONTIG LINE NUMBER OR ZERO FOR ERROR INTEGER LNBR(IDBSIZ) CLINNO = 0 N=IDBSIZ-NCONTS DO 10 J=N,IDBSIZ-1 IF(LNBR(J).EQ.IIN) THEN CLINNO = J RETURN END IF 10 CONTINUE END SUBROUTINE UPDCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,NGELS,NCONTS, +SEQ,MAXSEQ,IDIM1,CSTART,CLENO,LINCON,NAMPRO,SEQ2,TEMP3, +ECHRSZ,MAXGL2,KBOUT,IDEV,IDEV2,IFAIL,MAXGEL,IDM,PERCD) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) INTEGER CSTART,CLENO,S1,B1,ECHRSZ,RREG INTEGER TEMP3(ECHRSZ,MAXGL2) CHARACTER SEQ(MAXSEQ),SEQ2(MAXGEL) CHARACTER NAMPRO*(*) INTEGER CHNRP EXTERNAL CHNRP C cstart consensus start point (before new reading) C cleno consensus length (before new reading) C lincon element number of contig C s1 number of first reading to shift C b1 number of first base to shift (in overall consensus positioning) C C there are 2 tasks: 1. make space for the new and altered region C 2. calculate the new consensus and put it in the space C we do not have to make space if: C a. we are dealing with the last contig in the consensus and there are no C readings starting to the right of the new data C b. the contig has not been padded C C New code to update the consensus only for the region affected by the C new reading. Find the next reading to the right of the new one, which C the new one does not overlap (might not be one!). Make a consensus from C start of new reading to here. Prior to this make space for it by moving C the consensus right (only if the contig is longer (padding or extra data C at its ends). Let s1 be the first reading to shift. We shift from its C left end to the end of the contig - where is this in the overall consensus? C The distance of the left end of s1 to the right end of the contig is C unchanged. This means that the new relpg(s1) is the same distance from C the right end of the old consensus as the old relpg(s1) was from the right C end of the old consensus. So from this we can calculate the position of the C the first base to move. C Let L be the position in the overall consensus of the last base in this contig C L = cstart - cleno - 1 C Let D = distance to end of contig C D = RELPG(LINCON) - relpg(s1) + 1. C First base to shift B1 = L - D + 1 C Last base to shift is idim1 C Distance to move to right is relpg(lincon) - cleno ie the number of extra bases C make consensus from relpg(ngels) to relpg(s1) - 1 C put it at cstart + relpg(ngels) - 1 C C Potential problems: C 1) reading at right end of contig C the search for the first nonoverlapping read to the right will return 0 C shift al the next contig: ie cstart + cleno onwards C make consensus from relpg(ngels) to end of contig C put it at cstart + relpg(ngels) -1 C C 2) reading at left end of contig C shift whole contig ie cstart - 20 C add new title C shift consensus relpg(lincon) - cleno to the right C C 3) new reading contains contig - cases 1 and 2 combined C the search for the first nonoverlapping read to the right will return 0 C shift whole of next contig and make consensus from relpg(ngels) to end of C contig. C C 4) Might not be a next contig to shift C C get number of first reading to shift C S1 = CHNRP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,NGELS,NCONTS, +RELPG(NGELS)+ABS(LNGTHG(NGELS))-1) C WRITE(*,*)'S1',S1 C C is the altered region longer than the original: only then do we need to shift C C WRITE(*,*)'IDIM1',IDIM1 C WRITE(*,*)'RELPG(LINCON)',RELPG(LINCON) C WRITE(*,*)'CSTART,CLENO',CSTART,CLENO IF (RELPG(LINCON) - CLENO.GT.0) THEN C C it is longer so we probably need to shift C IF (S1.EQ.0) THEN C C no readings start to the right of the new data C IF (CSTART+CLENO-1.LT.IDIM1) THEN C C there are other contigs to the right C C WRITE(*,*)'CSTART,CLENO',CSTART,CLENO B1 = CSTART + CLENO C WRITE(*,*)'B1',B1 CALL MAKHCA(SEQ,MAXSEQ,B1,RELPG(LINCON)-CLENO,IDIM1) ELSE C C there are no contigs to the right and no readings start to the right of C the new one so nothing to shift C END IF ELSE C C there are readings starting to the right of the new one C C shift from start of next reading to right C L = CSTART + CLENO - 1 C WRITE(*,*)'CSTART,CLENO,L',CSTART,CLENO,L LD = RELPG(LINCON) - RELPG(S1) + 1 C WRITE(*,*)'LD',LD B1 = L - LD + 1 C WRITE(*,*)'B1',B1 CALL MAKHCA(SEQ,MAXSEQ,B1,RELPG(LINCON)-CLENO,IDIM1) END IF END IF C C now make new consensus (where do we put it, do we need C to give it a header, and what region do we make it for ? C in the simplest case make it for relpg(ngels) to relpg(s1) -1 C if s1=0 make it for relpg(ngels) to end of contig (relpg(lincon)) C we give it a header if it is at the left end of the contig ie lnbr(ngels)=0 C C we always start at the left end of the new reading C LREG = RELPG(NGELS) C C we end at the next reading to the right or the end of the contig C IF (S1.NE.0) THEN RREG = RELPG(S1) - 1 ELSE RREG = RELPG(LINCON) END IF C C where do we put the new consensus ? C B1 = CSTART + RELPG(NGELS) - 1 C WRITE(*,*)'LREG,RREG',LREG,RREG C WRITE(*,*)'B1',B1 C C do we need to add a title C IF (LNBR(NGELS).EQ.0) THEN B1 = CSTART - 20 C WRITE(*,*)'ADD NEW TIT AT',B1 CALL ADDTIT(SEQ(B1),NAMPRO,NGELS,B1) END IF IGELC = LNBR(LINCON) C C note aconsn will chain along until it find the first useful reading C JOB = 2 CALL ACONSN(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ,MAXSEQ,SEQ2,IDBSIZ,B1,JOB,IGELC,LREG,RREG,TEMP3, +ECHRSZ,MAXGL2,IDEV,IDEV2,IFAIL,MAXGEL,IDM,PERCD) IF(IFAIL.NE.0) THEN CALL ERROM(KBOUT,'Error calculating consensus') RETURN END IF C C before we leave we must make the overall consensus length correct C so add on the extra length (if any) which is the new length - old length C C WRITE(*,*)'OLD IDIM1',IDIM1 IDIM1 = IDIM1 + RELPG(LINCON) - CLENO IDIM2 = IDIM1 + RELPG(LINCON) - CLENO C WRITE(*,*)'NEW IDIM1/2',IDIM1 END SUBROUTINE MAKHCA(STRING,MAXAR,FROM,HSIZE,ASIZE) CHARACTER STRING(MAXAR) INTEGER FROM,HSIZE,ASIZE C C make a hole of size hsize in character array size asize C J = ASIZE + HSIZE DO 10 I=ASIZE,FROM,-1 STRING(J) = STRING(I) J = J - 1 10 CONTINUE END INTEGER FUNCTION CHNRP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT, +LREG) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) C C find first reading starting past lreg (0=none found) C I = LGEL CHNRP = 0 10 CONTINUE IF(I.NE.0) THEN IF(RELPG(I).LE.LREG) THEN I = RNBR(I) GO TO 10 END IF CHNRP = I RETURN END IF END INTEGER FUNCTION CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ, +LGEL,LREG) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) C C find first reading with data covering or past lreg (0=none found) C I = LGEL CHNRP1 = 0 10 CONTINUE IF(I.NE.0) THEN IF(RELPG(I)+ABS(LNGTHG(I))-1.LT.LREG) THEN I = RNBR(I) GO TO 10 END IF CHNRP1 = I RETURN END IF END C ACONSN SUBROUTINE ACONSN(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS, +SEQ1,IDIM1,GEL,IDBSIZ,ISTART,JOB,LLINO,LREG,RREG,TEMP, +CHRSIZ,MAXGL2,KBOUT, +IDEVW,IFAIL,MAXGEL,IDM,PERCD) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),CHRSIZ INTEGER LREG,RREG,X,Y,TEMP(CHRSIZ,MAXGL2) CHARACTER SEQ1(IDIM1) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER GEL(MAXGEL) CHARACTER NAMPRO*(*) INTEGER CHNRP1 EXTERNAL CHNRP1 C C new consensus calculating routine (could replace acons if we check values of job) C C job = 0 do it for whole db C = 2 for selected contig only C = 1 for selected contig, adding a header C note jobs 0 and 1 update istart (it always points to end of overall C consensus), but job=2 does not C CALL BUSY(KBOUT) IFAIL=0 IF(JOB.EQ.1) THEN C C do it for a selected contig, adding title C ISTART=ISTART+1 IDIM11=RREG-LREG+1 IF((ISTART+19+IDIM11).GT.IDIM1)THEN WRITE(KBOUT,1009)IDIM1 IFAIL=1 RETURN END IF C C allow summer to be dumb, and find first relevant reading number C LLINO1 = CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ, + LLINO,LREG) IF(LLINO1.EQ.0) THEN CALL ERROM(KBOUT, + 'Error in ACONSN: no data found for consensus') IFAIL = 1 RETURN END IF CALL ADDTIT(SEQ1(ISTART),NAMPRO,LLINO,ISTART) CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ1(ISTART),IDIM11,GEL,LREG,RREG,LLINO1,IDBSIZ,TEMP, + CHRSIZ,MAXGL2, + IDEVW,MAXGEL,IDM,PERCD) ISTART=ISTART+IDIM11-1 RETURN END IF IF(JOB.EQ.2) THEN C C do it for a selected contig C IDIM11=RREG-LREG+1 IF((ISTART+IDIM11-1).GT.IDIM1)THEN WRITE(KBOUT,1009)IDIM1 IFAIL=1 RETURN END IF C C allow summer to be dumb, and find first relevant reading number C LLINO1 = CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ, + LLINO,LREG) IF(LLINO1.EQ.0) THEN CALL ERROM(KBOUT, + 'Error in ACONSN: no data found for consensus') IFAIL = 1 RETURN END IF CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ1(ISTART),IDIM11,GEL,LREG,RREG,LLINO1,IDBSIZ,TEMP, + CHRSIZ,MAXGL2, + IDEVW,MAXGEL,IDM,PERCD) RETURN END IF C C do it for all contigs C N=IDBSIZ-NCONTS DO 110 I=N,IDBSIZ-1 J=LNBR(I) X=1 Y=RELPG(I) ISTART=ISTART+1 IF((ISTART+19+Y).GT.IDIM1)THEN WRITE(KBOUT,1009)IDIM1 1009 FORMAT( + ' Database maximum consensus length(',I6,') exceeded',/, + ' calculation aborted') IFAIL=1 RETURN END IF CALL ADDTIT(SEQ1(ISTART),NAMPRO,J,ISTART) CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + SEQ1(ISTART),Y,GEL,X,Y,J,IDBSIZ,TEMP,CHRSIZ,MAXGL2,IDEVW,MAXGEL, + IDM,PERCD) ISTART=ISTART+Y-1 110 CONTINUE END SUBROUTINE AERROR(IDEVS,IDEVF,NAME,IERR) CHARACTER NAME*(*) C C handle errors for assembly C C errors are: C 0 file not found C 1 read too short C 2 failed to align and not entered C 3 failed on entry C 4 failed to align but entered WRITE(IDEVF,1000)NAME(1:INDEX(NAME,' ')),IERR 1000 FORMAT(A,I2) CALL ERROM(IDEVS,'Failed reading written to error file') END SUBROUTINE SHFTLA(STRING,MAXAR,FROMS,TO,FROME) CHARACTER STRING(MAXAR) INTEGER FROMS,TO,FROME C C shift an array left from froms to to C J = TO DO 10 I=FROMS,FROME STRING(J) = STRING(I) J = J + 1 10 CONTINUE END SUBROUTINE GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LINCON,LLINO,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,PROMPT, +IHELPS,IHELPE,FILEH,IDEVH) C AUTHOR: RODGER STADEN INTEGER RELPG(IDBSIZ),GELIDN INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER FILEH*(*),PROMPT*(*) EXTERNAL GELIDN IERR = 1 NCONTC = GELIDN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,LLINO, +IDBSIZ,KBIN,KBOUT,IDEVN,PROMPT, +IHELPS,IHELPE,FILEH,IDEVH,INFLAG) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.4) RETURN IF(NCONTC.EQ.0) RETURN IGELNO = NCONTC IF(LNBR(NCONTC).NE.0) THEN WRITE(KBOUT,1013)RELPG(NCONTC) 1013 FORMAT(' Position of this reading=',I6) 25 CONTINUE NCONTC = LNBR(NCONTC) IF(LNBR(NCONTC).NE.0) GO TO 25 WRITE(KBOUT,1014)NCONTC 1014 FORMAT( ' Number of leftmost reading this contig=',I6) END IF 30 CONTINUE N = IDBSIZ - NCONTS DO 20 J=N,IDBSIZ-1 IF(LNBR(J).EQ.NCONTC) THEN LINCON=J GO TO 21 END IF 20 CONTINUE CALL ERROM(KBOUT,'No contig line for this reading!') RETURN 21 CONTINUE LLINO = NCONTC IERR = 0 END INTEGER FUNCTION GELIDN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, +LLINO,IDBSIZ,KBIN,KBOUT,IDEVN,PROMPT, +IHELPS,IHELPE,FILEH,IDEVH,INFLAG) CHARACTER FILEH*(*),PROMPT*(*) C AUTHOR: RODGER STADEN C SEARCHES FOR ARCHIVE NAMES INTEGER RELPG(IDBSIZ) INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER NAME1*17,NAME3*17,NFLAG PARAMETER (NFLAG='/') EXTERNAL NAMENO NAME3 = ' ' IF(LLINO.NE.0) THEN NAME3(1:1) = NFLAG CALL READN(IDEVN,LLINO,NAME3(2:)) END IF GELIDN = 0 10 CONTINUE L = 0 IF(LLINO.NE.0) L = 17 CALL GTSTR(PROMPT,NAME3, +NAME1,L,KBOUT,KBIN,INFLAG) IF(INFLAG.EQ.2) RETURN IF(INFLAG.EQ.4) RETURN IF(INFLAG.EQ.3) THEN GELIDN = LLINO RETURN END IF IF(INFLAG.EQ.1) THEN CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT) GO TO 10 END IF IF(NAME1(1:1).EQ.NFLAG) THEN GELIDN = NAMENO(NAME1(2:),NGELS,IDEVN) IF (GELIDN.EQ.0) CALL ERROM(KBOUT,'Reading name not found') ELSE CALL RJST(NAME1) READ(NAME1,1001,ERR=10,END=10)GELIDN 1001 FORMAT(I17) IF((GELIDN.LT.1).OR.(GELIDN.GT.NGELS)) THEN CALL ERROM(KBOUT,'Illegal gel reading number') GO TO 10 END IF END IF END INTEGER FUNCTION NAMENO(NAME,NGELS,IDEVN) CHARACTER NAME*(*) CHARACTER*16 NAME1,NAME2 NAME1 = NAME CALL CCASE(NAME1,1) DO 10 I=1,NGELS CALL READN(IDEVN,I,NAME2) CALL CCASE(NAME2,1) IF (NAME1.EQ.NAME2) THEN NAMENO = I RETURN END IF 10 CONTINUE NAMENO = 0 END SUBROUTINE REMGBD(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +KBIN,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN,IDEV2,FILNAM, +IHELPS,IHELPE,HELPF,IDEVH,IOK) INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ) CHARACTER HELPF*(*),GEL(MAXGEL),NAMARC*16,FILNAM*(*) INTEGER REMME,GCLIN,CHAINL,GNFFOF PARAMETER (MAXPRM = 35) CHARACTER PROMPT(4)*(MAXPRM) EXTERNAL GCLIN,CHAINL,NAMENO,GNFFOF C assumes db is logical consistent FILNAM = ' ' PROMPT(1) = 'Define a region by reading names' PROMPT(2) = 'Use a file of reading names' PROMPT(3) = 'Move a reading to a separate contig' PROMPT(4) = 'Make a list of unattached readings' IOPT = 1 CALL RADION('Select list definition mode',PROMPT,4,IOPT, +IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT) IF(IOPT.LT.1) RETURN IF(IOPT.EQ.4) THEN C C here we find all contigs with single readings and write their C names to a file C CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT, + 'Name for file of reading names', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN DO 5 I=IDBSIZ-NCONTS,IDBSIZ-1 IF (LNBR(I).EQ.RNBR(I)) THEN IF (LNBR(I).NE.0) THEN CALL READN(IDEVN,LNBR(I),NAMARC) WRITE(IDEV2,1000)NAMARC END IF END IF 5 CONTINUE CLOSE(UNIT=IDEV2) RETURN END IF IF(IOPT.EQ.3) THEN C C here we start a new contig with the selected reading C C we get the reading number igelno and move a copy of it C to ngels+1. Then we use the remove reading routine to delete C the original copy and move the new one to fill the hole. The C reason for this convoluted route is that remgel cleans up C all the mess. We must write a new contig line and check the C orientation. C C IF(NGELS+3.GE.IDBSIZ-NCONTS) THEN CALL ERROM(KBOUT,'Insufficient space for new contig') RETURN END IF NGELST = NGELS + 1 NCONTT = NCONTS + 1 CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + LINCOL,LLINOL,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, + 'Reading to disconnect', + IHELPS,IHELPE,HELPF,IDEVH) IF (IERR.NE.0) RETURN CALL READW(IDEVW,IGELNO,GEL,MAXGEL) C C move reading info over end of gel list C CALL READN(IDEVN,IGELNO,NAMARC) CALL WRITEN(IDEVN,NGELST,NAMARC) LNBR(NGELST) = 0 RNBR(NGELST) = 0 C C leave orientation the same C LNGTHG(NGELST) = LNGTHG(IGELNO) RELPG(NGELST) = 1 CALL WRITER(IDEVR,NGELST,RELPG(NGELST),LNGTHG(NGELST), + LNBR(NGELST),RNBR(NGELST)) CALL WRITEW(IDEVW,NGELST,GEL,MAXGEL) CALL MOVTAG(IGELNO,NGELST) C C start a new contig C I = IDBSIZ - NCONTT LNBR(I) = NGELST RNBR(I) = NGELST LNGTHG(I) = 0 RELPG(I) = ABS(LNGTHG(NGELST)) CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I), + LNBR(I),RNBR(I)) NGELS = NGELST NCONTS = NCONTT CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,0,0) IGLNO = IGELNO CALL REMGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, + IGLNO,LINCOL,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN) RETURN END IF IF(IOPT.EQ.1) THEN 10 CONTINUE LLINOL = 0 LLINOR = 0 CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + LINCOL,LLINOL,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, + 'Leftmost reading', + IHELPS,IHELPE,HELPF,IDEVH) IF (IERR.NE.0) RETURN CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + LINCOR,LLINOR,JGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN, + 'Rightmost reading', + IHELPS,IHELPE,HELPF,IDEVH) IF (IERR.NE.0) RETURN IF (LLINOL.NE.LLINOR) THEN CALL ERROM(KBOUT, + 'For this mode readings must be in the same contig') GO TO 10 END IF IF (RELPG(IGELNO).GT.RELPG(JGELNO)) THEN CALL ERROM(KBOUT, + 'For this mode readings must be in left to right order') GO TO 10 END IF C C IGELNO is first read to remove, JGELNO the last C C we must make a list of reads because removal changes numbers C CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT, + 'Name for temporary file of reading names', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN C C write out their names C J = IGELNO 20 CONTINUE CALL READN(IDEVN,J,NAMARC) WRITE(IDEV2,1000)NAMARC WRITE(KBOUT,1001)NAMARC 1000 FORMAT(A) 1001 FORMAT(' ',A) IF (J.NE.JGELNO) THEN IF (J.NE.0) THEN J=RNBR(J) GO TO 20 END IF END IF CALL BPAUSE(KBIN,KBOUT,IOK) IF (IOK.NE.0) THEN CLOSE(UNIT=IDEV2) RETURN END IF REWIND(UNIT=IDEV2) ELSE IF(IOPT.EQ.2) THEN C C here we start from a file of file names C CALL OPENF1(IDEV2,FILNAM,0,IOK,KBIN,KBOUT, + 'Name of file of reading names', + IHELPS,IHELPE,HELPF,IDEVH) IF(IOK.NE.0) RETURN ELSE CALL ERROM(KBOUT,'How the hell did we get here?') RETURN END IF 30 CONTINUE IOK = GNFFOF(IDEV2,NAMARC) IF(IOK.EQ.1) GO TO 100 IF(IOK.NE.0) GO TO 30 REMME = NAMENO(NAMARC,NGELS,IDEVN) I = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS, + IDBSIZ,REMME) ICONT = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,I) IF(ICONT.EQ.0) THEN CALL ERROM(KBOUT,'No contig line for this reading') IOK = 1 GO TO 100 END IF CALL REMGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ, +REMME,ICONT,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN) GO TO 30 100 CONTINUE CLOSE(UNIT=IDEV2) END