staden-lg/src/bap/dbsyscommon.f

5740 lines
168 KiB
Fortran

C DBSYS ROUTINES COMMON TO PRE AND POST .RD PROGRAMS
C
C 25-8-92 NOTE at the end of the file are new versions of getln2 (getln3)
C and gelid (gelidn) and a new routine nameno, that should
C replace getln2 and gelid.
C
C Further sap routines are in dbsysold.f (pre .rd) and dbsysnew.f (post)
C the split was made by rs 23-1-91
C 15-6-92 added fasta format output from consen
C 15-4-92 added in all the speedup changes ive been making and made
C screnv compatible
C 13-4-92 changed autocn to use a new hashing routine encof and inite
C 2-4-92 Added new dbauto related routines and changed gtconc
C 2-4-92 Added filnam = ' ' and brought uptodate with dap
C 4-5-90 Change to getreg to allow escape
C 4-5-90 addition of graphics routines and changes to menus
C 9-5-90 added default gel reading: many changes
C 17-5-90 Fixed 3 bugs in screen editing: 1) rightjustified names
C caused problems; 2) beginnings of sequences starting at far
C right of lines where not seen; 3) lines with no numbers at
C the end of a contig (i.e. with <10 chars) were flagged
C as errors. Changes to ltype for 1, linlen for 2, dsplay for 3
C 9-7-90 removed menu routines
C 20-8-90 changed gelid to add / to reading name because xsap did
C not return the INFLAG = 3 for the default
C 23-8-90 Changes to dbauto and autocn to deal with failures better
C Plus addition of calls to BUSY
C 9-11-90 Replaced call to radio with call to radion
C 19-11-90 Changed max match length in dbauto to maxglm+1 (was 50)
C 25-11-90 Very important bug fix in tpchek. Old versions could
C duplicate bits of working versions.
C 28-11-90 Modified slider to receive maxpg and maxpc and to allow exactly
C the requested number of matches at each end of the two
C sequences.
C Added two new options to dbauto: all gels to new contigs, all
C gels to contig 1; plus resurrected forbidding joins to allow
C sequences to be entered only into the contig the overlap best.
C Changed autocn to sort overlaps into order based on % mismatch
C (previously it saved the best two in any order)
C Minor change to dbstar
C 3-1-91 Discovered bug in dbopen: incorrect call to getint when the
C database is very old and needs values for the current format
C 21-1-91 GELID allowed illegal gel numbers to be returned! Fixed it.
C 22-1-91 Modified autocn, adism4,adism3 to give more info about
C overlaps, and to allow 10 overlaps. Modified dbopen to
C return version number, ditto dbstar
C 23-1-91 Split into dbsyscommon, dbsysold, dbsysnew
C 26-2-91 Improved overflow check in padcop
C 28-7-91 added extra parameter to quality calc: mxgood is the maximum
C reading length in which we have confidence, so only add this
C many chars from the start of each reading. Also changed the
C quality calc to make it the same as the consensus one. Made
C all characters have nonzero score and made lowercase = 100
C 21-8-91 Changed arrfil to arrfim which does not display comments
C 22-8-91 Added routine to find contig line number given left gel (CLINNO)
C
C
C 12-11-91 BIG CHANGE: made database handle 99,999 readings and 16 char names
C
C
C Also added routine to make aedit take strandedness into account
C (SUMSS).
C Also added fmt4lp which is used by find internal joins and
C could be used to advantage by others that call fmt4ln.
C 18-11-91 New routine GETLN2 with returns gel number specified
C
C enconn
C routine to store positions of words in posns and first occurences
C in wordp and number of occurences in wordn
C each number is a value representing one of the le4 possible
C words of length length made up of 4 characters
C words in posns are numbers from 1 to 4**length
SUBROUTINE ENCONN(POSNS,IDIM,WORDP,WORDN,LE4,LENGTH,START)
C AUTHOR: RODGER STADEN
INTEGER WORDP(LE4),POSNS(IDIM)
INTEGER WORDN(LE4),START
C number of words of length length
IDIM1 = IDIM - (LENGTH-1)
IF (START.EQ.1) THEN
DO 10 I=1,LE4
WORDN(I) = 0
10 CONTINUE
END IF
C loop for each word
DO 100 I=START,IDIM1
N = POSNS(I)
IF(N.NE.0) THEN
NW = WORDN(N)
C is their already an entry for this word?
IF(NW.EQ.0) THEN
C first entry, put in wordp
WORDP(N) = I
WORDN(N) = NW + 1
ELSE
WORDN(N) = NW + 1
POSNS(I) = WORDP(N)
WORDP(N) = I
END IF
END IF
100 CONTINUE
END
SUBROUTINE ENCOF(SEQ,IDSEQ,CONST,CSTART,LENGTH,POSNS)
CHARACTER SEQ(IDSEQ)
INTEGER CONST(LENGTH),CSTART,POSNS(IDSEQ),HASH
INTEGER CTONUM,CONSTL
EXTERNAL CTONUM
C
C new hashing routine. hash = k1 + k2
C
C hash = k1.c1 + k2.c2 + ... + kn.cn - cstart
C now c1=1, c2=4*c1, c3=4*c2,...
C
C find length bases in a row, then do first word base by base,
C for rest only change what is necessary
C
DO 1 I=1,IDSEQ
POSNS(I) = 0
1 CONTINUE
CONSTL = CONST(LENGTH)
LM1 = LENGTH - 1
IDSQML = IDSEQ - LENGTH
IS = 1
I = 1
IP = 1
HASH = 0
10 CONTINUE
C
C end approaching ?
C
IF (IS.GT.IDSQML) RETURN
C
C at least a words length of characters left
C
11 CONTINUE
K = CTONUM(SEQ(I))
IF (K.EQ.5) THEN
C
C start a new word
C
IS = I + 1
I = IS
IP = 1
HASH = 0
GO TO 10
END IF
HASH = HASH + CONST(IP) * K
IF (IP.NE.LENGTH) THEN
I = I + 1
IP = IP + 1
GO TO 11
END IF
C
C word finished
C
C save the hash value and the
C
20 CONTINUE
POSNS(IS) = HASH + CSTART
C K1 = CONST(1) * CTONUM(SEQ(IS)) note const(1) = 1
K1 = CTONUM(SEQ(IS))
K2 = (HASH - K1) / 4
IS = IS + 1
IF (IS.GT.IDSQML) RETURN
K = CTONUM(SEQ(IS+LM1))
IF (K.EQ.5) THEN
IS = IS + 1
I = IS
IP = 1
HASH = 0
GO TO 10
END IF
C HASH = K2 + K * CONST(LENGTH) note this is a constant constant
HASH = K2 + K * CONSTL
GO TO 20
END
SUBROUTINE INITE(CONST,CSTART,LENGTH)
INTEGER CONST(LENGTH),CSTART
CSTART = 1
DO 1 I=1,LENGTH
C WRITE(*,*)I
CONST(I) = 4**(I-1)
CSTART = CSTART - CONST(I)
1 CONTINUE
END
C SUBROUTINE TO READ CHARACTER DATA FROM IDEV, REMOVE SPACES, FILL
C ARRAY AND RETURN NUMBER OF ELEMENTS USED. ANY LINES STARTING WITH
C A ; ARE TREATED AS COMMENTS
SUBROUTINE ARRFIM(IDEV,SEQNCE,J,KBOUT)
C 14-8-91 Added err= option to read, and set length to 0 if error found
C AUTHOR: RODGER STADEN
CHARACTER TEMP(80),SEQNCE(J)
CHARACTER SPACE,ENDCHR,TITCHR
SAVE ENDCHR,SPACE,TITCHR
DATA ENDCHR/'@'/
DATA SPACE/' '/
DATA TITCHR/';'/
IDMX=J
J=0
1 CONTINUE
READ(IDEV,1001,END=30,ERR=40)TEMP
1001 FORMAT(80A1)
IF(TEMP(1).EQ.TITCHR)THEN
C WRITE(KBOUT,1003)(TEMP(K),K=2,80)
C1003 FORMAT(' ',79A1)
GO TO 1
END IF
10 CONTINUE
DO 20 I=1,80
IF(TEMP(I).NE.SPACE)THEN
IF(TEMP(I).EQ.ENDCHR)RETURN
IF(J.EQ.IDMX)THEN
WRITE(KBOUT,1002)IDMX
1002 FORMAT(
+ ' Too much data. Maximum possible',
+ ' =',I6,', input stopped there')
RETURN
END IF
J=J+1
SEQNCE(J)=TEMP(I)
END IF
20 CONTINUE
GO TO 1
30 CONTINUE
RETURN
40 CONTINUE
CALL ERROM(KBOUT,'Error reading file')
J = 0
END
C ABEDIN
C
C ROUTINE TO EDIT THE DB USING A PADDED SEQ
C HAVE AN ARRAY SEQC2 LENGTH IDC OF PADDED SECTION OF CONTIG LINCON
C THE LEFT END OF THE PADDED CONTIG STARTS AT X
C THERE ARE ITOTPC PADS TO MAKE
C
SUBROUTINE ABEDIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+GEL,LINCON,X,SEQC2,ITOTPC,IDC,IDBSIZ,KBOUT,IDEVR,IDEVW,
+MAXGEL)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),X,POSN
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER SEQC2(IDC),GEL(MAXGEL),P
SAVE P
DATA P/','/
C
C POINT TO CONTIG
POSN=X-1
C POINT TO SEQC2
IAT=0
C COUNT PADS DONE
IDONE=0
C LOOP FOR ALL SEQC2
DO 100 J=1,IDC
POSN=POSN+1
IAT=IAT+1
IPAD=0
C IS THIS A PADDING CHAR?
IF(SEQC2(IAT).NE.P)GO TO 100
50 CONTINUE
C COUNT PADS
IPAD=IPAD+1
IAT=IAT+1
IF(SEQC2(IAT).EQ.P)GO TO 50
C END OF THIS STRETCH OF PADS,DO INSERT
C HAVE IPAD INSERTS TO MAKE AT POSN
CALL PADCON(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+GEL,LINCON,POSN,IPAD,IDBSIZ,IDEVR,IDEVW,MAXGEL,KBOUT)
C MOVE POINTER TO CONTIG
POSN=POSN+IPAD
C COUNT PADS DONE
IDONE=IDONE+IPAD
C ANY MORE TO DO?
IF(IDONE.EQ.ITOTPC)GO TO 101
100 CONTINUE
C ERROR SHOULD HAVE DONE ALL PADS
WRITE(KBOUT,1000)
1000 FORMAT(' Problem: some pads were not done!')
101 CONTINUE
END
SUBROUTINE ADDTIT(SEQ1,NAMPRO,NGELS,IDIM1)
C AUTHOR: RODGER STADEN
CHARACTER SEQ1(20),NAMPRO*(*)
CHARACTER NUMS(10)
C Set maximum number of digits in reading number
PARAMETER (MAXGD = 5)
SAVE NUMS
DATA NUMS/'0','1','2','3','4','5','6','7','8','9'/
CALL FILLC(SEQ1(2),18,'-')
SEQ1(1) = '<'
SEQ1(20) = '>'
IEND = INDEX(NAMPRO,'.')
N=NGELS
K=IEND+MAXGD
DO 10 J=1,MAXGD
N=MOD(N,10)+1
NAMPRO(K:K)=NUMS(N)
N=NGELS/(10**J)
K=K-1
10 CONTINUE
K = 18-IEND
K=K/2
DO 20 I=1,IEND+MAXGD
SEQ1(K)=NAMPRO(I:I)
K=K+1
20 CONTINUE
IDIM1=IDIM1+20
END
SUBROUTINE ADISM1(SEQ,IDIM,GEL,IDIMG,SAVPS,SAVPG,IDSAV,
+CENDS,NENDS,IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE,
+LLINO,IMATC,
+ISTRAN,KBOUT,MATCH)
C AUTHOR: RODGER STADEN
C NEW PARMS
INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2),LLINO(2)
CCCCCCCCCCCC
INTEGER CENDS(MAXCON)
INTEGER NENDS(MAXCON)
INTEGER SAVPS(IDSAV),SAVPG(IDSAV)
CHARACTER SEQ(IDIM),GEL(IDIMG),MATCH(IDIMG)
C
C EDITED 07-02-83 TO ALLOW FOR CASE WHERE A GEL OVERLAPS ADJACENT
C CONTIGS WITHIN THE LENGTH OF THE GEL. USE PARM THAT CONTAINS
C THE POSITION OF THE LEFT END OF THE NEXT CONTIG. SET TO VERY HIGH
C VALUE TO START
NEXTC=IDIM+1
C SORT THE MATCHING WORDS INTO ASCENDING ORDER ON POSITION IN SEQ
CALL BUB2AS(SAVPS,SAVPG,IDSAV)
C LOOK FOR SEPERATE MATCHES
LEND=IDIMG-SAVPG(1)+SAVPS(1)
C COUNT NUMBER OF MATCHING CONTIGS
IMATC=IMATC+1
CALL ADISM2(SEQ,IDIM,GEL,IDIMG,SAVPS(1),
1SAVPG(1),CENDS,NENDS,IDCEND,MAXCON,
1ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,KBOUT,
2MATCH)
DO 10 I=2,IDSAV
IF((SAVPS(I).LT.LEND).AND.(SAVPS(I).LT.NEXTC))GO TO 10
C NEW MATCH, DISPLAY IT
C COUNT NUMBER OF MATCHING CONTIGS
IMATC=IMATC+1
CALL ADISM2(SEQ,IDIM,GEL,IDIMG,SAVPS(I),
1SAVPG(I),CENDS,NENDS,IDCEND,MAXCON,
1ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,KBOUT,
2MATCH)
C
C RESET LEND
LEND=IDIMG-SAVPG(I)+SAVPS(I)
10 CONTINUE
RETURN
END
C
C ADISM2
C ROUTINE TO DISPLAY MATCHES
SUBROUTINE ADISM2(SEQ,IDIM1,GEL,IDIMG,ISAVPS,SAVPG,CENDS,NENDS,
+IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,
+NEXTC,KBOUT,MATCH)
C AUTHOR: RODGER STADEN
C NEW PARMS
INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2),LLINO(2)
CCCCCCCCCCC
CHARACTER SEQ(IDIM1),GEL(IDIMG),MATCH(IDIMG)
INTEGER SAVPS,SAVPG,CENDS(MAXCON)
INTEGER NENDS(MAXCON)
C EDITED 07-02-83 FOR NEXTC. SEE ADISM1.
C DELETE 20 FROM END OF CONSENSUS MATCH
SAVPS=ISAVPS-19
C FIND CONTIG CONSENSUS ENDS
JJ=1
DO 5 J=2,IDCEND
IF(SAVPS.GT.CENDS(J))GO TO 5
C GONE PAST SO LAST IS THE ONE
JJ=J-1
GO TO 6
5 CONTINUE
JJ=IDCEND
6 CONTINUE
C SUBTRACT 1 FROM END
SAVPS=SAVPS-1
C LENGTH FROM MATCH TO LEFT OF CONTIG
LCL=SAVPS-CENDS(JJ)
C RIGHT
LCR=CENDS(JJ+1)-ISAVPS-1
C LEFT GEL
LGL=SAVPG-1
LGR=IDIMG-SAVPG
C NEED MIN OF EACH PAIR
LL=MIN(LCL,LGL)
LR=MIN(LCR,LGR)
C LENGTH OF OVERLAP
LM=LR+LL+1
C DISPLAY STARTS
ICL=ISAVPS-LL
IGL=SAVPG-LL
WRITE(KBOUT,1000)NENDS(JJ)
1000 FORMAT(' Match found with contig number =',I6)
CALL SQMTCH(SEQ(ICL),GEL(IGL),MATCH,LM)
L=ICL-CENDS(JJ)-19
CALL FMT4LN(SEQ(ICL),GEL(IGL),MATCH,LM,L,IGL,KBOUT)
C UPDATE END OF NEXT CONTIG
NEXTC=CENDS(JJ+1)+20
IF(IMATC.GT.2)RETURN
ILEFTS(IMATC)=CENDS(JJ)+20
ILC(IMATC)=LCL+LCR+1
IPOSC(IMATC)=LCL+1
IPOSG(IMATC)=SAVPG
LLINO(IMATC)=NENDS(JJ)
ISENSE(IMATC)=1
IF(ISTRAN.EQ.2)ISENSE(IMATC)=-1
RETURN
END
SUBROUTINE ADISM3(ISAVPS,SAVPG,CENDS,NENDS,
+IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,
+NEXTC,MAXC,KBOUT)
C AUTHOR: RODGER STADEN
INTEGER ILEFTS(MAXC),ILC(MAXC),IPOSC(MAXC),IPOSG(MAXC)
INTEGER ISENSE(MAXC),LLINO(MAXC)
INTEGER SAVPS,SAVPG,CENDS(MAXCON)
INTEGER NENDS(MAXCON)
SAVPS=ISAVPS-19
JJ=1
DO 5 J=2,IDCEND
IF(SAVPS.GT.CENDS(J))GO TO 5
JJ=J-1
GO TO 6
5 CONTINUE
JJ=IDCEND
6 CONTINUE
SAVPS=SAVPS-1
LCL=SAVPS-CENDS(JJ)
LCR=CENDS(JJ+1)-ISAVPS-1
NEXTC=CENDS(JJ+1)+20
IF(IMATC.LE.MAXC) THEN
ILEFTS(IMATC)=CENDS(JJ)+20
ILC(IMATC)=LCL+LCR+1
IPOSC(IMATC)=LCL+1
IPOSG(IMATC)=SAVPG
LLINO(IMATC)=NENDS(JJ)
ISENSE(IMATC)=1
IF(ISTRAN.EQ.2)ISENSE(IMATC)=-1
WRITE(KBOUT,1000)LLINO(IMATC),IPOSC(IMATC),ISTRAN,
+ IPOSG(IMATC)
1000 FORMAT
+ (' Contig',I5,' position',I6,' matches strand',I2,
+ ' at position',I5)
ELSE
CALL ERROM(KBOUT,'Warning: too many overlaps')
END IF
END
SUBROUTINE ADISM4(IDIM,IDIMG,SAVPS,SAVPG,IDSAV,
+CENDS,NENDS,IDCEND,MAXCON,ILEFTS,ILC,IPOSC,IPOSG,ISENSE,
+LLINO,IMATC,ISTRAN,MAXC,KBOUT)
C AUTHOR: RODGER STADEN
INTEGER ILEFTS(MAXC),ILC(MAXC),IPOSC(MAXC),IPOSG(MAXC)
INTEGER ISENSE(MAXC),LLINO(MAXC)
INTEGER CENDS(MAXCON)
INTEGER NENDS(MAXCON)
INTEGER SAVPS(IDSAV),SAVPG(IDSAV)
NEXTC=IDIM+1
CALL BUB2AS(SAVPS,SAVPG,IDSAV)
IMATC=IMATC+1
CALL ADISM3(SAVPS(1),SAVPG(1),CENDS,NENDS,IDCEND,MAXCON,
+ ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,MAXC,
+ KBOUT)
LEND=IDIMG-SAVPG(1)+SAVPS(1)
DO 10 I=2,IDSAV
IF((SAVPS(I).LT.LEND).AND.(SAVPS(I).LT.NEXTC))GO TO 10
IMATC=IMATC+1
CALL ADISM3(SAVPS(I),SAVPG(I),CENDS,NENDS,IDCEND,MAXCON,
+ ILEFTS,ILC,IPOSC,IPOSG,ISENSE,LLINO,IMATC,ISTRAN,NEXTC,MAXC,
+ KBOUT)
LEND=IDIMG-SAVPG(I)+SAVPS(I)
10 CONTINUE
IMATC = MIN(IMATC,MAXC)
RETURN
END
SUBROUTINE AEDIT(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT,
+GEL,MAXGEL,CON,IDC,IDEVW,IDEVR,LREG,RREG,KBOUT)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER GEL(MAXGEL),CON(IDC)
INTEGER RREG,PC,PCA,PG
NG = LGEL
PG = RELPG(NG)
PC = LREG
NTT = 0
NCT = 0
NDT = 0
10 CONTINUE
C WRITE(*,*)'GEL',NG
CALL READW(IDEVW,NG,GEL,MAXGEL)
LG = ABS(LNGTHG(NG))
IF(PC.LT.LREG) PC = LREG
PCA = PC - LREG + 1
IG = PC - PG + 1
LC = MIN(LG,RREG-PC+1)
C WRITE(*,*)'PC,PG,IG,LG,PCA,LC',PC,PG,IG,LG,PCA,LC
CALL ET(GEL(IG),LG,CON(PCA),LC,NE)
NTT = NTT + NE
CALL EC(GEL(IG),LG,CON(PCA),LC,NE)
NCT = NCT + NE
CALL ED(GEL(IG),LG,CON(PCA),LC,ND)
NDT = NDT + ND
CALL WRITEW(IDEVW,NG,GEL,MAXGEL)
IF(ND.GT.0) THEN
K = LNGTHG(NG)
LNGTHG(NG) = ABS(LNGTHG(NG)) - ND
LNGTHG(NG) = SIGN(LNGTHG(NG),K)
CALL WRITER(IDEVR,NG,RELPG(NG),LNGTHG(NG),LNBR(NG),RNBR(NG))
END IF
IF(RNBR(NG).NE.0) THEN
NG = RNBR(NG)
PG = RELPG(NG)
PC = PG
IF(PG.LE.RREG) GO TO 10
END IF
CALL EDR(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT,
+CON,IDC,IDEVW,IDEVR,LREG)
WRITE(KBOUT,1000)NTT
1000 FORMAT(' Number of transpositions=',I6)
WRITE(KBOUT,1001)NCT
1001 FORMAT(' Number of changes =',I6)
WRITE(KBOUT,1002)NDT
1002 FORMAT(' Number of deletions =',I6)
END
C AJOIN2
C COMPLETES JOIN AND RETURNS LENGTH OF NEW CONTIG IN LLINOR
SUBROUTINE AJOIN2(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,
+RELX,LLINOL,LLINOR,LNCONL,LNCONR,IDEVR)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNBR(IDBSIZ),RNBR(IDBSIZ),LNGTHG(IDBSIZ)
INTEGER RELX
C RELX IS THE POSITION OF THE JOINT
C LLINOL IS THE LEFT GEL NUMBER OF THE LEFT CONTIG
C LLINOR IS THE LEFT GEL OF THE RIGHT CONTIG
C LNCONL IS THE LEFT CONTIG LINE NUMBER
C LNCONR IS THE RIGHT CONTIG LINE NUMBER
C
C ADJUST ALL RELATIVE POSITIONS IN RIGHT CONTIG
N=LLINOR
RELPG(N)=RELX
50 CONTINUE
IF(RNBR(N).EQ.0)GO TO 60
N=RNBR(N)
RELPG(N)=RELPG(N)+RELX-1
GO TO 50
60 CONTINUE
C
C FIX UP NEW GEL LINE FOR OLD LEFT OF RIGHT CONTIG
LNBR(LLINOR)=RNBR(LNCONL)
C FIX UP RIGHT GEL OF LEFT CONTIG
N=RNBR(LNCONL)
RNBR(N)=LLINOR
C MERGE WILL SORT OUT THE CORRECT NEIGHBOURS
C
CALL MERGE(RELPG,LNGTHG,LNBR,RNBR,LNCONL,IDBSIZ)
C MERGE DOES NOT WRITE TO DISK
N=LNBR(LNCONL)
65 CONTINUE
C WRITE(IDEVR,REC=N)RELPG(N),LNGTHG(N),LNBR(N),RNBR(N)
CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N),LNBR(N),RNBR(N))
N=RNBR(N)
IF(N.NE.0)GO TO 65
C CONTIG LINES
X=RELPG(LNCONR)+RELX-1
C LENGTH MAY NOT HAVE INCREASED!
IF(X.GT.RELPG(LNCONL))RELPG(LNCONL)=X
C SAVE LENGTH OF NEW CONTIG
RELX=RELPG(LNCONL)
C WRITE(IDEVR,REC=LNCONL)RELPG(LNCONL),LNGTHG(LNCONL),LNBR(LNCONL),
C 1RNBR(LNCONL)
CALL WRITER(IDEVR,LNCONL,RELPG(LNCONL),LNGTHG(LNCONL),
+LNBR(LNCONL),RNBR(LNCONL))
C
C NOW MOVE ALL DATA DOWN TO DELETE OLD RIGHT END
N=IDBSIZ-NCONTS
M=LNCONR-N
IF(M.EQ.0)GO TO 80
K=LNCONR
J=LNCONR-1
DO 70 I=1,M
RELPG(K)=RELPG(J)
LNGTHG(K)=LNGTHG(J)
LNBR(K)=LNBR(J)
RNBR(K)=RNBR(J)
C WRITE(IDEVR,REC=K)RELPG(K),LNGTHG(K),LNBR(K),RNBR(K)
CALL WRITER(IDEVR,K,RELPG(K),LNGTHG(K),LNBR(K),RNBR(K))
K=K-1
J=J-1
70 CONTINUE
80 CONTINUE
NCONTS=NCONTS-1
C WRITE(IDEVR,REC=IDBSIZ)NGELS,NCONTS
CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS)
90 CONTINUE
RETURN
END
C SUBROUTINE AJOIN3
SUBROUTINE AJOIN3(RELPG,IDBSIZ,LINCON,ITYPE,ISENSE,JOINT,IDIM22,
+KLASS,IOVER,KBOUT,PL,PR)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),LINCON(2),IDIM22(2)
INTEGER ITYPE(2),ISENSE(2),JOINT(2),PL(2),PR(2)
C
C CALC POSITIONS OF CONTIGS RELATIVE TO FIXED GEL
DO 20 I=1,2
C R+
IF((ITYPE(I).NE.-1).OR.(ISENSE(I).NE.1))GO TO 11
PL(I)=-1*JOINT(I)+2
PR(I)=PL(I)+RELPG(LINCON(I))-1
GO TO 20
C L+
11 CONTINUE
IF((ITYPE(I).NE.1).OR.(ISENSE(I).NE.1))GO TO 12
PL(I)=JOINT(I)
PR(I)=PL(I)+RELPG(LINCON(I))-1
GO TO 20
C R-
12 CONTINUE
IF((ITYPE(I).NE.-1).OR.(ISENSE(I).NE.-1))GO TO 13
PR(I)=JOINT(I)+IDIM22(I)-1
PL(I)=PR(I)-RELPG(LINCON(I))+1
GO TO 20
C L-
13 CONTINUE
PR(I)=IDIM22(I)-JOINT(I)+1
PL(I)=PR(I)-RELPG(LINCON(I))+1
20 CONTINUE
C LENGTH OF OVERLAP
IOVER=MIN(PR(1),PR(2))-MAX(PL(1),PL(2))+1
WRITE(KBOUT,1002)IOVER
1002 FORMAT(' Length of overlap between the contigs=',I6)
C
C CLASS NUMBER 1-16
KLASS=1
IF(ITYPE(1).EQ.1)KLASS=KLASS+8
IF(ISENSE(1).EQ.-1)KLASS=KLASS+4
IF(ITYPE(2).EQ.1)KLASS=KLASS+2
IF(ISENSE(2).EQ.-1)KLASS=KLASS+1
C WRITE(KBOUT,1001)KLASS
C1001 FORMAT(' CLASS OF JOIN=',I6)
RETURN
END
C ALINE
C
C ROUTINE TO LINE UP 2 SEQS.
C IT SLIDES,REMOVES OVERLAPPING MATCHES,
C SORTS MATCHES INTO ASCENDING ORDER, THEN DOES DOES A TOPOLOGICAL
C CHECK, AND THEN PRODUCES 2 LINED UP SEQS WITH PADDING CHARS
C VARIABLES
C SEQ1 CONSENSUS
C SEQ2 GEL ORIGINAL IN CORRECT ORIENTATION
C SEQG2 ALIGNED GEL
C SEQC2 ALIGNED CONSENSUS
C SEQ3 SAVED GEL RAW DATA
C ISAV1,2,3 STORE MATCHES AND POSITIONS
C IDSAV NUMBER ISAV'S
C IDC LENGTH OF INPUT SEQ1
C IDIM2 LENGTH OF INPUT SEQ2
C IDOUT LENGTH OF OUTPUT ALIGNED SEQ1
C IDIM2 LENGTH OF SEQ2 ON OUTPUT AFTER ALIGNMENT
C MINSLI MIN MATCH FOR SLIDING
C IFAIL FLAG TO SHOW IF ALIGNMENT FAILED DUE TO TOO
C MANY MISMATCHES OR TOPOLIGICAL CHECK OR TOO MANY OR TOO MANY
C PADDING CHARS. 1=FAIL,0=PASS
C
SUBROUTINE ALINE(SEQ1,SEQ2,SEQG2,SEQC2,ISAV1,ISAV2,ISAV3,
+IDSAV,IDC,IDIM2,IDOUT,IC1,IG1,MINSLI,JOINT,
+ITOTPC,ITOTPG,IFAIL,ITYPE,MAXPC,MAXPG,PERMAX,KBOUT,SEQ3,MAXGEL,
+PERCM,LENO,ISHOW)
C AUTHOR: RODGER STADEN
CHARACTER SEQ1(IDC),SEQ2(IDIM2),SEQG2(IDOUT),SEQC2(IDOUT)
CHARACTER SEQ3(MAXGEL)
INTEGER ISAV1(IDSAV),ISAV2(IDSAV),ISAV3(IDSAV)
MINSLT=MINSLI
C SAVE SEQ2
CALL SQCOPY(SEQ2,SEQ3,IDIM2)
CALL MSTLKL(SEQ3,IDIM2)
IFAIL=1
C FIND MATCHES
IPP=IDSAV
CALL SLIDER(SEQ1,IDC,SEQ3,IDIM2,IC1,IG1,MAXPG,MAXPC,MINSLT,
+ISAV1,ISAV2,ISAV3,IPP)
IF(IPP.GT.IDSAV)RETURN
IF(IPP.LT.1)RETURN
CALL REMOVL(ISAV2,ISAV3,ISAV1,IPP)
CALL BUB3AS(ISAV2,ISAV3,ISAV1,IPP)
C DO TOPOLOGICAL CHECK
CALL TPCHEK(ISAV2,ISAV3,ISAV1,IPP)
C
C added next routine 27-2-93
C
CALL UPCHEK(ISAV2,ISAV3,ISAV1,IPP)
CALL LINEUP(SEQ2,SEQ1,SEQG2,SEQC2,IDC,IDIM2,IDOUT,ISAV3,ISAV2,
+ISAV1,IPP,ITOTPC,ITOTPG,JOINT,ITYPE,KBOUT,MAXGEL,IFAIL)
IF(ITOTPC.GT.MAXPC)IFAIL=1
IF(ITOTPG.GT.MAXPG)IFAIL=1
IF(IFAIL.NE.0)RETURN
C IDIM2 IS NOW LENGTH OF ALIGNED GEL
CALL DALIGN(SEQC2,SEQG2,SEQ3,MAXGEL,IDOUT,IDIM2,JOINT,
+ITYPE,PERCM,KBOUT,IFAIL,LENO,PERMAX,ISHOW)
IF(IFAIL.NE.0)RETURN
IF(ISHOW.EQ.1) THEN
WRITE(KBOUT,1052)PERCM,ITOTPC,ITOTPG
1052 FORMAT(' Percent mismatch=',F4.1,', pads in contig=',I3,
+ ', pads in gel=',I3)
END IF
END
SUBROUTINE ARCSER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,KBIN,KBOUT,IDEVN,
+IHELPS,IHELPE,FILEH,IDEVH)
CHARACTER FILEH*(*)
C AUTHOR: RODGER STADEN
C SEARCHES FOR ARCHIVE NAMES
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER NAME1*16,NAME2*16
10 CONTINUE
L = 0
CALL GTSTR('Archive name',' ',NAME1,L,KBOUT,KBIN,INFLAG)
IF(L.EQ.0) RETURN
CALL CCASE(NAME1,1)
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.3) RETURN
IF(NAME1(1:1).EQ.' ') RETURN
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
DO 100 I=1,NGELS
CALL READN(IDEVN,I,NAME2)
IF(NAME1.EQ.NAME2) THEN
WRITE(KBOUT,1003)NAME2,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)
1003 FORMAT(' ',A,2X,I4,2X,I6,2X,I6,2X,I6,2X,I6/)
GO TO 10
END IF
100 CONTINUE
WRITE(KBOUT,1004)NAME1
1004 FORMAT(' ',A,' Not in database')
GO TO 10
END
SUBROUTINE AUTOCN(SEQ1,IDIM,GEL,IDIMG,ILEFTS,ILC,IPOSC,
+IPOSG,ISENSE,LLINO,IMATC,IFCOMP,MINMAT,POSNS,WORDP,WORDN,
+CONST,LENGTH,LPOWRC,KBOUT,MATCH,MAXGEL,MAXGLM,GELCOP,GELN,
+SAVPS,SAVPG,SAVL,MAXSAV,CENDS,NENDS,MAXCON,
+SEQG2,SEQC2,SEQ4,IDOUT,IDIM22,ITOTPG,ITOTPC,JOINT,IFAIL,
+ITYPE,MAXPC,MAXPG,PERMAX,MINSLI,SEQG3,SEQC3,KFAIL,CSTART,
+JOBC,PERMIS,LENO,ISHOW)
C AUTHOR: RODGER STADEN
C changed 29-11-90 to make first in list of alignments the best
INTEGER ILEFTS(2),ILC(2),IPOSC(2),IPOSG(2),ISENSE(2),LLINO(2)
INTEGER POSNS(IDIM),GELN(MAXGLM),WORDP(LPOWRC),SAVPS(MAXSAV)
INTEGER SAVPG(MAXSAV),SAVL(MAXSAV)
INTEGER WORDN(LPOWRC)
CHARACTER GELCOP(MAXGLM),MATCH(MAXGLM)
INTEGER CENDS(MAXCON),NENDS(MAXCON)
INTEGER CONST(LENGTH)
CHARACTER SEQ1(IDIM),GEL(MAXGLM)
C
CHARACTER SEQG2(MAXGLM,2),SEQC2(MAXGLM,2),SEQ4(MAXGLM)
INTEGER IDOUT(2),IDIM22(2),ITOTPG(2),ITOTPC(2),JOINT(2)
INTEGER IFAIL(2),ITYPE(2)
PARAMETER (MAXC = 100)
CHARACTER SEQG3(MAXGLM),SEQC3(MAXGLM)
INTEGER JLEFTS(MAXC),JLC(MAXC),JPOSC(MAXC),JPOSG(MAXC)
INTEGER JSENSE(MAXC),JLLINO(MAXC),CSTART,START
REAL PERMIS(2)
C
C jobc tells how to update the hash tables:
C 0 means dont do anything because the consensus hasnt changed
C 1 means add the last contig because a new one has been stuck on the end
C 2 means do the whole consensus
C
IFAIL(1) = 1
IFAIL(2) = 1
KFAIL = 0
C 23-8-90 Need to deal with failures in a better way. Problem is
C case where overlaps are found but fail to align. In future
C signal them with new variable KFAIL which will be nonzero
C if any alignment fails.
C 29-11-90 Changed sorting of overlaps so that the best is first in the
C list returned to caller.
C SAVE GEL
CALL SQCOPY(GEL,GELCOP,IDIMG)
C COUNT NUMBER OF CONTIGS THAT MATCH
IMATC=0
IDCEND=MAXCON
CALL BUSY(KBOUT)
CALL FNDCON(SEQ1,IDIM,CENDS,NENDS,IDCEND,MAXCON,KBOUT)
IF (JOBC.NE.0) THEN
START = 1
IF(JOBC.EQ.1) START = CENDS(IDCEND)
CALL ENCOF(SEQ1(START),IDIM-START+1,CONST,CSTART,LENGTH,
+POSNS(START))
CALL ENCONN(POSNS,IDIM,WORDP,WORDN,LPOWRC,LENGTH,START)
END IF
1 CONTINUE
ISTRAN=1
2 CONTINUE
CALL MSTLKL(GEL,IDIMG)
CALL ENCOF(GEL,IDIMG,CONST,CSTART,LENGTH,GELN)
IDSAV=MAXSAV
CALL CFGEL(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LPOWRC,
+SAVPG,SAVPS,SAVL,
+IDSAV,SEQ1,GEL,MINMAT,IFCOMP,KBOUT)
IF(IFCOMP.NE.0)RETURN
IF(IDSAV.NE.0)THEN
CALL ADISM4(IDIM,IDIMG,SAVPS,SAVPG,IDSAV,CENDS,NENDS,
+ IDCEND,MAXCON,JLEFTS,JLC,JPOSC,JPOSG,JSENSE,JLLINO,
+ IMATC,ISTRAN,MAXC,KBOUT)
END IF
ISTRAN=ISTRAN+1
IF(ISTRAN.EQ.2) THEN
CALL SQCOPY(GELCOP,GEL,IDIMG)
CALL SQREV(GEL,IDIMG)
CALL SQCOM(GEL,IDIMG)
GO TO 2
END IF
CALL SQCOPY(GELCOP,GEL,IDIMG)
KSENSE = 0
WRITE(KBOUT,*)'Total matches found',IMATC
IF(IMATC.EQ.0) THEN
IFAIL(1) = 0
RETURN
END IF
JMATC = 0
DO 100 I = 1,IMATC
IF(JSENSE(I).EQ.-1) THEN
IF(KSENSE.EQ.0) THEN
CALL SQREV(GEL,IDIMG)
CALL SQCOM(GEL,IDIMG)
KSENSE = 1
END IF
END IF
JDIM22 = IDIMG
JDOUT = MAXGEL
IDSAV = MAXSAV
WRITE(KBOUT,*)'Trying to align with contig',JLLINO(I)
CALL ALINE(SEQ1(JLEFTS(I)),GEL,SEQG3,SEQC3,
+ SAVPS,SAVPG,SAVL,IDSAV,JLC(I),JDIM22,JDOUT,
+ JPOSC(I),JPOSG(I),MINSLI,JJOINT,JTOTPC,JTOTPG,
+ JFAIL,JTYPE,MAXPC,MAXPG,PERMAX,KBOUT,SEQ4,MAXGEL,PERMS,LENO,
+ ISHOW)
IF(JFAIL.EQ.0) THEN
JMATC = JMATC + 1
IF(JMATC.EQ.1) THEN
C Save in elements 1
CALL COPYM(JLEFTS(I),ILEFTS(1),JLC(I),ILC(1),
+ JPOSC(I),IPOSC(1),JSENSE(I),ISENSE(1),
+ JLLINO(I),LLINO(1),JJOINT,JOINT(1),JTOTPC,
+ ITOTPC(1),JTOTPG,ITOTPG(1),JTYPE,ITYPE(1),
+ JDOUT,IDOUT(1),JDIM22,IDIM22(1),
+ SEQG3,SEQG2(1,1),SEQC3,SEQC2(1,1),
+ PERMS,PERMIS(1))
IFAIL(1) = 0
ELSE IF(JMATC.EQ.2) THEN
IF(PERMS.LT.PERMIS(1)) THEN
C Better match so save in elements 1, so copy 1 to 2 first
CALL COPYM(ILEFTS(1),ILEFTS(2),ILC(1),ILC(2),
+ IPOSC(1),IPOSC(2),ISENSE(1),ISENSE(2),
+ LLINO(1),LLINO(2),JOINT(1),JOINT(2),ITOTPC(1),
+ ITOTPC(2),ITOTPG(1),ITOTPG(2),ITYPE(1),ITYPE(2),
+ IDOUT(1),IDOUT(2),IDIM22(1),IDIM22(2),
+ SEQG2(1,1),SEQG2(1,2),SEQC2(1,1),SEQC2(1,2),
+ PERMIS(1),PERMIS(2))
IFAIL(2) = 0
C Now save in 1
CALL COPYM(JLEFTS(I),ILEFTS(1),JLC(I),ILC(1),
+ JPOSC(I),IPOSC(1),JSENSE(I),ISENSE(1),
+ JLLINO(I),LLINO(1),JJOINT,JOINT(1),JTOTPC,
+ ITOTPC(1),JTOTPG,ITOTPG(1),JTYPE,ITYPE(1),
+ JDOUT,IDOUT(1),JDIM22,IDIM22(1),
+ SEQG3,SEQG2(1,1),SEQC3,SEQC2(1,1),
+ PERMS,PERMIS(1))
ELSE
C Save in element 2
CALL COPYM(JLEFTS(I),ILEFTS(2),JLC(I),ILC(2),
+ JPOSC(I),IPOSC(2),JSENSE(I),ISENSE(2),
+ JLLINO(I),LLINO(2),JJOINT,JOINT(2),JTOTPC,
+ ITOTPC(2),JTOTPG,ITOTPG(2),JTYPE,ITYPE(2),
+ JDOUT,IDOUT(2),JDIM22,IDIM22(2),
+ SEQG3,SEQG2(1,2),SEQC3,SEQC2(1,2),
+ PERMS,PERMIS(2))
IFAIL(2) = 0
END IF
ELSE
IF(PERMS.LT.PERMIS(1)) THEN
C Better match so save in elements 1, so copy 1 to 2 first
CALL COPYM(ILEFTS(1),ILEFTS(2),ILC(1),ILC(2),
+ IPOSC(1),IPOSC(2),ISENSE(1),ISENSE(2),
+ LLINO(1),LLINO(2),JOINT(1),JOINT(2),ITOTPC(1),
+ ITOTPC(2),ITOTPG(1),ITOTPG(2),ITYPE(1),ITYPE(2),
+ IDOUT(1),IDOUT(2),IDIM22(1),IDIM22(2),
+ SEQG2(1,1),SEQG2(1,2),SEQC2(1,1),SEQC2(1,2),
+ PERMIS(1),PERMIS(2))
IFAIL(2) = 0
C Now save in 1
CALL COPYM(JLEFTS(I),ILEFTS(1),JLC(I),ILC(1),
+ JPOSC(I),IPOSC(1),JSENSE(I),ISENSE(1),
+ JLLINO(I),LLINO(1),JJOINT,JOINT(1),JTOTPC,
+ ITOTPC(1),JTOTPG,ITOTPG(1),JTYPE,ITYPE(1),
+ JDOUT,IDOUT(1),JDIM22,IDIM22(1),
+ SEQG3,SEQG2(1,1),SEQC3,SEQC2(1,1),
+ PERMS,PERMIS(1))
ELSE IF(PERMS.LT.PERMIS(2)) THEN
C Save in element 2
CALL COPYM(JLEFTS(I),ILEFTS(2),JLC(I),ILC(2),
+ JPOSC(I),IPOSC(2),JSENSE(I),ISENSE(2),
+ JLLINO(I),LLINO(2),JJOINT,JOINT(2),JTOTPC,
+ ITOTPC(2),JTOTPG,ITOTPG(2),JTYPE,ITYPE(2),
+ JDOUT,IDOUT(2),JDIM22,IDIM22(2),
+ SEQG3,SEQG2(1,2),SEQC3,SEQC2(1,2),
+ PERMS,PERMIS(2))
END IF
END IF
ELSE
KFAIL = 1
END IF
100 CONTINUE
IMATC = MIN(2,JMATC)
END
SUBROUTINE BREAKC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,KBIN,KBOUT,IDEVR,IDEVW,IDEVN,
+IHELPS,IHELPE,IHELP1,IHELP2,FILEH,IDEVH,IOK)
CHARACTER FILEH*(*)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER CHAINL,GCLIN
EXTERNAL CHAINL,GCLIN
C ROUTINE TO BREAK A CONTIG INTO 2
C LEFT GEL OF NEW RIGHT CONTIG IS IR
C RIGHT GEL OF NEW LEFT CONTIG IS IL
C LEFT GEL OF OLD LEFT CONTIG IS ILO
C CONTIG LINE OF OLD CONTIG IS NCONTO
C CONTIG LINE OF NEW RIGHT CONTIG IS NCONTR
C CONTIG LINE OF NEW LEFT CONTIG IS NCONTO
C LENGTH OF OLD CONTIG IS LCONTO
IOK = 1
NCONTR = IDBSIZ - NCONTS - 1
IF(NCONTR.LE.NGELS) THEN
WRITE(KBOUT,*)'Insufficient space for new contig line.'
WRITE(KBOUT,*)'Increase database size with copy'
RETURN
END IF
10 CONTINUE
MN = 0
MX = NGELS
IR = 0
CALL GETINT(MN,MX,IR,
+ 'Number of gel reading that will become a left end',
+ IVAL,KBIN,KBOUT,
+ IHELPS,IHELPE,FILEH,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IF(IVAL.LT.1) RETURN
IR = IVAL
IL = LNBR(IR)
IF(IL.EQ.0)THEN
WRITE(KBOUT,*)'Gel number',IR,' is already a left end'
GO TO 10
END IF
ILO = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,IR)
IF(ILO.EQ.0)THEN
WRITE(KBOUT,*)
+'Problem with this contig. Check logical consistency'
WRITE(KBOUT,*)'of database. Break not made'
RETURN
END IF
NCONTO = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,ILO)
IF(NCONTO.EQ.0)THEN
WRITE(KBOUT,*)'No contig line for this contig. Check logical'
WRITE(KBOUT,*)'consistency of database. Break not made'
RETURN
END IF
LCONTO = RELPG(NCONTO)
IF(LCONTO.LT.1)THEN
WRITE(KBOUT,*)'Contig has zero length. Break not made'
RETURN
END IF
CALL CBREAK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,KBOUT,IDEVR,IDEVW,IDEVN,IR,IL,ILO,NCONTO,NCONTR,IOK)
END
SUBROUTINE CBREAK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,KBOUT,IDEVR,IDEVW,IDEVN,IR,IL,ILO,NCONTO,NCONTR,IOK)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER CLEN
EXTERNAL CLEN
C ROUTINE TO BREAK A CONTIG INTO 2
C LEFT GEL OF NEW RIGHT CONTIG IS IR
C RIGHT GEL OF NEW LEFT CONTIG IS IL
C LEFT GEL OF OLD LEFT CONTIG IS ILO
C CONTIG LINE OF OLD CONTIG IS NCONTO
C CONTIG LINE OF NEW RIGHT CONTIG IS NCONTR
C CONTIG LINE OF NEW LEFT CONTIG IS NCONTO
C LENGTH OF OLD CONTIG IS LCONTO
IOK = 1
NCONTS = NCONTS + 1
C WRITE LAST LINE OF DB
WRITE(KBOUT,*)'Increasing number of contigs by 1'
CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,NGELS,NCONTS)
C MAKE NEW CONTIG A COPY OF OLD
RELPG(NCONTR) = RELPG(NCONTO)
LNGTHG(NCONTR) = LNGTHG(NCONTO)
LNBR(NCONTR) = IR
RNBR(NCONTR) = RNBR(NCONTO)
WRITE(KBOUT,*)'Writing new right contig line'
CALL WRITER(IDEVR,NCONTR,RELPG(NCONTR),LNGTHG(NCONTR),
+LNBR(NCONTR),RNBR(NCONTR))
C NEED LENGTH FOR OLD LEFT CONTIG
RNBR(IL) = 0
L = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,ILO)
IF(L.LT.1)THEN
WRITE(KBOUT,*)
+ 'New left contig has zero length. Break not made'
RETURN
END IF
RELPG(NCONTO) = L
RNBR(NCONTO) = IL
C DO CONTIG LINE FOR NEW LEFT CONTIG
WRITE(KBOUT,*)'Writing new left contig line'
WRITE(KBOUT,*)'New length=',RELPG(NCONTO)
WRITE(KBOUT,*)'New right gel=',RNBR(NCONTO)
CALL WRITER(IDEVR,NCONTO,RELPG(NCONTO),LNGTHG(NCONTO),
+LNBR(NCONTO),RNBR(NCONTO))
C DO GEL LINE FOR RIGHT GEL OF NEW LEFT CONTIG
WRITE(KBOUT,*)'Writing new right gel of left contig'
WRITE(KBOUT,*)'Gel number=',IL
CALL WRITER(IDEVR,IL,RELPG(IL),LNGTHG(IL),
+LNBR(IL),RNBR(IL))
C DO GEL LINE FOR NEW RIGHT CONTIG
LNBR(IR) = 0
WRITE(KBOUT,*)'Writing new left gel of right contig'
WRITE(KBOUT,*)'Gel number=',IR
CALL WRITER(IDEVR,IR,RELPG(IR),LNGTHG(IR),
+LNBR(IR),RNBR(IR))
C NOW SHIFT
I = 1 - RELPG(IR)
WRITE(KBOUT,*)'Shifting gels in right contig by distance=',I
CALL SHIFTC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDEVR,
+IDBSIZ,IR,NCONTR,I)
WRITE(KBOUT,*)'Right contig length=',RELPG(NCONTR)
WRITE(KBOUT,*)'Break completed'
IOK = 0
END
C BUBBL3
C SUBROUTINE TO SORT INTEGER ARRAY (LIST) INTO ASCENDING ORDER
C
SUBROUTINE BUBBL3(LIST,LISTEL,LISTAL,IDIM)
C AUTHOR: RODGER STADEN
INTEGER LIST(IDIM),LISTEL(IDIM),LISTAL(IDIM)
C
C SET POINTERS TO ZERO
I=0
J=0
C
10 CONTINUE
C
C SET I=J IF WE HAVE JUST CORRECTLY POSITIONED AN ELEMENT
IF(J.GT.I)I=J
C
C INCREMENT POINTER TO NEXT ELEMENT
I=I+1
C TEST FOR END OF ARRAY
IF(I.EQ.IDIM)RETURN
C
20 CONTINUE
C
C COMPARE ADJACENT ELEMENTS
IF(LIST(I).GE.LIST(I+1))GO TO 10
C
C FIRST MOVE THIS ELEMENT? IF SO SET POINTER TO ITS INITIAL POSITION
IF(J.LT.I)J=I
C
C EXCHANGE ADJACENT ELEMENTS
ITEMP=LIST(I)
LIST(I)=LIST(I+1)
LIST(I+1)=ITEMP
C
ITEMP=LISTEL(I)
LISTEL(I)=LISTEL(I+1)
LISTEL(I+1)=ITEMP
ITEMP=LISTAL(I)
LISTAL(I)=LISTAL(I+1)
LISTAL(I+1)=ITEMP
C
C
C DECREMENT BACK THRU LIST WITH THIS ELEMENT
IF(I.GT.1)I=I-1
C
GO TO 20
END
SUBROUTINE CCTA(SEQ,ID)
CHARACTER SEQ(ID),COM,AS
SAVE COM,AS
DATA COM/','/,AS/'*'/
DO 10 I = 1,ID
IF(SEQ(I).EQ.COM) SEQ(I) = AS
10 CONTINUE
END
C
C CFGEL new version 15-4-92
C
C ROUTINE TO COMPARE A STRING OF WORD NUMBERS FOR A GEL WITH A SERIES
C OF ARRAYS REPRESENTING A CONSENSUS SEQUENCE. WE LOOK FOR OCCURENCES
C OF PAIRS OF WORDS (EACH WORD IS LENGTH CHARS LONG AND SO TOTAL MATCH IS
C 2*LENGTH CHARS LONG). THE ARRAYS SENT ARE OF SIZE 4**LENGTH (LE4)
SUBROUTINE CFGEL(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LE4,
+SAVPG,
+SAVPS,SAVL,IDSAV,SEQ,GEL,MINMAT,IFAIL,KBOUT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),GEL(IDIMG)
INTEGER POSNS(IDIM),WORDP(LE4),SAVPS(IDSAV),SAVPG(IDSAV)
INTEGER GELN(IDIMG),SAVL(IDSAV)
INTEGER WORDN(LE4)
INTEGER W1,W2,PS1W1,PS1W2
INTEGER CTONUM
EXTERNAL CTONUM
C go thru the gel reading looking at words to see if they exist in the consensus
IDSAVM=IDSAV
IDSAV=0
C length of a pair of words is LX2
LX2=2*LENGTH
C number of pairs of words in gel reading is NW
NW=IDIMG-(LX2-1)
C loop for each words start point
DO 200 I=1,NW
C IS THIS WORD A ZERO?
W1=GELN(I)
IF(W1.EQ.0)GO TO 200
C POINT TO NEXT WORD OF PAIR
W2=GELN(I+LENGTH)
IF(W2.EQ.0)GO TO 200
C DOES W1 EXIST IN SEQ?
N1S1=WORDN(W1)
IF(N1S1.EQ.0)GO TO 200
N2S1=WORDN(W2)
IF(N2S1.EQ.0)GO TO 200
C BOTH EXIST, SO POINT TO THE FIRST + LENGTH
PS1W1=WORDP(W1)+LENGTH
C LOOP FOR ALL PAIRS
C there are N1S1 occurrences of word 1 and N2S1 of word 2 in consensus
C compare the positions of all pairs to see if they are LENGTH apart
DO 50 J=1,N1S1
C POINT TO FIRST W2 BECAUSE IT IS IN WORDP NOT POSNS
PS1W2=WORDP(W2)
C LOOP FOR THESE
DO 40 K=1,N2S1
C ARE THIS PAIR LENGTH APART?
N=PS1W1-PS1W2
IF(N.NE.0)GO TO 20
C THEY ARE SO, IF REQUIRED LOOK FOR REST OF MATCH
LMAT=LX2
C
C new code
C
IPC = PS1W2 + LENGTH - 1
IPG = I + LX2 - 1
16 CONTINUE
IF (LMAT.LT.MINMAT) THEN
IPC = IPC + 1
IPG = IPG + 1
IF(IPG.GT.IDIMG)GO TO 20
IF(IPC.GT.IDIM)GO TO 20
IF(CTONUM(SEQ(IPC)).NE.CTONUM(GEL(IPG)))GO TO 20
LMAT=LMAT+1
GO TO 16
END IF
C
C match found, is it an extension of a previous one ?
C
C WRITE(*,*)I,PS1W1-LENGTH
IF (IDSAV.GT.0) THEN
IF (I-SAVPG(IDSAV).EQ.PS1W1-LENGTH-SAVPS(IDSAV)) GO TO 20
END IF
IDSAV = IDSAV + 1
IF (IDSAV.GT.IDSAVM) THEN
WRITE(KBOUT,1000)IDSAVM
1000 FORMAT(' More than ',I6,' matches. Search aborted')
IFAIL = 1
RETURN
END IF
C WRITE(*,*)IDSAV
SAVPG(IDSAV) = I
SAVPS(IDSAV) = PS1W1 - LENGTH
20 CONTINUE
C POINT TO NEXT W2
PS1W2=POSNS(PS1W2)
40 CONTINUE
C ALL TRIED THIS PS1W1, TRY NEXT
PS1W1=POSNS(PS1W1-LENGTH)+LENGTH
50 CONTINUE
200 CONTINUE
IFAIL=0
RETURN
END
C
C CFGEL old version (before 15-4-92)
C
C ROUTINE TO COMPARE A STRING OF WORD NUMBERS FOR A GEL WITH A SERIES
C OF ARRAYS REPRESENTING A CONSENSUS SEQUENCE. WE LOOK FOR OCCURENCES
C OF PAIRS OF WORDS (EACH WORD IS LENGTH CHARS LONG AND SO TOTAL MATCH IS
C 2*LENGTH CHARS LONG). THE ARRAYS SENT ARE OF SIZE 4**LENGTH (LE4)
SUBROUTINE CFGELO(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LE4,
+SAVPG,
+SAVPS,SAVL,IDSAV,SEQ,GEL,MINMAT,IFAIL,KBOUT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),GEL(IDIMG)
INTEGER POSNS(IDIM),WORDP(LE4),SAVPS(IDSAV),SAVPG(IDSAV)
INTEGER GELN(IDIMG),SAVL(IDSAV)
INTEGER WORDN(LE4)
INTEGER W1,W2,PS1W1,PS1W2
INTEGER CTONUM
EXTERNAL CTONUM
IDSAVM=IDSAV
IDSAV=0
C LENGTH OF PAIR OF WORDS
LX2=2*LENGTH
C NUMBER OF PAIRS OF WORDS OF LENGTH LENGTH IN GEL
NW=IDIMG-(LX2-1)
C LOOP FOR EACH START POINT
DO 200 I=1,NW
C IS THIS WORD A ZERO?
W1=GELN(I)
IF(W1.EQ.0)GO TO 200
C POINT TO NEXT WORD OF PAIR
W2=GELN(I+LENGTH)
IF(W2.EQ.0)GO TO 200
C DOES W1 EXIST IN SEQ?
N1S1=WORDN(W1)
IF(N1S1.EQ.0)GO TO 200
N2S1=WORDN(W2)
IF(N2S1.EQ.0)GO TO 200
C BOTH EXIST, SO POINT TO THE FIRST + LENGTH
PS1W1=WORDP(W1)+LENGTH
C LOOP FOR ALL PAIRS
DO 50 J=1,N1S1
C POINT TO FIRST W2 BECAUSE IT IS IN WORDP NOT POSNS
PS1W2=WORDP(W2)
C LOOP FOR THESE
DO 40 K=1,N2S1
C ARE THIS PAIR LENGTH APART?
N=PS1W1-PS1W2
IF(N.NE.0)GO TO 20
C THEY ARE SO, IF REQUIRED LOOK FOR REST OF MATCH
LMAT=LX2
IF(MINMAT.EQ.LX2)GO TO 15
IPC=PS1W2+LENGTH
IPG=I+LX2
16 CONTINUE
IF(IPG.GT.IDIMG)GO TO 15
IF(IPC.GT.IDIM)GO TO 15
C
IF(CTONUM(SEQ(IPC)).NE.CTONUM(GEL(IPG)))GO TO 15
LMAT=LMAT+1
IPC=IPC+1
IPG=IPG+1
GO TO 16
15 CONTINUE
C IS MATCH LONG ENOUGH?
IF(LMAT.LT.MINMAT)GO TO 20
IDSAV=IDSAV+1
IF(IDSAV.LE.IDSAVM)GO TO 18
WRITE(KBOUT,1000)IDSAVM
1000 FORMAT(' More than ',I6,' matches. Search aborted')
IFAIL=1
RETURN
18 CONTINUE
SAVL(IDSAV)=LMAT
SAVPG(IDSAV)=I
SAVPS(IDSAV)=PS1W1-LENGTH
20 CONTINUE
C POINT TO NEXT W2
PS1W2=POSNS(PS1W2)
40 CONTINUE
C ALL TRIED THIS PS1W1, TRY NEXT
PS1W1=POSNS(PS1W1-LENGTH)+LENGTH
50 CONTINUE
200 CONTINUE
IFAIL=0
RETURN
END
INTEGER FUNCTION CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,IIN)
C AUTHOR: RODGER STADEN
C RETURNS CONTIG LEFT GEL NUMBER OR ZERO FOR ERROR
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
I = IIN
J = I
CHAINL = 0
10 CONTINUE
IF(I.NE.0)THEN
J = I
I = LNBR(I)
IF(I.EQ.IIN)RETURN
GO TO 10
END IF
CHAINL = J
END
C
C CHANGE
C
C ROUTINE TO EXCHANGE ALL THE CHARS IN A CHARACTER ARRAY USING
C A PAIR OF LOOKUP ARRAYS SENT BY CALLING PROG
C
C
SUBROUTINE CHANGE(SEQ,IDIM1,CHAR1,CHAR2,IDIM2,ELSE)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1)
CHARACTER CHAR1(IDIM2),CHAR2(IDIM2)
CHARACTER ELSE
DO 100 I=1,IDIM1
C
DO 50 J=1,IDIM2
C
IF(SEQ(I).NE.CHAR1(J))GO TO 50
C MATCH SO EXCHANGE CHARS
SEQ(I)=CHAR2(J)
GO TO 100
50 CONTINUE
SEQ(I)=ELSE
100 CONTINUE
C
RETURN
END
CHARACTER*1 FUNCTION CHARSL(I)
CHARACTER C*6
SAVE C
DATA C/'ctag*-'/
CHARSL = C(I:I)
END
CHARACTER*1 FUNCTION CHARSU(I)
CHARACTER C*6
SAVE C
DATA C/'CTAG*-'/
CHARSU = C(I:I)
END
INTEGER FUNCTION CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,IIN)
C AUTHOR: RODGER STADEN
C RETURNS CONTIG LEFT GEL NUMBER OR ZERO FOR ERROR
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
I = IIN
CLEN= 0
LEN = 0
10 CONTINUE
IF(I.NE.0)THEN
LEN = MAX(LEN,(RELPG(I) + ABS(LNGTHG(I)) - 1))
I = RNBR(I)
IF(I.EQ.IIN)RETURN
GO TO 10
END IF
CLEN = LEN
END
SUBROUTINE CLIST(GELNO1,LINNO1,IGEL1,GELNO2,LINNO2,
+IGEL2,GELNOS,GELSTR,GELEND,IUNIQ1,IUNIQ,KBOUT,IOK)
C AUTHOR: RODGER STADEN
INTEGER GELNO1(IGEL1),GELNO2(IGEL2),GELNOS(IUNIQ1)
INTEGER GELSTR(IUNIQ1),GELEND(IUNIQ1)
INTEGER LINNO1(IGEL1),LINNO2(IGEL2)
EXTERNAL INLIST
C GELNOS === GEL NUMBERS (GELNOS)
C GELSTR === GEL START LINES
C GELEND === GEL END LINES
C GELNO === GEL NUMBERS PER STRIP
C LINNO === GEL LINE NUMBERS PER STRIP
C IGEL === NUMBER OF GELS PER STRIP
C LINENO === CURRENT LINE NUMBER
C
C WHICH GELS IN GELNO2 DO NOT APPEAR IN GELNO1
C IE HAVE STARTED IN GELNO2
DO 20 I=1,IGEL2
MATCH=INLIST(GELNO1,IGEL1,GELNO2(I))
IF(MATCH.EQ.0)THEN
C NO MATCH SO NEW
C PUT IN GELSTR
IUNIQ=IUNIQ+1
GELNOS(IUNIQ)=GELNO2(I)
GELSTR(IUNIQ)=LINNO2(I)
END IF
20 CONTINUE
C WHICH GELS IN GELNO1 DO NOT APPEAR IN GELNO2
C IE WHICH HAVE ENDED IN GELNO1
DO 10 I=1,IGEL1
MATCH=INLIST(GELNO2,IGEL2,GELNO1(I))
IF(MATCH.EQ.0)THEN
C NO MATCH SO MUST HAVE ENDED
C WHERE IS IT STORED IN GELNOS?
MATCH=INLIST(GELNOS,IUNIQ,GELNO1(I))
IF(MATCH.NE.0)THEN
GELEND(MATCH)=LINNO1(I)
GO TO 10
END IF
C ERROR
WRITE(KBOUT,1000)GELNO1(I)
1000 FORMAT( ' Error: gel number ',I5,
+ ' expected but not found in list')
IOK = 1
RETURN
END IF
10 CONTINUE
IOK = 0
RETURN
END
C
C CMPLMT
C
C SUBROUTINE TO REVERSE AND COMPLEMENT GELS AND DATA BASE
C THE POSITIONS OF THE RIGHT ENDS OF GELS ARE FIRST STORED
C IN RELPG THEN WE DO A BUBBLE SORT ON THESE POSITIONS
C UPDATING RELATIONSHIPS AS WE GO
C ALSO SEQUENCES ARE COMPLEMENTED, SIGNS OF LENGTH ARE
C MULTIPLIED BY -1 AND THE CONTIG LINE IS ALTERED
SUBROUTINE CMPLMT(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LINCON,LLINO,GEL,IDBSIZ,KBOUT,IDEVR,IDEVW,MAXGEL)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER GEL(MAXGEL)
INTEGER X
C
WRITE(KBOUT,1000)LLINO
1000 FORMAT( ' Complementing contig',I6)
C CHAIN THRU AND PUT RIGHT ENDS IN RELPG
N=LLINO
10 CONTINUE
RELPG(N)=RELPG(N)+(ABS(LNGTHG(N)))-1
IF(RNBR(N).EQ.0)GO TO 20
N=RNBR(N)
GO TO 10
20 CONTINUE
C
C NOW EFFECTIVELY BUBBLE SORT ON RELPG
N=RNBR(LINCON)
GO TO 22
21 CONTINUE
N=NL
IF(I1.GT.0)N=I2
22 CONTINUE
NL=LNBR(N)
IF(NL.EQ.0)GO TO 30
I1=0
23 CONTINUE
IF(RELPG(N).GE.RELPG(NL))GO TO 21
C NOT IN CORRECT ORDER SO CHAIN ALONG UNTIL CORRECT,THEN COME
C BACK TO THIS POINT AND CONTINUE
C IF FIRST MOVE THIS LINE SET POINTER TO CURRENT POSITION
IF(I1.EQ.0)I2=N
I1=1
C
C EXCHANGE NEIGHBOURS. CURRENTLY LOOKING AT N AND ITS LEFT
C NBR, AND THE LEFT NBR IS FURTHER RIGHT THAN N
C FIX UP POINTERS TO LEFT AND RIGHT OF THESE TWO
M=LNBR(NL)
IF(M.NE.0)RNBR(M)=N
M=RNBR(N)
IF(M.NE.0)LNBR(M)=NL
LNBR(N)=LNBR(NL)
LNBR(NL)=N
RNBR(NL)=RNBR(N)
RNBR(N)=NL
C CHAIN BACK THRU LIST WITH THIS LINE
N=RNBR(NL)
IF(N.EQ.0)GO TO 21
C IE END MET
GO TO 23
30 CONTINUE
C FINISH WITH LEFT END IN N
40 CONTINUE
C NOW REVERSE NBRS SO CHAIN BACK RIGHT
NL=RNBR(N)
IF(NL.EQ.0)GO TO 50
RNBR(N)=LNBR(N)
LNBR(N)=NL
N=NL
GO TO 40
50 CONTINUE
C NEED TO FIX UP NEW LEFT END
RNBR(N)=LNBR(N)
LNBR(N)=0
C ALL POINTERS FIXED NOW DO RELATIVE POSITION
C FINISH WITH LEFT END IN N
C SO CHAIN BACK RIGHT
C SAVE RIGHT LINE NUMBER
NL=N
X=RELPG(N)
60 CONTINUE
RELPG(N)=1+(-1*(RELPG(N)-X))
IF(RNBR(N).EQ.0)GO TO 70
N=RNBR(N)
GO TO 60
70 CONTINUE
C NOW FIX CONTIG LINE
LNBR(LINCON)=NL
RNBR(LINCON)=N
C WRITE NEW CONTIG LINE
CALL WRITER(IDEVR,LINCON,RELPG(LINCON),LNGTHG(LINCON),
+LNBR(LINCON),RNBR(LINCON))
C WRITE(IDEVR,REC=LINCON)RELPG(LINCON),LNGTHG(LINCON),LNBR(LINCON),
C 1RNBR(LINCON)
C NOW REVERSE AND COMPLEMENT GELS
N=NL
80 CONTINUE
C READ(IDEVW,REC=N)GEL
CALL READW(IDEVW,N,GEL,MAXGEL)
M=ABS(LNGTHG(N))
CALL SQREV(GEL,M)
CALL SQCOM(GEL,M)
CALL WRITEW(IDEVW,N,GEL,MAXGEL)
C WRITE(IDEVW,REC=N)GEL
C CHANGE SIGNS
LNGTHG(N)=-1*LNGTHG(N)
C WRITE NEW GEL LINE
CALL WRITER(IDEVR,N,RELPG(N),LNGTHG(N),
+LNBR(N),RNBR(N))
C WRITE(IDEVR,REC=N)RELPG(N),LNGTHG(N),LNBR(N),RNBR(N)
C ANY MORE?
N=RNBR(N)
IF(N.NE.0)GO TO 80
C NO MORE
RETURN
END
C CONSEN
C CALCULATES A CONSENSUS USING THE RULES OUTLINED IN THE DOCUMENTATION
C AND SUBROUTINE SUMMER
C UNIT IDEV IS USED FOR OUTPUT
SUBROUTINE CONSEN(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS,
+SEQ1,IDIM1,GEL,IDBSIZ,TEMP,CHRSIZ,MAXGL2,
+KBIN,KBOUT,IDEVW,IDEV,NAMCON,
+IHELPS,IHELPE,FILEH,IDEVH,MAXGEL,IDM,PERCD,IDEVN,LLINO)
CHARACTER FILEH*(*)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),ANS,CHRSIZ
INTEGER LREG,RREG,X,Y,TEMP(CHRSIZ,MAXGL2)
CHARACTER SEQ1(IDIM1)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER GEL(MAXGEL)
CHARACTER NAMPRO*(*)
CHARACTER NAMCON*(*)
100 CONTINUE
ISTART=1
NAMCON = ' '
CALL OPENF1(IDEV,NAMCON,1,IOK,KBIN,KBOUT,
+'Name for consensus file',
+IHELPS,IHELPE,FILEH,IDEVH)
IF(IOK.NE.0)RETURN
CALL YESNO(ANS,'Make consensus for whole database',
+IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
IF(ANS.LT.0) RETURN
IF(ANS.EQ.1)GO TO 150
N=IDBSIZ-NCONTS
CALL BUSY(KBOUT)
DO 110 I=N,IDBSIZ-1
J=LNBR(I)
X=1
Y=RELPG(I)
IF((ISTART+19+Y).GT.IDIM1)THEN
WRITE(KBOUT,1009)IDIM1
1009 FORMAT(
+ ' Maximum consensus length(',I6,') exceeded,',/,
+ ' calculation aborted')
RETURN
END IF
CALL ADDTIT(SEQ1(ISTART),NAMPRO,J,ISTART)
CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ SEQ1(ISTART),Y,GEL,X,Y,J,IDBSIZ,TEMP,CHRSIZ,MAXGL2,
+ IDEVW,MAXGEL,IDM,PERCD)
ISTART=ISTART+Y
110 CONTINUE
ISTART=ISTART-1
CALL YESNO(ANS,'Staden format',
+IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
IF (ANS.LT.0) RETURN
IF(ANS.EQ.0) THEN
CALL FMTDK(IDEV,SEQ1,ISTART)
ELSE
CALL WRITCF(IDEV,SEQ1,ISTART,NAMPRO,KBOUT,IOK)
END IF
RETURN
150 CONTINUE
CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LINCON,LLINO,NULGEL,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+'Contig identifier',
+IHELPS,IHELPE,FILEH,IDEVH)
IF(IERR.NE.0)GO TO 400
CALL GETREG(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+1,RELPG(LINCON),LREG,RREG,LINCON,LLINO,IDBSIZ,KBIN,KBOUT,
+ IHELPS,IHELPE,FILEH,IDEVH,IERR)
IF(IERR.NE.0)GO TO 400
IDIM2=RREG-LREG+1
IF((ISTART+19+IDIM2).GT.IDIM1)THEN
WRITE(KBOUT,1009)IDIM1
RETURN
END IF
CALL BUSY(KBOUT)
CALL ADDTIT(SEQ1(ISTART),NAMPRO,LLINO,ISTART)
CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+SEQ1(ISTART),IDIM2,GEL,LREG,RREG,LLINO,IDBSIZ,TEMP,
+CHRSIZ,MAXGL2,IDEVW,MAXGEL,IDM,PERCD)
ISTART=ISTART+IDIM2
300 CONTINUE
CALL YESNO(ANS,'Select another contig',
+IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
IF(ANS.EQ.0) GO TO 150
ISTART=ISTART-1
CALL YESNO(ANS,'Staden format',
+IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
IF (ANS.LT.0) RETURN
IF(ANS.EQ.0) THEN
CALL FMTDK(IDEV,SEQ1,ISTART)
ELSE
CALL WRITCF(IDEV,SEQ1,ISTART,NAMPRO,KBOUT,IOK)
END IF
400 CONTINUE
CALL YESNO(ANS,'Make another consensus',
+IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
IF(ANS.EQ.0)GO TO 100
END
SUBROUTINE WRITCF(IDEV,SEQ,IDSEQ,NAMPRO,KBOUT,IOK)
CHARACTER SEQ(IDSEQ),TITLE*10,ENAME*10,NAMPRO*(*)
CHARACTER NL
PARAMETER (MAXDG = 5)
EXTERNAL INDEXA
C
C write out in fasta format
C also change -,* to N
C currently name is project name for single contig, but left gel number
C for multiple contigs, title is always left gel number
C
NC = 0
DO 1 I=1,IDSEQ
IF (SEQ(I).EQ.'>') NC = NC + 1
1 CONTINUE
NL = CHAR(10)
ENAME = ' '
I = INDEX(NAMPRO,'.')
ENAME(1:I-1) = NAMPRO(1:I-1)
IAT = 1
10 CONTINUE
IF (IAT.GT.IDSEQ) THEN
CLOSE(UNIT=IDEV)
IOK =0
RETURN
END IF
IF(SEQ(IAT).NE.'<') THEN
CALL ERROM(KBOUT,'Missing header in consensus')
IOK = 1
RETURN
END IF
INAMES = INDEXA(SEQ(IAT),20,'.')
IF (INAMES.EQ.0) THEN
CALL ERROM(KBOUT,'Missing dot in header')
IOK = 1
RETURN
END IF
INAMES = IAT + INAMES
INAMEE = INAMES + MAXDG - 1
TITLE = ' '
K = 0
DO 20 I=INAMES,INAMEE
K = K + 1
TITLE(K:K) = SEQ(I)
20 CONTINUE
IF (NC.GT.1) ENAME = TITLE
IAT = IAT + 20
IDT = IDSEQ-IAT+2
IDSQ = INDEXA(SEQ(IAT),IDT,'<')
IF (IDSQ.EQ.0) IDSQ = IDT
IDSQ = IDSQ - 1
CALL SETCCS(SEQ(IAT),IDSQ)
CALL WRITFF(IDEV,SEQ(IAT),IDSQ,ENAME,TITLE)
IAT = IAT + IDSQ
GO TO 10
END
SUBROUTINE SETCCS(SEQ,IDSEQ)
CHARACTER SEQ(IDSEQ),TO(5)
INTEGER CTONUM
EXTERNAL CTONUM
SAVE TO
DATA TO/'t','c','a','g','n'/
C
C change chars in array seq of type found to type to
C
DO 10 I=1,IDSEQ
K = CTONUM(SEQ(I))
SEQ(I) = TO(K)
10 CONTINUE
END
SUBROUTINE COPYM(JLEFTS,ILEFTS,JLC,ILC,
+JPOSC,IPOSC,JSENSE,ISENSE,JLLINO,LLINO,
+JJOINT,JOINT,JTOTPC,ITOTPC,JTOTPG,ITOTPG,
+JTYPE,ITYPE,JDOUT,IDOUT,JDIM22,IDIM22,
+SEQG3,SEQG2,SEQC3,SEQC2,PERMS,PERMIS)
CHARACTER SEQG3(JDIM22),SEQG2(JDIM22),SEQC3(JDOUT),SEQC2(JDOUT)
ILEFTS = JLEFTS
ILC = JLC
IPOSC = JPOSC
ISENSE = JSENSE
LLINO = JLLINO
JOINT = JJOINT
ITOTPC = JTOTPC
ITOTPG = JTOTPG
ITYPE = JTYPE
IDOUT = JDOUT
IDIM22 = JDIM22
CALL SQCOPY(SEQG3,SEQG2,JDIM22)
CALL SQCOPY(SEQC3,SEQC2,JDOUT)
PERMIS = PERMS
END
C SUBROUTINE DALIGN
C
C COUNTS MISMATCHES AND DISPLAYS OVERLAP.
SUBROUTINE DALIGN(SEQC2,SEQG2,SEQ3,MAXGEL,IDOUT,IDIM2,
+JOINT,ITYPE,X,KBOUT,IFAIL,LO,PERMAX,ISHOW)
C AUTHOR: RODGER STADEN
CHARACTER SEQC2(MAXGEL),SEQG2(MAXGEL),SEQ3(MAXGEL)
CHARACTER PAD,DASH
SAVE PAD,DASH
DATA PAD,DASH/',','-'/
IFAIL = 1
IENDG=1
IENDC=JOINT
C ONLY LOOK AT OVERLAP WHICH IS FROM JOINT FOR LEFT TYPE JOIN
IF(ITYPE.EQ.1)THEN
IENDG=JOINT
IENDC=1
END IF
100 CONTINUE
C LENGTH OF OVERLAP?
LG=IDIM2-IENDG+1
LO=MIN(IDOUT,LG)
C SAVE RAW DATA
CALL SQCOPY(SEQG2,SEQ3,IDIM2)
CALL MSTLKL(SEQ3,IDIM2)
X=FLOAT(LO)
Y=X
K=IENDG+LO-1
C POINT TO CONSENSUS
J=0
C CHECK FOR OVERFLOW
IF(K.GT.MAXGEL)THEN
CALL ERROM(KBOUT,'DALIGN: matching region too long')
RETURN
END IF
DO 200 I=IENDG,K
J=J+1
IF(SEQC2(J).EQ.SEQ3(I))GO TO 200
C IF(SEQ3(I).EQ.DASH)GO TO 200
C IF(SEQC2(J).EQ.DASH)GO TO 200
C IF(SEQC2(J).EQ.PAD)GO TO 200
X=X-1.
200 CONTINUE
X=(Y-X)*100./Y
IF (X.GT.PERMAX) RETURN
IF (ISHOW.EQ.1) THEN
WRITE(KBOUT,1002)
1002 FORMAT(' Best alignment found')
CALL SQMTCH(SEQC2(1),SEQG2(IENDG),SEQ3,LO)
CALL FMT4LN(SEQC2(1),SEQG2(IENDG),SEQ3,LO,IENDC,IENDG,KBOUT)
END IF
IFAIL=0
END
SUBROUTINE DBCHEK(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,
+TEMP,IERR,KBOUT)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER TEMP(IDBSIZ)
EXTERNAL LCCHEK,NCCHEK
C to check the logical consistency of a database
C
C 1. are all nbrs holding hands
C 2. are all gels in exactly 1 contig
C 3. are there loops in contigs
C 4. do the gels designated left or right ends have outward neighbours
C 5. are the relative positions in same order as hand holding
C 6. are there gels of zero length
C 7. are there contigs of length < 1
C 8. does the designated length of the contigs agree with the gel positions
C 9. if i chain left thru a contig do i reach the gel designated as the left end
C10.if i chain right thru a contig do i reach the gel designated as the right end
C
C return error code 2 for all errors except where only error is "gel not used"
C for which we return 1
IERR=0
C hand holding OK?
DO 100 I=1,NGELS
K=LNBR(I)
IF(K.EQ.0)GO TO 50
IF(RNBR(K).EQ.I)GO TO 50
WRITE(KBOUT,1000)I
1000 FORMAT(' Hand holding problem for gel reading',I6)
WRITE(KBOUT,1001)I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)
WRITE(KBOUT,1001)K,RELPG(K),LNGTHG(K),LNBR(K),RNBR(K)
1001 FORMAT(' ',5I6)
IERR=2
50 CONTINUE
K=RNBR(I)
IF(K.EQ.0)GO TO 100
IF(LNBR(K).EQ.I)GO TO 100
WRITE(KBOUT,1000)I
WRITE(KBOUT,1001)I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)
WRITE(KBOUT,1001)K,RELPG(K),LNGTHG(K),LNBR(K),RNBR(K)
IERR=2
100 CONTINUE
C
C are all gels in exactly 1 contig
C
CALL FILLI(TEMP,IDBSIZ,0)
N=IDBSIZ-NCONTS
C
C count the number of times thru loop 320 to 300: if this exceeds the
C database size a loop has been found
C
ICOUNT=0
DO 300 I=N,IDBSIZ-1
II=I
K=LNBR(I)
J=RNBR(I)
IF((K.NE.0).AND.(J.NE.0))GO TO 310
C
C This contig points to zero gel number as a left or right end
C
WRITE(KBOUT,1002)I
1002 FORMAT(' Contig',I4,' has gel numbers of zero')
IERR=2
GO TO 290
310 CONTINUE
IF((LNBR(K).EQ.0).AND.(RNBR(J).EQ.0))GO TO 290
C
C These ends reads have outward neighbours
C
WRITE(KBOUT,1004)I
1004 FORMAT(' The end gels of contig',I4,' have outward neighbours')
IERR=2
290 CONTINUE
C
C Does the contig have nonzero length?
C
IF(RELPG(I).GT.0)GO TO 320
WRITE(KBOUT,1010)I
1010 FORMAT(' The contig on line number',I4,' has zero length')
IERR=2
320 CONTINUE
TEMP(K)=TEMP(K)+1
ICOUNT=ICOUNT+1
IF(ICOUNT.GT.IDBSIZ)GO TO 601
K=RNBR(K)
IF(K.NE.0)GO TO 320
300 CONTINUE
DO 400 I=1,NGELS
IF(TEMP(I).EQ.1)GO TO 390
IF(TEMP(I).EQ.0)GO TO 410
WRITE(KBOUT,1005)I,TEMP(I)
1005 FORMAT(' Gel number ',I6,' is used ',I6,' times')
IERR=2
GO TO 400
390 CONTINUE
C
C does the gel have nonzero length (only check those used once)
C
IF(LNGTHG(I).NE.0)GO TO 400
WRITE(KBOUT,1011)I
1011 FORMAT(' Gel number',I6,' has zero length')
IERR=2
GO TO 400
410 CONTINUE
WRITE(KBOUT,1006)I
1006 FORMAT(' Gel number ',I6,' is not used')
C
C need to increase the error count (dont reset to lower value)
C
IF(IERR.LT.2)IERR=1
400 CONTINUE
C
C all relative positions ok?
C
N=IDBSIZ-NCONTS
DO 500 I=N,IDBSIZ-1
K=LNBR(I)
IF(K.EQ.0)GO TO 500
510 CONTINUE
J=RNBR(K)
IF(J.EQ.0)GO TO 500
IF(RELPG(K).GT.RELPG(J))GO TO 520
K=J
GO TO 510
520 CONTINUE
WRITE(KBOUT,1007)K,RELPG(K),J,RELPG(J)
1007 FORMAT(' Gel number',I6,' with position',I6,
+ ' is the left neighbour of',
+ /,' gel number',I6,' with position',I6)
K=J
IERR=2
GO TO 510
500 CONTINUE
IOK = LCCHEK(RELPG,LNGTHG,LNBR,RNBR,NCONTS,IDBSIZ,KBOUT)
IF (IOK.NE.0) IERR = 2
IOK = NCCHEK(RELPG,LNGTHG,LNBR,RNBR,NCONTS,IDBSIZ,KBOUT)
IF (IOK.NE.0) IERR = 2
IF(IERR.EQ.0) WRITE(KBOUT,1013)
1013 FORMAT(' Database is logically consistent')
RETURN
601 CONTINUE
IERR=2
WRITE(KBOUT,1008)II
1008 FORMAT(' Loop in contig',I6,/,
+' No further checking done but gel numbers follow')
CALL FILLI(TEMP,IDBSIZ,0)
K=LNBR(II)
710 CONTINUE
TEMP(K)=TEMP(K)+1
WRITE(KBOUT,1009)K
1009 FORMAT(' ',I6)
IF(TEMP(K).GT.1)RETURN
K=RNBR(K)
GO TO 710
END
INTEGER FUNCTION LCCHEK(RELPG,LNGTHG,LNBR,RNBR,NCONTS,IDBSIZ,
+KBOUT)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),CLEN
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
EXTERNAL CLEN
LCCHEK = 0
DO 10 I=IDBSIZ-NCONTS,IDBSIZ-1
IL = LNBR(I)
L1 = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ IDBSIZ,IL)
L2 = RELPG(I)
IF (L1.NE.L2) THEN
WRITE(KBOUT,1000)I,L2,L1
1000 FORMAT(
+' Contig line',I6,' records length',I6,' but actual length is',I6)
LCCHEK = LCCHEK + 1
END IF
10 CONTINUE
END
INTEGER FUNCTION NCCHEK(RELPG,LNGTHG,LNBR,RNBR,NCONTS,IDBSIZ,
+KBOUT)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),CHAINL,CHAINR
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
EXTERNAL CHAINL,CHAINR
NCCHEK = 0
DO 10 I=IDBSIZ-NCONTS,IDBSIZ-1
IL = LNBR(I)
L1 = CHAINR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ IDBSIZ,IL)
L2 = RNBR(I)
IF (L1.NE.L2) THEN
WRITE(KBOUT,1000)I,L2,L1
1000 FORMAT(
+' Contig line',I6,' records right neighbour as',I6,
+' but left to right chaining gives',I6)
NCCHEK = NCCHEK + 1
END IF
IL = RNBR(I)
L1 = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ IDBSIZ,IL)
L2 = LNBR(I)
IF (L1.NE.L2) THEN
WRITE(KBOUT,1001)I,L2,L1
1001 FORMAT(
+' Contig line',I6,' records left neighbour as',I6,
+' but right to left chaining gives',I6)
NCCHEK = NCCHEK + 1
END IF
10 CONTINUE
END
INTEGER FUNCTION CHAINR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,IIN)
C AUTHOR: RODGER STADEN
C RETURNS CONTIG RIGHT GEL NUMBER OR ZERO FOR ERROR
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
I = IIN
J = I
CHAINR = 0
10 CONTINUE
IF(I.NE.0)THEN
J = I
I = RNBR(I)
IF(I.EQ.IIN)RETURN
GO TO 10
END IF
CHAINR = J
END
C DBPRNT
C PRINTS A DATABASE. IE ITS RELATIONSHIPS
SUBROUTINE DBPRNT(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,
+IDEV,KBIN,KBOUT,IDEVN,LLINO,
+IHELPS,IHELPE,FILEH,IDEVH)
CHARACTER FILEH*(*)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),LREG,RREG,ANS
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER NAMARC*16
IF(NGELS.GT.0)CALL DBSTAT(RELPG,LNGTHG,LNBR,RNBR,NGELS,
+NCONTS,IDBSIZ,IDEV)
WRITE(IDEV,10011)NGELS,NCONTS
10011 FORMAT(' Number of gel readings ',I6,' Number of contigs ',I6)
20 CONTINUE
CALL YESNO(ANS,'Select contigs',
+IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
IF(ANS.LT.0) RETURN
IF(ANS.EQ.0) GO TO 45
N=IDBSIZ-NCONTS
25 CONTINUE
CALL YESNO(ANS,'Show gel readings in positional order',
+IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
IF(ANS.LT.0) RETURN
IF(ANS.EQ.0)GO TO 41
WRITE(IDEV,1009)
1009 FORMAT(' CONTIG LINES')
WRITE(IDEV,1000)
1000 FORMAT(
+' CONTIG LINE LENGTH ENDS'/
+' LEFT RIGHT')
DO 30 I=N,IDBSIZ-1
WRITE(IDEV,1007)I,RELPG(I),LNBR(I),RNBR(I)
30 CONTINUE
1007 FORMAT( ' ',18X,I6,2X,I7,9X,I6,2X,I6)
WRITE(IDEV,1008)
1008 FORMAT(' GEL LINES')
WRITE(IDEV,1001)
1001 FORMAT(
+' NAME NUMBER POSITION LENGTH NEIGHBOURS'/
+' LEFT RIGHT')
DO 40 I=1,NGELS
CALL READN(IDEVN,I,NAMARC)
WRITE(IDEV,1006)NAMARC,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I)
1006 FORMAT( ' ',A,2X,I6,2X,I7,2X,I5,2X,I6,2X,I6)
40 CONTINUE
RETURN
C
41 CONTINUE
C
C SORTED DATA
DO 43 I=N,IDBSIZ-1
WRITE(IDEV,1021)
1021 FORMAT( )
WRITE(IDEV,1000)
WRITE(IDEV,1007)I,RELPG(I),LNBR(I),RNBR(I)
J=LNBR(I)
WRITE(IDEV,1001)
42 CONTINUE
CALL READN(IDEVN,J,NAMARC)
WRITE(IDEV,1006)NAMARC,J,RELPG(J),LNGTHG(J),LNBR(J),RNBR(J)
J=RNBR(J)
IF(J.NE.0)GO TO 42
43 CONTINUE
RETURN
45 CONTINUE
C SELECTED CONTIGS ONLY
C
C GET GEL NUMBER AND CONTIG NUMBER
CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,LINCON,
+LLINO,NULGEL,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+'Contig identifier',
+IHELPS,IHELPE,FILEH,IDEVH)
IF(IERR.NE.0)RETURN
WRITE(IDEV,1009)
WRITE(IDEV,1000)
WRITE(IDEV,1007)LINCON,RELPG(LINCON),LNBR(LINCON),RNBR(LINCON)
CALL GETREG(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+1,RELPG(LINCON),LREG,RREG,LINCON,LLINO,IDBSIZ,KBIN,KBOUT,
+ IHELPS,IHELPE,FILEH,IDEVH,IERR)
IF(IERR.NE.0)RETURN
WRITE(IDEV,1008)
N=LLINO
WRITE(IDEV,1001)
46 CONTINUE
CALL READN(IDEVN,N,NAMARC)
WRITE(IDEV,1006)NAMARC,N,RELPG(N),LNGTHG(N),LNBR(N),RNBR(N)
IF(RNBR(N).EQ.0)GO TO 48
N=RNBR(N)
IF(RELPG(N).GT.RREG)GO TO 48
GO TO 46
48 CONTINUE
GO TO 45
END
SUBROUTINE DBSCAN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,SEQ1,
+IDIM1,GEL,IDBSIZ,TEMP3,ID1,CHRSIZ,MAXGL2,KBIN,KBOUT,IDEVW,
+IDEV,LINLEN,PERCD,
+IHELPS,IHELPE,FILEH,IDEVH,MAXGEL,LINOU1,LINOU2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEVN,
+ LLINO,LINCON,LREG,RREG,MXGOOD)
C 28-7-91 added extra parameter mxgood: the max length of read
C we have confidence in
CHARACTER FILEH*(*)
PARAMETER (MAXPRM = 10)
CHARACTER PROMPT(2)*(MAXPRM)
C AUTHOR: RODGER STADEN
INTEGER RREG, RELPG(IDBSIZ),CHRSIZ
INTEGER LREG,TEMP3(ID1,CHRSIZ,MAXGL2),ANS
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER GEL(MAXGEL),LINOU1(MAXGEL),LINOU2(MAXGEL)
CHARACTER SEQ1(IDIM1)
CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LINCON,LLINO,NULGEL,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+'Contig identifier',
+IHELPS,IHELPE,FILEH,IDEVH)
IF(IERR.NE.0) RETURN
CALL GETREG(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+1,RELPG(LINCON),LREG,RREG,LINCON,LLINO,IDBSIZ,KBIN,KBOUT,
+ IHELPS,IHELPE,FILEH,IDEVH,IERR)
IF(IERR.NE.0) RETURN
IDIM2=RREG-LREG+1
CALL SUMMAR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+SEQ1,IDIM1,GEL,LREG,RREG,LLINO,PERCD,IDBSIZ,
+TEMP3,ID1,CHRSIZ,MAXGL2,IDEVW,
+MAXGEL,LINOU1,LINOU2,MXGOOD)
CALL DBSCSM(SEQ1(LREG),IDIM2,KBOUT)
160 CONTINUE
ANS = 1
PROMPT(1) = 'List codes'
PROMPT(2) = 'Plot codes'
CALL RADION('Select results display mode',PROMPT,2,ANS,
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(ANS.LT.1) RETURN
IF(ANS.EQ.1) THEN
CALL FMTDB(SEQ1,IDIM1,LREG,RREG,LINLEN,IDEV)
RETURN
ELSE
CALL PLTQ(SEQ1(LREG),IDIM2,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
END
SUBROUTINE DBSCNP(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,SEQ1,
+IDIM1,GEL,IDBSIZ,TEMP3,ID1,CHRSIZ,MAXGL2,IDEVW,LLINO,
+PERCD,MAXGEL,LINOU1,LINOU2,LREG,RREG,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,MXGOOD)
C AUTHOR: RODGER STADEN
INTEGER RREG, RELPG(IDBSIZ),CHRSIZ
INTEGER LREG,TEMP3(ID1,CHRSIZ,MAXGL2)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER GEL(MAXGEL),LINOU1(MAXGEL),LINOU2(MAXGEL)
CHARACTER SEQ1(IDIM1)
IDIM2=RREG-LREG+1
C 28-7-91 added extra parameter mxgood: the max length of read
C we have confidence in
CALL SUMMAR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+SEQ1,IDIM1,GEL,LREG,RREG,LLINO,PERCD,IDBSIZ,
+TEMP3,ID1,CHRSIZ,MAXGL2,IDEVW,
+MAXGEL,LINOU1,LINOU2,MXGOOD)
CALL PLTQ(SEQ1(LREG),IDIM2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END
SUBROUTINE DBSCSM(SEQ1,IDIM1,KBOUT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ1(IDIM1)
CHARACTER CODES(5)
REAL X(5)
SAVE CODES
DATA CODES/'0','1','2','3','4'/
DO 50 J=1,5
X(J)=0.
50 CONTINUE
DO 100 I=1,IDIM1
DO 60 J=1,5
IF(SEQ1(I).NE.CODES(J))GO TO 60
X(J)=X(J)+1.
GO TO 61
60 CONTINUE
61 CONTINUE
100 CONTINUE
SUM=0.
DO 130 J=1,5
SUM=SUM+X(J)
130 CONTINUE
DO 140 J=1,5
IF(SUM.NE.0)X(J)=X(J)*100./SUM
140 CONTINUE
WRITE(KBOUT,1001)X(1)
1001 FORMAT(' ',F6.2,'% OK on both strands and they agree(0)')
WRITE(KBOUT,1002)X(2)
1002 FORMAT(' ',F6.2,'% OK on plus strand only(1)')
WRITE(KBOUT,1003)X(3)
1003 FORMAT(' ',F6.2,'% OK on minus strand only(2)')
WRITE(KBOUT,1004)X(4)
1004 FORMAT(' ',F6.2,'% Bad on both strands(3)')
WRITE(KBOUT,1005)X(5)
1005 FORMAT(' ',F6.2,'% OK on both strands but they disagree(4)')
RETURN
END
C DBSTAT
SUBROUTINE DBSTAT(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,
+KBOUT)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
N=IDBSIZ-NCONTS
SUM=0.
DO 20 I=N,IDBSIZ-1
SUM=SUM+RELPG(I)
20 CONTINUE
AV=SUM/NCONTS
WRITE(KBOUT,1020)SUM,AV
1020 FORMAT( ' Total contig length ',F10.0,' Average',
+' length ',F10.1)
SUMG=0.
DO 30 I=1,NGELS
SUMG=SUMG+FLOAT(ABS(LNGTHG(I)))
30 CONTINUE
AV=SUMG/SUM
WRITE(KBOUT,1021)SUMG
1021 FORMAT( ' Total characters in gel readings ',F10.0)
WRITE(KBOUT,1022)AV
1022 FORMAT
+( ' Average gel characters per consensus character ',F10.2)
99 CONTINUE
RETURN
END
C DELCON
C
C DELETES CONTIG FROM CONSENSUS SEQUENCE
SUBROUTINE DELCON(SEQ1,ILEFT,ILC,IDIM1)
C AUTHOR: RODGER STADEN
CHARACTER SEQ1(IDIM1)
C FIRST CHAR TO REPLACE
I1=ILEFT-20
C FIRST CHAR TO MOVE
I2=ILEFT+ILC
C IS THIS RIGHTMOST CONTIG ANYWAY?
IF(I2.GT.IDIM1)GO TO 10
C NUMBER TO MOVE
ID=IDIM1-I2+1
C MOVE
CALL SQCOPY(SEQ1(I2),SEQ1(I1),ID)
C RESET LENGTH
IDIM1=I1+ID-1
RETURN
10 CONTINUE
C RIGHTMOST CONTIG SO DONT MOVE
IDIM1=I1-1
C
RETURN
END
SUBROUTINE DISMAT(SEQ,IDIM,GEL,IDIMG,SAVPS,SAVPG,IDSAV,
+CENDS,NENDS,IDCEND,MAXCON,KBOUT,MATCH)
C AUTHOR: RODGER STADEN
INTEGER CENDS(MAXCON)
INTEGER NENDS(MAXCON)
INTEGER SAVPS(IDSAV),SAVPG(IDSAV)
CHARACTER SEQ(IDIM),GEL(IDIMG),MATCH(IDIMG)
C EDIT 07-02-83 TO CHECK FOR CASE WHEN GEL OVERLAPS ADJACENT
C CONTIGS WITHIN THE LENGTH OF THE GEL! DONE BY HAVING A
C PARAMETER THAT STORES THE POSITION OF THE LEFT END OF THE
C NEXT CONTIG (IE THE ONE AFTER THE ONE THE CURRENT GEL OVERLAPS)
C SET IT TO A VERY LARGE VALUE INITIALLY
NEXTC=99999
C SORT THE MATCHING WORDS INTO ASCENDING ORDER ON POSITION IN SEQ
CALL BUB2AS(SAVPS,SAVPG,IDSAV)
C LOOK FOR SEPARATE MATCHES
LEND=IDIMG-SAVPG(1)+SAVPS(1)
C WRITE(KBOUT,1000)SAVPG(1),SAVPS(1)
CALL DISMAU(SEQ,IDIM,GEL,IDIMG,SAVPS(1),
+SAVPG(1),CENDS,NENDS,IDCEND,MAXCON,
+NEXTC,KBOUT,MATCH)
DO 10 I=2,IDSAV
IF((SAVPS(I).LT.LEND).AND.(SAVPS(I).LT.NEXTC))GO TO 10
C NEW MATCH, DISPLAY IT
C WRITE(KBOUT,1000)SAVPG(I),SAVPS(I)
C1000 FORMAT(' ',2I6)
CALL DISMAU(SEQ,IDIM,GEL,IDIMG,SAVPS(I),
+SAVPG(I),CENDS,NENDS,IDCEND,MAXCON,
+NEXTC,KBOUT,MATCH)
C RESET LEND
LEND=IDIMG-SAVPG(I)+SAVPS(I)
10 CONTINUE
RETURN
END
C
C DISMAU
C ROUTINE TO DISPLAY MATCHES
C EDITED 17-12-81 TO NOT SUBTRACT 1 FROM LCL AND LGR
SUBROUTINE DISMAU(SEQ,IDIM1,GEL,IDIMG,ISAVPS,SAVPG,CENDS,NENDS,
+IDCEND,MAXCON,NEXTC,KBOUT,MATCH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),GEL(IDIMG),MATCH(IDIMG)
INTEGER SAVPS,SAVPG,CENDS(MAXCON)
INTEGER NENDS(MAXCON)
C EDITED 07-02-83 FOR NEXTC (SEE DISMAT)
C DELETE 20 FROM END OF CONSENSUS MATCH
SAVPS=ISAVPS-19
C FIND CONTIG CONSENSUS ENDS
JJ=1
DO 5 J=2,IDCEND
IF(SAVPS.GT.CENDS(J))GO TO 5
C GONE PAST SO LAST IS THE ONE
JJ=J-1
GO TO 6
5 CONTINUE
JJ=IDCEND
6 CONTINUE
C SUBTRACT 1 FROM END
SAVPS=SAVPS-1
C LENGTH FROM MATCH TO LEFT OF CONTIG
LCL=SAVPS-CENDS(JJ)
C RIGHT
LCR=CENDS(JJ+1)-ISAVPS-1
C LEFT GEL
LGL=SAVPG-1
LGR=IDIMG-SAVPG
C NEED MIN OF EACH PAIR
LL=MIN(LCL,LGL)
LR=MIN(LCR,LGR)
C LENGTH OF OVERLAP
LM=LR+LL+1
C DISPLAY STARTS
ICL=ISAVPS-LL
IGL=SAVPG-LL
WRITE(KBOUT,1000)NENDS(JJ)
1000 FORMAT(' Match found with vector number =',I6)
CALL SQMTCH(SEQ(ICL),GEL(IGL),MATCH,LM)
L=ICL-CENDS(JJ)-19
CALL FMT4LN(SEQ(ICL),GEL(IGL),MATCH,LM,L,IGL,KBOUT)
C SAVE POSN OF END OF NEXT CONTIG
NEXTC=CENDS(JJ+1)+20
RETURN
END
SUBROUTINE DSPLAY(RELPG,LNGTHG,LNBR,RNBR,
+GEL,LLINOO,LINCON,LREG,RREG,GEL2,I1,IDIM,NOPT,
+LLINOR,IDBSIZ,IDEV,KBOUT,IDEVW,IDEVN,LINLEN,PERCD,
+MAXGEL,IDM)
C AUTHOR: RODGER STADEN
INTEGER CHRSIZ
PARAMETER (CHRSIZ = 6)
PARAMETER (IDC1 = CHRSIZ*100)
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER LREG,RREG,X,XLS2,XLS1,XRS2,XRS1,RREG2
CHARACTER MATCH(100)
INTEGER CHARS(CHRSIZ,100),CHARS1(IDC1)
CHARACTER NAMARC*16
CHARACTER GEL(MAXGEL)
CHARACTER GEL2(MAXGEL)
INTEGER RELPOS(10),RELPO2(10)
INTEGER GELC
INTEGER RP
INTEGER LSEQNO,RSEQNO
CHARACTER LINOUT(100)
CHARACTER MUNOTP
CHARACTER GTCONC
EXTERNAL GTCONC
EQUIVALENCE (CHARS1,CHARS)
CALL FILLI(CHARS1,IDC1,0)
C SET CONTIG NUMBER
ICON=1
LLINO=LLINOO
NLEN=LINLEN/10
LSEQNO=LREG
X=LINLEN+LSEQNO-1
RSEQNO=MIN(RREG,X)
C SET LEFT GEL NUMBER FOR RIGHT CONTIG
LN2=LLINOR
C FIRST GEL NO IS LLINOO
C SET RREG FOR RIGHT CONTIG
RREG2=IDIM
C SET UP LSEQNO,RSEQNO FOR FOR NOPT=3
XLS2=I1
XRS2=RSEQNO-LSEQNO+XLS2
9 CONTINUE
C IF RIGHT CONTIG SKIP NUMBER PRINTING
IF(ICON.EQ.2)GO TO 8
C NEED TO KEEP LONGEST LINE LENGTH FOR OUTPUT OF CONSENSUS
IE=0
C SETUP AND WRITE NUMBERS
RELPOS(1)=LSEQNO+9
DO 5 I=2,NLEN
RELPOS(I)=RELPOS(I-1)+10
5 CONTINUE
WRITE(IDEV,1023)
+(RELPOS(K),K=1,MIN(NLEN,MAX(1,(RSEQNO-LSEQNO+1)/10)))
1023 FORMAT( ' ',25X,10(I9,1X))
C SET CURRENT LINE NUMBER
8 CONTINUE
GELC=LLINO
10 CONTINUE
C IS LEFT END OF CURRENT GEL >RREG
IF(RELPG(GELC).GT.RSEQNO)GO TO 200
C ALSO NEED TO KNOW IF RIGHT END ON THIS LINE (IF .LT. NO DATA
C TO DISPLAY)
X=RELPG(GELC)+ABS(LNGTHG(GELC))-1
IF(X.LT.LSEQNO)GO TO 190
CALL READW(IDEVW,GELC,GEL,MAXGEL)
CALL FILLC(LINOUT,LINLEN,' ')
CALL READN(IDEVN,GELC,NAMARC)
C
C NEED TO KNOW HOW MANY CHARS TO COPY OVER TO OUTPUT LINE
C AND WHERE IN LINE TO PUT THEM
C CURRENT LINE LEFT END IS LSEQNO,RIGHT END RSEQNO
C SO LEFT START CHAR IS
X=MAX(LSEQNO,RELPG(GELC))
C POSITION IN ARRAY LINE
LP=X-LSEQNO+1
C RIGHT END CHAR IS
X=RELPG(GELC)+ABS(LNGTHG(GELC))-1
X=MIN(RSEQNO,X)
C POSITION IN ARRAY LINE
RP=X-LSEQNO+1
C LOOK FOR LONGEST LINE
IF(RP.GT.IE)IE=RP
C NEED LEFT START IN GEL
K=LSEQNO-RELPG(GELC)+1
IF(K.LT.1)K=1
NCOP=RP-LP+1
IF(NCOP.GT.0)CALL SQCOPY(GEL(K),LINOUT(LP),NCOP)
N=LP+NCOP-1
II=K-1
IF(IDM.EQ.26)THEN
DO 50 I = LP,N
II = II + 1
CALL PCON1(GEL(II),CHARS(1,I))
50 CONTINUE
ELSE
DO 70 I=LP,N
II=II+1
JJ = INDEXS(GEL(II),JSCORE)
CHARS(JJ,I) = CHARS(JJ,I) + JSCORE
C CHARS(CHRSIZ,I) = CHARS(CHRSIZ,I) + JSCORE
70 CONTINUE
END IF
I=SIGN(GELC,LNGTHG(GELC))
WRITE(IDEV,1020)I,NAMARC,(LINOUT(K),K=1,RP)
C1020 FORMAT( ' ',I4,2X,A,2X,100A1)
1020 FORMAT( ' ',I6,1X,A,1X,100A1)
C
190 CONTINUE
C NOW GET NEXT GEL TO RIGHT
GELC=RNBR(GELC)
IF(GELC.NE.0)GO TO 10
200 CONTINUE
C CALC CONSENSUS AND WRITE IT
IF(IDM.EQ.26)THEN
DO 49 I = 1,LINLEN
LINOUT(I) = MUNOTP(CHARS(1,I))
CHARS(1,I) = 0
49 CONTINUE
ELSE
DO 230 I=1,LINLEN
LINOUT(I) = GTCONC(CHARS(1,I),CHRSIZ,PERCD)
CALL FILLI(CHARS(1,I),CHRSIZ,0)
230 CONTINUE
END IF
WRITE(IDEV,1019)(LINOUT(K),K=1,IE)
C IF REQUIRED WRITE COMPARISON GEL
C WHICH OPTION IN OPERATION?
IF(NOPT.EQ.2)GO TO 52
IF(NOPT.NE.3)GO TO 250
53 CONTINUE
C ALREADY DONE THIS LINE CONTIG2?
IF(ICON.EQ.2)GO TO 54
ICON=2
C NEED TO SAVE CONSENSUS FROM LEFT CONTIG
CALL SQCOPY(LINOUT,GEL2,IE)
C SAVE VALUES FROM LEFT CONTIG
XLS1=LSEQNO
XRS1=RSEQNO
C SAVE CURRENT LEFT GEL NUMBER
LN1=LLINO
C SET UP VALUES FOR RIGHT CONTIG
LSEQNO=XLS2
RSEQNO=XRS2
C SET LEFT GEL NUMBER
LLINO=LN2
C GET NEXT GEL
GO TO 150
54 CONTINUE
C SAVE CURRENT LEFT GEL NUMBER
LN2=LLINO
C SET VALUES FOR RIGHT CONTIG NEXT PASS
XLS2=XRS2+1
XRS2=XLS2+LINLEN-1
IF(XRS2.GT.RREG2)XRS2=RREG2
C SET UP VALUES FOR LEFT CONTIG
LLINO=LN1
ICON=1
LSEQNO=XLS1
RSEQNO=XRS1
C SET DECREMENT FOR POINTER TO GEL2
MMM=I1-1
52 CONTINUE
C1020 FORMAT( ' ',I4,2X,A,2X,100A1)
1017 FORMAT(' NEWGEL ',100A1)
1018 FORMAT(' MISMATCH ',100A1)
1019 FORMAT(' CONSENSUS ',100A1)
1022 FORMAT( ' ',26X,100A1)
I2=I1+LINLEN-1
IF(I2.GT.IDIM)I2=IDIM
IF(NOPT.EQ.2)WRITE(IDEV,1017)(GEL2(K),K=I1,I2)
C SET DECREMENT
IF(NOPT.EQ.2)MMM=0
55 CONTINUE
CALL FILLC(MATCH,LINLEN,'*')
K=0
DO 667 J=I1,I2
K=K+1
IF(GEL2(J-MMM).EQ.LINOUT(K))MATCH(K) = ' '
667 CONTINUE
WRITE(IDEV,1018)(MATCH(K),K=1,IE)
RELPO2(1)=(I1)+9
DO 240 I=2,NLEN
RELPO2(I)=RELPO2(I-1)+10
240 CONTINUE
WRITE(IDEV,1023)(RELPO2(K),K=1,NLEN)
I1=I2+1
I2=I2+LINLEN
IF(I2.GT.IDIM)I2=IDIM
IF(I1.GT.I2)RETURN
250 CONTINUE
C
WRITE(IDEV,1021)
1021 FORMAT( )
C NEXT LINE LENGTH
C NEXT LENGTH IS OLD RIGHT +1
LSEQNO=RSEQNO+1
C NEW RIGHT IS LEFT +LENGTH
RSEQNO=LSEQNO+(LINLEN)-1
C ARE WE OVER END OF REGION
IF(RSEQNO.GT.RREG)RSEQNO=RREG
C HAVE WE FINISHED REGION COMPLETELY
IF(RSEQNO.LT.LSEQNO) RETURN
C NOT FINISHED SO NEED TO FIND CURRENT LEFT GEL NO
C CURRENT LEFT GEL IS LLINO
C
150 CONTINUE
C NEED TO KNOW IF CURRENT LEFT GELS RIGHT END IS INSIDE REGION
X=RELPG(LLINO)+ABS(LNGTHG(LLINO))-1
IF(X.GE.LSEQNO)GO TO 9
C LOOK AT NEXT GEL TO RIGHT
LLINO=RNBR(LLINO)
C MAY HAVE GONE OVER END OF CONTIG?????
IF(LLINO.GT.0)GO TO 150
300 CONTINUE
RETURN
END
SUBROUTINE EC(GEL,IDG,CON,IDC,K)
CHARACTER GEL(IDG),CON(IDC),CHARSL
EXTERNAL CHARSL,INDEXS
PARAMETER (IDASH = 6)
K = 0
DO 10 I = 1,MIN(IDC,IDG)
JC = INDEXS(CON(I),J)
IF(JC.NE.IDASH) THEN
JG = INDEXS(GEL(I),J)
IF(JG.NE.JC) THEN
GEL(I) = CHARSL(JC)
K = K + 1
END IF
END IF
10 CONTINUE
C WRITE(*,*)'NUMBER OF CHARS CORRECTED=',K
END
SUBROUTINE ED(GEL,IDG,CON,IDC,K)
CHARACTER GEL(IDG),CON(IDC),CHARSL
EXTERNAL CHARSL,INDEXS
K = 0
DO 10 I = MIN(IDC,IDG),1,-1
JC = INDEXS(CON(I),J)
IF(JC.EQ.5) THEN
IF(I.LT.IDG) CALL SQCOPY(GEL(I+1),GEL(I),IDG-I)
K = K + 1
END IF
10 CONTINUE
C WRITE(*,*)'NUMBER OF CHARS DELETED=',K
END
SUBROUTINE EDR(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT,
+CON,IDC,IDEVW,IDEVR,LREG)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER CON(IDC)
INTEGER CHNRP
EXTERNAL CHNRP
C CHANGE RELATIVE POSITIONS FOR AE
ND = 0
DO 10 I = IDC,1,-1
IF(CON(I).EQ.'*') THEN
ND = ND + 1
K = I + LREG - 1
J = CHNRP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT,K)
IF(J.NE.0) THEN
CALL SHIFTC(RELPG,LNGTHG,LNBR,RNBR,IDUM,JDUM,IDEVR,
+ IDBSIZ,J,NCONT,-1)
END IF
END IF
10 CONTINUE
C WRITE(*,*)' NUMBER OF DELETIONS=',ND
END
SUBROUTINE ET(GEL,IDG,CON,IDC,K)
CHARACTER GEL(IDG),CON(IDC),CHARSL
EXTERNAL CHARSL,INDEXS
K = 0
DO 10 I = 2,MIN(IDC,IDG)
JC = INDEXS(CON(I),J)
IF(JC.NE.6) THEN
JG = INDEXS(GEL(I),J)
IF(JG.NE.JC) THEN
JNG = INDEXS(GEL(I-1),J)
JNC = INDEXS(CON(I-1),J)
IF(JNC.NE.JNG) THEN
IF((JNG.EQ.JC).AND.(JNC.EQ.JG)) THEN
GEL(I) = CHARSL(JNG)
GEL(I-1) = CHARSL(JG)
K = K + 1
END IF
END IF
END IF
END IF
10 CONTINUE
C WRITE(*,*)' NUMBER OF CHARS TRANSPOSED=',K
END
SUBROUTINE FDEPTH(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,LGEL,LREG,RREG,LENCON,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER RREG,DEPTHP,DEPTHM,STRAND
STRAND = 1
CALL FDPTH(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,LGEL,LREG,RREG,LENCON,STRAND,DEPTHP)
IF(DEPTHP.LT.0) RETURN
STRAND = -1
CALL FDPTH(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,LGEL,LREG,RREG,LENCON,STRAND,DEPTHM)
IF(DEPTHM.LT.0) RETURN
CALL PLTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,
+MARGL,MARGR,MARGB,
+MARGT,ISXMAX,ISYMAX,LGEL,LREG,RREG,DEPTHP,DEPTHM)
END
SUBROUTINE FDPTH(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,LGEL,LREG,RREG,LENCON,STRAND,DEPTH)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER RREG,DEPTH,STRAND
EXTERNAL NCDEP
C LREG = left contig position
C RREG = right '' ''
C LENCON = RREG-LREG+1
I = LGEL
DEPTH = 0
5 CONTINUE
IF(I.NE.0) THEN
IF((RELPG(I)+ABS(LNGTHG(I))-1).LT.LREG) THEN
I = RNBR(I)
GO TO 5
END IF
ELSE
DEPTH = -1
RETURN
END IF
C WRITE(*,*)'LGEL',LGEL
10 CONTINUE
IF(I.NE.0)THEN
IF(RELPG(I).LE.RREG) THEN
IF(SIGN(1,LNGTHG(I)).EQ.STRAND) THEN
K = RELPG(I) + ABS(LNGTHG(I)) -1
DEPTH = MAX(NCDEP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,I,
+ STRAND,K),DEPTH)
END IF
I = RNBR(I)
GO TO 10
END IF
END IF
C WRITE(*,*)'DEPTH',DEPTH
END
C FIND
C
C SUBROUTINE TO FIND THE FIRST OCCURENCE OF A GIVEN STRING
C IN A GIVEN ARRAY
C
SUBROUTINE FIND(SEQ,IDIM1,STRING,IDIM2,IMATCH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIM2),DASH
INTEGER PSEQ,PSTR
SAVE DASH
DATA DASH/'-'/
PSEQ=0
PSTR=1
IMATCH=0
C
100 CONTINUE
C
C PUT PSEQ TO WHERE THIS FAILED MATCH STARTED
PSEQ=PSEQ+1-PSTR
C
400 CONTINUE
C
PSTR=0
C
500 CONTINUE
C
C POINT TO NEXT SEQ CHAR
PSEQ=PSEQ+1
C TEST FOR END
IF(PSEQ.GT.IDIM1)GO TO 300
C POINT TO NEXT STRING CHAR
PSTR=PSTR+1
C TEST FOR DASH IN STRING
IF(STRING(PSTR).EQ.DASH)GO TO 450
C TEST FOR DASH IN SEQ
IF(SEQ(PSEQ).EQ.DASH)GO TO 400
C TEST FOR MATCH
IF(SEQ(PSEQ).NE.STRING(PSTR))GO TO 100
C
450 CONTINUE
C
C TEST FOR END OF STRING IE. WHOLE STRING MATCH
IF(PSTR.LT.IDIM2)GO TO 500
C HAVE MATCH. GET POINTER TO WHERE IT STARTED
IMATCH=PSEQ-IDIM2+1
C
300 CONTINUE
RETURN
END
SUBROUTINE FMT4LP(SEQ1,SEQ2,IDIM,ISW,ISX,IDEV,NAME1,NAME2)
C AUTHOR: RODGER STADEN
CHARACTER SEQ1(IDIM),SEQ2(IDIM),MATCH(60),NAME1*(*),NAME2*(*)
INTEGER KL(6)
ISXX=ISX
ISWW=ISW
IE=0
10 CONTINUE
IS=IE+1
IE=IE+60
IF(IE.GT.IDIM)IE=IDIM
N=IE-IS+1
N=1+(N-1)/10
C SET UP DECIMAL COUNTERS
DO 50 J=1,N
KL(J)=ISWW
ISWW=ISWW+10
50 CONTINUE
WRITE(IDEV,1001)(KL(K),K=1,N)
WRITE(IDEV,1002)NAME1,(SEQ1(K),K=IS,IE)
IL = IE - IS + 1
CALL SQMTCH(SEQ1(IS),SEQ2(IS),MATCH,IL)
WRITE(IDEV,1003)(MATCH(K),K=1,IL)
WRITE(IDEV,1002)NAME2,(SEQ2(K),K=IS,IE)
1002 FORMAT(2X,A,2X,6(10A1,1X))
1003 FORMAT(10X,6(10A1,1X))
C SET UP DECIMAL COUNTERS
DO 60 J=1,N
KL(J)=ISXX
ISXX=ISXX+10
60 CONTINUE
WRITE(IDEV,1001)(KL(K),K=1,N)
1001 FORMAT( 5X,6(I6,5X))
IF(IE.LT.IDIM) GO TO 10
END
SUBROUTINE FMTDB(SEQ1,IDIM,ISW,ISE,LINLEN,IDEV)
C NOTE SAME AS FMTSEP!
C AUTHOR: RODGER STADEN
CHARACTER SEQ1(IDIM)
INTEGER KL(12)
ISWW=ISW-1
IE=ISW-1
1 CONTINUE
WRITE(IDEV,1003)
1003 FORMAT( )
C SET UP DECIMAL COUNTERS
DO 50 J=1,LINLEN/10
ISWW=ISWW+10
KL(J)=ISWW
50 CONTINUE
IS=IE+1
IE=IE+LINLEN
IF(IE.GT.ISE)IE=ISE
WRITE(IDEV,1001)(KL(KKK),KKK=1,MIN(IE-IS+1,LINLEN)/10)
WRITE(IDEV,1002)(SEQ1(K),K=IS,IE)
1002 FORMAT( ' ',12(10A1,1X))
1001 FORMAT( ' ',12(5X,I6))
IF(IE.EQ.ISE)RETURN
GO TO 1
END
SUBROUTINE FNDCON(SEQ,IDIM,CENDS,NENDS,IDCEND,MAXCON,KBOUT)
C AUTHOR: RODGER STADEN
C STORES THEIR POSITIONS IN CENDS AND THEIR LEFT LINE NUMBERS IN NENDS
PARAMETER (MAXDG = 5)
CHARACTER SEQ(IDIM),DC(MAXDG)
INTEGER CENDS(MAXCON)
INTEGER NENDS(MAXCON)
EXTERNAL IFROMC,INDEXA
IDCEND=0
DO 10 I=1,IDIM
IF(SEQ(I).NE.'<')GO TO 10
IDCEND=IDCEND+1
C PUT POSITION OF LEFT END OF CONTIG IN CENDS
CENDS(IDCEND)=I
K = INDEXA(SEQ(I),20,'.')
IF(K.EQ.0) THEN
WRITE(KBOUT,*)'Error in contig title: no dot!'
IDCEND = 0
RETURN
END IF
K = K + I
C K=I+11
DO 5 J=1,MAXDG
DC(J)=SEQ(K)
K=K+1
5 CONTINUE
NENDS(IDCEND)=IFROMC(DC,MAXDG,KBOUT)
10 CONTINUE
C STORE POSITION OF LAST CHAR +1 TO SIMPLIFY DISPLAY ROUTINES
CENDS(IDCEND+1)=IDIM+1
RETURN
END
INTEGER FUNCTION GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,IIN)
C AUTHOR: RODGER STADEN
C RETURNS CONTIG LINE NUMBER OR ZERO FOR ERROR
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
GCLIN = 0
N=IDBSIZ-NCONTS
DO 10 J=N,IDBSIZ-1
IF(LNBR(J).EQ.IIN) THEN
GCLIN = J
RETURN
END IF
10 CONTINUE
END
INTEGER FUNCTION GELID(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LLINO,IDBSIZ,KBIN,KBOUT,IDEVN,
+IHELPS,IHELPE,FILEH,IDEVH,INFLAG)
CHARACTER FILEH*(*)
C AUTHOR: RODGER STADEN
C SEARCHES FOR ARCHIVE NAMES
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER NAME1*17,NAME2*16,NAME3*17,NFLAG
PARAMETER (NFLAG='/')
NAME3 = ' '
IF(LLINO.NE.0) THEN
NAME3(1:1) = NFLAG
CALL READN(IDEVN,LLINO,NAME3(2:))
END IF
GELID = 0
10 CONTINUE
L = 0
IF(LLINO.NE.0) L = 17
CALL GTSTR('Contig identfier',NAME3,
+NAME1,L,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.4) RETURN
IF(INFLAG.EQ.3) THEN
GELID = LLINO
RETURN
END IF
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(NAME1(1:1).EQ.NFLAG) THEN
CALL CCASE(NAME1,1)
DO 20 I=1,NGELS
CALL READN(IDEVN,I,NAME2)
CALL CCASE(NAME2,1)
IF(NAME1(2:17).EQ.NAME2) THEN
GELID = I
RETURN
END IF
20 CONTINUE
WRITE(KBOUT,1004)NAME1(2:)
1004 FORMAT(' ',A,' is not in the database!')
ELSE
CALL RJST(NAME1)
READ(NAME1,1001,ERR=10,END=10)GELID
1001 FORMAT(I17)
IF((GELID.LT.1).OR.(GELID.GT.NGELS)) THEN
CALL ERROM(KBOUT,'Illegal gel reading number')
GO TO 10
END IF
END IF
END
SUBROUTINE GELOUT(RELPG,LNGTHG,LNBR,RNBR,MAXDB,IDBSIZ,NGELS,
+NCONTS,GEL,MAXGEL,IDEV3,IDEV4,IDEV5,IDEV1,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,FILNAM)
INTEGER RELPG(MAXDB)
INTEGER LNGTHG(MAXDB),LNBR(MAXDB),RNBR(MAXDB)
CHARACTER GEL(MAXGEL)
CHARACTER FILNAM*(*),HELPF*(*)
CHARACTER NAMARC*16
FILNAM = ' '
CALL OPENF1(IDEV5,FILNAM,1,IOK,KBIN,KBOUT,
+'File for names of extracted gel readings',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
CALL YESNO(I,'Extract ends of contigs only',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(I.LT.0) RETURN
IF(I.EQ.0) GO TO 15
DO 10 I=1,NGELS
L=ABS(LNGTHG(I))
IF(L.GT.0)THEN
CALL READN(IDEV4,I,NAMARC)
WRITE(KBOUT,1002)NAMARC
1002 FORMAT(' ',A)
WRITE(IDEV5,1003)NAMARC
1003 FORMAT(A)
FILNAM = NAMARC
CALL OPENRS(IDEV1,FILNAM,IOK,LRECL,1)
IF(IOK.NE.0) GO TO 100
CALL READW(IDEV3,I,GEL,MAXGEL)
IF(LNGTHG(I).LT.0)THEN
CALL SQREV(GEL,L)
CALL SQCOM(GEL,L)
END IF
CALL FMTDKN(IDEV1,GEL,L)
CLOSE(UNIT=IDEV1)
END IF
10 CONTINUE
RETURN
15 CONTINUE
C NUMBER OF LINES TO PROCESS
N=IDBSIZ-NCONTS
DO 20 I=N,IDBSIZ-1
JL=LNBR(I)
JR=RNBR(I)
CALL READN(IDEV4,JL,NAMARC)
WRITE(KBOUT,1002)NAMARC
WRITE(IDEV5,1003)NAMARC
FILNAM = NAMARC
CALL OPENRS(IDEV1,NAMARC,IOK,LRECL,1)
IF(IOK.NE.0) GO TO 100
CALL READW(IDEV3,JL,GEL,MAXGEL)
L=ABS(LNGTHG(JL))
IF(LNGTHG(JL).LT.0)THEN
CALL SQREV(GEL,L)
CALL SQCOM(GEL,L)
END IF
CALL FMTDKN(IDEV1,GEL,L)
CLOSE(UNIT=IDEV1)
IF(JR.EQ.JL)GO TO 20
CALL READN(IDEV4,JR,NAMARC)
WRITE(KBOUT,1002)NAMARC
WRITE(IDEV5,1003)NAMARC
CALL OPENRS(IDEV1,NAMARC,IOK,LRECL,1)
IF(IOK.NE.0) GO TO 100
CALL READW(IDEV3,JR,GEL,MAXGEL)
L=ABS(LNGTHG(JR))
IF(LNGTHG(JR).LT.0)THEN
CALL SQREV(GEL,L)
CALL SQCOM(GEL,L)
END IF
CALL FMTDKN(IDEV1,GEL,L)
CLOSE(UNIT=IDEV1)
20 CONTINUE
RETURN
100 CONTINUE
WRITE(KBOUT,*)'Error opening file for extracted gel reading'
RETURN
END
SUBROUTINE GETLN2(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LINCON,LLINO,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+IHELPS,IHELPE,FILEH,IDEVH)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),GELID
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER FILEH*(*)
EXTERNAL GELID
IERR = 1
NCONTC = GELID(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,LLINO,
+IDBSIZ,KBIN,KBOUT,IDEVN,
+IHELPS,IHELPE,FILEH,IDEVH,INFLAG)
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.4) RETURN
IF(NCONTC.EQ.0) RETURN
IGELNO = NCONTC
IF(LNBR(NCONTC).NE.0) THEN
WRITE(KBOUT,1013)RELPG(NCONTC)
1013 FORMAT(' Position of this reading=',I6)
25 CONTINUE
NCONTC = LNBR(NCONTC)
IF(LNBR(NCONTC).NE.0) GO TO 25
WRITE(KBOUT,1014)NCONTC
1014 FORMAT( ' Number of leftmost reading this contig=',I6)
END IF
30 CONTINUE
N = IDBSIZ - NCONTS
DO 20 J=N,IDBSIZ-1
IF(LNBR(J).EQ.NCONTC) THEN
LINCON=J
GO TO 21
END IF
20 CONTINUE
WRITE(KBOUT,9999)
9999 FORMAT(' No contig line for this gel! Fix the database')
RETURN
21 CONTINUE
LLINO = NCONTC
IERR = 0
END
SUBROUTINE GETLN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LINCON,LLINO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+IHELPS,IHELPE,FILEH,IDEVH)
CALL GETLN2(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LINCON,LLINO,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+IHELPS,IHELPE,FILEH,IDEVH)
END
SUBROUTINE GETREG(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LEFTMN,RIGHTM,LREG,RREG,LINCON,LLINO,IDBSIZ,KBIN,KBOUT,
+ IHELPS,IHELPE,FILEH,IDEVH,IOK)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER LREG,RREG,RIGHTM
CHARACTER FILEH*(*)
40 CONTINUE
MN = LEFTMN
MX = RIGHTM
LREG = MN
CALL GETINT(MN,MX,LREG,
+'Start position in contig',
+IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,FILEH,IDEVH,IOK)
IF(IOK.NE.0) RETURN
LREG = IVAL
MN = LREG
MX = RIGHTM
RREG = MX
CALL GETINT(MN,MX,RREG,
+'End position in contig',
+IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,FILEH,IDEVH,IOK)
IF(IOK.NE.0) RETURN
RREG = IVAL
C NOW FIND FIRST GEL THAT OVER LAPS
50 CONTINUE
X=RELPG(LLINO)+(ABS(LNGTHG(LLINO)))-1
IF(X.GE.LREG)GO TO 60
C NOT IN REGION
LLINO=RNBR(LLINO)
GO TO 50
60 CONTINUE
RETURN
END
SUBROUTINE GLEVEL(T,YF,YT,Y0,YP1,YP2,YM1,YM2)
CHARACTER T
IF(T.EQ.'0') THEN
YF = Y0
YT = Y0
ELSE IF(T.EQ.'1') THEN
YF = Y0
YT = YM1
ELSE IF(T.EQ.'2') THEN
YF = Y0
YT = YP1
ELSE IF(T.EQ.'3') THEN
YF = YP1
YT = YM1
ELSE IF(T.EQ.'4') THEN
YF = YP2
YT = YM2
END IF
END
CHARACTER*1 FUNCTION GTCONC(COUNTS,IDM,CUT)
INTEGER IDM
INTEGER COUNTS(IDM)
CHARACTER CHARSU
EXTERNAL CHARSU
C 30-3-92 made this routine sum counts
GTCONC = '-'
ISUM = 0
DO 5 I=1,IDM
ISUM = ISUM + COUNTS(I)
5 CONTINUE
IF(ISUM.EQ.0) RETURN
Y = ISUM
DO 10 I = 1,IDM - 1
X = REAL(COUNTS(I))/Y
IF(X.GE.CUT) THEN
GTCONC = CHARSU(I)
RETURN
END IF
10 CONTINUE
END
SUBROUTINE HIGHLT(GELSAV,NAMSAV,NUMSAV,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IDEV1,IDEV2,
+FILNAM,IOK)
CHARACTER LINEIN*132,CONSEN*100
CHARACTER GELNO*6,GEL*100,GELSAV*100,GELNAM*16
CHARACTER NAMSAV*16,NUMSAV*6
CHARACTER FILNAM*(*),HELPF*(*)
DIMENSION GELSAV(50),NAMSAV(50),NUMSAV(50)
CHARACTER PLUS*4,MINUS*4
EQUIVALENCE (LINEIN(2:2),GELNO),(LINEIN(9:9),GELNAM)
EQUIVALENCE (LINEIN(26:26),GEL)
EXTERNAL NOTIRL
CALL OPENF1(IDEV1,FILNAM,0,IOK,KBIN,KBOUT,
+'File containing contig display',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
FILNAM = ' '
CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT,
+'File for problem display',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
C
C FORMAT:
C
C12345678901234567890 10 20 30 ETC
C 12 GELNAM0000 CAGACGCGCGCGCGCGCGGATATAGTCTCTCCGCTCT
C 100 GELNAM0000 TGATACGCTCGCTCTCTCTCTCTCTCTCTTTC
C AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
C
C 70 80 ETC
C 12 GELNAM0000 AAAAAAAAAAAAAAAAAAAAAAAAAAAA
C
C
LIN = 1
CALL GTSTR('plus strand symbol',':',PLUS,LIN,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(INFLAG.EQ.2) RETURN
IF(LIN.EQ.0) PLUS = ':'
LIN = 1
CALL GTSTR('minus strand symbol','.',MINUS,LIN,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(INFLAG.EQ.2) RETURN
IF(LIN.EQ.0) MINUS = '.'
C COUNT LINE NUMBERS
LINNO=0
10 CONTINUE
C READ LINE OF NOS
READ(IDEV1,1003,END=100)LINEIN
LINNO=LINNO+1
1003 FORMAT(A)
C WRITE IT OUT AGAIN
WRITE(IDEV2,1003)LINEIN
C ZERO GEL COUNT FOR THIS STRIP
IGEL=0
20 CONTINUE
C
C READ A LINE, COULD BE 1 GEL, 2 CONSENSUS OR BLANK
C LINEIN=' '
READ(IDEV1,1003,END=100)LINEIN
LINNO=LINNO+1
C WHAT SORT OF LINE? ONLY A GEL WILL HAVE NON BLANK CHARS AT THE LEFT END
IF(LINEIN(2:7).NE.' ')THEN
C GEL LINE SO SAVE
IGEL=IGEL+1
GELSAV(IGEL)=GEL
NAMSAV(IGEL)=GELNAM
NUMSAV(IGEL)=GELNO
GO TO 20
END IF
C MUST BE CONSENSUS
CONSEN=GEL
C PROCESS THIS STRIP OF GELS (IGEL OF THEM)
DO 50 I=1,IGEL
C WHERE DOES DATA START AND END?
IFIRST=1
40 CONTINUE
IF(GELSAV(I)(IFIRST:IFIRST).NE.' ')GO TO 45
IFIRST=IFIRST+1
IF(IFIRST.LE.100)GO TO 40
C ERROR --- NO DATA FOUND
WRITE(KBOUT,1004)LINNO
1004 FORMAT(' Error on line',I6,' of file')
RETURN
45 CONTINUE
C NOW WHERE DOES IT END
ILAST=NOTIRL(GELSAV(I),100,' ')
C COMPARE WITH CONSENSUS
READ(NUMSAV(I),1001,ERR=900)INTEG
1001 FORMAT(I6)
IF(INTEG.GE.0)CALL IDTOD(CONSEN,GELSAV(I),IFIRST,ILAST,PLUS)
IF(INTEG.LT.0)CALL IDTOD(CONSEN,GELSAV(I),IFIRST,ILAST,MINUS)
WRITE(IDEV2,1008)NUMSAV(I),NAMSAV(I),GELSAV(I)(1:ILAST)
1008 FORMAT(' ',A,1X,A,1X,A)
50 CONTINUE
WRITE(IDEV2,1009)CONSEN
1009 FORMAT(' ',24X,A)
1006 FORMAT( )
C READ A BLANK LINE
READ(IDEV1,1003,END=100)LINEIN
LINNO=LINNO+1
WRITE(IDEV2,1003)LINEIN
C NO GO BACK FOR THE NEXT LINE OF NUMBERS
GO TO 10
100 CONTINUE
WRITE(KBOUT,1005)
1005 FORMAT(' Finished')
RETURN
900 WRITE(KBOUT,*)'Error reading gel number'
END
SUBROUTINE IDPLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,
+NCONTS,IX,IY,MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,
+DBTDUX,DBTDUY,NCONT,IGEL,IS)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER CHNRP1
EXTERNAL CWORLD,CHNRP1
YMAX = ISYMAX
YMIN = 0.
XMIN = 0.
LENCON = 0
DO 10 I = IDBSIZ-NCONTS,IDBSIZ-1
LENCON = LENCON + RELPG(I)
10 CONTINUE
XMAX = LENCON
XX = CWORLD(IX,MARGL,MARGR,XMIN,XMAX)
YX = CWORLD(IY,MARGB,MARGT,YMIN,YMAX)
YINC = (YMAX-YMIN)/3.
Y = 0.
XF = XMIN
N = 0
DO 20 I = IDBSIZ-NCONTS,IDBSIZ-1
N = N + 1
XT = XF + RELPG(I)
Y = Y + YINC
IF((XX.GT.XF).AND.(XX.LT.XT)) THEN
IS = NINT(((XX-XF)/(XT-XF)) * RELPG(I))
JGEL = LNBR(I)
IGEL = CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,JGEL,IS)
NCONT = I
RETURN
END IF
XF = XT
IF(N.EQ.2) THEN
N = 0
Y = 0.
END IF
20 CONTINUE
IGEL = 0
NCONT = 0
END
SUBROUTINE IDTOD(TOPLIN,GEL,IFIRST,ILAST,SYMBOL)
CHARACTER TOPLIN*100,GEL*100,SYMBOL*4
DO 10 I=IFIRST,ILAST
IF(GEL(I:I).EQ.TOPLIN(I:I))GEL(I:I)=SYMBOL(1:1)
10 CONTINUE
END
INTEGER FUNCTION INDEXS(C,S)
PARAMETER (IDM = 29)
CHARACTER C
INTEGER POINTS(0:255),SCORES(IDM),IND(IDM),S
COMMON /SHOTC/POINTS
SAVE /SHOTC/
SAVE SCORES,IND
DATA
+IND/1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,6,6,6,6,6,6,1,2,3,4,5,5,6/
C DATA DUP/'CTAG1234DVBHKLMNRY5678ctag*,-'/
C changed 28-7-91 to give 10 to old zeroes and 100 to lowercase
DATA SCORES/
+100,100,100,100,
+75,75,75,75,
+100,100,100,100,
+100,100,100,100,
+10,10,10,10,10,10,
+100,100,100,100,100,100,10/
I = ICHAR(C)
I = POINTS(I)
S = SCORES(I)
INDEXS = IND(I)
END
C ROUTINES TO CONTROL CHARACTER LOOKUP FOR SHOTGUN SEQUENCING
SUBROUTINE INITS
C AUTHOR RODGER STADEN
INTEGER POINTS(0:255)
PARAMETER (IDM = 29)
CHARACTER DUP*29
COMMON /SHOTC/POINTS
SAVE /SHOTC/
DATA DUP/'CTAG1234DVBHKLMNRY5678ctag*,-'/
C ICHAR RETURNS THE COLLATING SEQUENCE NUMBER
C I WANT 1-4 FOR ACGT
C acgt
C 1234
C BDHV
C KLMN
C 5 FOR *
C 6 FOR 5678- AND ELSE
C THE ACTUAL VALUE RETURNED BY ICHAR IS NOT PORTABLE
C SO I NEED TO INITIALIZE POINTR SO THAT THE CORRECT
C ELEMENTS CONTAIN VALUES 1 - 6
C
DO 30 I = 0,255
POINTS(I) = IDM
30 CONTINUE
DO 35 I = 1,IDM
J = ICHAR(DUP(I:I))
POINTS(J) = I
35 CONTINUE
END
FUNCTION INLIST(LIST,IDLIST,ITEM)
C AUTHOR: RODGER STADEN
C SENT LIST LIST, AND ITEM ITEM. IF IN LIST RETURNS ELEMENT NUMBER, ELSE 0
INTEGER LIST(IDLIST)
INLIST=0
DO 1 I=1,IDLIST
IF(LIST(I).NE.ITEM)GO TO 1
INLIST=I
RETURN
1 CONTINUE
RETURN
END
SUBROUTINE IPLTC(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,LGEL,LREG,RREG,STRAND,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,YMID,YINC,DEPTH,X,Y,KBOUT,
+IGEL,IOK)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER RREG,STRAND,DEPTH
IOK = 1
XMIN = LREG
XMAX = RREG
YMAX = ISYMAX
YMIN = 0.
YINCO2 = STRAND*YINC/2.
I = LGEL
IGEL = 0
5 CONTINUE
IF(I.NE.0) THEN
IF((RELPG(I)+ABS(LNGTHG(I))-1).LT.LREG) THEN
I = RNBR(I)
GO TO 5
END IF
END IF
N = 0
10 CONTINUE
IF(I.NE.0)THEN
IF(RELPG(I).LE.RREG) THEN
IF(SIGN(1,LNGTHG(I)).EQ.STRAND) THEN
XF = MAX(RELPG(I),LREG)
XT = MIN(ABS(LNGTHG(I))+RELPG(I)-1,RREG)
N = N + 1
IF(N.GT.DEPTH) N = 1
YF = YMID + N * YINC
IF((X.GE.XF).AND.(X.LE.XT)) THEN
IGEL = I
IF((Y.GE.YF-YINCO2).AND.(Y.LE.YF+YINCO2)) THEN
IOK = 0
RETURN
END IF
END IF
END IF
I = RNBR(I)
GO TO 10
END IF
END IF
END
C LINEUP
C
C TAKES 2 SEQS SET OF MATCHES AND PRODUCES LINED UP SEQS
C FINDS IF WE HAVE A LEFT OVERLAP
C RETURNS POSITION OF JOINT. THIS IS RELATIVE TO THE CONTIG
C FOR MOST MATCHES BUT I RELATIVE TO THE GEL FOR A LEFT OVERLAP
SUBROUTINE LINEUP(SEQG,SEQC,SEQG2,SEQC2,IDC,IDG,IDOUT,
1MATG,MATC,MATL,IP,ITOTPC,ITOTPG,JOINT,ITYPE,KBOUT,MAXGEL,IFAIL)
C AUTHOR: RODGER STADEN
CHARACTER SEQG(IDG),SEQC(IDC),SEQG2(IDOUT),SEQC2(IDOUT),PAD
INTEGER MATG(IP),MATC(IP),MATL(IP)
SAVE PAD
DATA PAD/','/
IFAIL=0
C ZERO PADDING CHARS IN CONTIG (GEL DONE AT END BY DIFFERENCE
C IN INPUT AND OUTPUT LENGTHS)
ITOTPC=0
C FILL OUTPUT WITH PADDING
DO 10 I=1,IDOUT
SEQG2(I)=PAD
SEQC2(I)=PAD
10 CONTINUE
NMTCH=0
C SET INITIAL POINTERS TO OUTPUT
C CONSENSUS
IS1=1
C GEL
IS2=1
C FIND DISTANCE FROM LEFT MATCH IN GEL TO LEFT OF GEL
IG2=MATG(1)-1
IF(IG2.EQ.0)THEN
C THE LEFT END OF THE GEL MATCHES SO THIS IS NOT A LEFT OVERLAP
C SET TYPE
ITYPE=-1
C SET JOINT
JOINT=MATC(1)
C SKIP NEXT SECTION
GO TO 50
END IF
C FIND DISTANCE FROM LEFT MATCH IN CONTIG TO LEFT OF CONTIG
IC2=MATC(1)-1
C GET DISTANCE FROM FIRST MATCH IN CONTIG TO FIRST MATCH IN GEL.
C IF THIS DISTANCE <0 THEN WE HAVE A LEFT OVERLAP
IC1=IC2-IG2+1
IF(IC1.GT.0)THEN
C THIS IS NOT A LEFT OVERLAP
C SET TYPE
ITYPE=-1
C SET LEFT END
JOINT=IC1
C COPY THE GEL UPTO THE FIRST MATCH, INTO THE OUTPUT ARRAY
C CHECK FOR OVERFLOW
IF(IG2.GT.MAXGEL)GO TO 700
CALL SQCOPY(SEQG(1),SEQG2(1),IG2)
C COPY THE CONTIG FOR THE SAME REGION
IF(IG2.GT.MAXGEL)GO TO 700
CALL SQCOPY(SEQC(IC1),SEQC2(1),IG2)
IS1=IS1+IG2
IS2=IS2+IG2
GO TO 50
END IF
C MUST BE LEFT END OVERLAP
C SET TYPE
ITYPE=1
C SET POSITION OF JOINT RELATIVE TO GEL
JOINT=ABS(IC1)+2
C COPY OVER THE GEL UPTO THE JOINT
C CHECK FOR OVERFLOW
IF(IG2.GT.MAXGEL)GO TO 700
CALL SQCOPY(SEQG(1),SEQG2(1),IG2)
IS2=IS2+IG2
C WE MAY ALSO HAVE MISMATCHING
C DATA AT THE JOIN SO DEAL WITH THAT NOW
C IF IC2 >0 THE LEFT END OF THE CONTIG MATCHES THE GEL BUT OTHERWISE
C WE HAVE SOME MISMATCHED DATA TO DEAL WITH - WE NEED TO TRANSFER
C THE MISMATCHED REGION OF THE CONTIG TO THE OUTPUT ARRAY
IF(IC2.GT.0)THEN
IF(IC2.GT.MAXGEL)GO TO 700
CALL SQCOPY(SEQC(1),SEQC2(1),IC2)
IS1=IS1+IC2
END IF
C WHEN WE GET HERE WE HAVE SORTED OUT THE LEFT ENDS FOR LEFT OVERLAP
C AND MISMATCHED LEFT ENDS, WE NOW DEAL WITH THE REST OF THE SEQUENCE
C STARTING WITH THE FIRST BLOCK OF IDENTITY
C
C IG1 POSITION IN INPUT GEL
C IS2 POSITION IN OUTPUT GEL
C IC1 POSITION IN INPUT CONTIG
C IS1 POSITION IN OUTPUT CONTIG
C LG1 POSITION OF END OF CURRENT MATCH IN OUTPUT GEL
C LC1 POSITION OF END OF CURRENT MATCH IN OUTPUT CONTIG
C LG2 DISTANCE FROM CURRENT MATCH IN INPUT GEL TO NEXT MATCH
C LC2 DISTANCE FROM CURRENT MATCH IN INPUT CONTIG TO NEXT MATCH
C
50 CONTINUE
C POINT TO NEXT MATCH
NMTCH=NMTCH+1
C COPY NEXT MATCH
IG1=MATG(NMTCH)
IC1=MATC(NMTCH)
L=MATL(NMTCH)
C CHECK FOR OVERFLOW
IF(IS2+L-1.GT.MAXGEL)GO TO 700
CALL SQCOPY(SEQG(IG1),SEQG2(IS2),L)
C CHECK FOR OVERFLOW
IF(IS1+L-1.GT.MAXGEL)GO TO 700
CALL SQCOPY(SEQC(IC1),SEQC2(IS1),L)
C POINT TO NEXT OUTPUT POSITIONS
IS1=IS1+L
IS2=IS2+L
C END OF CURRENT MATCH
LG1=IG1+L
LC1=IC1+L
C ANY MORE MATCHES
IF(NMTCH.EQ.IP)GO TO 500
K=NMTCH+1
LG2=MATG(K)-LG1
LC2=MATC(K)-LC1
C ANY DIFFERENCE IN LENGTH? IF SO WE HAVE TO PAD SO THEY BECOME THE SAME
L5=ABS(LG2-LC2)
C COUNT PADDING CHARS IN CONTIG
IF(LG2.GT.LC2)ITOTPC=ITOTPC+L5
C IF DIFFERENCE INCREMENT SHORTER
IF(LG2.GT.LC2)IS1=IS1+L5
C IF GEL NEEDS PADDING TRY TO PUT PADS NEXT TO DOUBLE CODES
IF(LC2.GT.LG2)CALL PADCOP(SEQG,SEQG2,
+LG1,MATG(K),L5,IS2,LG2,MAXGEL,IFAIL,KBOUT,SEQC,LC1)
C CHECK FOR OVERFLOW
IF(IFAIL.EQ.1)GO TO 700
C NOW COPY MISSMATCHED REGION
C CHECK FOR OVERFLOW
IF(IS2+LG2-1.GT.MAXGEL)GO TO 700
IF(LG2.GT.0)CALL SQCOPY(SEQG(LG1),SEQG2(IS2),LG2)
C CHECK FOR OVERFLOW
IF(IS1+LC2-1.GT.MAXGEL)GO TO 700
IF(LC2.GT.0)CALL SQCOPY(SEQC(LC1),SEQC2(IS1),LC2)
C POINT TO NEXT OUTPUT POSITIONS
IS1=IS1+LC2
IS2=IS2+LG2
C GET NEXT MATCH
GO TO 50
500 CONTINUE
C
C FINISH RIGHT ENDS
C ONLY COPY TO END OF GEL IN GEL AND TO THE SAME RELATIVE POSITION
C IN THE CONTIG FOR DISPLAY PURPOSES AND FOR COUNTING MISMATCH
C CURRENT ENDS AT LG1,LC1
C HOW FAR TO END OF GEL?
C SET M
M=0
L=IDG-LG1+1
IF(L.LT.1)GO TO 600
C CHECK FOR OVERFLOW
IF(IS2+L-1.GT.MAXGEL)GO TO 700
CALL SQCOPY(SEQG(LG1),SEQG2(IS2),L)
C NEED TO COPY TO END OF GEL IN CONTIG FOR DISPLAY
C POINT TO POSN IN CONTIG LEVEL WITH END OF GEL
M=LC1+L-1
C IS THIS OVER END OF CONTIG?
IF(M.GT.IDC)M=IDC
C NUMBER TO COPY
M=M-LC1+1
C CHECK FOR OVERFLOW
IF(IS1+M-1.GT.MAXGEL)GO TO 700
IF(M.GT.0)CALL SQCOPY(SEQC(LC1),SEQC2(IS1),M)
600 CONTINUE
C COUNT PADDING IN GEL
ITOTPG=IS2+L-1-IDG
C SET NEW LENGTHS FOR RETURN TO CALLING ROUTINE
IDOUT=IS1+M-1
IDG=IS2+L-1
IFAIL=0
RETURN
700 CONTINUE
WRITE(KBOUT,1000)
1000 FORMAT(' Matching region too long for routine lineup,',
+' alignment aborted')
IFAIL=1
RETURN
END
SUBROUTINE LSTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LLINO,
+RREG,IDEV,IDEVN,NAMARC)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER RREG
CHARACTER NAMARC*(*)
N = LLINO
WRITE(IDEV,1001)
10 CONTINUE
CALL READN(IDEVN,N,NAMARC)
WRITE(IDEV,1006)NAMARC,N,RELPG(N),LNGTHG(N),LNBR(N),RNBR(N)
IF(RNBR(N).NE.0) THEN
N = RNBR(N)
IF(RELPG(N).LE.RREG) GO TO 10
END IF
1001 FORMAT(
+' NAME NUMBER POSITION LENGTH NEIGHBOURS'/
+' LEFT RIGHT')
1006 FORMAT( ' ',A,2X,I6,2X,I7,2X,I5,2X,I6,2X,I6)
C1006 FORMAT( ' ',A,2X,I4,2X,I7,2X,I5,2X,I6,2X,I6)
END
C12345678901234567890
C 710 720 730 740 750
C -1 HINW.004 CGTCAGACGCACGCTGGAAAA
INTEGER FUNCTION LTYPE(LINE,LL,J1,J2,N,MAXDB,KBOUT)
PARAMETER (MAXDG = 5)
CHARACTER LINE*(*),NUM*(MAXDG),SPACE
EXTERNAL NOTRL,NOTLR
PARAMETER (SPACE= ' ')
J1 = NOTLR(LINE,LL,SPACE)
IF(J1.EQ.0) THEN
C BLANK LINE
LTYPE = 1
RETURN
END IF
IF(J1.GT.20) THEN
C LINE OF NUMBERS
LTYPE = 2
RETURN
END IF
IF(J1.GT.MAXDG+2) THEN
C CONSENSUS LINE
LTYPE = 3
RETURN
END IF
C SHOULD BE A SEQUENCE LINE
J = INDEX(LINE(J1:),SPACE)
NUM = SPACE
NUM = LINE(J1:J1+J-2)
CALL RJST(NUM)
1001 FORMAT(I6)
READ(NUM,1001,ERR=10) N
IF(N.GT.MAXDB-2) GO TO 10
C NUMBER ENDS AT J1+J-2
J1 = J1 + J - 1
C LOOK FOR BEGINNING OF NAME
J = NOTLR(LINE(J1:),LL-J1+1,SPACE)
N1 = J1 + J - 1
C LOOK FOR END OF NAME
J = INDEX(LINE(N1:),SPACE)
N2 = N1 + J - 2
C LOOK FOR BEGINNING OF SEQ
J = NOTLR(LINE(N2+1:),LL-N2,SPACE)
J1 = N2 + J
LTYPE = 4
C LOOK FOR END OF SEQ
J2 = NOTRL(LINE,LL,SPACE)
IF(J2.GT.N2) RETURN
10 CONTINUE
LTYPE = 0
END
INTEGER FUNCTION LWRAPS(I,J)
K = MOD(I,J)
IF(K.EQ.0) K = J
LWRAPS = K
END
C MERGE
C
C ROUTINE SENT CONTIG WHOSE GELS MAY BE OUT OF ORDER
C REORDERS GELS ON POSITION OF LEFT ENDS AND SETS LEFT
C GEL NUMBER FOR THE REORDERED CONTIG
C
SUBROUTINE MERGE(RELPG,LNGTHG,LNBR,RNBR,LINCON,IDBSIZ)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
C
C START AT LEFT END
N=LNBR(LINCON)
GO TO 22
21 CONTINUE
C SET POINTER TO NEXT GEL TO RIGHT IN LIST
N=NR
IF(I1.GT.0)N=I2
22 CONTINUE
C SET POINTER TO NEXT GEL TO RIGHT
NR=RNBR(N)
IF(NR.EQ.0)GO TO 30
C HAVENT REACHED END YET
I1=0
23 CONTINUE
C ARE THESE 2 IN CORRECT ORDER IE N<=NR ?
IF(RELPG(N).LE.RELPG(NR))GO TO 21
C NOT IN ORDER SO CHAIN LEFT UNTIL CORRECTLY POSITIONED
C THEN COME BACK TO THIS POINT AND CONTINUE
C IF FIRST MOVE SAVE POSITION
IF(I1.EQ.0)I2=N
I1=1
C EXCHANGE NEIGHBOURS
M=RNBR(NR)
IF(M.NE.0)LNBR(M)=N
M=LNBR(N)
IF(M.NE.0)RNBR(M)=NR
RNBR(N)=RNBR(NR)
RNBR(NR)=N
LNBR(NR)=LNBR(N)
LNBR(N)=NR
C CHAIN BACK THRU LIST
N=LNBR(NR)
IF(N.EQ.0)GO TO 21
C END NOT REACHED
GO TO 23
30 CONTINUE
C ALL DONE POINTER AT RIGHT GEL
RNBR(LINCON)=N
RETURN
END
SUBROUTINE MINCOM(SEQ1,IDIM1,SEQ2,IDIM2,SAV1,SAV2,SAV3,
+IP,MINM,KBOUT)
C AUTHOR: RODGER STADEN
C
CHARACTER SEQ1(IDIM1),SEQ2(IDIM2)
INTEGER SAV1(IP),SAV2(IP),SAV3(IP)
C
IP1=IP
IP=0
C
C SITUATION 1
NT1=IDIM2-MINM
IES1=MINM-1
ISS2=NT1+1
C
DO 100 I=1,NT1
C
C POINT TO FIRST CHAR-1 OF SEQ2
ISS2=ISS2-1
C POINT TO LAST CHAR SEQ1
IES1=IES1+1
C
N=0
C
DO 200 J=1,IES1
C STORE POINTER
JJ=J
C
C POINT TO SEQ2
K=ISS2+J
C TEST FOR EQUALITY
IF(SEQ1(J).NE.SEQ2(K))GO TO 220
C INCREMENT N
N=N+1
GO TO 200
220 CONTINUE
C TEST FOR SUFFICENTLY LARGE N
IF(N.GE.MINM)CALL SAVIT(N,J,K,IP,SAV1,SAV2,SAV3,IP1)
C TEST FOR OVERFLOW
IF(IP.GT.IP1)GO TO 5000
C RESET N TO ZERO
N=0
200 CONTINUE
C
C GOOD SCORE AT END?
C NEED TO INCREMENT POINTERS AS SAVIT EXPECTS TO BE POINTING AT NEXT
C MISMATCH
JJ=JJ+1
KK=K+1
IF(N.GE.MINM)CALL SAVIT(N,JJ,KK,IP,SAV1,SAV2,SAV3,IP1)
C TEST FOR OVERFLOW
IF(IP.GT.IP1)GO TO 5000
C
100 CONTINUE
C
C
C SITUATION 2
NT2=IDIM1-IDIM2+1
C
DO 300 I=1,NT2
N=0
C
DO 400 J=1,IDIM2
C SAVE POINTER
JJ=J
C
C SET POINTER TO SEQ1
L=I+J-1
IF(SEQ1(L).NE.SEQ2(J))GO TO 420
N=N+1
GO TO 400
420 CONTINUE
IF(N.GE.MINM)CALL SAVIT(N,L,J,IP,SAV1,SAV2,SAV3,IP1)
C TEST FOR OVERFLOW
IF(IP.GT.IP1)GO TO 5000
N=0
400 CONTINUE
LL=L+1
JJ=JJ+1
IF(N.GE.MINM)CALL SAVIT(N,LL,JJ,IP,SAV1,SAV2,SAV3,IP1)
C TEST FOR OVERFLOW
IF(IP.GT.IP1)GO TO 5000
300 CONTINUE
C
C
C SITUATION 3
ISS1=IDIM1-IDIM2
C
DO 500 I=1,NT1
C
C POINT TO FIRST CHAR SEQ1
K=ISS1+I
IES2=IDIM2-I
N=0
C
DO 600 J=1,IES2
C SAVE POINTER
JJ=J
C
C POINT TO SEQ1
L=K+J
IF(SEQ1(L).NE.SEQ2(J))GO TO 620
N=N+1
GO TO 600
620 CONTINUE
IF(N.GE.MINM)CALL SAVIT(N,L,J,IP,SAV1,SAV2,SAV3,IP1)
C TEST FOR OVERFLOW
IF(IP.GT.IP1)GO TO 5000
N=0
600 CONTINUE
C
LL=L+1
JJ=JJ+1
IF(N.GE.MINM)CALL SAVIT(N,LL,JJ,IP,SAV1,SAV2,SAV3,IP1)
C TEST FOR OVERFLOW
IF(IP.GT.IP1)GO TO 5000
500 CONTINUE
C
RETURN
5000 CONTINUE
C OVERFLOW
C
WRITE(KBOUT,1000)IP1
1000 FORMAT(/' TOO MANY MATCHES. LIMIT = ',I6)
RETURN
END
SUBROUTINE ML(PC,PG,L,N,J)
INTEGER PC(N),PG(N),L(N)
DO 10 I = J,N-1
PC(I) = PC(I+1)
PG(I) = PG(I+1)
L(I) = L(I+1)
10 CONTINUE
END
SUBROUTINE MSTLKL(SEQ,IDIM)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
CHARACTER CHARSU
EXTERNAL CHARSU,INDEXS
DO 100 I=1,IDIM
J = INDEXS(SEQ(I),K)
SEQ(I) = CHARSU(J)
100 CONTINUE
END
CHARACTER FUNCTION MUNOTP(IP)
C AUTHOR RODGER STADEN
CHARACTER PUP*26
SAVE PUP
DATA PUP/'CSTPAGNDEQBZHRKMILVFYW-X? '/
MUNOTP = '-'
IF((IP.GT.0).AND.(IP.LT.23))MUNOTP = PUP(IP:IP)
END
INTEGER FUNCTION NCDEP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,IGEL,
+STRAND,RREG)
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER RREG,STRAND
NCDEP = 0
N = 0
I = IGEL
10 CONTINUE
IF(I.NE.0) THEN
IF(RELPG(I).LE.RREG) THEN
IF(SIGN(1,LNGTHG(I)).EQ.STRAND) N = N + 1
I = RNBR(I)
GO TO 10
END IF
END IF
NCDEP = N
END
SUBROUTINE PADCOP(SEQG,SEQG2,LG1,MG,L5,IS2,LG2,MAXGEL,IFAIL,
+KBOUT,SEQC,IC1)
C AUTHOR: RODGER STADEN
PARAMETER (NDUBL = 4)
CHARACTER SEQG(MAXGEL),SEQG2(MAXGEL),DUBBL(NDUBL),SEQC(MAXGEL)
SAVE DUBBL
DATA DUBBL/'D','B','V','H'/
JC1 = IC1
C Make seqg2 from seqg placing L5 padding chars before position MG
C which is the start of the next block of identity. Try to put the
C padding either in line with consensus pads, or next to double
C codes. The positions in seqg are LG1 to MG-1. seqg2 needs to be long
C enough to be extended from IS2 to IS2 + L5 -1 + MGM1-LG1 +1
C ie we add L5 pads, plus the chars between and including LG1 and MGM1
IDONE=0
C POINT TO END OF MISMATCH
MGM1=MG-1
C MAY BE NO CHARS TO COPY
IF(MGM1.LT.LG1)GO TO 111
C Next check added 26-2-91
MAXREQ = IS2 + L5 - 1 + MGM1 - LG1 + 1
IF((MGM1.GT.MAXGEL).OR.(MAXREQ.GT.MAXGEL)) THEN
WRITE(KBOUT,1000)
1000 FORMAT(' Matching region too large for routine padcop,',
+ ' alignment aborted')
IFAIL=1
RETURN
END IF
DO 110 J=LG1,MGM1
IF(IDONE.LT.L5) THEN
IF((JC1.GT.0).AND.(JC1.LT.MAXGEL)) THEN
IF(SEQC(JC1).EQ.'*') THEN
IS2 = IS2 + 1
JC1 = JC1 + 1
IDONE = IDONE + 1
GO TO 109
END IF
END IF
DO 108 M=1,NDUBL
IF(SEQG(J).EQ.DUBBL(M)) THEN
IS2 = IS2 + 1
JC1 = JC1 + 1
IDONE = IDONE + 1
GO TO 109
END IF
108 CONTINUE
109 CONTINUE
END IF
SEQG2(IS2) = SEQG(J)
IS2 = IS2 + 1
JC1 = JC1 + 1
110 CONTINUE
111 CONTINUE
C ALL CHARS COPIED. ENOUGH PADDING?
IF(IDONE.LT.L5)IS2=IS2+L5-IDONE
C IS2 SHOULD NOW BE POINTING AT NEXT CHAR
C ZERO LG2 TO SHOW CALLING ROUTINE COPYING DONE
LG2=0
IFAIL=0
END
SUBROUTINE PCON1(CHAR,CHRSUM)
C AUTHOR RODGER STADEN
C PART OF PROTEIN 'CONSENSUS' CALCULATION
CHARACTER CHAR
INTEGER CHRSUM
INTEGER CTONUM
EXTERNAL CTONUM
K = CTONUM(CHAR)
IF(K.NE.26)THEN
IF(CHRSUM.EQ.0)THEN
CHRSUM = K
ELSE
IF(K.NE.CHRSUM)CHRSUM = -1
END IF
END IF
END
SUBROUTINE PLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LINCON,IGEL,
+NCONTS,MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
YMIN = 0.
YMAX = ISYMAX
XMIN = 0.
LENCON = 0
DO 10 I = IDBSIZ-NCONTS,IDBSIZ-1
LENCON = LENCON + RELPG(I)
10 CONTINUE
XMAX = LENCON
YINC = (YMAX-YMIN)/3.
Y = 0.
XF = XMIN
N = 0
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
DO 20 I = IDBSIZ-NCONTS,IDBSIZ-1
N = N + 1
XT = XF + RELPG(I)
Y = Y + YINC
CALL LINE(XF,XT,Y,Y,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
IF((IGEL.NE.0).AND.(I.EQ.LINCON)) THEN
XZ = XF + RELPG(IGEL) + ABS(LNGTHG(IGEL))/2
CALL LINE(XZ,XZ,YMAX,YMIN,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
XF = XT
IF(N.EQ.2) THEN
N = 0
Y = 0.
END IF
20 CONTINUE
CALL VT100M
END
SUBROUTINE PLTC(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,LGEL,LREG,RREG,STRAND,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,YMID,YINC,DEPTH)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER RREG,STRAND,DEPTH
XMIN = LREG
XMAX = RREG
YMAX = ISYMAX
YMIN = 0.
I = LGEL
5 CONTINUE
IF(I.NE.0) THEN
IF((RELPG(I)+ABS(LNGTHG(I))-1).LT.LREG) THEN
I = RNBR(I)
GO TO 5
END IF
END IF
N = 0
10 CONTINUE
IF(I.NE.0)THEN
IF(RELPG(I).LE.RREG) THEN
IF(SIGN(1,LNGTHG(I)).EQ.STRAND) THEN
XF = MAX(RELPG(I),LREG)
XT = MIN(ABS(LNGTHG(I))+RELPG(I)-1,RREG)
N = N + 1
IF(N.GT.DEPTH) N = 1
YF = YMID + N * YINC
CALL LINE(XF,XT,YF,YF,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
I = RNBR(I)
GO TO 10
END IF
END IF
END
SUBROUTINE PLTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,
+MARGL,MARGR,MARGB,
+MARGT,ISXMAX,ISYMAX,LGEL,LREG,RREG,DEPTHP,DEPTHM)
INTEGER DEPTHP,DEPTHM
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER RREG,STRAND
C have window size margt starting at margb
C depths depthp, depthm
YMAX = ISYMAX
YMIN = 0.
XMIN = LREG
XMAX = RREG
RINC = YMAX / (DEPTHP + DEPTHM + 2)
RMID =(DEPTHM+1) * RINC
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX,RMID,RMID,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL TEXT(XMIN,RMID,'*',1,ISIZE,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL TEXT(XMAX,RMID,'*',1,ISIZE,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
STRAND = 1
YINC = RINC * STRAND
CALL PLTC(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,LGEL,LREG,RREG,STRAND,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,RMID,YINC,DEPTHP)
STRAND = -1
YINC = RINC * STRAND
CALL PLTC(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,LGEL,LREG,RREG,STRAND,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,RMID,YINC,DEPTHM)
CALL VT100M
END
SUBROUTINE PLTQ(SEQ,IDIM2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CHARACTER SEQ(IDIM2),T
PARAMETER (Y0 = 0.,
+ YP1 = 1.,
+ YP2 = 2.,
+ YM1 = -1.,
+ YM2 = -2.)
XMIN = 0.
XMAX = IDIM2
YMIN = YM2
YMAX = YP2
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(XIN,XMAX,Y0,Y0,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
I = 1
10 CONTINUE
XF = I
T = SEQ(I)
20 CONTINUE
IF(SEQ(I).NE.T) THEN
CALL GLEVEL(T,YF,YT,Y0,YP1,YP2,YM1,YM2)
XT = I - 1
CALL LINE(XF,XF,YF,YT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(XF,XT,YT,YT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(XT,XT,YF,YT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
GO TO 10
END IF
I = I + 1
IF(I.LT.IDIM2) GO TO 20
CALL GLEVEL(T,YF,YT,Y0,YP1,YP2,YM1,YM2)
XT = I
CALL LINE(XF,XF,YF,YT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(XF,XT,YT,YT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(XT,XT,YF,YT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL VT100M
END
SUBROUTINE READN(IDEVN,N,NAME)
CHARACTER NAME*(*)
READ(IDEVN,REC=N)NAME
RETURN
END
SUBROUTINE READR(IDEVR,N,RELPG,LNGTHG,LNBR,RNBR)
INTEGER RELPG,RNBR
INTEGER SWAPBO
EXTERNAL SWAPBO
READ(IDEVR,REC=N+1)RELPG,LNGTHG,LNBR,RNBR
RELPG = SWAPBO(RELPG)
LNGTHG = SWAPBO(LNGTHG)
LNBR = SWAPBO(LNBR)
RNBR = SWAPBO(RNBR)
RETURN
END
SUBROUTINE READW(IDEVW,N,GEL,MAXGEL)
CHARACTER GEL(MAXGEL)
READ(IDEVW,REC=N)GEL
RETURN
END
SUBROUTINE REMOVL(MATC,MATG,MATL,IP)
C AUTHOR: RODGER STADEN
INTEGER MATC(IP),MATG(IP),MATL(IP)
C
C SET POINTER TO FIRST MATCH
NMTCH=0
10 CONTINUE
C POINT TO NEXT MATCH
NMTCH=NMTCH+1
C SORT MATCHES ON LENGTH
IPP=IP-NMTCH+1
CALL BUBBL3(MATL(NMTCH),MATG(NMTCH),MATC(NMTCH),IPP)
C LOOK FOR END OF POSITIVES
DO 20 I=NMTCH,IP
J=I
20 IF(MATL(I).LT.1)GO TO 30
J=J+1
30 CONTINUE
IP=J-1
C END OF POSITIVES AT IP
IF(NMTCH.GE.IP)RETURN
K1=MATC(NMTCH)
K2=K1+MATL(NMTCH)-1
K3=MATG(NMTCH)
K4=K3+MATL(NMTCH)-1
C POINT TO FIRST MATCH TO TEST
K6=NMTCH+1
DO 200 I=K6,IP
C DO CONSENSUS FIRST
C OVERLAP?
IF(MATC(I).GT.K2)GO TO 100
K5=MATC(I)+MATL(I)-1
IF(K5.LT.K1)GO TO 100
C DOES OVERLAP
C WHICH END
IF(K5.LE.K2)GO TO 80
C LENGTH TO REDUCE MATCH BY IS IDELT
IDELT=K2-MATC(I)+1
C NEW LENGTH
MATL(I)=MATL(I)-IDELT
C MOVE LEFT ENDS
MATC(I)=MATC(I)+IDELT
MATG(I)=MATG(I)+IDELT
GO TO 100
80 CONTINUE
C LENGTH
MATL(I)=K1-MATC(I)
100 CONTINUE
C NOW LOOK FOR OVERLAPS WITH GEL
C OVERLAP?
IF(MATG(I).GT.K4)GO TO 200
K5=MATG(I)+MATL(I)-1
IF(K5.LT.K3)GO TO 200
C DOES OVERLAP
C WHICH END?
IF(K5.LE.K4)GO TO 180
C LENGTH TO REDUCE MATCH BY IS IDELT
IDELT=K4-MATG(I)+1
C NEW LENGTH
MATL(I)=MATL(I)-IDELT
C MOVE LEFT ENDS
MATC(I)=MATC(I)+IDELT
MATG(I)=MATG(I)+IDELT
GO TO 200
180 CONTINUE
C LENGTH
MATL(I)=K3-MATG(I)
200 CONTINUE
GO TO 10
END
C SAVIT
C
SUBROUTINE SAVIT(N,J,K,IP,S1,S2,S3,IP1)
C AUTHOR: RODGER STADEN
INTEGER S1(IP1),S2(IP1),S3(IP1)
C
IP=IP+1
C TEST FOR OVERFLOW
IF(IP.GT.IP1)RETURN
S1(IP)=N
S2(IP)=J-N
S3(IP)=K-N
C
RETURN
END
SUBROUTINE SCRENR(GEL,MAXGEL,STRING,NAME,FILNAM,
+IDEV1,IDEV2,IDEV3,IDEV4,IDEV,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER NAME*(*),FILNAM*(*),HELPF*(*)
CHARACTER GEL(MAXGEL),STRING(60)
INTEGER GNFFOF
EXTERNAL GNFFOF
CALL YESNO(INF,'Use file of file names',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(INF.LT.0) RETURN
IF(INF.EQ.0) THEN
FILNAM = ' '
CALL OPENF1(IDEV1,FILNAM,0,IOK,KBIN,KBOUT,
+ 'File of gel reading names',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
FILNAM = ' '
CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT,
+ 'File for names of sequences that pass',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
END IF
FILNAM = ' '
CALL OPENF1(IDEV3,FILNAM,0,IOK,KBIN,KBOUT,
+'File name of recognition sequences',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
C
JGEL = 0
IGEL = 0
1 CONTINUE
IF(INF.EQ.1) THEN
31 CONTINUE
MN = 0
CALL GTSTR('Gel reading name',' ',NAME,MN,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.3) RETURN
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
GO TO 31
END IF
ELSE
IOK = GNFFOF(IDEV1,NAME)
IF(IOK.EQ.1) GO TO 100
IF(IOK.NE.0) GO TO 1
END IF
1002 FORMAT(A)
JGEL = JGEL + 1
WRITE(IDEV,*)'Processing', JGEL,' in batch'
WRITE(IDEV,1003)NAME
1003 FORMAT(' Gel reading name ',A)
IDIMG=MAXGEL
CALL OPENRS(IDEV4,NAME,IOK,LRECL,2)
IF(IOK.NE.0)THEN
IF(INF.EQ.1) RETURN
WRITE(KBOUT,*)' Error opening gel reading file'
GO TO 1
END IF
CALL ARRFIM(IDEV4,GEL,IDIMG,KBOUT)
CLOSE(UNIT=IDEV4)
2 CONTINUE
IF(IDIMG.LT.1)THEN
WRITE(KBOUT,*)' Gel reading too short to compare'
GO TO 1
END IF
CALL MSTLKL(GEL,IDIMG)
3 CONTINUE
READ(IDEV3,1005,END=6)STRING
1005 FORMAT(60A1)
C FIND LENGTH OF STRING ASSUMING NO SPACES
DO 4 I=1,60
II=I
IF(STRING(I).EQ.' ')GO TO 5
4 CONTINUE
5 CONTINUE
II=II-1
IF(II.GT.0)CALL FIND(GEL,IDIMG,STRING,II,JMATCH)
IF(JMATCH.EQ.0)GO TO 3
C A MATCH
WRITE(IDEV,1007)JMATCH,(STRING(K),K=1,II)
1007 FORMAT(' Match at',I6,' with ',60A1)
REWIND IDEV3
GO TO 1
C NO MATCH SO SAVE
6 CONTINUE
WRITE(IDEV2,1002)NAME
IGEL = IGEL + 1
REWIND IDEV3
GO TO 1
100 CONTINUE
WRITE(KBOUT,*)'Batch finished'
WRITE(KBOUT,*)JGEL,' compared and ',IGEL,' passed'
RETURN
END
SUBROUTINE SCRENV(MAXGEL,
+WORDP,WORDN,LPOWRC,POSNS,GELN,
+SEQ,MAXSEQ,GEL,GELCOP,MATCH,
+LENGTH,
+SAVPS,SAVPG,SAVL,MAXMAT,CENDS,NENDS,MAXCON,CONST,
+KBIN,KBOUT,IDEV1,IDEV2,IDEV3,IDEV4,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,FILNAM,NAME,IOK)
INTEGER POSNS(MAXSEQ),GELN(MAXGEL),WORDP(LPOWRC),SAVPS(MAXMAT)
INTEGER SAVPG(MAXMAT),SAVL(MAXMAT)
INTEGER WORDN(LPOWRC)
CHARACTER FILNAM*(*),NAME*(*),HELPF*(*)
CHARACTER GELCOP(MAXGEL)
INTEGER CENDS(MAXCON)
INTEGER NENDS(MAXCON)
INTEGER CONST(LENGTH),CSTART
CHARACTER SEQ(MAXSEQ),GEL(MAXGEL),MATCH(MAXGEL)
INTEGER GNFFOF
EXTERNAL GNFFOF
JGEL = 0
IGELS = 0
CALL YESNO(INF,'Use file of file names',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(INF.LT.0) RETURN
IF(INF.EQ.0) THEN
FILNAM = ' '
CALL OPENF1(IDEV1,FILNAM,0,IOK,KBIN,KBOUT,
+ 'File of gel reading names',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
FILNAM = ' '
CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT,
+ 'File for names of gel readings that pass',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
END IF
FILNAM = ' '
CALL OPENF1(IDEV4,FILNAM,0,IOK,KBIN,KBOUT,
+'File name of vector sequence',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
IDIM = MAXSEQ
CALL ARRFIM(IDEV4,SEQ,IDIM,KBOUT)
CLOSE(UNIT=IDEV4)
MN = LENGTH*2
MX = 50
MINMAT = MAX(15,MN)
CALL GETINT(MN,MX,MINMAT,
+'Minimum initial match',
+IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINMAT = IVAL
IDCEND=MAXCON
CALL FNDCON(SEQ,IDIM,CENDS,NENDS,IDCEND,MAXCON,KBOUT)
C IS THE VECTOR SEQUENCE IN THE CORRECT FORMAT WITH A TITLE AT THE FRONT?
IF(IDCEND.EQ.0)THEN
CENDS(1) = -19
NENDS(1) = 1
CENDS(2) = IDIM + 1
IDCEND = 1
END IF
C WRITE(KBOUT,9999)
C9999 FORMAT(' VECTOR SEQUENCE REQUIRES A TITLE EG ',
C 1' <---M13MP7.001----->')
C RETURN
C END IF
CALL BUSY(KBOUT)
C
C init hashing routines
C
CALL INITE(CONST,CSTART,LENGTH)
CALL ENCOF(SEQ,IDIM,CONST,CSTART,LENGTH,POSNS)
CALL ENCONN(POSNS,IDIM,WORDP,WORDN,LPOWRC,LENGTH,1)
C
1 CONTINUE
IF(INF.EQ.1) THEN
3 CONTINUE
MN = 0
CALL GTSTR('Gel reading name',' ',NAME,MN,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.3) RETURN
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
GO TO 3
END IF
ELSE
IOK = GNFFOF(IDEV1,NAME)
IF(IOK.EQ.1) GO TO 100
IF(IOK.NE.0) GO TO 1
END IF
JGEL = JGEL + 1
WRITE(IDEV,*)'Processing',JGEL,' in batch'
WRITE(IDEV,1003)NAME
1003 FORMAT(' Gel reading name ',A)
IDIMG=MAXGEL
CALL OPENRS(IDEV3,NAME,IOK,LRECL,2)
IF(IOK.NE.0)THEN
IF(INF.EQ.1) RETURN
WRITE(IDEV,*)' Gel reading file not found'
GO TO 1
END IF
CALL ARRFIM(IDEV3,GEL,IDIMG,KBOUT)
CLOSE(UNIT=IDEV3)
C LONG ENOUGH ?
IF(IDIMG.LT.MINMAT)THEN
WRITE(IDEV,*)' Gel reading too short to compare'
GO TO 1
END IF
CALL SQCOPY(GEL,GELCOP,IDIMG)
ISTRAN=1
IMATCH=0
2 CONTINUE
CALL BUSY(KBOUT)
CALL MSTLKL(GEL,IDIMG)
CALL ENCO(GEL,IDIMG,GELN,CONST,LENGTH)
WRITE(IDEV,1009)ISTRAN
1009 FORMAT(' Searching strand',I6)
IDSAV=MAXMAT
CALL CFGEL(GELN,IDIMG,POSNS,IDIM,WORDP,WORDN,LENGTH,LPOWRC,
+SAVPG,SAVPS,SAVL,
+IDSAV,SEQ,GELCOP,MINMAT,IFAIL,KBOUT)
IF(IDSAV.GT.0) THEN
IMATCH=1
CALL DISMAT(SEQ,IDIM,GELCOP,IDIMG,SAVPS,SAVPG,IDSAV,
+ CENDS,NENDS,IDCEND,MAXCON,IDEV,MATCH)
END IF
IF(ISTRAN.EQ.1) THEN
CALL SQREV(GELCOP,IDIMG)
CALL SQCOM(GELCOP,IDIMG)
CALL SQCOPY(GELCOP,GEL,IDIMG)
ISTRAN = 2
GO TO 2
END IF
IF(IMATCH.EQ.0) THEN
WRITE(IDEV2,1010)NAME
IGELS = IGELS + 1
END IF
GO TO 1
1010 FORMAT(A)
100 CONTINUE
WRITE(KBOUT,*)'Batch finished'
WRITE(KBOUT,*)JGEL,' compared and ',IGELS,' passed'
RETURN
END
SUBROUTINE SHIFTC(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDEVR,
+IDBSIZ,IGN,NCONT,DIST)
C AUTHOR: RODGER STADEN
C SHIFTS PART OF A CONTIG FORM GEL IGN TO RIGHT END
C CONTIG LINE NUMBER IF NCONT
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER DIST,CLEN
EXTERNAL CLEN
I = IGN
10 CONTINUE
IF(I.NE.0)THEN
RELPG(I) = RELPG(I) + DIST
CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I),LNBR(I),RNBR(I))
I = RNBR(I)
GO TO 10
END IF
C UPDATE CONTIG LENGTH
L = CLEN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+IDBSIZ,IGN)
RELPG(NCONT) = L
CALL WRITER(IDEVR,NCONT,RELPG(NCONT),LNGTHG(NCONT),
+LNBR(NCONT),RNBR(NCONT))
END
SUBROUTINE SLIDER(SEQ1,IDC,SEQ2,IDIM2,MS1,MS2,MAXPG,MAXPC,MINSLI,
+MATL,MATC,MATG,IP)
C AUTHOR: RODGER STADEN
CHARACTER SEQ1(IDC),SEQ2(IDIM2)
INTEGER MATL(IP),MATC(IP),MATG(IP),P1S,P1,P2
IP1 = IP
IP = 0
C LEFT END S2 RELATIVE S1 - MAX PADS -2 READY FOR LOOP
P1S = MS1 - MS2 - MAXPC - 1
C TRY NSLIDE START POSNS FOR SEQ2
DO 100 I=1,MAXPG+MAXPC+1
C POINT TO SEQ1 START
P1S = P1S + 1
C POINT TO CURRENT SEQ1 POSN
P1 = P1S
N = 0
C COMPARE WHOLE LENGTH OF SEQ2 (IF P1 WITHIN RANGE)
DO 50 J=1,IDIM2
P2 = J
P1 = P1 + 1
IF(P1.LT.1)GO TO 50
C OFF RIGHT END? IF SO MAY HAVE BEEN A MATCH
IF(P1.GT.IDC)GO TO 40
IF(SEQ1(P1).EQ.SEQ2(P2))GO TO 45
40 CONTINUE
IF(N.GE.MINSLI)CALL SAVIT(N,P1,P2,IP,MATL,MATC,MATG,IP1)
N = 0
GO TO 50
45 CONTINUE
N = N + 1
50 CONTINUE
C GOOD SCORE AT END? NEED TO INCREMENT POINTERS FOR SAVIT
P1 = P1 + 1
P2 = P2 + 1
IF(N.GE.MINSLI)CALL SAVIT(N,P1,P2,IP,MATL,MATC,MATG,IP1)
100 CONTINUE
END
SUBROUTINE SUBS(SEQ,IDIMS,FROM,TO)
CHARACTER SEQ(IDIMS),FROM,TO
C AUTHOR RODGER STADEN
DO 10 I = 1,IDIMS
IF(SEQ(I).EQ.FROM) SEQ(I) = TO
10 CONTINUE
END
SUBROUTINE SUMMAR(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+SEQ1,IDIM1,GEL,LREG,RREG,IGELC,PERCD,IDBSIZ,CHARS,
+ID1,CHRSIZ,MAXGL2,IDEVW,MAXGEL,LINOU1,LINOU2,MXGOOD)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),CHRSIZ
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER SEQ1(IDIM1)
CHARACTER GEL(MAXGEL)
INTEGER LREG,RREG,LSEQNO,POSN,Y,POSN1
INTEGER GELC
CHARACTER LINOU1(MAXGEL),LINOU2(MAXGEL),GTCONC
INTEGER CHARS(CHRSIZ,ID1,MAXGL2)
EXTERNAL INDEXS,LWRAPS,GTCONC
C 28-7-91 added extra parameter: mxgood is the maximum reading
C length for which we have confidence, so only the parts of
C reads 1 to mxgood will be included in the calculation
C SET INITIAL VALUES
C hard to understand this very old code! rewrite it.
C we have a summing array of twice the length of the longest sequence
C posn is posn in contig of next to write, lseqno is current posn in contig
C we write when lseqno-posn ge the length of the longest seq
POSN=LREG
GELC=IGELC
LINLEN=MAXGEL
LSEQNO=RELPG(GELC)
IEND=0
DO 40 I=1,MAXGL2
DO 40 J=1,ID1
DO 40 K=1,CHRSIZ
CHARS(K,J,I)=0
40 CONTINUE
50 CONTINUE
ISS=1
IF(LNGTHG(GELC).LT.0)ISS=2
CALL READW(IDEVW,GELC,GEL,MAXGEL)
C LOOP FOR RELEVANT ELEMENTS THIS GEL
C only use mxgood characters from start of read
C
IF(ISS.EQ.1) THEN
N = MIN(MXGOOD,ABS(LNGTHG(GELC)))
IF(LSEQNO.LT.LREG)LSEQNO=LREG
IS = LSEQNO-RELPG(GELC)+1
ELSE
C LOOP FOR RELEVANT ELEMENTS THIS GEL
C only use mxgood characters from start of read (right end for these)
C
IREND = RELPG(GELC) - LNGTHG(GELC) + 1
IF (MXGOOD.LT.MAXGEL) THEN
LSEQNO = IREND - MXGOOD + 1
ELSE
LSEQNO = RELPG(GELC)
END IF
LSEQNO = MAX(LSEQNO,LREG)
IS = LSEQNO - RELPG(GELC) + 1
N = ABS(LNGTHG(GELC))
END IF
DO 70 I=IS,N
JJ = INDEXS(GEL(I),JSCORE)
JJJ = LWRAPS(LSEQNO,MAXGL2)
CHARS(JJ,ISS,JJJ) = CHARS(JJ,ISS,JJJ) + JSCORE
LSEQNO = LSEQNO + 1
70 CONTINUE
IF(RNBR(GELC).EQ.0)GO TO 200
GELC=RNBR(GELC)
LSEQNO=RELPG(GELC)
IF(LSEQNO.GT.RREG)GO TO 200
C ENOUGH TO OUTPUT?
Y=LSEQNO-POSN
IF(Y.GE.MAXGEL)GO TO 210
GO TO 50
200 CONTINUE
C SET FLAG TO SHOW END REACHED
IEND=1
LINLEN=MAXGEL
Y=RREG-POSN
IF(Y.LT.MAXGEL)LINLEN=Y+1
210 CONTINUE
C SET POINTER TO SEQ1
POSN1=POSN-1
C PREPARE NEXT SECTION OF CHARS FOR OUTPUT
DO 230 I=1,LINLEN
JJJ = LWRAPS(POSN,MAXGL2)
LINOU1(I) = GTCONC(CHARS(1,1,JJJ),CHRSIZ,PERCD)
LINOU2(I) = GTCONC(CHARS(1,2,JJJ),CHRSIZ,PERCD)
DO 250 J=1,CHRSIZ
CHARS(J,1,JJJ)=0
CHARS(J,2,JJJ)=0
250 CONTINUE
POSN=POSN+1
230 CONTINUE
C
C COMPARE STRANDS
C
DO 500 I=1,LINLEN
C WRITE(*,*)I,LINOU1(I),LINOU2(I)
POSN1=POSN1+1
IF(LINOU1(I).EQ.LINOU2(I)) THEN
IF(LINOU1(I).EQ.'-') THEN
SEQ1(POSN1) = '3'
GO TO 500
END IF
IF(LINOU1(I).EQ.'*') THEN
SEQ1(POSN1) = '3'
GO TO 500
END IF
SEQ1(POSN1) = '0'
ELSE
IF((LINOU1(I).EQ.'*').AND.(LINOU2(I).EQ.'-')) THEN
SEQ1(POSN1) = '3'
GO TO 500
END IF
IF((LINOU2(I).EQ.'*').AND.(LINOU1(I).EQ.'-')) THEN
SEQ1(POSN1) = '3'
GO TO 500
END IF
IF((LINOU1(I).NE.'-').AND.(LINOU1(I).NE.'*')) THEN
SEQ1(POSN1) = '1'
IF((LINOU2(I).NE.'-').AND.(LINOU2(I).NE.'*'))
+ SEQ1(POSN1) = '4'
GO TO 500
END IF
IF((LINOU2(I).NE.'-').AND.(LINOU2(I).NE.'*')) THEN
SEQ1(POSN1) = '2'
IF((LINOU1(I).NE.'-').AND.(LINOU1(I).NE.'*'))
+ SEQ1(POSN1) = '4'
GO TO 500
END IF
END IF
500 CONTINUE
IF(POSN.GT.RREG)RETURN
IF((IEND.EQ.1).AND.(POSN.LE.RREG))GO TO 200
C ANY MORE MAXGEL CHAR LENGTHS TO OUTPUT
Y=LSEQNO-POSN
IF(Y.LT.MAXGEL)GO TO 50
C FINISHED COMPLETELY?
GO TO 210
END
C SUMMER
C
C SUBROUTINE TO PRODUCE A CONSENSUS FROM LINED UP GEL READINGS
SUBROUTINE SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
1SEQ1,IDIM1,GEL,LREG,RREG,IGELC,IDBSIZ,CHARS,CHRSIZ,MAXGL2,
+IDEVW,MAXGEL,IDM,PERCD)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),CHRSIZ
INTEGER LREG,RREG,LSEQNO,POSN,Y
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER GEL(MAXGEL)
INTEGER GELC
CHARACTER SEQ1(IDIM1)
INTEGER CHARS(CHRSIZ,MAXGL2)
CHARACTER MUNOTP,GTCONC
EXTERNAL MUNOTP,INDEXS,GTCONC,LWRAPS
C
C SET INITIAL VALUES
POSN=LREG
GELC=IGELC
LINLEN=MAXGEL
LSEQNO=RELPG(GELC)
IEND=0
IPSEQ1=0
C
C ZERO ARRAY
DO 40 I=1,MAXGL2
DO 40 J=1,CHRSIZ
CHARS(J,I)=0
40 CONTINUE
50 CONTINUE
CALL READW(IDEVW,GELC,GEL,MAXGEL)
C LOOP FOR RELEVANT ELEMENTS THIS GEL
N=ABS(LNGTHG(GELC))
IF(LSEQNO.LT.LREG)LSEQNO=LREG
IS=(LSEQNO-RELPG(GELC))+1
****************************
IF(IDM.EQ.26)THEN
DO 51 I = IS,N
JJJ=(MOD(LSEQNO,MAXGL2))
IF(JJJ.EQ.0)JJJ=MAXGL2
CALL PCON1(GEL(I),CHARS(1,JJJ))
LSEQNO = LSEQNO + 1
51 CONTINUE
ELSE
****************************
DO 70 I=IS,N
JJ = INDEXS(GEL(I),JSCORE)
JJJ = LWRAPS(LSEQNO,MAXGL2)
CHARS(JJ,JJJ) = CHARS(JJ,JJJ) + JSCORE
LSEQNO = LSEQNO + 1
70 CONTINUE
END IF
C
C LOOK AT NEXT GEL TO RIGHT
IF(RNBR(GELC).EQ.0)GO TO 200
GELC=RNBR(GELC)
C RESET LSEQNO
LSEQNO=RELPG(GELC)
C IS THIS OVER END?
IF(LSEQNO.GT.RREG)GO TO 200
C ENOUGH TO OUTPUT?
Y=LSEQNO-POSN
IF(Y.GE.MAXGEL)GO TO 210
GO TO 50
200 CONTINUE
C SET FLAG TO SHOW END REACHED
IEND=1
C NEED TO SUM AND OUTPUT
LINLEN=MAXGEL
Y=RREG-POSN
IF(Y.LT.MAXGEL)LINLEN=Y+1
210 CONTINUE
C SUM NEXT SECTION OF CHARS
IF(IDM.EQ.26)THEN
DO 211 I = 1,LINLEN
IPSEQ1 = IPSEQ1 + 1
SEQ1(IPSEQ1) = '-'
JJJ = MOD(POSN,MAXGL2)
IF(JJJ.EQ.0)JJJ = MAXGL2
SEQ1(IPSEQ1) = MUNOTP(CHARS(1,JJJ))
CHARS(1,JJJ) = 0
POSN = POSN + 1
211 CONTINUE
ELSE
DO 230 I=1,LINLEN
IPSEQ1=IPSEQ1+1
ISUM=0
JJJ = LWRAPS(POSN,MAXGL2)
SEQ1(IPSEQ1) = GTCONC(CHARS(1,JJJ),CHRSIZ,PERCD)
CALL FILLI(CHARS(1,JJJ),CHRSIZ,0)
POSN = POSN + 1
230 CONTINUE
END IF
C
C
C ANY MORE TO OUTPUT?
IF(POSN.GT.RREG)RETURN
IF((IEND.EQ.1).AND.(POSN.LE.RREG))GO TO 200
C ANY MORE MAXGLEL CHAR LENGTHS TO OUTPUT
Y=LSEQNO-POSN
IF(Y.LT.MAXGEL)GO TO 50
C FINISHED COMPLETELY?
GO TO 210
END
SUBROUTINE SUMSS(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+SEQ1,IDIM1,GEL,LREG,RREG,IGELC,PERCD,IDBSIZ,CHARS,
+ID1,CHRSIZ,MAXGL2,IDEVW,MAXGEL,LINOU1,LINOU2)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),CHRSIZ
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER SEQ1(IDIM1)
CHARACTER GEL(MAXGEL)
INTEGER LREG,RREG,LSEQNO,POSN,Y,POSN1
INTEGER GELC
CHARACTER LINOU1(MAXGEL),LINOU2(MAXGEL),GTCONC
INTEGER CHARS(CHRSIZ,ID1,MAXGL2)
EXTERNAL INDEXS,LWRAPS,GTCONC
C
C Routine to calculate a consensus. Only if the two strands agree
C is a non dash character assigned.
C
POSN=LREG
GELC=IGELC
LINLEN=MAXGEL
LSEQNO=RELPG(GELC)
IEND=0
DO 40 I=1,MAXGL2
DO 40 J=1,ID1
DO 40 K=1,CHRSIZ
CHARS(K,J,I)=0
40 CONTINUE
50 CONTINUE
ISS=1
IF(LNGTHG(GELC).LT.0)ISS=2
CALL READW(IDEVW,GELC,GEL,MAXGEL)
C LOOP FOR RELEVANT ELEMENTS THIS GEL
N=ABS(LNGTHG(GELC))
IF(LSEQNO.LT.LREG)LSEQNO=LREG
IS=LSEQNO-RELPG(GELC)+1
DO 70 I=IS,N
JJ = INDEXS(GEL(I),JSCORE)
JJJ = LWRAPS(LSEQNO,MAXGL2)
CHARS(JJ,ISS,JJJ) = CHARS(JJ,ISS,JJJ) + JSCORE
LSEQNO = LSEQNO + 1
70 CONTINUE
IF(RNBR(GELC).EQ.0)GO TO 200
GELC=RNBR(GELC)
LSEQNO=RELPG(GELC)
IF(LSEQNO.GT.RREG)GO TO 200
C ENOUGH TO OUTPUT?
Y=LSEQNO-POSN
IF(Y.GE.MAXGEL)GO TO 210
GO TO 50
200 CONTINUE
C SET FLAG TO SHOW END REACHED
IEND=1
LINLEN=MAXGEL
Y=RREG-POSN
IF(Y.LT.MAXGEL)LINLEN=Y+1
210 CONTINUE
C SET POINTER TO SEQ1
POSN1=POSN-1
C PREPARE NEXT SECTION OF CHARS FOR OUTPUT
DO 230 I=1,LINLEN
JJJ = LWRAPS(POSN,MAXGL2)
LINOU1(I) = GTCONC(CHARS(1,1,JJJ),CHRSIZ,PERCD)
LINOU2(I) = GTCONC(CHARS(1,2,JJJ),CHRSIZ,PERCD)
DO 250 J=1,CHRSIZ
CHARS(J,1,JJJ)=0
CHARS(J,2,JJJ)=0
250 CONTINUE
POSN=POSN+1
230 CONTINUE
C
C Compare the strands. If they the same then set the consensus
C accordingly, otherwise set it to - so no edits are made.
C
DO 500 I=1,LINLEN
POSN1=POSN1+1
IF(LINOU1(I).EQ.LINOU2(I)) THEN
SEQ1(POSN1) = LINOU1(I)
ELSE
SEQ1(POSN1) = '-'
END IF
500 CONTINUE
IF(POSN.GT.RREG)RETURN
IF((IEND.EQ.1).AND.(POSN.LE.RREG))GO TO 200
C ANY MORE MAXGEL CHAR LENGTHS TO OUTPUT
Y=LSEQNO-POSN
IF(Y.LT.MAXGEL)GO TO 50
C FINISHED COMPLETELY?
GO TO 210
END
SUBROUTINE TPCHEK(PC,PG,L,N)
INTEGER PC(N),PG(N),L(N)
C AUTHOR RODGER STADEN
C IF OVERLAPPING BLOCKS ARE FOUND REMOVE THE SHORTER ONE
C THEN REMOVE LARGE GAPS AT ENDS (THOSE AS LARGE AS THE END BLOCK)
K1 = 2
1 CONTINUE
DO 10 I = K1,N
J1 = I
IF(PC(I).LE.PC(I-1)) GO TO 20
IF(PG(I).LE.PG(I-1)) GO TO 20
10 CONTINUE
C REMOVE LARGE GAPS FROM ENDS
C THIS RULE OF THUMB COULD BE CHANGED TO USE A DIFFERENCE
C BETWEEN THE NUMBERS OF MISMATCHING CHARACTERS
IF(N.GT.1) THEN
K1 = PC(2) - PC(1) - L(1)
J1 = PG(2) - PG(1) - L(1)
IF(MAX(K1,J1).GT.L(1)) THEN
CALL ML(PC,PG,L,N,1)
N = N - 1
END IF
IF(N.GT.1) THEN
K1 = PC(N) - PC(N-1) - L(N-1)
J1 = PG(N) - PG(N-1) - L(N-1)
IF(MAX(K1,J1).GT.L(N)) THEN
CALL ML(PC,PG,L,N,N)
N = N - 1
END IF
END IF
END IF
RETURN
20 CONTINUE
IF(L(J1-1).GT.L(J1)) THEN
CALL ML(PC,PG,L,N,J1)
ELSE
CALL ML(PC,PG,L,N,J1-1)
END IF
C Until 25-11-90 next line was k1=j1 but this does not deal with all
C cases: when a line is deleted we must compare it with the previous
C one before dealing with the rest, because it could be left of that
C one as well!
K1 = MAX(2,J1-1)
N = N - 1
GO TO 1
END
SUBROUTINE WRITEN(IDEVN,N,NAME)
CHARACTER NAME*(*)
WRITE(IDEVN,REC=N)NAME
RETURN
END
SUBROUTINE WRITER(IDEVR,N,RELPG,LNGTHG,LNBR,RNBR)
INTEGER RELPG,RNBR
INTEGER SWAPBO
EXTERNAL SWAPBO
WRITE(IDEVR,REC=N+1)SWAPBO(RELPG),SWAPBO(LNGTHG),
+SWAPBO(LNBR),SWAPBO(RNBR)
RETURN
END
SUBROUTINE WRITEW(IDEVW,N,GEL,MAXGEL)
CHARACTER GEL(MAXGEL)
WRITE(IDEVW,REC=N)GEL
RETURN
END
SUBROUTINE XHSAP(RELPG,LNGTHG,LNBR,RNBR,
+IDBSIZ,NCONTS,LLINOI,LINCNI,LREG,RREG,
+WINDOW,GWIND,LENCON,DEPTHP,DEPTHM,
+MARGL,MARGR,MARGB,MARGT,MAXOPT,ISXMAX,ISYMAX,KBIN,IDEV,
+KBOUT,GEL,GEL2,IDEV2,IDEV3,LINLEN,PERCD,MAXGEL,IDM,
+SEQ1,IDIM1,NGELS,TEMP3,CHRSIZ,MAXGL2,LINOU1,LINOU2,
+NOPT1,NOPT2,NOPT3,
+IHELPS,IHELPE,HELPF,IDEVH,MXGOOD)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),WINDOW,CHRSIZ,GWIND
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER MARGB(MAXOPT),MARGT(MAXOPT)
INTEGER RREG,DEPTHP,DEPTHM,STRAND,CHNRP1,HQN
INTEGER TEMP3(2,CHRSIZ,MAXGL2)
CHARACTER GEL(MAXGEL),GEL2(MAXGEL)
CHARACTER TERM,TUPPER,NAMARC*16,HELPF*(*)
CHARACTER SEQ1(IDIM1),LINOU1(MAXGEL),LINOU2(MAXGEL)
EXTERNAL NOPWIN,CWORLD,TUPPER,CHNRP1,HQN
C nopt1 = single contig
C nopt2 = all contigs
C nopt3 = scan
10 CONTINUE
LLINO = LLINOI
LINCON = LINCNI
LOCLR = 0
LOCRR = 0
CALL BPAUSE(KBIN,KBOUT,IOK)
CALL CLEARV
CALL XHAIRR(ISXMAX,ISYMAX,IX,IY,TERM,DBTDUX,DBTDUY)
CALL VT100M
INFLAG = HQN(TERM)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.3) RETURN
NOPT = NOPWIN(IY,MARGB,MARGT,MAXOPT)
TERM = TUPPER(TERM)
IF(NOPT.EQ.0) RETURN
IF(NOPT.EQ.NOPT3) THEN
IF(TERM.EQ.'S') THEN
XMIN = LREG
XMAX = RREG
X = CWORLD(IX,MARGL,MARGR,XMIN,XMAX)
LOCLR = MAX(LREG,NINT(X)-WINDOW)
LOCRR = MIN(RREG,NINT(X)+WINDOW-1)
IF(LOCLR.NE.0) THEN
CALL DSPLAY(RELPG,LNGTHG,LNBR,RNBR,
+ GEL,LLINO,LINCON,LOCLR,LOCRR,GEL2,I1,I2,0,I,
+ IDBSIZ,IDEV,KBOUT,
+ IDEV2,IDEV3,LINLEN,PERCD,MAXGEL,IDM)
GO TO 10
END IF
END IF
IF((TERM.EQ.'N').OR.(TERM.EQ.'Z').OR.(TERM.EQ.'I')) GO TO 10
END IF
IF(NOPT.EQ.NOPT1) THEN
STRAND = 1
CALL FDPTH(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,LENCON,STRAND,DEPTHP)
IF(DEPTHP.LT.0) RETURN
STRAND = -1
CALL FDPTH(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,LENCON,STRAND,DEPTHM)
IF(DEPTHM.LT.0) RETURN
YMAX = ISYMAX
YMIN = 0.
XMIN = LREG
XMAX = RREG
RINC = ISYMAX / (DEPTHP + DEPTHM + 2)
RMID =(DEPTHM+1) * RINC
X = CWORLD(IX,MARGL,MARGR,XMIN,XMAX)
Y = CWORLD(IY,MARGB(NOPT),MARGT(NOPT),YMIN,YMAX)
IF(TERM.EQ.'I') THEN
STRAND = 1
YINC = RINC * STRAND
CALL IPLTC(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,STRAND,
+ MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),
+ ISXMAX,ISYMAX,RMID,YINC,DEPTHP,X,Y,
+ KBOUT,IGEL,ICLOSE)
IF(ICLOSE.EQ.1) THEN
STRAND = -1
YINC = RINC * STRAND
CALL IPLTC(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,STRAND,
+ MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),
+ ISXMAX,ISYMAX,RMID,YINC,DEPTHM,X,Y,
+ KBOUT,IGEL,ICLOSE)
END IF
IF(ICLOSE.EQ.1) GO TO 10
CALL READN(IDEV3,IGEL,NAMARC)
WRITE(IDEV,1006)NAMARC,IGEL,RELPG(IGEL),LNGTHG(IGEL)
1006 FORMAT
+ ( ' Name ',A,' Number ',I6,' Rel. Posn. ',I7,' Length ',I5)
GO TO 10
END IF
IF(TERM.EQ.'Z') THEN
STRAND = 1
YINC = RINC * STRAND
CALL IPLTC(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,STRAND,
+ MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),
+ ISXMAX,ISYMAX,RMID,YINC,DEPTHP,X,Y,
+ KBOUT,IGEL,ICLOSE)
IF(ICLOSE.EQ.1) THEN
STRAND = -1
YINC = RINC * STRAND
CALL IPLTC(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,STRAND,
+ MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),
+ ISXMAX,ISYMAX,RMID,YINC,DEPTHM,X,Y,
+ KBOUT,IGEL,ICLOSE)
END IF
IF(IGEL.EQ.0) GO TO 10
CALL CLEARG
CALL PLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LINCON,IGEL,
+ NCONTS,MARGL,MARGR,MARGB(NOPT2),MARGT(NOPT2),ISXMAX,ISYMAX)
LREG = MAX(1,RELPG(IGEL)-GWIND)
RREG = MIN(RELPG(LINCON),RELPG(IGEL)+GWIND)
LLINO = LNBR(LINCON)
LLINOI = LLINO
LINCNI = LINCON
LENCON = RREG - LREG + 1
CALL FDEPTH(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,LENCON,
+ MARGL,MARGR,MARGB(NOPT1),MARGT(NOPT1),ISXMAX,ISYMAX)
GO TO 10
END IF
IF(TERM.EQ.'S') THEN
LOCLR = MAX(LREG,NINT(X)-WINDOW)
LOCRR = MIN(RREG,NINT(X)+WINDOW-1)
IF(LOCLR.NE.0) THEN
CALL DSPLAY(RELPG,LNGTHG,LNBR,RNBR,
+ GEL,LLINO,LINCON,LOCLR,LOCRR,GEL2,I1,I2,0,I,
+ IDBSIZ,IDEV,KBOUT,
+ IDEV2,IDEV3,LINLEN,PERCD,MAXGEL,IDM)
GO TO 10
END IF
END IF
IF(TERM.EQ.'N') THEN
LOCLR = MAX(LREG,NINT(X)-WINDOW)
LOCRR = MIN(RREG,NINT(X)+WINDOW-1)
IGEL = CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,
+ LLINO,LREG)
IF(LOCLR.NE.0) THEN
CALL LSTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,IGEL,
+ LOCRR,IDEV,IDEV3,NAMARC)
END IF
GO TO 10
END IF
IF(TERM.EQ.'Q') THEN
CALL DBSCNP(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,SEQ1,
+ IDIM1,GEL,IDBSIZ,TEMP3,2,CHRSIZ,MAXGL2,IDEV2,LLINO,
+ PERCD,MAXGEL,LINOU1,LINOU2,LREG,RREG,
+ MARGL,MARGR,MARGB(NOPT3),MARGT(NOPT3),ISXMAX,ISYMAX,
+ MXGOOD)
GO TO 10
END IF
END IF
IF(NOPT.EQ.NOPT2) THEN
CALL IDPLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,
+ NCONTS,IX,IY,MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),
+ ISXMAX,ISYMAX,DBTDUX,DBTDUY,
+ LINCON,IGEL,IS)
IF(IGEL.EQ.0) RETURN
IF(TERM.EQ.'Z') THEN
CALL CLEARG
CALL PLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LINCON,IGEL,
+ NCONTS,MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),ISXMAX,ISYMAX)
LREG = 1
RREG = RELPG(LINCON)
LLINO = LNBR(LINCON)
LLINOI = LLINO
LINCNI = LINCON
LENCON = RREG - LREG + 1
CALL FDEPTH(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,LENCON,
+ MARGL,MARGR,MARGB(NOPT1),MARGT(NOPT1),ISXMAX,ISYMAX)
GO TO 10
END IF
IF(TERM.EQ.'Q') THEN
CALL CLEARG
CALL PLC(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LINCON,IGEL,
+ NCONTS,MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),ISXMAX,ISYMAX)
LREG = 1
RREG = RELPG(LINCON)
LLINO = LNBR(LINCON)
LLINOI = LLINO
LINCNI = LINCON
LENCON = RREG - LREG + 1
CALL FDEPTH(RELPG,LNGTHG,LNBR,RNBR,
+ IDBSIZ,LLINO,LREG,RREG,LENCON,
+ MARGL,MARGR,MARGB(NOPT1),MARGT(NOPT1),ISXMAX,ISYMAX)
CALL DBSCNP(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,SEQ1,
+ IDIM1,GEL,IDBSIZ,TEMP3,2,CHRSIZ,MAXGL2,IDEV2,LLINO,
+ PERCD,MAXGEL,LINOU1,LINOU2,LREG,RREG,
+ MARGL,MARGR,MARGB(NOPT3),MARGT(NOPT3),ISXMAX,ISYMAX,
+ MXGOOD)
GO TO 10
END IF
IF(TERM.EQ.'I') THEN
CALL READN(IDEV3,IGEL,NAMARC)
WRITE(IDEV,1006)NAMARC,IGEL,RELPG(IGEL),LNGTHG(IGEL)
GO TO 10
END IF
IF(TERM.EQ.'S') THEN
LOCLR = MAX(1,IS-WINDOW)
LOCRR = MIN(RELPG(LINCON),IS+WINDOW-1)
LLINO = LNBR(LINCON)
IF(LOCLR.NE.0) THEN
CALL DSPLAY(RELPG,LNGTHG,LNBR,RNBR,
+ GEL,LLINO,LINCON,LOCLR,LOCRR,GEL2,I1,I2,0,I,
+ IDBSIZ,IDEV,KBOUT,
+ IDEV2,IDEV3,LINLEN,PERCD,MAXGEL,IDM)
END IF
GO TO 10
END IF
IF(TERM.EQ.'N') THEN
LOCLR = MAX(1,IS-WINDOW)
LOCRR = MIN(RELPG(LINCON),IS+WINDOW-1)
LLINO = LNBR(LINCON)
IF(LOCLR.NE.0) THEN
CALL LSTCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,IGEL,
+ LOCRR,IDEV,IDEV3,NAMARC)
END IF
GO TO 10
END IF
END IF
END
INTEGER FUNCTION CLINNO(LNBR,IDBSIZ,NCONTS,IIN)
C AUTHOR: RODGER STADEN
C RETURNS CONTIG LINE NUMBER OR ZERO FOR ERROR
INTEGER LNBR(IDBSIZ)
CLINNO = 0
N=IDBSIZ-NCONTS
DO 10 J=N,IDBSIZ-1
IF(LNBR(J).EQ.IIN) THEN
CLINNO = J
RETURN
END IF
10 CONTINUE
END
SUBROUTINE UPDCON(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,NGELS,NCONTS,
+SEQ,MAXSEQ,IDIM1,CSTART,CLENO,LINCON,NAMPRO,SEQ2,TEMP3,
+ECHRSZ,MAXGL2,KBOUT,IDEV,IDEV2,IFAIL,MAXGEL,IDM,PERCD)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
INTEGER CSTART,CLENO,S1,B1,ECHRSZ,RREG
INTEGER TEMP3(ECHRSZ,MAXGL2)
CHARACTER SEQ(MAXSEQ),SEQ2(MAXGEL)
CHARACTER NAMPRO*(*)
INTEGER CHNRP
EXTERNAL CHNRP
C cstart consensus start point (before new reading)
C cleno consensus length (before new reading)
C lincon element number of contig
C s1 number of first reading to shift
C b1 number of first base to shift (in overall consensus positioning)
C
C there are 2 tasks: 1. make space for the new and altered region
C 2. calculate the new consensus and put it in the space
C we do not have to make space if:
C a. we are dealing with the last contig in the consensus and there are no
C readings starting to the right of the new data
C b. the contig has not been padded
C
C New code to update the consensus only for the region affected by the
C new reading. Find the next reading to the right of the new one, which
C the new one does not overlap (might not be one!). Make a consensus from
C start of new reading to here. Prior to this make space for it by moving
C the consensus right (only if the contig is longer (padding or extra data
C at its ends). Let s1 be the first reading to shift. We shift from its
C left end to the end of the contig - where is this in the overall consensus?
C The distance of the left end of s1 to the right end of the contig is
C unchanged. This means that the new relpg(s1) is the same distance from
C the right end of the old consensus as the old relpg(s1) was from the right
C end of the old consensus. So from this we can calculate the position of the
C the first base to move.
C Let L be the position in the overall consensus of the last base in this contig
C L = cstart - cleno - 1
C Let D = distance to end of contig
C D = RELPG(LINCON) - relpg(s1) + 1.
C First base to shift B1 = L - D + 1
C Last base to shift is idim1
C Distance to move to right is relpg(lincon) - cleno ie the number of extra bases
C make consensus from relpg(ngels) to relpg(s1) - 1
C put it at cstart + relpg(ngels) - 1
C
C Potential problems:
C 1) reading at right end of contig
C the search for the first nonoverlapping read to the right will return 0
C shift al the next contig: ie cstart + cleno onwards
C make consensus from relpg(ngels) to end of contig
C put it at cstart + relpg(ngels) -1
C
C 2) reading at left end of contig
C shift whole contig ie cstart - 20
C add new title
C shift consensus relpg(lincon) - cleno to the right
C
C 3) new reading contains contig - cases 1 and 2 combined
C the search for the first nonoverlapping read to the right will return 0
C shift whole of next contig and make consensus from relpg(ngels) to end of
C contig.
C
C 4) Might not be a next contig to shift
C
C get number of first reading to shift
C
S1 = CHNRP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,NGELS,NCONTS,
+RELPG(NGELS)+ABS(LNGTHG(NGELS))-1)
C WRITE(*,*)'S1',S1
C
C is the altered region longer than the original: only then do we need to shift
C
C WRITE(*,*)'IDIM1',IDIM1
C WRITE(*,*)'RELPG(LINCON)',RELPG(LINCON)
C WRITE(*,*)'CSTART,CLENO',CSTART,CLENO
IF (RELPG(LINCON) - CLENO.GT.0) THEN
C
C it is longer so we probably need to shift
C
IF (S1.EQ.0) THEN
C
C no readings start to the right of the new data
C
IF (CSTART+CLENO-1.LT.IDIM1) THEN
C
C there are other contigs to the right
C
C WRITE(*,*)'CSTART,CLENO',CSTART,CLENO
B1 = CSTART + CLENO
C WRITE(*,*)'B1',B1
CALL MAKHCA(SEQ,MAXSEQ,B1,RELPG(LINCON)-CLENO,IDIM1)
ELSE
C
C there are no contigs to the right and no readings start to the right of
C the new one so nothing to shift
C
END IF
ELSE
C
C there are readings starting to the right of the new one
C
C shift from start of next reading to right
C
L = CSTART + CLENO - 1
C WRITE(*,*)'CSTART,CLENO,L',CSTART,CLENO,L
LD = RELPG(LINCON) - RELPG(S1) + 1
C WRITE(*,*)'LD',LD
B1 = L - LD + 1
C WRITE(*,*)'B1',B1
CALL MAKHCA(SEQ,MAXSEQ,B1,RELPG(LINCON)-CLENO,IDIM1)
END IF
END IF
C
C now make new consensus (where do we put it, do we need
C to give it a header, and what region do we make it for ?
C in the simplest case make it for relpg(ngels) to relpg(s1) -1
C if s1=0 make it for relpg(ngels) to end of contig (relpg(lincon))
C we give it a header if it is at the left end of the contig ie lnbr(ngels)=0
C
C we always start at the left end of the new reading
C
LREG = RELPG(NGELS)
C
C we end at the next reading to the right or the end of the contig
C
IF (S1.NE.0) THEN
RREG = RELPG(S1) - 1
ELSE
RREG = RELPG(LINCON)
END IF
C
C where do we put the new consensus ?
C
B1 = CSTART + RELPG(NGELS) - 1
C WRITE(*,*)'LREG,RREG',LREG,RREG
C WRITE(*,*)'B1',B1
C
C do we need to add a title
C
IF (LNBR(NGELS).EQ.0) THEN
B1 = CSTART - 20
C WRITE(*,*)'ADD NEW TIT AT',B1
CALL ADDTIT(SEQ(B1),NAMPRO,NGELS,B1)
END IF
IGELC = LNBR(LINCON)
C
C note aconsn will chain along until it find the first useful reading
C
JOB = 2
CALL ACONSN(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS,
+SEQ,MAXSEQ,SEQ2,IDBSIZ,B1,JOB,IGELC,LREG,RREG,TEMP3,
+ECHRSZ,MAXGL2,IDEV,IDEV2,IFAIL,MAXGEL,IDM,PERCD)
IF(IFAIL.NE.0) THEN
CALL ERROM(KBOUT,'Error calculating consensus')
RETURN
END IF
C
C before we leave we must make the overall consensus length correct
C so add on the extra length (if any) which is the new length - old length
C
C WRITE(*,*)'OLD IDIM1',IDIM1
IDIM1 = IDIM1 + RELPG(LINCON) - CLENO
IDIM2 = IDIM1 + RELPG(LINCON) - CLENO
C WRITE(*,*)'NEW IDIM1/2',IDIM1
END
SUBROUTINE MAKHCA(STRING,MAXAR,FROM,HSIZE,ASIZE)
CHARACTER STRING(MAXAR)
INTEGER FROM,HSIZE,ASIZE
C
C make a hole of size hsize in character array size asize
C
J = ASIZE + HSIZE
DO 10 I=ASIZE,FROM,-1
STRING(J) = STRING(I)
J = J - 1
10 CONTINUE
END
INTEGER FUNCTION CHNRP(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,LGEL,NCONT,
+LREG)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
C
C find first reading starting past lreg (0=none found)
C
I = LGEL
CHNRP = 0
10 CONTINUE
IF(I.NE.0) THEN
IF(RELPG(I).LE.LREG) THEN
I = RNBR(I)
GO TO 10
END IF
CHNRP = I
RETURN
END IF
END
INTEGER FUNCTION CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,
+LGEL,LREG)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
C
C find first reading with data covering or past lreg (0=none found)
C
I = LGEL
CHNRP1 = 0
10 CONTINUE
IF(I.NE.0) THEN
IF(RELPG(I)+ABS(LNGTHG(I))-1.LT.LREG) THEN
I = RNBR(I)
GO TO 10
END IF
CHNRP1 = I
RETURN
END IF
END
C ACONSN
SUBROUTINE ACONSN(RELPG,LNGTHG,LNBR,RNBR,NAMPRO,NGELS,NCONTS,
+SEQ1,IDIM1,GEL,IDBSIZ,ISTART,JOB,LLINO,LREG,RREG,TEMP,
+CHRSIZ,MAXGL2,KBOUT,
+IDEVW,IFAIL,MAXGEL,IDM,PERCD)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),CHRSIZ
INTEGER LREG,RREG,X,Y,TEMP(CHRSIZ,MAXGL2)
CHARACTER SEQ1(IDIM1)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER GEL(MAXGEL)
CHARACTER NAMPRO*(*)
INTEGER CHNRP1
EXTERNAL CHNRP1
C
C new consensus calculating routine (could replace acons if we check values of job)
C
C job = 0 do it for whole db
C = 2 for selected contig only
C = 1 for selected contig, adding a header
C note jobs 0 and 1 update istart (it always points to end of overall
C consensus), but job=2 does not
C
CALL BUSY(KBOUT)
IFAIL=0
IF(JOB.EQ.1) THEN
C
C do it for a selected contig, adding title
C
ISTART=ISTART+1
IDIM11=RREG-LREG+1
IF((ISTART+19+IDIM11).GT.IDIM1)THEN
WRITE(KBOUT,1009)IDIM1
IFAIL=1
RETURN
END IF
C
C allow summer to be dumb, and find first relevant reading number
C
LLINO1 = CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,
+ LLINO,LREG)
IF(LLINO1.EQ.0) THEN
CALL ERROM(KBOUT,
+ 'Error in ACONSN: no data found for consensus')
IFAIL = 1
RETURN
END IF
CALL ADDTIT(SEQ1(ISTART),NAMPRO,LLINO,ISTART)
CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ SEQ1(ISTART),IDIM11,GEL,LREG,RREG,LLINO1,IDBSIZ,TEMP,
+ CHRSIZ,MAXGL2,
+ IDEVW,MAXGEL,IDM,PERCD)
ISTART=ISTART+IDIM11-1
RETURN
END IF
IF(JOB.EQ.2) THEN
C
C do it for a selected contig
C
IDIM11=RREG-LREG+1
IF((ISTART+IDIM11-1).GT.IDIM1)THEN
WRITE(KBOUT,1009)IDIM1
IFAIL=1
RETURN
END IF
C
C allow summer to be dumb, and find first relevant reading number
C
LLINO1 = CHNRP1(RELPG,LNGTHG,LNBR,RNBR,IDBSIZ,
+ LLINO,LREG)
IF(LLINO1.EQ.0) THEN
CALL ERROM(KBOUT,
+ 'Error in ACONSN: no data found for consensus')
IFAIL = 1
RETURN
END IF
CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ SEQ1(ISTART),IDIM11,GEL,LREG,RREG,LLINO1,IDBSIZ,TEMP,
+ CHRSIZ,MAXGL2,
+ IDEVW,MAXGEL,IDM,PERCD)
RETURN
END IF
C
C do it for all contigs
C
N=IDBSIZ-NCONTS
DO 110 I=N,IDBSIZ-1
J=LNBR(I)
X=1
Y=RELPG(I)
ISTART=ISTART+1
IF((ISTART+19+Y).GT.IDIM1)THEN
WRITE(KBOUT,1009)IDIM1
1009 FORMAT(
+ ' Database maximum consensus length(',I6,') exceeded',/,
+ ' calculation aborted')
IFAIL=1
RETURN
END IF
CALL ADDTIT(SEQ1(ISTART),NAMPRO,J,ISTART)
CALL SUMMER(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ SEQ1(ISTART),Y,GEL,X,Y,J,IDBSIZ,TEMP,CHRSIZ,MAXGL2,IDEVW,MAXGEL,
+ IDM,PERCD)
ISTART=ISTART+Y-1
110 CONTINUE
END
SUBROUTINE AERROR(IDEVS,IDEVF,NAME,IERR)
CHARACTER NAME*(*)
C
C handle errors for assembly
C
C errors are:
C 0 file not found
C 1 read too short
C 2 failed to align and not entered
C 3 failed on entry
C 4 failed to align but entered
WRITE(IDEVF,1000)NAME(1:INDEX(NAME,' ')),IERR
1000 FORMAT(A,I2)
CALL ERROM(IDEVS,'Failed reading written to error file')
END
SUBROUTINE SHFTLA(STRING,MAXAR,FROMS,TO,FROME)
CHARACTER STRING(MAXAR)
INTEGER FROMS,TO,FROME
C
C shift an array left from froms to to
C
J = TO
DO 10 I=FROMS,FROME
STRING(J) = STRING(I)
J = J + 1
10 CONTINUE
END
SUBROUTINE GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LINCON,LLINO,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,PROMPT,
+IHELPS,IHELPE,FILEH,IDEVH)
C AUTHOR: RODGER STADEN
INTEGER RELPG(IDBSIZ),GELIDN
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER FILEH*(*),PROMPT*(*)
EXTERNAL GELIDN
IERR = 1
NCONTC = GELIDN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,LLINO,
+IDBSIZ,KBIN,KBOUT,IDEVN,PROMPT,
+IHELPS,IHELPE,FILEH,IDEVH,INFLAG)
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.4) RETURN
IF(NCONTC.EQ.0) RETURN
IGELNO = NCONTC
IF(LNBR(NCONTC).NE.0) THEN
WRITE(KBOUT,1013)RELPG(NCONTC)
1013 FORMAT(' Position of this reading=',I6)
25 CONTINUE
NCONTC = LNBR(NCONTC)
IF(LNBR(NCONTC).NE.0) GO TO 25
WRITE(KBOUT,1014)NCONTC
1014 FORMAT( ' Number of leftmost reading this contig=',I6)
END IF
30 CONTINUE
N = IDBSIZ - NCONTS
DO 20 J=N,IDBSIZ-1
IF(LNBR(J).EQ.NCONTC) THEN
LINCON=J
GO TO 21
END IF
20 CONTINUE
CALL ERROM(KBOUT,'No contig line for this reading!')
RETURN
21 CONTINUE
LLINO = NCONTC
IERR = 0
END
INTEGER FUNCTION GELIDN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+LLINO,IDBSIZ,KBIN,KBOUT,IDEVN,PROMPT,
+IHELPS,IHELPE,FILEH,IDEVH,INFLAG)
CHARACTER FILEH*(*),PROMPT*(*)
C AUTHOR: RODGER STADEN
C SEARCHES FOR ARCHIVE NAMES
INTEGER RELPG(IDBSIZ)
INTEGER LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER NAME1*17,NAME3*17,NFLAG
PARAMETER (NFLAG='/')
EXTERNAL NAMENO
NAME3 = ' '
IF(LLINO.NE.0) THEN
NAME3(1:1) = NFLAG
CALL READN(IDEVN,LLINO,NAME3(2:))
END IF
GELIDN = 0
10 CONTINUE
L = 0
IF(LLINO.NE.0) L = 17
CALL GTSTR(PROMPT,NAME3,
+NAME1,L,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.4) RETURN
IF(INFLAG.EQ.3) THEN
GELIDN = LLINO
RETURN
END IF
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,FILEH,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(NAME1(1:1).EQ.NFLAG) THEN
GELIDN = NAMENO(NAME1(2:),NGELS,IDEVN)
IF (GELIDN.EQ.0) CALL ERROM(KBOUT,'Reading name not found')
ELSE
CALL RJST(NAME1)
READ(NAME1,1001,ERR=10,END=10)GELIDN
1001 FORMAT(I17)
IF((GELIDN.LT.1).OR.(GELIDN.GT.NGELS)) THEN
CALL ERROM(KBOUT,'Illegal gel reading number')
GO TO 10
END IF
END IF
END
INTEGER FUNCTION NAMENO(NAME,NGELS,IDEVN)
CHARACTER NAME*(*)
CHARACTER*16 NAME1,NAME2
NAME1 = NAME
CALL CCASE(NAME1,1)
DO 10 I=1,NGELS
CALL READN(IDEVN,I,NAME2)
CALL CCASE(NAME2,1)
IF (NAME1.EQ.NAME2) THEN
NAMENO = I
RETURN
END IF
10 CONTINUE
NAMENO = 0
END
SUBROUTINE REMGBD(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,
+KBIN,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN,IDEV2,FILNAM,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
INTEGER RELPG(IDBSIZ),LNGTHG(IDBSIZ),LNBR(IDBSIZ),RNBR(IDBSIZ)
CHARACTER HELPF*(*),GEL(MAXGEL),NAMARC*16,FILNAM*(*)
INTEGER REMME,GCLIN,CHAINL,GNFFOF
PARAMETER (MAXPRM = 35)
CHARACTER PROMPT(4)*(MAXPRM)
EXTERNAL GCLIN,CHAINL,NAMENO,GNFFOF
C assumes db is logical consistent
FILNAM = ' '
PROMPT(1) = 'Define a region by reading names'
PROMPT(2) = 'Use a file of reading names'
PROMPT(3) = 'Move a reading to a separate contig'
PROMPT(4) = 'Make a list of unattached readings'
IOPT = 1
CALL RADION('Select list definition mode',PROMPT,4,IOPT,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOPT.LT.1) RETURN
IF(IOPT.EQ.4) THEN
C
C here we find all contigs with single readings and write their
C names to a file
C
CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT,
+ 'Name for file of reading names',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
DO 5 I=IDBSIZ-NCONTS,IDBSIZ-1
IF (LNBR(I).EQ.RNBR(I)) THEN
IF (LNBR(I).NE.0) THEN
CALL READN(IDEVN,LNBR(I),NAMARC)
WRITE(IDEV2,1000)NAMARC
END IF
END IF
5 CONTINUE
CLOSE(UNIT=IDEV2)
RETURN
END IF
IF(IOPT.EQ.3) THEN
C
C here we start a new contig with the selected reading
C
C we get the reading number igelno and move a copy of it
C to ngels+1. Then we use the remove reading routine to delete
C the original copy and move the new one to fill the hole. The
C reason for this convoluted route is that remgel cleans up
C all the mess. We must write a new contig line and check the
C orientation.
C
C
IF(NGELS+3.GE.IDBSIZ-NCONTS) THEN
CALL ERROM(KBOUT,'Insufficient space for new contig')
RETURN
END IF
NGELST = NGELS + 1
NCONTT = NCONTS + 1
CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ LINCOL,LLINOL,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+ 'Reading to disconnect',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF (IERR.NE.0) RETURN
CALL READW(IDEVW,IGELNO,GEL,MAXGEL)
C
C move reading info over end of gel list
C
CALL READN(IDEVN,IGELNO,NAMARC)
CALL WRITEN(IDEVN,NGELST,NAMARC)
LNBR(NGELST) = 0
RNBR(NGELST) = 0
C
C leave orientation the same
C
LNGTHG(NGELST) = LNGTHG(IGELNO)
RELPG(NGELST) = 1
CALL WRITER(IDEVR,NGELST,RELPG(NGELST),LNGTHG(NGELST),
+ LNBR(NGELST),RNBR(NGELST))
CALL WRITEW(IDEVW,NGELST,GEL,MAXGEL)
CALL MOVTAG(IGELNO,NGELST)
C
C start a new contig
C
I = IDBSIZ - NCONTT
LNBR(I) = NGELST
RNBR(I) = NGELST
LNGTHG(I) = 0
RELPG(I) = ABS(LNGTHG(NGELST))
CALL WRITER(IDEVR,I,RELPG(I),LNGTHG(I),
+ LNBR(I),RNBR(I))
NGELS = NGELST
NCONTS = NCONTT
CALL WRITER(IDEVR,IDBSIZ,NGELS,NCONTS,0,0)
IGLNO = IGELNO
CALL REMGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,
+ IGLNO,LINCOL,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN)
RETURN
END IF
IF(IOPT.EQ.1) THEN
10 CONTINUE
LLINOL = 0
LLINOR = 0
CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ LINCOL,LLINOL,IGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+ 'Leftmost reading',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF (IERR.NE.0) RETURN
CALL GETLN3(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ LINCOR,LLINOR,JGELNO,IERR,IDBSIZ,KBIN,KBOUT,IDEVN,
+ 'Rightmost reading',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF (IERR.NE.0) RETURN
IF (LLINOL.NE.LLINOR) THEN
CALL ERROM(KBOUT,
+ 'For this mode readings must be in the same contig')
GO TO 10
END IF
IF (RELPG(IGELNO).GT.RELPG(JGELNO)) THEN
CALL ERROM(KBOUT,
+ 'For this mode readings must be in left to right order')
GO TO 10
END IF
C
C IGELNO is first read to remove, JGELNO the last
C
C we must make a list of reads because removal changes numbers
C
CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT,
+ 'Name for temporary file of reading names',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
C
C write out their names
C
J = IGELNO
20 CONTINUE
CALL READN(IDEVN,J,NAMARC)
WRITE(IDEV2,1000)NAMARC
WRITE(KBOUT,1001)NAMARC
1000 FORMAT(A)
1001 FORMAT(' ',A)
IF (J.NE.JGELNO) THEN
IF (J.NE.0) THEN
J=RNBR(J)
GO TO 20
END IF
END IF
CALL BPAUSE(KBIN,KBOUT,IOK)
IF (IOK.NE.0) THEN
CLOSE(UNIT=IDEV2)
RETURN
END IF
REWIND(UNIT=IDEV2)
ELSE IF(IOPT.EQ.2) THEN
C
C here we start from a file of file names
C
CALL OPENF1(IDEV2,FILNAM,0,IOK,KBIN,KBOUT,
+ 'Name of file of reading names',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
ELSE
CALL ERROM(KBOUT,'How the hell did we get here?')
RETURN
END IF
30 CONTINUE
IOK = GNFFOF(IDEV2,NAMARC)
IF(IOK.EQ.1) GO TO 100
IF(IOK.NE.0) GO TO 30
REMME = NAMENO(NAMARC,NGELS,IDEVN)
I = CHAINL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,
+ IDBSIZ,REMME)
ICONT = GCLIN(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,I)
IF(ICONT.EQ.0) THEN
CALL ERROM(KBOUT,'No contig line for this reading')
IOK = 1
GO TO 100
END IF
CALL REMGEL(RELPG,LNGTHG,LNBR,RNBR,NGELS,NCONTS,IDBSIZ,
+REMME,ICONT,KBOUT,GEL,MAXGEL,IDEVR,IDEVW,IDEVN)
GO TO 30
100 CONTINUE
CLOSE(UNIT=IDEV2)
END