staden-lg/src/staden/anals89.f

10996 lines
333 KiB
Fortran

C ANALS89 SUBROUTINES FOR NIP
C AUTHOR: RODGER STADEN
C 11-8-92 Fixed bug in x version: use of gtstr and opratr='all' when
C using feature tables: xversion returns default, fortran returns
C lin=0: now check for both.
C 29-7-92 Added routines for finding repeated words
C 6-8-92 trna: previously did not allow for case in conserved base check
C 29-5-92 set max open reading frame to idim/3 in opentr
C 27-5-92 trnad made max intron length default = min intron length (was 0)
C 21-5-92 fixed bug in restriction enzyme search: if 4 cutter file was
C selected it always got listed!
C 16-4-92 fixed bug in find inverted repeats: the second position was out!
C 8-1-90
C Changed all kstran to jstran
C Changed all mstran to jstran
C Replaced calls codid1 by trand1 and deleted codid1
C Added clos(unit=ideve) to tran3,codimp
C Changed format statement in trpiro to produce embl ft output
C 26-3-90 TRNA changed it to allow intron lengths from 0 to N (previously
C by mistake, had to be M to N where M is non zero).
C 29-3-90 Minor change to CODTDK call to CODTBO was erroneously sent ANST
C not ANSTO. Now sent ANSTO.
C 2-4-90 Major change to codtdk: normalised data not written to disk!
C 3-5-90 Minor change to inverted repeat search for case when a pallindrome
C is found: stop the subtraction of 2 from position for listed
C results by making intx1=intx2
C 9-7-90 Removed menu routine
C 21-8-90 Changed getpar for option 39 to use completely new routines
C Changed TRPIRP and TRPIR and their callers
C 5-11-90 Changed calls to rdwmt to include flag for show/not show title
C 7-11-90 Huge number of changes to replace radio by radion, including
C writing new routine gstrnd. This has increased the returned
C value of the choice flag by 1 and so is a potential source
C of minor cockup
C 21-12-90 Changed weight matrix searches so that text output gives position
C as middle + i (was i)
C 11-1-91 Greatly modified positional base preferences method adding
C facility to read codon table from disk.
C 19-2-91 Modified s2 to accomodate longer restriction enzyme recognition
C seqs, and moved a write statement from findl4, findl5 to s2.
C For output from s2 names are now limited to 15 chars
C recognition seqs to 35 chars.
C 22-2-91 trpir had forgotten to reverse translations from the complementary
C strand. Added call to sqrev.
C 11-4-91 modified trand7 to make 6 phase translation more obvious
C 18-4-91 Replaced splice search and changed setpar
C 23-4-91 Minor change to trand8 to allow both strands
C New routines: trnpir, trndop, trndom, openfr, trndp, trndm,
C wpair, pstop, mstop, minil. For translating open frames to disk
C and finding open frames
C 11-6-91 Following routines replaced or added.Changes relating to new fts
C 24-2-92 Bracketed the factor of -1 in MBPRIM
C 2-3-92 set FILNAM = ' ' for all openf1 calls
C SUBROUTINE OPENTR(SEQ,IDIM,KSTART,IDEV,KBIN,KBOUT,PAA,
C SUBROUTINE TRANEM(SEQ,IDIM,J1,J2,FRAMEC,
C SUBROUTINE TRAND8(IDIM,J1,J2,KSTRAN,IP1,IP2,PROMPT,
C SUBROUTINE TRAND7(KBIN,KBOUT,I1,I2,
C SUBROUTINE EMBLF(IDEVE,FILNAM,
C SUBROUTINE TRAND9(KBIN,KBOUT,IDEVE,FILNAM,OPRATR,
C SUBROUTINE TRANEN(SEQ,IDIM,J1,J2,FRAMEC,JSTRAN,IDEVE,
C SUBROUTINE TRAND1(JSTRAN,ANSE,IDEVE,FILNAM,
C SUBROUTINE TRANDK(SEQ1,IDIM1,OUTP1,IDIMP,ISTART,ISTOP,IDIMT,
C SUBROUTINE TRNEMB(SEQ,IDIM,J1,J2,SEQW,PAA,JSTRAN,IDEVE,IDEV,
C SUBROUTINE CODTDK(SEQ,IDIM,J1,J2,IDEVE,FILNAM,KBIN,KBOUT,
C SUBROUTINE CODND1(JSTRAN,NORM,ANSE,IDEVE,FILNAM,OPRATR,
C SUBROUTINE CTDEMB(SEQ,IDIM,J1,J2,SEQW,PAA,JSTRAN,IDEVE,
C SUBROUTINE CODEMB(SEQ,IDIM,J1,J2,SEQW,PAA,JSTRAN,IDEVE,IDEV,
C SUBROUTINE CODONS(SEQ,IDIM,J1,J2,IDEV,IDEVE,FILNAM,KBIN,KBOUT,
C SUBROUTINE CODIMP(SEQ,IDIM,FTABLE,IDFTAB,IDEVE,FILNAM,
C SUBROUTINE CDIEMB(SEQ,IDIM,J1,J2,SEQW,JSTRAN,IDEVE,IDEV,
C SUBROUTINE PLTEMB(JSTRAN,IDEVE,
C SUBROUTINE PLTMAQ(IDEV,FILNAM,POSNS,MAXPOS,
C SUBROUTINE PLTBAR(POSNL,POSNR,YF,YF,BLIPB,BLIPT,
C SUBROUTINE MBPRIM(FRAMEC,IDIMS,J1,J2,PRIME)
C 17-7-91 trndom replced line to write out title! which had disapeared
SUBROUTINE GETPAR(IOPT,NPAM,IOK,
+I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,
+I11,I12,I13,I14,I15,I16,I17,I18,I19,I20,
+I21,I22,I23,I24,I25,I26,I27,I28,I29,I30,
+I31,I32,I33,I34,I35)
PARAMETER (MINOPT = 17,
+ MAXOPT = 68,
+ NUMBER = 35)
INTEGER VALUES(NUMBER,MINOPT:MAXOPT)
COMMON /PAMDEF/VALUES
IOK = 0
IF((IOPT.GE.MINOPT).AND.(IOPT.LE.MAXOPT)) THEN
I = 1
I1 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I2 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I3 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I4 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I5 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I6 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I7 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I8 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I9 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I10 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I11 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I12 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I13 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I14 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I15 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I16 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I17 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I18 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I19 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I20 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I21 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I22 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I23 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I24 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I25 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I26 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I27 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I28 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I29 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I30 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I31 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I32 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I33 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I34 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
I = I + 1
I35 = VALUES(I,IOPT)
IF(I.EQ.NPAM) RETURN
END IF
IOK = 1
END
SUBROUTINE SETPAR(IOK)
PARAMETER (MINOPT = 17,
+ MAXOPT = 68,
+ NUMBER = 35)
INTEGER VALUES(NUMBER,MINOPT:MAXOPT)
C INTEGER TEMP(NUMBER)
COMMON /PAMDEF/VALUES
SAVE /PAMDEF/
IOK = 0
C SEARCH
C CALL GETPAR(17,14,IOK,
C +MINLEN,MAXLEN,LINLEN,MNCUT,MAXCUT,MINCUT,
C +OPTION,OPTOUT,IFILE,JTRAN,ITRAN,IROUND,IDEF)
VALUES(1,17) = 10
VALUES(2,17) = 100
VALUES(3,17) = 60
VALUES(4,17) = 0
VALUES(5,17) = 100
VALUES(6,17) = 0
VALUES(7,17) = 0
VALUES(8,17) = 0
VALUES(9,17) = 2
VALUES(10,17) = 0
VALUES(11,17) = 0
VALUES(12,17) = 0
VALUES(13,17) = 0
C PERCENT MATCH
VALUES(1,18) = 0
VALUES(2,18) = 70
C SCORE MATRIX MATCH
VALUES(1,19) = 0
C WEIGHT MATRIX
VALUES(1,20) = 0
C WRITE(KBOUT,*)' CALCULATE CODON USAGE, BASE COMPOSITION'
C WRITE(KBOUT,*)' AND AMINO ACID COMPOSITION'
C CALL GETPAR(23,3,IOK,JSTRAN,NORM,ANSE)
VALUES(1,23) = 0
VALUES(2,23) = 0
VALUES(3,23) = 0
C
C PLOT BASE COMPOSITION
C
C CALL GETPAR(24,10,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
C +SCORES(1),SCORES(2),SCORES(3),SCORES(4))
C
VALUES(1,24) = 1
VALUES(2,24) = 201
VALUES(3,24) = 31
VALUES(4,24) = 1
VALUES(5,24) = 11
VALUES(6,24) = 5
VALUES(7,24) = 1
VALUES(8,24) = 0
VALUES(9,24) = 1
VALUES(10,24) = 0
C WRITE(KBOUT,*)' PLOT BASE COMPOSITION DIFFERENCES AS CHI SQUARED'
C CALL GETPAR(25,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,25) = 11
VALUES(2,25) = 301
VALUES(3,25) = 33
VALUES(4,25) = 1
VALUES(5,25) = 21
VALUES(6,25) = 5
C WRITE(KBOUT,*)' PLOT DI COMPOSITION DIFFERENCES AS CHI SQUARED'
C CALL GETPAR(26,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,26) = 11
VALUES(2,26) = 301
VALUES(3,26) = 33
VALUES(4,26) = 1
VALUES(5,26) = 21
VALUES(6,26) = 5
C WRITE(KBOUT,*)' PLOT TRI COMPOSITION DIFFERENCES AS CHI SQUARED'
C CALL GETPAR(27,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,27) = 11
VALUES(2,27) = 301
VALUES(3,27) = 33
VALUES(4,27) = 1
VALUES(5,27) = 21
VALUES(6,27) = 5
C WRITE(KBOUT,*)' CALCULATE CODON IMPROBABILITY'
C CALL GETPAR(28,2,IOK,JSTRAN,ANSE)
VALUES(1,28) = 0
VALUES(2,28) = 0
C WRITE(KBOUT,*)' SEARCH FOR HAIRPIN LOOPS'
C CALL GETPAR(30,10,IOK,MINLPI,MAXLPI,MINLP,
C +MINLPX,MAXLPX,MAXLP,MINBP,MAXBP,MININ,IGON)
VALUES(1,30) = 1
VALUES(2,30) = 30
VALUES(3,30) = 1
VALUES(4,30) = 3
VALUES(5,30) = 120
VALUES(6,30) = 3
VALUES(7,30) = 2
VALUES(8,30) = 20
VALUES(9,30) = 6
VALUES(10,30) = 0
C WRITE(KBOUT,*)' PLOT INVERTED REPEATS'
C CALL GETPAR(31,4,IOK,MINRP,MAXRP,MINREP,IGON)
VALUES(1,31) = 6
VALUES(2,31) = 30
VALUES(3,31) = 12
VALUES(4,31) = 0
C WRITE(KBOUT,*)' PLOT REPEATS'
C CALL GETPAR(32,4,IOK,MINRP,MAXRP,MINREP,IGON)
VALUES(1,32) = 6
VALUES(2,32) = 30
VALUES(3,32) = 12
VALUES(4,32) = 0
C WRITE(KBOUT,*)' PLOT Z DNA POTENTIAL '
C CALL GETPAR(33,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,33) = 3
VALUES(2,33) = 33
VALUES(3,33) = 15
VALUES(4,33) = 1
VALUES(5,33) = 21
VALUES(6,33) = 3
C WRITE(KBOUT,*)' PLOT Z DNA POTENTIAL '
C CALL GETPAR(34,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,34) = 3
VALUES(2,34) = 33
VALUES(3,34) = 21
VALUES(4,34) = 1
VALUES(5,34) = 11
VALUES(6,34) = 1
C WRITE(KBOUT,*)' PLOT Z DNA POTENTIAL '
C CALL GETPAR(35,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,35) = 5
VALUES(2,35) = 33
VALUES(3,35) = 15
VALUES(4,35) = 1
VALUES(5,35) = 21
VALUES(6,35) = 1
C WRITE(KBOUT,*)' SEARCH FOR LOCAL SIMILARITY OR COMPLEMENTARITY'
C CALL GETPAR(36,15,IOK,MINSP,MAXSP,SPAN,MINPR,MAXPR,IPR,
C +MNIR1,MXIR1,IRAN1,MNIR2,MXIR2,IRAN2,
C +ANSRV,ANSC,IGON)
VALUES(1,36) = 5
VALUES(2,36) = 200
VALUES(3,36) = 15
VALUES(4,36) = 5
VALUES(5,36) = 100
VALUES(6,36) = 70
VALUES(7,36) = 1
VALUES(8,36) = 50
VALUES(9,36) = 1
VALUES(10,36) = 1
VALUES(11,36) = 50
VALUES(12,36) = 1
VALUES(13,36) = 0
VALUES(14,36) = 0
VALUES(15,36) = 0
C WRITE(KBOUT,*)' LIST TRANSLATION (UP TO 3 PHASES)'
C CALL GETPAR(38,6,IOK,MINLEN,MAXLEN,LINLEN,JSTRAN,ANSE,ITRAN)
VALUES(1,38) = 30
VALUES(2,38) = 120
VALUES(3,38) = 60
VALUES(4,38) = 0
VALUES(5,38) = 0
VALUES(6,38) = 0
C WRITE(KBOUT,*)' TRANSLATE IN SIX PHASES'
C CALL GETPAR(39,11,IOK,MINLEN,MAXLEN,LINLEN,ITRAN,
C CALL GETPAR(39,10,IOK,MINO,MINOPN,JSTRAN,
C MINLEN,MAXLEN,LINLEN,ITRAN,JTRAN,INUM,ANSE,
VALUES(1,39) = 0
VALUES(2,39) = 30
VALUES(3,39) = 0
VALUES(4,39) = 30
VALUES(5,39) = 120
VALUES(6,39) = 60
VALUES(7,39) = 0
VALUES(8,39) = 0
VALUES(9,39) = 0
VALUES(10,39) = 3
C WRITE(KBOUT,*)' TRANSLATION AND WRITE PROTEIN SEQUENCE TO DISK'
C CALL GETPAR(40,7,IOK,JSTRAN,ANSE,IOPEN,MINO,MAXO,MINOPN,JSTRAN)
VALUES(1,40) = 0
VALUES(2,40) = 0
VALUES(3,40) = 0
VALUES(4,40) = 5
VALUES(5,40) = 1000
VALUES(6,40) = 30
VALUES(7,40) = 0
C WRITE(KBOUT,*)' CALCULATE AND WRITE CODON TABLE TO DISK'
C CALL GETPAR(41,5,IOK,JSTRAN,NORM,ANSE,ANST,ANSTO)
VALUES(1,41) = 0
VALUES(2,41) = 0
VALUES(3,41) = 0
VALUES(4,41) = 0
VALUES(5,41) = 0
C WRITE(KBOUT,*)'STADEN AND MCLACHLAN CODON USAGE METHOD'
C CALL GETPAR(42,7,IOK,MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,ANSTY)
VALUES(1,42) = 11
VALUES(2,42) = 101
VALUES(3,42) = 25
VALUES(4,42) = 1
VALUES(5,42) = 11
VALUES(6,42) = 5
VALUES(7,42) = 0
C WRITE(KBOUT,*)' POSITIONAL BASE PREFERENCES METHOD TO FIND',
C +' PROTEIN GENES'
C CALL GETPAR(43,9,IOK,MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,
C +ANSABS,ANSTY,ANSS)
VALUES(1,43) = 31
VALUES(2,43) = 101
VALUES(3,43) = 67
VALUES(4,43) = 1
VALUES(5,43) = 11
VALUES(6,43) = 5
VALUES(7,43) = 0
VALUES(8,43) = 0
VALUES(9,43) = 0
C WRITE(KBOUT,*)' UNEVEN POSITIONAL BASE FREQUENCIES METHOD'
C CALL GETPAR(44,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,44) = 11
VALUES(2,44) = 101
VALUES(3,44) = 67
VALUES(4,44) = 1
VALUES(5,44) = 21
VALUES(6,44) = 5
C WRITE(KBOUT,*)' SEARCH FOR PROTEIN GENES USING MCLACHLAN, STADEN'
C WRITE(KBOUT,*)' AND BOSWELL IMPROBABILITY METHOD'
C WRITE(KBOUT,*)' EXPECTING CODON COMPOSITION TO DEPEND ON'
C WRITE(KBOUT,*)' BASE COMPOSITION'
C CALL GETPAR(45,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,45) = 67
VALUES(2,45) = 99
VALUES(3,45) = 99
VALUES(4,45) = 1
VALUES(5,45) = 21
VALUES(6,45) = 5
C WRITE(KBOUT,*)' SEARCH FOR PROTEIN GENES USING MCLACHLAN, STADEN'
C WRITE(KBOUT,*)' AND BOSWELL IMPROBABILITY METHOD'
C WRITE(KBOUT,*)' EXPECTING CODON COMPOSITION TO DEPEND ON'
C WRITE(KBOUT,*)' AMINO ACID COMPOSITION'
C CALL GETPAR(46,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,46) = 67
VALUES(2,46) = 99
VALUES(3,46) = 99
VALUES(4,46) = 1
VALUES(5,46) = 21
VALUES(6,46) = 5
C WRITE(KBOUT,*)' SHEPHERDS METHOD'
C CALL GETPAR(47,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,47) = 11
VALUES(2,47) = 101
VALUES(3,47) = 67
VALUES(4,47) = 1
VALUES(5,47) = 21
VALUES(6,47) = 5
C WRITE(KBOUT,*)' FICKETTS METHOD'
C CALL GETPAR(48,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,48) = 11
VALUES(2,48) = 101
VALUES(3,48) = 67
VALUES(4,48) = 1
VALUES(5,48) = 21
VALUES(6,48) = 5
C WRITE(KBOUT,*)' TRNA SEARCH '
C CALL GETPAR(49,35,IOK,
C +MNLEN,MXLEN,MAXLNT,
C +MNAC,MXAC,MINACL,
C +MNTU,MXTU,MINTU,
C +MNAN,MXAN,MINANT,
C +MND,MXD,MIND,
C +MNI1,MXI1,INT1,
C +MNI2,MXI2,INT2,
C +MNTUL1,MXTUL1,LTUMIN,
C +MNTUL2,MXTUL2,LTUMAX,
C +MNB,MXB,MINBAS,
C +MNC,MXC,MINCON,IGON,ANSCON)
VALUES(1,49) = 70
VALUES(2,49) = 130
VALUES(3,49) = 92
VALUES(4,49) = 0
VALUES(5,49) = 14
VALUES(6,49) = 11
VALUES(7,49) = 0
VALUES(8,49) = 10
VALUES(9,49) = 8
VALUES(10,49) = 0
VALUES(11,49) = 10
VALUES(12,49) = 8
VALUES(13,49) = 0
VALUES(14,49) = 8
VALUES(15,49) = 3
VALUES(16,49) = 0
VALUES(17,49) = 30
VALUES(18,49) = 0
VALUES(19,49) = 30
VALUES(20,49) = 30
VALUES(21,49) = 0
VALUES(22,49) = 4
VALUES(23,49) = 12
VALUES(24,49) = 6
VALUES(25,49) = 6
VALUES(26,49) = 12
VALUES(27,49) = 9
VALUES(28,49) = 0
VALUES(29,49) = 44
VALUES(30,49) = 0
VALUES(31,49) = 0
VALUES(32,49) = 0
VALUES(33,49) = 0
VALUES(34,49) = 0
VALUES(35,49) = 1
C WRITE(KBOUT,*)' FIND OPEN READING FRAMES'
C CALL GETPAR(54,4,IOK,MINO,MAXO,MINOPN,JSTRAN)
VALUES(1,54) = 5
VALUES(2,54) = 1000
VALUES(3,54) = 30
VALUES(4,54) = 0
C WRITE(KBOUT,*)' PLOT NEGENTROPY'
C CALL GETPAR(29,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,59) = 11
VALUES(2,59) = 301
VALUES(3,59) = 33
VALUES(4,59) = 1
VALUES(5,59) = 21
VALUES(6,59) = 5
C SPLICE SEARCH
VALUES(1,62) = 0
C WRITE(KBOUT,*)'PLOT COMPOSITION DIFFERENCES (OBS-EXP))'
C CALL GETPAR(64,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT)
VALUES(1,64) = 3
VALUES(2,64) = 401
VALUES(3,64) = 101
VALUES(4,64) = 1
VALUES(5,64) = 20
VALUES(6,64) = 5
C PATTERNS TO DO
C OPEN(UNIT=99,FILE='ANALYSEP.PAR',STATUS ='OLD')
C10 CONTINUE
C READ(99,*,ERR=100,END=200)TEMP
C IF((TEMP(1).GE.MINOPT).AND.(TEMP(1).LE.MAXOPT)) THEN
C DO 20 I = 1,NUMBER-1
C VALUES(I,TEMP(1)) = TEMP(I+1)
C WRITE(*,*)TEMP(1),TEMP(I+1)
C20 CONTINUE
C END IF
C GO TO 10
C100 CONTINUE
C WRITE(*,*)'ERROR IN PARMS FILE'
C GO TO 10
C200 CONTINUE
C CLOSE(UNIT = 99)
IOK = 0
END
SUBROUTINE SEARCH(SEQ,IDSEQ,J1,J2,KSTART,
+RENZYM,MAXREN,RECSEQ,MAXREC,
+NAMES,MAXNAM,CUTADD,PSAVE,NSAVE,SORTA,MAXMAT,
+NAMEP,NAMLEN,NUMREC,LENEN,RECSTR,NAMLST,
+CUTSIT,PCUT,MAXENZ,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,
+IDEVEN,FILNAM,FILE4,FILE6,FILEA,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,POSN,WORDP,MAXDIC,DIALOG)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),FILEA*(*),FILE4*(*),FILE6*(*),HELPF*(*)
CHARACTER SEQ(IDSEQ),RENZYM(MAXREN),RECSEQ(MAXREC),NAMES(MAXNAM)
CHARACTER PAA(125)
INTEGER OPTION,OPTOUT
INTEGER CUTSIT(MAXENZ),PCUT(MAXENZ)
INTEGER CUTADD(MAXMAT),PSAVE(MAXMAT),NSAVE(MAXMAT),SORTA(MAXMAT)
INTEGER NAMEP(MAXENZ),NAMLEN(MAXENZ),NUMREC(MAXENZ)
INTEGER LENEN(MAXENZ)
INTEGER RECSTR(MAXENZ),NAMLST(MAXENZ),DIALOG
INTEGER POSN(IDSEQ),WORDP(MAXDIC),CONST(0:24)
PARAMETER (IBH=256)
CALL SHOWFU(KBOUT,'Search for restriction enzyme sites')
CALL GETPAR(17,13,IOK,
+MINLEN,MAXLEN,LINLEN,MNCUT,MAXCUT,MINCUT,
+OPTION,OPTOUT,IFILE,JTRAN,KTRAN,IROUND,IDEF,
+IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
INC = 1
IDNDEX = 1024
LENW = 5
LENC = 4*LENW
CALL SETCN(CONST,LENW,5,LENC)
ITRAN = KTRAN
10 CONTINUE
IDREN = MAXREN
IDRSEQ = MAXREC
IDNAML = MAXNAM
IDNLST = MAXENZ
MAXEN = MAXENZ
IDMAT = MAXMAT
LEVEL = 0
CALL SERCHD(RENZYM,IDREN,RECSEQ,IDRSEQ,
+NAMES,IDNAML,MARGT,
+NAMEP,NAMLEN,NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,
+IDEVEN,FILNAM,FILE4,FILE6,FILEA,KBIN,KBOUT,
+OPTION,OPTOUT,NOPOUT,IFILE,DIALOG,
+IHELPS,IHELPE,HELPF,IDEVH,IOK,CUTSIT,PCUT,MAXS,LINLEN,ITRAN,
+JTRAN,MINLEN,MAXLEN,IROUND,IDEF,MINCUT,MNCUT,MAXCUT)
IF(IOK.NE.0) RETURN
CALL BUSY(KBOUT)
CALL CONNUM(SEQ(J1),POSN(J1),J2-J1+1)
CALL ENCOND(POSN(J1),J2-J1+1,WORDP,IDNDEX,5,CONST,LENW,LENC)
IF(OPTOUT.EQ.2) THEN
CALL FINDL1(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,NAMES,IDNAML,
+ NAMEP,NAMLEN,
+ NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+ PSAVE,NSAVE,CUTADD,SORTA,IDMAT,NOPOUT,IBH,LEVEL,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+ IHELPS,IHELPE,HELPF,IDEVH,MINCUT,IDEF,
+ POSN(J1),WORDP,IDNDEX,CONST,LENW,LENC,IOK)
ELSE IF((OPTOUT.EQ.0).AND.(NOPOUT.EQ.1)) THEN
CALL FINDL2(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,NAMES,IDNAML,
+ NAMEP,NAMLEN,
+ NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+ PSAVE,NSAVE,CUTADD,SORTA,IDMAT,NOPOUT,IBH,LEVEL,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+ IHELPS,IHELPE,HELPF,IDEVH,IDEF,
+ POSN(J1),WORDP,IDNDEX,CONST,LENW,LENC,IOK)
ELSE IF((OPTOUT.EQ.1).AND.(NOPOUT.EQ.1)) THEN
CALL FINDL3(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,NAMES,IDNAML,
+ NAMEP,NAMLEN,
+ NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+ PSAVE,NSAVE,CUTADD,SORTA,IDMAT,NOPOUT,IBH,LEVEL,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+ IHELPS,IHELPE,HELPF,IDEVH,IDEF,
+ POSN(J1),WORDP,IDNDEX,CONST,LENW,LENC,IOK)
ELSE IF((OPTOUT.EQ.0).AND.(NOPOUT.EQ.0)) THEN
CALL FINDL4(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,NAMES,IDNAML,
+ NAMEP,NAMLEN,
+ NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+ PSAVE,NSAVE,CUTADD,SORTA,IDMAT,NOPOUT,IBH,LEVEL,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+ IHELPS,IHELPE,HELPF,IDEVH,IDEF,
+ POSN(J1),WORDP,IDNDEX,CONST,LENW,LENC,IOK)
ELSE IF((OPTOUT.EQ.1).AND.(NOPOUT.EQ.0)) THEN
CALL FINDL5(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,NAMES,IDNAML,
+ NAMEP,NAMLEN,
+ NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+ PSAVE,NSAVE,CUTADD,SORTA,IDMAT,NOPOUT,IBH,LEVEL,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+ IHELPS,IHELPE,HELPF,IDEVH,IDEF,
+ POSN(J1),WORDP,IDNDEX,CONST,LENW,LENC,IOK)
ELSE IF((OPTOUT.EQ.3).AND.(NOPOUT.EQ.0)) THEN
CALL FINDL6(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,NAMES,IDNAML,
+ NAMEP,NAMLEN,
+ NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+ PSAVE,NSAVE,CUTADD,SORTA,IDMAT,NOPOUT,IBH,LEVEL,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+ IHELPS,IHELPE,HELPF,IDEVH,ITRAN,LINLEN,PAA,IDEF,
+ POSN(J1),WORDP,IDNDEX,CONST,LENW,LENC,IOK)
END IF
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
DIALOG = 1
GO TO 10
END
SUBROUTINE SERCHD(RENZYM,IDREN,RECSEQ,IDRSEQ,
+NAMES,IDNAML,MARGT,
+NAMEP,NAMLEN,NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,
+IDEVEN,FILNAM,FILE4,FILE6,FILEA,KBIN,KBOUT,
+OPTION,OPTOUT,NOPOUT,IFILE,DIALOG,
+IHELPS,IHELPE,HELPF,IDEVH,IOK,CUTSIT,PCUT,MAXS,LINLEN,ITRAN,
+JTRAN,MINLEN,MAXLEN,IROUND,IDEF,MINCUT,MNCUT,MAXCUT)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),FILEA*(*),FILE4*(*),FILE6*(*),HELPF*(*)
CHARACTER RENZYM(IDREN),RECSEQ(IDRSEQ),NAMES(IDNAML)
INTEGER OPTION,OPTOUT
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),NUMREC(MAXEN)
INTEGER LENEN(MAXEN)
INTEGER RECSTR(MAXEN),NAMLST(IDNLST),DIALOG
INTEGER CUTSIT(IDNLST),PCUT(IDNLST)
PARAMETER (MAXPRM = 30)
CHARACTER PROMPT(4)*(MAXPRM)
IOK = 0
NOPOUT = 0
CALL GETREN(RENZYM,IDREN,IDEVEN,FILNAM,FILE4,FILE6,FILEA,
+IOK,KBIN,KBOUT,IFILE,DIALOG,
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
IF(IDREN.LT.5) THEN
IOK = 1
RETURN
END IF
CALL DECREN(RENZYM,IDREN,RECSEQ,IDRSEQ,NAMES,IDNAML,
+NAMEP,NAMLEN,NUMREC,LENEN,RECSTR,MAXEN,KBOUT,CUTSIT,PCUT,MAXS)
IF(MAXEN.LT.1) THEN
IOK = 1
RETURN
END IF
IF(DIALOG.EQ.0) RETURN
IOP = OPTION
CALL YESONO(IOP,'Search for all names','select names',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
OPTION = IOP
IF(OPTION.EQ.1)THEN
CALL GETNAM(NAMLST,IDNLST,NAMES,IDNAML,NAMEP,NAMLEN,
+ MAXEN,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IDNLST.LT.1) THEN
IOK = 1
RETURN
END IF
END IF
IOP = OPTOUT + 1
PROMPT(1) = 'Order results enzyme by enzyme'
PROMPT(2) = 'Order results by position'
PROMPT(3) = 'Show only infrequent cutters'
PROMPT(4) = 'Show names above the sequence'
CALL RADION('Select results display mode',PROMPT,4,IOP,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.1) THEN
IOK = 1
RETURN
END IF
OPTOUT = IOP - 1
IF(OPTOUT.EQ.2) THEN
CALL GETINT(MNCUT,MAXCUT,MINCUT,'Maximum number of cuts',
+ IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINCUT = IVAL
END IF
IF(OPTOUT.EQ.3) THEN
IOP = JTRAN
CALL YESONO(IOP,'Hide translation','Show translation',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
JTRAN = IOP
IF(JTRAN.EQ.1) THEN
IOP = ITRAN
IF(IOP.EQ.3) IOP = 1
CALL YESONO(IOP,'Use 1 letter codes',
+ 'Use 3 letter codes',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
IF(IOP.EQ.1) ITRAN = 3
IF(IOP.EQ.0) ITRAN = 0
ELSE IF(JTRAN.EQ.0) THEN
ITRAN = 1
END IF
IF(ITRAN.NE.1) THEN
MAXLEN = 90
MINLEN = 30
END IF
LENDEF = LINLEN
CALL GETINT(MINLEN,MAXLEN,LENDEF,'Line length',
+ IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
LINLEN = IVAL
IF((ITRAN.NE.1).AND.(MOD(LINLEN,3).NE.0)) LINLEN = 60
END IF
IOP = NOPOUT
IF((OPTOUT.EQ.0).OR.(OPTOUT.EQ.1)) THEN
CALL YESONO(IOP,'List matches','Plot matches',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
NOPOUT = IOP
END IF
C IF(NOPOUT.EQ.1)THEN
C LEVEL = LEVELI
C CALL SD2(IBLIPH,IBH,LEVEL,MARGT,
C + IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
C LEVELI = LEVEL
C IF(IOK.NE.0) RETURN
C END IF
C CALL GETINT(MININC,MAXINC,INC,'Step',
C +IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
C IF(IOK.NE.0) RETURN
C INC = IVAL
IOP = IROUND
CALL YESONO(IOP,'The sequence is linear',
+'The sequence is circular',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
IROUND = IOP
IOP = IDEF
CALL YESONO(IOP,'Search for definite matches',
+'Search for possible matches',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
IDEF = IOP
END
SUBROUTINE FINDL1(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,
+NAMES,IDNAML,NAMEP,NAMLEN,
+NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+PSAVE,NSAVE,CUTADD,SORTA,MAXMAT,NOPOUT,IBH,LEVEL,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,MINCUT,IDEF,
+POSN,WORDP,IDNDEX,
+CONST,LENW,LENC,IOK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),NAMES(IDNAML),RECSEQ(IDRSEQ)
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),NUMREC(MAXEN),LENEN(MAXS)
INTEGER RECSTR(MAXEN),NAMLST(IDNLST),PSAVE(MAXMAT)
INTEGER NSAVE(MAXMAT),OPTION,OPTOUT
INTEGER CUTSIT(MAXS),PCUT(MAXEN),CUTADD(MAXMAT),SORTA(MAXMAT)
CHARACTER HELPF*(*)
INTEGER POSN(IDSEQ),WORDP(IDNDEX),CONST(0:LENC)
C OPTOUT=0 ENZYME BY ENZYME
C LOW FREQUENCY CUTTERS
C names max length 20 chars, strings 20 chars
LAST=MAXEN
IF(OPTION.EQ.1)LAST=IDNLST
DO 100 IEN=1,LAST
IFOUND=0
JEN=IEN
IF(OPTION.EQ.1)JEN=NAMLST(IEN)
NREC=NUMREC(JEN)
IF(IDEF.EQ.0) THEN
CALL S1D0(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,
+ POSN,WORDP,IDNDEX,
+ CONST,LENW,LENC,KBOUT,IOK)
ELSE
CALL S1D1(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,KBOUT,IOK)
END IF
IF(IOK.NE.0) RETURN
IF(IFOUND.LE.MINCUT) THEN
K1=NAMEP(JEN)
K2=K1+NAMLEN(JEN)-1
WRITE(IDEV,1009)IFOUND,(NAMES(K),K=K1,K2)
1009 FORMAT(' ',I6,' ',20A1)
END IF
100 CONTINUE
END
SUBROUTINE FINDL2(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,
+NAMES,IDNAML,NAMEP,NAMLEN,
+NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+PSAVE,NSAVE,CUTADD,SORTA,MAXMAT,NOPOUT,IBH,LEVEL,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,IDEF,
+POSN,WORDP,IDNDEX,
+CONST,LENW,LENC,IOK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),NAMES(IDNAML),RECSEQ(IDRSEQ)
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),NUMREC(MAXEN),LENEN(MAXS)
INTEGER RECSTR(MAXEN),NAMLST(IDNLST),PSAVE(MAXMAT)
INTEGER NSAVE(MAXMAT),OPTION,OPTOUT
INTEGER CUTSIT(MAXS),PCUT(MAXEN),CUTADD(MAXMAT),SORTA(MAXMAT)
CHARACTER HELPF*(*)
CHARACTER*20 ATOS
INTEGER POSN(IDSEQ),WORDP(IDNDEX),CONST(0:LENC)
EXTERNAL ATOS
C OPTOUT=0 ENZYME BY ENZYME
C NOPOUT=1 GRAPHICAL OUTPUT
C FOR GRAPHICS NEED TO CHECK FOR END OF SCREEN
CALL CLEARV
CALL VECTOM
LEVEL1=LEVEL
LEVELM=MARGT-IBH
C names max length 20 chars, strings 20 chars
LAST=MAXEN
IF(OPTION.EQ.1)LAST=IDNLST
DO 100 IEN=1,LAST
IFOUND=0
JEN=IEN
IF(OPTION.EQ.1)JEN=NAMLST(IEN)
NREC=NUMREC(JEN)
IF(IDEF.EQ.0) THEN
CALL S1D0(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,
+ POSN,WORDP,IDNDEX,
+ CONST,LENW,LENC,KBOUT,IOK)
ELSE
CALL S1D1(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,KBOUT,IOK)
END IF
IF(IOK.NE.0) THEN
CALL VT100M
RETURN
END IF
IF(IFOUND.GT.0) THEN
IF(LEVEL.GE.LEVELM)THEN
CALL PSRCHX(LEVEL1,LEVEL,KBIN,KBOUT,IQUIT,
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IQUIT.EQ.1) THEN
CALL VT100M
RETURN
END IF
END IF
CALL PLSRCH(J1-KSTART+1,J2-KSTART+1,
+ PSAVE,IFOUND,IBH,LEVEL,
+ ATOS(NAMES(NAMEP(JEN)),NAMLEN(JEN)),
+ NAMLEN(JEN),
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
100 CONTINUE
CALL VT100M
END
SUBROUTINE FINDL3(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,
+NAMES,IDNAML,NAMEP,NAMLEN,
+NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+PSAVE,NSAVE,CUTADD,SORTA,MAXMAT,NOPOUT,IBH,LEVEL,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,IDEF,
+POSN,WORDP,IDNDEX,
+CONST,LENW,LENC,IOK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),NAMES(IDNAML),RECSEQ(IDRSEQ)
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),NUMREC(MAXEN),LENEN(MAXS)
INTEGER RECSTR(MAXEN),NAMLST(IDNLST),PSAVE(MAXMAT)
INTEGER NSAVE(MAXMAT),OPTION,OPTOUT
INTEGER CUTSIT(MAXS),PCUT(MAXEN),CUTADD(MAXMAT),SORTA(MAXMAT)
CHARACTER HELPF*(*)
INTEGER POSN(IDSEQ),WORDP(IDNDEX),CONST(0:LENC)
C OPTOUT=1 ALL ENZYMES TOGETHER
C NOPOUT=1 GRAPHICAL OUTPUT
C FOR GRAPHICS NEED TO CHECK FOR END OF SCREEN
CALL CLEARV
CALL VECTOM
LEVEL1=LEVEL
LEVELM=MARGT-IBH
LAST=MAXEN
IF(OPTION.EQ.1)LAST=IDNLST
IFOUND=0
DO 100 IEN=1,LAST
JEN=IEN
IF(OPTION.EQ.1)JEN=NAMLST(IEN)
NREC=NUMREC(JEN)
IF(IDEF.EQ.0) THEN
CALL S1D0(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,
+ POSN,WORDP,IDNDEX,
+ CONST,LENW,LENC,KBOUT,IOK)
ELSE
CALL S1D1(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,KBOUT,IOK)
END IF
IF(IOK.NE.0) THEN
CALL VT100M
RETURN
END IF
100 CONTINUE
IF(IFOUND.GT.0)THEN
CALL PLSRCH(J1-KSTART+1,J2-KSTART+1,
+ PSAVE,IFOUND,IBH,LEVEL,'ALL',3,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
CALL VT100M
IF(IFOUND.EQ.0)WRITE(IDEV,3000)
3000 FORMAT(' No matches found')
END
SUBROUTINE FINDL4(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,
+NAMES,IDNAML,NAMEP,NAMLEN,
+NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+PSAVE,NSAVE,CUTADD,SORTA,MAXMAT,NOPOUT,IBH,LEVEL,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,IDEF,
+POSN,WORDP,IDNDEX,
+CONST,LENW,LENC,IOK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),NAMES(IDNAML),RECSEQ(IDRSEQ)
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),NUMREC(MAXEN),LENEN(MAXS)
INTEGER RECSTR(MAXEN),NAMLST(IDNLST),PSAVE(MAXMAT)
INTEGER NSAVE(MAXMAT),OPTION,OPTOUT
INTEGER CUTSIT(MAXS),PCUT(MAXEN),CUTADD(MAXMAT),SORTA(MAXMAT)
CHARACTER HELPF*(*)
INTEGER POSN(IDSEQ),WORDP(IDNDEX),CONST(0:LENC)
C OPTOUT=0 ENZYME BY ENZYME
C names max length 20 chars, strings 20 chars
LAST=MAXEN
IF(OPTION.EQ.1)LAST=IDNLST
DO 100 IEN=1,LAST
IFOUND=0
JEN=IEN
IF(OPTION.EQ.1)JEN=NAMLST(IEN)
NREC=NUMREC(JEN)
IF(IDEF.EQ.0) THEN
CALL S1D0(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,
+ POSN,WORDP,IDNDEX,
+ CONST,LENW,LENC,KBOUT,IOK)
ELSE
CALL S1D1(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,KBOUT,IOK)
END IF
IF(IOK.NE.0) RETURN
IF(IFOUND.GT.0) THEN
WRITE(IDEV,1010)IFOUND
1010 FORMAT(' Matches found=',I6)
CALL BUB3AS(PSAVE,NSAVE,CUTADD,IFOUND)
C calc lengths
IF(IROUND.EQ.0) THEN
LENN = J1 - KSTART + 1
SORTA(1) = PSAVE(1) - LENN
ELSE IF(IROUND.EQ.1) THEN
LENN = J2 - PSAVE(IFOUND) + 1
SORTA(1) = LENN + PSAVE(1) - (J1 - KSTART + 1)
LENN = 1 - LENN
END IF
DO 45 KOUT=2,IFOUND
SORTA(KOUT)=PSAVE(KOUT)-PSAVE(KOUT-1)
45 CONTINUE
C do length to end
IFP1 = IFOUND
IF(IROUND.EQ.0) THEN
SORTA(IFOUND+1)=J2-KSTART+1-PSAVE(IFOUND)+1
IFP1=IFOUND+1
END IF
CALL BUBBLE(SORTA,IFP1)
CALL S2(SEQ,IDSEQ,IFOUND,NAMEP,NAMLEN,LENEN,MAXEN,
+ NSAVE,PSAVE,SORTA,MAXMAT,NAMES,IDNAML,
+ CUTSIT,MAXS,CUTADD,IROUND,
+ LENN,IDEV,J2,KSTART,0)
END IF
100 CONTINUE
END
SUBROUTINE FINDL5(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,
+NAMES,IDNAML,NAMEP,NAMLEN,
+NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+PSAVE,NSAVE,CUTADD,SORTA,MAXMAT,NOPOUT,IBH,LEVEL,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,IDEF,
+POSN,WORDP,IDNDEX,
+CONST,LENW,LENC,IOK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),NAMES(IDNAML),RECSEQ(IDRSEQ)
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),NUMREC(MAXEN),LENEN(MAXS)
INTEGER RECSTR(MAXEN),NAMLST(IDNLST),PSAVE(MAXMAT)
INTEGER NSAVE(MAXMAT),OPTION,OPTOUT
INTEGER CUTSIT(MAXS),PCUT(MAXEN),CUTADD(MAXMAT),SORTA(MAXMAT)
CHARACTER HELPF*(*)
INTEGER POSN(IDSEQ),WORDP(IDNDEX),CONST(0:LENC)
C OPTOUT=1 ALL ENZYMES TOGETHER
C names max length 20 chars, strings 20 chars
LAST=MAXEN
IF(OPTION.EQ.1)LAST=IDNLST
IFOUND=0
DO 100 IEN=1,LAST
JEN=IEN
IF(OPTION.EQ.1)JEN=NAMLST(IEN)
NREC=NUMREC(JEN)
IF(IDEF.EQ.0) THEN
CALL S1D0(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,
+ POSN,WORDP,IDNDEX,
+ CONST,LENW,LENC,KBOUT,IOK)
ELSE
CALL S1D1(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,KBOUT,IOK)
END IF
IF(IOK.NE.0) RETURN
100 CONTINUE
IF(IFOUND.GT.0) THEN
CALL BUB3AS(PSAVE,NSAVE,CUTADD,IFOUND)
IF(IROUND.EQ.0) THEN
LENN = J1 - KSTART + 1
SORTA(1) = PSAVE(1) - LENN
ELSE IF(IROUND.EQ.1) THEN
LENN = J2 - PSAVE(IFOUND) + 1
SORTA(1) = LENN + PSAVE(1) - (J1 - KSTART + 1)
LENN = 1 - LENN
END IF
DO 45 KOUT=2,IFOUND
SORTA(KOUT)=PSAVE(KOUT)-PSAVE(KOUT-1)
45 CONTINUE
CALL S2(SEQ,IDSEQ,IFOUND,NAMEP,NAMLEN,LENEN,MAXEN,
+ NSAVE,PSAVE,SORTA,MAXMAT,NAMES,IDNAML,
+ CUTSIT,MAXS,CUTADD,IROUND,
+ LENN,IDEV,J2,KSTART,1)
ELSE
WRITE(IDEV,1010)
1010 FORMAT(' No matches found')
END IF
END
SUBROUTINE FINDL6(SEQ,IDSEQ,J1,J2,KSTART,RECSEQ,IDRSEQ,
+NAMES,IDNAML,NAMEP,NAMLEN,
+NUMREC,LENEN,RECSTR,MAXEN,NAMLST,IDNLST,OPTION,OPTOUT,
+ PCUT,CUTSIT,MAXS,IROUND,
+PSAVE,NSAVE,CUTADD,SORTA,MAXMAT,NOPOUT,IBH,LEVEL,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,INC,KBIN,KBOUT,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,ITRAN,LINLEN,PAA,IDEF,
+POSN,WORDP,IDNDEX,
+CONST,LENW,LENC,IOK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),NAMES(IDNAML),RECSEQ(IDRSEQ)
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),NUMREC(MAXEN),LENEN(MAXS)
INTEGER RECSTR(MAXEN),NAMLST(IDNLST),PSAVE(MAXMAT)
INTEGER NSAVE(MAXMAT),OPTION,OPTOUT
INTEGER CUTSIT(MAXS),PCUT(MAXEN),CUTADD(MAXMAT),SORTA(MAXMAT)
CHARACTER HELPF*(*),PAA(125)
INTEGER POSN(IDSEQ),WORDP(IDNDEX),CONST(0:LENC)
C OPTOUT=3 NAMES ABOVE SEQUENCE
C names max length 20 chars, strings 20 chars
LAST=MAXEN
IF(OPTION.EQ.1)LAST=IDNLST
IFOUND=0
DO 100 IEN=1,LAST
JEN=IEN
IF(OPTION.EQ.1)JEN=NAMLST(IEN)
NREC=NUMREC(JEN)
IF(IDEF.EQ.0) THEN
CALL S1D0(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,
+ POSN,WORDP,IDNDEX,
+ CONST,LENW,LENC,KBOUT,IOK)
ELSE
CALL S1D1(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+ RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+ PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,KBOUT,IOK)
END IF
IF(IOK.NE.0) RETURN
100 CONTINUE
IF(IFOUND.GT.0) THEN
CALL BUB2AS(PSAVE,NSAVE,IFOUND)
CALL PETER(SEQ,IDSEQ,J1,J2,KSTART,
+ NAMES,IDNAML,NAMEP,NAMLEN,MAXEN,
+ PSAVE,NSAVE,IDEV,IFOUND,LINLEN,PAA,ITRAN)
ELSE
WRITE(IDEV,1010)
1010 FORMAT(' No matches')
END IF
END
SUBROUTINE S1D0(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,POSN,WORDP,IDNDEX,
+CONST,LENW,LENC,KBOUT,IOK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),RECSEQ(IDRSEQ),TEMP3(40)
INTEGER RECSTR(MAXEN),PSAVE(MAXMAT),NSAVE(MAXMAT)
INTEGER PCUT(MAXEN),CUTSIT(MAXS),CUTADD(MAXMAT),SPOINT
INTEGER LENEN(MAXS)
INTEGER POSN(IDSEQ),WORDP(IDNDEX),CONST(0:LENC),GENRCI
EXTERNAL MC,GENRCI
IOK = 1
LREC = 0
DO 50 IREC=1,NREC
C POINT TO START OF STRING
ISTR=RECSTR(JEN)+(IREC-1)*(LREC)
SPOINT = PCUT(JEN) + IREC - 1
LREC = LENEN (SPOINT)
C FIND FIRST AND LAST NON N CHARS IN STRING
CALL GETNON(RECSEQ(ISTR),LREC,INON,LNON)
C CHECK FOR ALL N'S IN STRING!!
IF(LNON.GT.0)THEN
ISTR = ISTR + INON - 1
IENTRY = 0
5 CONTINUE
NINDEX = GENRCI(RECSEQ(ISTR),LNON,LENW,CONST,LENC,IENTRY)
IF(NINDEX.NE.0) THEN
K1 = WORDP(NINDEX)
IF(K1.NE.0) THEN
IMATCH = 0
K2 = K1 + J1 - 1
IF(LNON.GT.LENW) THEN
IF((K2+LNON-1).LE.IDSEQ) THEN
IMATCH = MC(SEQ(K2+LENW),RECSEQ(ISTR+LENW),
+ LNON-LENW)
ELSE
IMATCH = 1
END IF
END IF
IF(IMATCH.EQ.0) THEN
IFOUND = IFOUND + 1
IF(IFOUND.GT.MAXMAT)THEN
CALL ERROM(KBOUT,'Too many matches')
RETURN
END IF
NSAVE(IFOUND) = JEN
C NEED THE CUTPOINT FOR THIS STRING; THE FIRST ONE THIS NAME
C IS AT PCUT(JEN) IN ARRAY CUTSIT, SO THIS ONE IS AT
C PCUT(JEN)+IREC-1 = SPOINT IN CUTSIT
CUTADD(IFOUND) = SPOINT
C SAVE POSITION RELATIVE TO ARRAY
KT = K2 + CUTSIT(SPOINT) - INON + 1
IF(KT.GT.IDSEQ)KT = KT - IDSEQ
IF(KT.LT.0)KT = ABS(KT) + 1
PSAVE(IFOUND) = KT
END IF
10 CONTINUE
K3 = K1
K1 = POSN(K3)
IF(K1.NE.0) THEN
IMATCH = 0
K2 = K1 + J1 - 1
IF(LNON.GT.LENW) THEN
IF((K2+LNON-1).LE.IDSEQ) THEN
IMATCH = MC(SEQ(K2+LENW),RECSEQ(ISTR+LENW),
+ LNON-LENW)
ELSE
IMATCH = 1
END IF
END IF
IF(IMATCH.EQ.0) THEN
IFOUND = IFOUND + 1
IF(IFOUND.GT.MAXMAT)THEN
CALL ERROM(KBOUT,'Too many matches')
RETURN
END IF
NSAVE(IFOUND) = JEN
CUTADD(IFOUND) = SPOINT
KT = K2 + CUTSIT(SPOINT) - INON + 1
IF(KT.GT.IDSEQ)KT = KT - IDSEQ
IF(KT.LT.0)KT = ABS(KT) + 1
PSAVE(IFOUND) = KT
END IF
GO TO 10
END IF
END IF
GO TO 5
END IF
IF(LNON.GE.IDSEQ)GO TO 50
IF(IROUND.NE.1)GO TO 50
IAT = 0
LNONM1 = LNON - 1
IF(LNONM1.GT.0)THEN
CALL SQCOPY(SEQ(IDSEQ-LNON+2),TEMP3,LNONM1)
CALL SQCOPY(SEQ,TEMP3(LNON),LNONM1)
END IF
51 CONTINUE
IAT = IAT + 1
ILEFT = 2 * LNONM1 - IAT + 1
IF(ILEFT.GT.0)THEN
CALL FIND6(TEMP3(IAT),ILEFT,RECSEQ(ISTR),LNON,INC,IMATCH)
IF(IMATCH.GT.0)THEN
IFOUND = IFOUND + 1
IF(IFOUND.GT.MAXMAT)THEN
CALL ERROM(KBOUT,'Too many matches')
RETURN
END IF
NSAVE(IFOUND)=JEN
C NEED THE CUTPOINT FOR THIS STRING; THE FIRST ONE THIS NAME
C IS AT PCUT(JEN) IN ARRAY CUTSIT, SO THIS ONE IS AT
C PCUT(JEN)+IREC-1 = SPOINT IN CUTSIT
CUTADD(IFOUND) = SPOINT
IAT = IAT + IMATCH - 1
KT=IDSEQ - LNON + 1 + IAT + CUTSIT(SPOINT) - INON + 1
IF(KT.GT.IDSEQ) KT = KT - IDSEQ
IF(KT.LT.1) KT = ABS(KT) + 1
PSAVE(IFOUND) = KT
END IF
GO TO 51
END IF
END IF
50 CONTINUE
IOK = 0
END
SUBROUTINE S1D1(SEQ,IDSEQ,NREC,JEN,IREC,J1,J2,INC,KSTART,
+RECSTR,MAXEN,PSAVE,NSAVE,MAXMAT,RECSEQ,IDRSEQ,IFOUND,
+PCUT,CUTSIT,LENEN,MAXS,CUTADD,IROUND,KBOUT,IOK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),RECSEQ(IDRSEQ),TEMP3(40)
INTEGER RECSTR(MAXEN),PSAVE(MAXMAT),NSAVE(MAXMAT)
INTEGER PCUT(MAXEN),CUTSIT(MAXS),CUTADD(MAXMAT),SPOINT
INTEGER LENEN(MAXS)
IOK = 1
LREC = 0
DO 50 IREC=1,NREC
C POINT TO START OF STRING
ISTR=RECSTR(JEN)+(IREC-1)*(LREC)
C NOTE WE NEED TO POINT TO THE CORRECT ARRAY ELEMENT BUT
C TO REMEMBER THE SEQUENCE ELEMENT FOR OUTPUT
C IAT IS THE ARRAY ELEMENT
IAT = J1-INC-KSTART+1
SPOINT = PCUT(JEN) + IREC - 1
LREC = LENEN (SPOINT)
C FIND FIRST AND LAST NON N CHARS IN STRING
CALL GETNON(RECSEQ(ISTR),LREC,INON,LNON)
C CHECK FOR ALL N'S IN STRING!!
IF(LNON.GT.0)THEN
ISTR = ISTR + INON - 1
10 CONTINUE
IAT = IAT+INC
ILEFT = J2-IAT+1-KSTART+1
IF(ILEFT.GT.0)THEN
CALL FIND7(SEQ(IAT),ILEFT,
+ RECSEQ(ISTR),LNON,INC,IMATCH)
IF(IMATCH.GT.0)THEN
IFOUND = IFOUND + 1
IF(IFOUND.GT.MAXMAT)THEN
CALL ERROM(KBOUT,'Too many matches')
RETURN
END IF
IAT = IAT + IMATCH - 1
NSAVE(IFOUND) = JEN
C NEED THE CUTPOINT FOR THIS STRING; THE FIRST ONE THIS NAME
C IS AT PCUT(JEN) IN ARRAY CUTSIT, SO THIS ONE IS AT
C PCUT(JEN)+IREC-1 = SPOINT IN CUTSIT
CUTADD(IFOUND) = SPOINT
C SAVE POSITION RELATIVE TO ARRAY
KT = IAT + CUTSIT(SPOINT) - INON + 1
IF(KT.GT.IDSEQ)KT = KT - IDSEQ
IF(KT.LT.0)KT = ABS(KT) + 1
PSAVE(IFOUND) = KT
GO TO 10
END IF
END IF
IF(LNON.GE.IDSEQ)GO TO 50
IF(IROUND.NE.1)GO TO 50
IAT = 0
LNONM1 = LNON - 1
IF(LNONM1.GT.0)THEN
CALL SQCOPY(SEQ(IDSEQ-LNON+2),TEMP3,LNONM1)
CALL SQCOPY(SEQ,TEMP3(LNON),LNONM1)
END IF
51 CONTINUE
IAT = IAT + 1
ILEFT = 2 * LNONM1 - IAT + 1
IF(ILEFT.GT.0)THEN
CALL FIND7(TEMP3(IAT),ILEFT,RECSEQ(ISTR),LNON,INC,IMATCH)
IF(IMATCH.GT.0)THEN
IFOUND = IFOUND + 1
IF(IFOUND.GT.MAXMAT)THEN
CALL ERROM(KBOUT,'Too many matches')
RETURN
END IF
NSAVE(IFOUND)=JEN
C NEED THE CUTPOINT FOR THIS STRING; THE FIRST ONE THIS NAME
C IS AT PCUT(JEN) IN ARRAY CUTSIT, SO THIS ONE IS AT
C PCUT(JEN)+IREC-1 = SPOINT IN CUTSIT
CUTADD(IFOUND) = SPOINT
IAT = IAT + IMATCH - 1
KT=IDSEQ - LNON + 1 + IAT + CUTSIT(SPOINT) - INON + 1
IF(KT.GT.IDSEQ) KT = KT - IDSEQ
IF(KT.LT.1) KT = ABS(KT) + 1
PSAVE(IFOUND) = KT
END IF
GO TO 51
END IF
END IF
50 CONTINUE
IOK = 0
END
SUBROUTINE DECREN(RENZYM,IDREN,RECSEQ,IDRSEQ,NAMES,IDNAML,
+NAMEP,NAMLEN,NUMREC,LENEN,RECSTR,MAXEN,KBOUT,CUTSIT,PCUT,MAXS)
C ROUTINE TO DECODE A RESTRICTION ENZYME FILE ARRAY RENZYM IDREN
C INTO THE FOLLOWING:
C A CONCATENATED ARRAY OF RECOGNITION SEQUENCES RECSEQ IDRSEQ PREC
C A CONCATENATED ARRAY OF ENZYME NAMES NAMES IDNAML,PNAM
C A LIST OF POINTERS TO ENZYME NAMES IN NAMES NAMEP MAXEN ITOTEN
C A LIST OF ENZYME NAME LENGTHS NAMLEN MAXEN ITOTEN
C A LIST OF NUMBERS OF RECOGNITION SEQUENCES PER ENZYME NUMREC MAXEN
C A LIST OF LENGHTS OF RECOGNITION SEQUENCES PER RECOGNITION SEQUENCE
C LENEN MAXEN ITOTEN
C A LIST OF POINTERS TO THE FIRST CHARACTER OF THE FIRST RECOGNITION
C SEQUENCE PER ENZYME RECSTR MAXEN ITOTEN
C A LIST OF CUTSITES PER SEQUENCE CUTSIT
C A LIST OF START POSITIONS FOR CUTSITES PER NAME PCUT (ALSO USED
C FOR LENGTHS OF RECOGNITION SEQUENCES)
C AUTHOR: RODGER STADEN
CHARACTER RENZYM(IDREN),RECSEQ(IDRSEQ),NAMES(IDNAML)
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),NUMREC(MAXEN),LENEN(MAXEN)
INTEGER RECSTR(MAXEN),TOTEN,PREC,PREN,PNAM
INTEGER CUTSIT(MAXEN),PCUT(MAXEN)
CHARACTER CUT
EXTERNAL INDEXA
SAVE CUT
DATA CUT/''''/
C NB CUTSIT SHOULD REALLY BE DIMENSIONED TO BE LARGER THAN THE
C MAX NUMBER OF ENZYMES AS IT WILL BE LONGER
C
C set enzyme count to zero
TOTEN=0
C point to string store
PREC=1
C point to renzym
PREN=0
C point to names array
PNAM=1
C SET NUMBER OF CUTPOINTS TO ZERO. IT IS EQUAL TO THE NUMBER OF
C OF RECOGNITION SEQUENCES. NOTE THEY CAN ALMOST ALL BE CODED AS SINGLE
C SEQUENCES USING THE NC-IUB CODES, BUT SOMETIMES, IF THE CUTSITE IS
C NON-SYMMETRICAL TWO ARE NEEDED
ICUT = 0
C
10 CONTINUE
PREN=PREN+1
IF(PREN.LT.IDREN)THEN
C some data left
C should be pointing at start of name, count its chars
CALL CHARCT(RENZYM,IDREN,PREN,NCHAR)
IF(NCHAR.GT.0)THEN
C found a name
C increment number of enzymes
TOTEN=TOTEN+1
IF(TOTEN.LT.MAXEN)THEN
C space left for enzymes
IF((PNAM+NCHAR-1).GT.IDNAML)THEN
CALL ERROM(KBOUT,'Names array full')
GO TO 101
END IF
C space for name
C store name start position, length of name
NAMEP(TOTEN)=PNAM
NAMLEN(TOTEN)=NCHAR
CALL SQCOPY(RENZYM(PREN),NAMES(PNAM),NCHAR)
PNAM=PNAM+NCHAR
C count number of strings this enzyme
NREC=0
20 CONTINUE
C point to first char of string
PREN=PREN+NCHAR+1
C count chars in string
NCHAR=0
IF(PREN.LT.IDREN)CALL CHARCT(RENZYM,IDREN,PREN,NCHAR)
IF(NCHAR.EQ.0)THEN
C no chars in string. If first string then error, else no more strings
IF(NREC.EQ.0)THEN
CALL ERROM(KBOUT,'Name with no strings')
C decrease number of enzymes
TOTEN=TOTEN-1
GO TO 100
END IF
C some strings
C save number of strings this enzyme
NUMREC(TOTEN)=NREC
C
GO TO 10
END IF
C nonzero string length
NREC=NREC+1
******************************************
C SAVE ITS START POSITION IN RECSEQ IF FIRST THIS NAME
IF(NREC.EQ.1)RECSTR(TOTEN)=PREC
C FIND CUTSITE
JCUT=INDEXA(RENZYM(PREN),NCHAR,CUT)
C IF JCUT IS ZERO THEN NO CUTSITE MARKED
IF(ICUT.GE.MAXEN)GO TO 200
ICUT = ICUT + 1
CUTSIT(ICUT) = 0
C IF FIRST CUTSITE THIS NAME SAVE ITS POINTER
IF(NREC.EQ.1)PCUT(TOTEN) = ICUT
IF(JCUT.GT.0)THEN
NL = JCUT - 1
NR = NCHAR - JCUT
CUTSIT(ICUT) = NL
C COPY UP TO CUT
IF(NL.GT.0)THEN
CALL SQCOPY(RENZYM(PREN),RECSEQ(PREC),NL)
PREC = PREC + NL
END IF
C COPY AFTER CUT
IF(NR.GT.0)THEN
IPREN = PREN + NL + 1
CALL SQCOPY(RENZYM(IPREN),RECSEQ(PREC),NR)
PREC = PREC + NR
END IF
LENEN(ICUT) = NCHAR - 1
C (REDUCED NCHAR BY 1 TO ALLOW FOR CUT)
ELSE
C
C NO CUT SITE MARKED, USE OLD CODE
******************************************
C copy string to recseq
CALL SQCOPY(RENZYM(PREN),RECSEQ(PREC),NCHAR)
C
PREC=PREC+NCHAR
LENEN(ICUT) = NCHAR
END IF
C now look for next string
GO TO 20
END IF
C too many enzymes
CALL ERROM(KBOUT,'Too many names')
GO TO 100
END IF
C deal with name of zero length
CALL ERROM(KBOUT,'Error in names and strings file')
END IF
C now at end of file
100 CONTINUE
101 CONTINUE
C return length of names array
IDNAML=PNAM-1
C return number of enzymes
MAXEN=TOTEN
C return length of recseq
IDRSEQ=PREC
C RETURN NUMBER OF RECOGNITION SEQUENCES
MAXS = ICUT
RETURN
200 CONTINUE
C COME HERE WITH OVERFLOW OF CUTSITES
C
C REDUCE NUMBER OF NAMES BY 1
MAXEN = TOTEN - 1
IDNAML = PNAM - 1
IDRSEQ = PREC
C RETURN NUMBER OF RECOGNITION SEQUENCES
MAXS = ICUT
END
SUBROUTINE GETREN(RENZYM,IDREN,IDEV,FILNAM,FILE4,FILE6,FILEA,
+ IOK,KBIN,KBOUT,IFILE,DIALOG,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),FILE4*(*),FILE6*(*),FILEA*(*),RENZYM(IDREN)
INTEGER DIALOG
PARAMETER (MAXPRM = 16)
CHARACTER PROMPT(5)*(MAXPRM)
IOK = 1
IF(DIALOG.EQ.0) NOPT = IFILE
IOP = 1
1 CONTINUE
IF(DIALOG.EQ.1) THEN
IOP = 1
PROMPT(1) = 'Search'
PROMPT(2) = 'List enzyme file'
PROMPT(3) = 'Clear text'
PROMPT(4) = 'Clear graphics'
CALL RADION('Select operation',PROMPT,4,IOP,
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.1) THEN
IOK = 1
RETURN
END IF
IF(IOP.EQ.3) THEN
CALL CLEARV
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
GO TO 1
END IF
IF(IOP.EQ.4) THEN
CALL CLEARG
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
GO TO 1
END IF
IF((IOP.EQ.1).OR.(IOP.EQ.2)) THEN
NOPT = IFILE
PROMPT(1) = 'All enzymes file'
PROMPT(2) = 'Six cutter file'
PROMPT(3) = 'Four cutter file'
PROMPT(4) = 'Personal file'
PROMPT(5) = 'Keyboard'
CALL RADION('Select input source',PROMPT,5,NOPT,
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(NOPT.LT.1) THEN
IOK = 1
RETURN
END IF
END IF
END IF
IF(NOPT.EQ.3)THEN
CALL OPENRS(IDEV,FILE4,IOK,LRECL,2)
IF(IOK.NE.0)GO TO 100
IF(IOP.EQ.2)THEN
CALL TTEXT1(IDEV,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IQUIT)
GO TO 1
END IF
CALL ARRFIL(IDEV,RENZYM,IDREN,KBOUT)
CLOSE(UNIT=IDEV)
RETURN
END IF
IF(NOPT.EQ.2)THEN
CALL OPENRS(IDEV,FILE6,IOK,LRECL,2)
IF(IOK.NE.0)GO TO 100
IF(IOP.EQ.2)THEN
CALL TTEXT1(IDEV,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IQUIT)
GO TO 1
END IF
CALL ARRFIL(IDEV,RENZYM,IDREN,KBOUT)
CLOSE(UNIT=IDEV)
RETURN
END IF
IF(NOPT.EQ.1)THEN
CALL OPENRS(IDEV,FILEA,IOK,LRECL,2)
IF(IOK.NE.0)GO TO 100
IF(IOP.EQ.2)THEN
CALL TTEXT1(IDEV,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IQUIT)
GO TO 1
END IF
CALL ARRFIL(IDEV,RENZYM,IDREN,KBOUT)
CLOSE(UNIT=IDEV)
RETURN
END IF
IF(NOPT.EQ.4)THEN
10 CONTINUE
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+ 'Restriction enzyme file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.EQ.2) GO TO 1
IF(IOK.NE.0)GO TO 100
IF(IOP.EQ.2)THEN
CALL TTEXT1(IDEV,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IQUIT)
GO TO 1
END IF
CALL ARRFIL(IDEV,RENZYM,IDREN,KBOUT)
CLOSE(UNIT=IDEV)
RETURN
END IF
IF(NOPT.EQ.5)THEN
CALL GETRKB(RENZYM,IDREN,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,
+ IOK)
RETURN
END IF
GO TO 1
100 CONTINUE
CALL ERROM(KBOUT,'Error opening file')
IOK=1
GO TO 1
END
SUBROUTINE FIND6(SEQ,IDIM1,STRING,IDIM2,INC,IMATCH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIM2)
INTEGER PSEQ,PSTR
EXTERNAL IUBM1
PSEQ=1-1*INC
PSTR=1
IMATCH=0
100 CONTINUE
PSEQ=PSEQ+1-PSTR
PSEQ=PSEQ+INC-1
PSTR=0
500 CONTINUE
PSEQ=PSEQ+1
IF(PSEQ.GT.IDIM1)RETURN
PSTR=PSTR+1
IUBMAT = IUBM1(STRING(PSTR),SEQ(PSEQ))
IF(IUBMAT.EQ.0)GO TO 100
IF(PSTR.LT.IDIM2)GO TO 500
IMATCH=PSEQ-IDIM2+1
END
SUBROUTINE FIND7(SEQ,IDIM1,STRING,IDIM2,INC,IMATCH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIM2)
INTEGER PSEQ,PSTR
EXTERNAL IUBM2
PSEQ=1-1*INC
PSTR=1
IMATCH=0
100 CONTINUE
PSEQ=PSEQ+1-PSTR
PSEQ=PSEQ+INC-1
PSTR=0
500 CONTINUE
PSEQ=PSEQ+1
IF(PSEQ.GT.IDIM1)RETURN
PSTR=PSTR+1
IUBMAT = IUBM2(STRING(PSTR),SEQ(PSEQ))
IF(IUBMAT.EQ.0)GO TO 100
IF(PSTR.LT.IDIM2)GO TO 500
IMATCH=PSEQ-IDIM2+1
END
SUBROUTINE GETNON(STRING,IDS,LNOTN,RNOTN)
C AUTHOR: RODGER STADEN
CHARACTER STRING(IDS),CHAR
INTEGER RNOTN
EXTERNAL NNDEXL,NNDEXR
PARAMETER (CHAR='N')
C FIND ENDS OF CHARACTER STRING THAT ARE NOT N
C RNOTN IS RETURNED AS THE LENGTH OF THE STRING WITHOUT N'S
C IT WILL BE < 1 IF THE WHOLE STRING IS N'S
LNOTN = NNDEXL(STRING,IDS,CHAR)
RNOTN = NNDEXR(STRING,IDS,CHAR)
RNOTN = RNOTN - LNOTN + 1
END
INTEGER FUNCTION NNDEXL(STRING,ID,CHAR)
CHARACTER STRING(ID),CHAR
C FUNCTION TO FIND FIRST NON-OCCURRENCE OF CHAR IN STRING
DO 10 I = 1,ID
IF(STRING(I).NE.CHAR)THEN
NNDEXL = I
RETURN
END IF
10 CONTINUE
NNDEXL = ID + 1
END
INTEGER FUNCTION NNDEXR(STRING,ID,CHAR)
CHARACTER STRING(ID),CHAR
C FUNCTION TO FIND FIRST NON-OCCURRENCE OF CHAR IN STRING
DO 10 I = ID,1,-1
IF(STRING(I).NE.CHAR)THEN
NNDEXR = I
RETURN
END IF
10 CONTINUE
NNDEXR = 0
END
SUBROUTINE S2(SEQ,IDSEQ,IFOUND,NAMEP,NAMLEN,LENEN,MAXEN,
+NSAVE,PSAVE,LENGTH,MAXMAT,NAMES,IDNAML,
+CUTSIT,MAXS,CUTADD,IROUND,
+LENN,IDEV,J2,KSTART,JOB)
C 19-2-91 Changed idt and idtrs from 20 each to 15 and 35
C so names have max length 15 chars and recognition seqs 35
C for this part of the code.
PARAMETER (IDT = 15, IDTRS = 35)
CHARACTER SEQ(IDSEQ),TEMP1(IDT),TEMP2(IDTRS),NAMES(IDNAML),CUTSYM
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),LENEN(MAXS)
INTEGER NSAVE(MAXMAT),PSAVE(MAXMAT),LENGTH(MAXMAT)
INTEGER CUTSIT(MAXS),CUTADD(MAXMAT)
EXTERNAL LWRAP3
SAVE CUTSYM
DATA CUTSYM/''''/
WRITE(IDEV,1011)
1011 FORMAT(
+' Name Sequence',
+' Position',
+' Fragment lengths')
DO 60 KOUT=1,IFOUND
KR1=NAMEP(NSAVE(KOUT))
KS1=PSAVE(KOUT) - CUTSIT(CUTADD(KOUT))
CALL FILLC(TEMP1,IDT,' ')
CALL FILLC(TEMP2,IDTRS,' ')
KR2 = NAMLEN(NSAVE(KOUT))
CALL SQCOPY(NAMES(KR1),TEMP1,KR2)
ITEMP1 = 0
DO 58 ITEMP = 1,CUTSIT(CUTADD(KOUT))
ITEMP1 = ITEMP
KS2 = LWRAP3(J2,KS1)
TEMP2(ITEMP) = SEQ(KS2)
KS1 = KS1 + 1
58 CONTINUE
ITEMP1 = ITEMP1 + 1
TEMP2(ITEMP1) = CUTSYM
ITEMP1 = ITEMP1 + 1
DO 59 ITEMP = ITEMP1,LENEN(CUTADD(KOUT))+1
KS2 = LWRAP3(J2,KS1)
TEMP2(ITEMP) = SEQ(KS2)
KS1 = KS1 + 1
59 CONTINUE
LENN=PSAVE(KOUT)-LENN
IF(JOB.EQ.0) THEN
WRITE(IDEV,1008)KOUT,
+ TEMP1,TEMP2,PSAVE(KOUT)+KSTART-1,LENN,LENGTH(KOUT)
ELSE IF(JOB.EQ.1) THEN
WRITE(IDEV,1008)KOUT,
+ TEMP1,TEMP2,PSAVE(KOUT)+KSTART-1,LENGTH(KOUT)
END IF
LENN=PSAVE(KOUT)
1008 FORMAT(' ',I4,' ',15A1,1X,35A1,1X,I6,1X,I6,1X,I6)
60 CONTINUE
IF(IROUND.EQ.0)THEN
C need length to end
LEND = J2-PSAVE(IFOUND)+1-KSTART+1
IF(JOB.EQ.0)WRITE(IDEV,1020)LEND,LENGTH(IFOUND+1)
IF(JOB.EQ.1)WRITE(IDEV,1020)LEND
1020 FORMAT(' ',64X,I6,1X,I6)
END IF
END
SUBROUTINE PETER(SEQ,IDSEQ,J1,J2,KSTART,
+NAMES,IDNAML,NAMEP,NAMLEN,MAXEN,
+PSAVE,NSAVE,IDEV,IFOUND,LINLEN,PAA,ITRAN)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ),NAMES(IDNAML),TEMP1*130
INTEGER NAMEP(MAXEN),NAMLEN(MAXEN),PSAVE(IFOUND),NSAVE(IFOUND)
CHARACTER TRANF3*3,PAA(125)
EXTERNAL TRANF3
EXTERNAL NOTIRL
C KSTART = SEQ ELEMENT IN SEQ(1)
C J1 = START OF ACTIVE REGION RELATIVE TO SEQ(1)
C PSAVE ARE RELATIVE TO SEQ(1)
C WORK RELATIVE TO ARRAY SEQ
NLINES = 1 + (J2 - J1)/LINLEN
I1 = J1
I2 = MIN(J2,I1+LINLEN-1)
NEXTE = 1
ITMAX = J2 - 2
DO 100 KK = 1,NLINES
TEMP1 = ' '
LP1 = 0
DO 50 I = NEXTE,IFOUND
KS1=PSAVE(I)
IF(KS1.GT.I2)THEN
NEXTES = I
GO TO 51
END IF
KR1=NAMEP(NSAVE(I))
LP = KS1 - I1 + 1
C TRAP FOR CUTSITE OUT OF RANGE!!!!!!
IF(LP.LT.0)THEN
WRITE(IDEV,1004)(NAMES(K),K=KR1,KR1+NAMLEN(NSAVE(I))-1)
1004 FORMAT(' WARNING:',
+ ' Recognition site with cut off end for enzyme ',20A1)
WRITE(IDEV,1005)
1005 FORMAT(//)
END IF
IF(LP.GT.0)THEN
IF(LP.LE.LP1)THEN
WRITE(IDEV,1000)TEMP1(1:MAX(1,NOTIRL(TEMP1,130,' ')))
1000 FORMAT(' ',A)
CALL DOTTY(TEMP1)
END IF
DO 66 ITEMP=1,NAMLEN(NSAVE(I))
TEMP1(LP:LP)=NAMES(KR1)
KR1=KR1+1
LP = LP + 1
66 CONTINUE
LP1 = LP
END IF
NEXTES = I + 1
50 CONTINUE
51 CONTINUE
WRITE(IDEV,1000)TEMP1(1:MAX(1,NOTIRL(TEMP1,130,' ')))
NEXTE = NEXTES
WRITE(IDEV,1001)(SEQ(K),K=I1,I2)
WRITE(IDEV,1002)(K,K=I1+9+KSTART-1,I2+KSTART-1,10)
IF(ITRAN.NE.1)THEN
WRITE(IDEV,1010)(TRANF3(SEQ(K),PAA,ITRAN),
+ K=I1,MIN(ITMAX,I2-1),3)
WRITE(IDEV,1011)(TRANF3(SEQ(K),PAA,ITRAN),
+ K=I1+1,MIN(ITMAX,I2),3)
WRITE(IDEV,1012)(TRANF3(SEQ(K),PAA,ITRAN),
+ K=I1+2,MIN(ITMAX,I2+1),3)
END IF
WRITE(IDEV,1003)
1003 FORMAT(' ')
1002 FORMAT(' ',10I10)
1001 FORMAT(' ',100A1)
1010 FORMAT(1X,40(A))
1011 FORMAT(2X,40(A))
1012 FORMAT(3X,40(A))
I1 = I2 + 1
I2 = MIN((I1+LINLEN-1),J2)
100 CONTINUE
END
SUBROUTINE DOTTY(TEMP)
CHARACTER TEMP*(*),DOT
PARAMETER (DOT='.')
DO 10 I = LEN(TEMP),2,-1
IF(TEMP(I:I).NE.' ')THEN
IF(TEMP(I:I).NE.DOT)THEN
IF((TEMP(I-1:I-1).EQ.' ').OR.(TEMP(I-1:I-1).EQ.DOT))THEN
TEMP(I:I) = DOT
ELSE
TEMP(I:I) = ' '
END IF
END IF
END IF
10 CONTINUE
IF(TEMP(1:1).NE.' ')TEMP(1:1) = DOT
END
SUBROUTINE SEQFIT(SEQNCE,IDIM1,SEQ2,IDIM3I,ITOT,ITOTEL,ITOTID,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,
+IDEV,IDEVIN,FILE2,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
C AUTHOR: RODGER STADEN
INTEGER DIALOG
PARAMETER (MAXSTR = 50)
CHARACTER HELPF*(*)
CHARACTER SEQNCE(IDIM1),STRING(MAXSTR)
CHARACTER SEQ2(IDIM3I),FILE2*(*)
CHARACTER MATCH(MAXSTR)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
INTEGER ANSTY
CALL SHOWFU(KBOUT,'Find percentage matches')
IDIM2 = 0
IDIM2I = MAXSTR
IDIM3 = IDIM3I
IS = J1
IE = J2
ISS = 1
ISE = 10
CALL GETPAR(18,2,IOK,ANSTY,IPR,
+IPAR3,IPAR4,IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
PR = REAL(IPR)
5 CONTINUE
IF(DIALOG.EQ.1) THEN
CALL SQPFD1(SEQ2,IDIM3I,IDIM3,ANSTY,IDEVIN,FILE2,
+ KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
10 CONTINUE
CALL SQPFD2(SEQ2,IDIM3,MATCH,STRING,
+J1,J2,KSTART,ISS,ISE,IS,IE,IDIM2I,IDIM2,MAXSTR,PR,ANSTY,MINP,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL SQPF6(SEQNCE,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINP,ITOTP,KSTART,J1,J2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IOK)
IF(IOK.EQ.2)THEN
WRITE(KBOUT,1017)ITOTID
1017 FORMAT(/,' More than',I6,' matches.',
+ ' Try changing percentage or region'/)
GO TO 10
END IF
IF(IOK.NE.0) RETURN
CALL SQPF7(SEQNCE,IDIM1,STRING,IDIM2,MATCH,ITOT,ITOTEL,ITOTID,
+ITOTP,KSTART,J1,J2,ISS,PR,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
GO TO 10
END
SUBROUTINE SQPFD2(SEQ2,IDIM3,MATCH,STRING,
+J1,J2,KSTART,ISS,ISE,IS,IE,IDIM2I,IDIM2,MAXSTR,PR,ANSTY,MINP,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*)
CHARACTER STRING(MAXSTR)
CHARACTER SEQ2(IDIM3)
CHARACTER MATCH(MAXSTR)
INTEGER ANSC,ANSTY
IOK = 1
CALL YESNO(ANSC,'Keep picture',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(ANSC.LT.0)RETURN
IF(ANSC.EQ.1)CALL CLEARG
IF(ANSTY.EQ.1) THEN
CALL SQPF2(SEQ2,IDIM3,STRING,IDIM2I,IDIM2,ISS,ISE,
+ KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
IF(ANSTY.EQ.0) THEN
CALL SQPF3(STRING,MATCH,IDIM2I,IDIM2,
+ KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL SQPF8(STRING,IDIM2,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
C CALL SQPF4(KBIN,KBOUT,J1,J2,IS,IE,
C +IHELPS,IHELPE,HELPF,IDEVH,IOK)
C IF(IOK.NE.0) RETURN
CALL SQPF5(IDIM2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PR,MINP,IOK)
C IF(IOK.NE.0) RETURN
RETURN
END
SUBROUTINE SQPF6(SEQNCE,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINP,ITOTP,KSTART,J1,J2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IOK)
CHARACTER SEQNCE(IDIM1),STRING(IDIM2)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
IOK = 1
CALL FILLI(ITOT,ITOTID,0)
CALL FILLI(ITOTEL,ITOTID,0)
CALL SQFIT6(SEQNCE,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINP,ITOTP,KSTART)
IF(ITOTP.GT.ITOTID)THEN
IOK = 2
RETURN
END IF
IF(ITOTP.GT.0)THEN
XMIN=J1
XMAX=J2
YMIN=0.
YMAX=IDIM2
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
DO 203 I=1,ITOTP
X=ITOTEL(I)
Y=ITOT(I)
CALL LINE(X,X,YMIN,Y,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
203 CONTINUE
END IF
CALL VT100M
IOK = 0
END
SUBROUTINE SQPF8(STRING,LENGTH,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER STRING(LENGTH)
IOK = 1
IANSC = 0
WRITE(KBOUT,1000)(STRING(K),K=1,LENGTH)
1000 FORMAT(' STRING=',50A1)
CALL YESNO(IANSC,'This sense',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IANSC.LT.0)RETURN
IF(IANSC.EQ.1)THEN
CALL SQREV(STRING,LENGTH)
CALL SQCOM2(STRING,LENGTH)
WRITE(KBOUT,1000)(STRING(K),K=1,LENGTH)
END IF
IOK = 0
END
SUBROUTINE SEPFIT(SEQNCE,IDIM1,SEQ2,IDIM3I,ITOT,ITOTEL,ITOTID,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,
+IDEV,IDEVIN,FILE2,KBIN,KBOUT,MATRIX,IDM,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
C AUTHOR: RODGER STADEN
INTEGER DIALOG
PARAMETER (MAXSTR = 50)
CHARACTER HELPF*(*)
CHARACTER SEQNCE(IDIM1),STRING(MAXSTR)
CHARACTER SEQ2(IDIM3I),FILE2*(*)
CHARACTER MATCH(MAXSTR)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID),MATRIX(IDM,IDM)
INTEGER ANSTY,SMIN,SMAX
CALL SHOWFU(KBOUT,'Find matches using a score matrix')
MINP = 0
IDIM2 = 0
IDIM2I = MAXSTR
IDIM3 = IDIM3I
IS = J1
IE = J2
ISS = 1
ISE = 10
CALL GETPAR(19,1,IOK,ANSTY,
+IPAR2,IPAR3,IPAR4,IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
5 CONTINUE
IF(DIALOG.EQ.1) THEN
CALL SQPFD1(SEQ2,IDIM3I,IDIM3,ANSTY,IDEVIN,FILE2,
+ KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
10 CONTINUE
CALL SEPFD2(SEQ2,IDIM3,MATCH,STRING,
+J1,J2,KSTART,ISS,ISE,IS,IE,IDIM2I,IDIM2,MAXSTR,ANSTY,MINP,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,MATRIX,IDM,
+SMIN,SMAX,IOK)
IF(IOK.NE.0) RETURN
CALL SEPF6(SEQNCE,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINP,ITOTP,KSTART,J1,J2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,MATRIX,IDM,
+SMIN,SMAX,IOK)
IF(IOK.EQ.2)THEN
WRITE(KBOUT,1017)ITOTID
1017 FORMAT(/,' More than',I6,' matches.',
+ ' Try changing score or region'/)
GO TO 10
END IF
IF(IOK.NE.0) RETURN
CALL SEPF7(SEQNCE,IDIM1,STRING,IDIM2,MATCH,ITOT,ITOTEL,ITOTID,
+ITOTP,MINP,KSTART,J1,J2,ISS,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
GO TO 10
END
SUBROUTINE SEPFD2(SEQ2,IDIM3,MATCH,STRING,
+J1,J2,KSTART,ISS,ISE,IS,IE,IDIM2I,IDIM2,MAXSTR,ANSTY,MINP,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,MATRIX,IDM,
+SMIN,SMAX,IOK)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*)
CHARACTER STRING(MAXSTR)
CHARACTER SEQ2(IDIM3)
CHARACTER MATCH(MAXSTR)
INTEGER ANSC,ANSTY,MATRIX(IDM,IDM),SMIN,SMAX
IOK = 1
CALL YESNO(ANSC,'Keep picture',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(ANSC.LT.0)RETURN
IF(ANSC.EQ.1)CALL CLEARG
IF(ANSTY.EQ.1) THEN
CALL SQPF2(SEQ2,IDIM3,STRING,IDIM2I,IDIM2,ISS,ISE,
+ KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
IF(ANSTY.EQ.0) THEN
CALL SQPF3(STRING,MATCH,IDIM2I,IDIM2,
+ KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL SQPF8(STRING,IDIM2,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
C CALL SEPF4(KBIN,KBOUT,J1,J2,IS,IE,
C +IHELPS,IHELPE,HELPF,IDEVH,IOK)
C IF(IOK.NE.0) RETURN
CALL SEPF5(IDIM2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,MINP,MATRIX,IDM,
+SMIN,SMAX,STRING,IOK)
C IF(IOK.NE.0) RETURN
RETURN
END
SUBROUTINE SEPF5(IDIM2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,MINP,MATRIX,IDM,
+SMIN,SMAX,STRING,IOK)
CHARACTER HELPF*(*)
CHARACTER STRING(IDIM2)
INTEGER MATRIX(IDM,IDM),SMIN,SMAX,VALUE
IOK = 1
CALL GTSCR(STRING,IDIM2,MATRIX,IDM,SMIN,SMAX)
WRITE(KBOUT,1000)SMIN,SMAX
1000 FORMAT(' Minimum score=',I6,' Maximum score=',I6)
MININ = SMIN
MAXIN = SMAX
IF(MINP.EQ.0) MINP = SMAX
CALL GETINT(MININ,MAXIN,MINP,'Score',VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINP = VALUE
IOK = 0
END
SUBROUTINE SEPF6(SEQNCE,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINP,ITOTP,KSTART,J1,J2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,MATRIX,IDM,
+SMIN,SMAX,IOK)
CHARACTER SEQNCE(IDIM1),STRING(IDIM2)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID),MATRIX(IDM,IDM)
INTEGER SMIN,SMAX
IOK = 1
CALL FILLI(ITOT,ITOTID,0)
CALL FILLI(ITOTEL,ITOTID,0)
CALL SPFIT(SEQNCE,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINP,ITOTP,MATRIX,IDM,KSTART)
IF(ITOTP.GT.ITOTID)THEN
IOK = 2
RETURN
END IF
IF(ITOTP.GT.0)THEN
XMIN = J1
XMAX = J2
YMIN = SMIN
YMAX = SMAX
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
DO 203 I=1,ITOTP
X=ITOTEL(I)
Y=ITOT(I)
CALL LINE(X,X,YMIN,Y,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
203 CONTINUE
END IF
CALL VT100M
IOK = 0
END
C SQFITP
SUBROUTINE SQFITP(SEQ,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINP,ITOTP,KSTART)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIM2)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
IDIF=(IE-IS+2)-IDIM2
IPSEQ=IS-KSTART+1
ITOTP=0
DO 200 I=1,IDIF
NTOT=0
IP=IPSEQ
DO 100 J=1,IDIM2
IF(SEQ(IP).EQ.STRING(J))THEN
NTOT=NTOT+1
ELSE IF (STRING(J).EQ.'-')THEN
NTOT=NTOT+1
END IF
IP=IP+1
100 CONTINUE
IF(NTOT.GE.MINP)THEN
ITOTP=ITOTP+1
IF(ITOTP.GT.ITOTID)RETURN
ITOT(ITOTP)=NTOT
ITOTEL(ITOTP)=IP-IDIM2+KSTART-1
END IF
IPSEQ=IPSEQ+1
200 CONTINUE
END
SUBROUTINE SIGNAL(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,IDEV,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IDEVOT,
+SUM,WT,TOT,IDM,MAXLEN,LINE,CHRSET,DIALOG,JOB)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILNAM*(*),HELPF*(*)
CHARACTER LINE(MAXLEN),CHRSET(IDM)
INTEGER SUM(IDM,MAXLEN),TOT(MAXLEN),DIALOG
REAL WT(IDM,MAXLEN)
CALL SHOWFU(KBOUT,'Motif search using weight matrix')
C IF(JOB.EQ.2)CALL SHOWFU(KBOUT,'on complementary strand')
CALL GETPAR(20,1,IOK,IGON,
+IPAR2,IPAR3,IPAR4,IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
CALL SIGNLD(IDEV,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,
+SUM,IDM,MAXLEN,TOT,WT,LENGTH,MIDDLE,YMIN,YMAX,
+IGON,JOB,IDEVOT,LINE,CHRSET,DIALOG,IOK)
IF(IOK.NE.0) RETURN
CALL SIGNLP(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,IDEVOT,WT,IDM,MAXLEN,LENGTH,
+YMIN,YMAX,IGON,MIDDLE)
END
SUBROUTINE SIGNLD(IDEV,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,
+SUM,IDM,MAXLEN,TOT,WT,LENGTH,MIDDLE,YMIN,YMAX,
+IGON,JOB,IDEVOT,LINE,CHRSET,DIALOG,IOK)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),HELPF*(*),LINE(MAXLEN),CHRSET(IDM)
INTEGER SUM(IDM,MAXLEN),TOT(MAXLEN),DIALOG
REAL WT(IDM,MAXLEN)
PARAMETER (MAXPRM = 21)
CHARACTER PROMPT(3)*(MAXPRM)
IOK = 1
IDO = 1
PROMPT(1) = 'Use weight matrix'
PROMPT(2) = 'Make weight matrix'
PROMPT(3) = 'Rescale weight matrix'
CALL RADION('Select operation',PROMPT,3,IDO,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IDO.LT.1) RETURN
IF(IDO.GT.1) THEN
CALL MKWT(WT,SUM,TOT,CHRSET,IDM,MAXLEN,
+ IDEV,IDEVOT,KBIN,KBOUT,LINE,
+ FILNAM,IDO,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IOK = 1
RETURN
END IF
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+'Motif weight matrix file',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
LENGTH=MAXLEN
CALL RDWMT(TOT,SUM,MIDDLE,LENGTH,MAXLEN,YMIN,YMAX,IDEV,
+ IOK,IDM,KBOUT,0)
IF(IOK.NE.0)THEN
CALL ERROM(KBOUT,'Error in weight matrix')
RETURN
END IF
IF(DIALOG.EQ.1) THEN
AMN = -9999.
AMX = 9999.
CALL GETRL(AMN,AMX,YMIN,'Cutoff score',VAL,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
YMIN = VAL
END IF
IF(YMIN.LT.0.0)CALL GETW(TOT,SUM,WT,LENGTH,IDM,MAXLEN)
IF(YMIN.GE.0.0)CALL GETW2(SUM,WT,LENGTH,IDM,MAXLEN)
IF(JOB.EQ.2)CALL COMPWM(WT,LENGTH,MAXLEN,IDM)
CALL YESONO(IGON,'Plot results','List results',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IGON.LT.0)RETURN
IOK = 0
END
SUBROUTINE SIGNLP(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,IDEVOT,WT,IDM,MAXLEN,LENGTH,
+YMIN,YMAX,IGON,MIDDLE)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
REAL WT(IDM,MAXLEN)
INTEGER CTONUM
EXTERNAL CTONUM
XMAX=J2
XMIN=J1
IF(IGON.EQ.0)THEN
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
DO 10 I=J1,J2-LENGTH+1
SUM1=0.
K=0
DO 5 J=I,I+LENGTH-1
K=K+1
SUM1=SUM1+WT(CTONUM(SEQ(J)),K)
5 CONTINUE
IF(SUM1.GE.YMIN)THEN
IF(IGON.EQ.0)THEN
X=I+MIDDLE
CALL LINE(X,X,YMIN,SUM1,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
IF(IGON.EQ.1)THEN
CALL VT100M
WRITE(IDEVOT,1010)I+MIDDLE,SUM1,(SEQ(K),K=I,I+LENGTH-1)
1010 FORMAT(' ',I7,F10.2,' ',120A1)
END IF
END IF
10 CONTINUE
CALL VT100M
RETURN
END
SUBROUTINE COMPWM(WTR,LENGTH,MAXL,IDM)
C AUTHOR: RODGER STADEN
REAL WTR(IDM,MAXL)
C COMPLEMENT THE WEIGHT MATRIX
DO 60 I=1,LENGTH
DO 60 J=1,2
T = WTR(J,I)
WTR(J,I)=WTR(J+2,I)
WTR(J+2,I)=T
60 CONTINUE
L = LENGTH/2
DO 70 I=1,L
L1 = 1 + LENGTH-I
DO 70 J=1,IDM
T = WTR(J,L1)
WTR(J,L1)=WTR(J,I)
WTR(J,I)=T
70 CONTINUE
END
C AACOMP
SUBROUTINE AACOMP(SUM,IDEV,PAA)
C AUTHOR: RODGER STADEN
REAL SUM(4,4,4),AAWTS(20),EXPERC(20),APERC(20),ASUM(20)
REAL HYDRO(20)
CHARACTER PAA(5,5,5),AA(20)
SAVE AA,AAWTS,EXPERC,HYDRO
DATA AA/'A','C','D','E','F',
+'G','H','I','K','L',
+'M','N','P','Q','R',
+'S','T','V','W','Y'/
DATA AAWTS/71.09,103.15,115.10,129.13,147.19,
+57.07,137.16,113.17,128.19,113.17,
+131.21,114.12,97.13,128.15,156.20,
+87.09,101.12,99.15,186.23,163.19/
C ARGOS VALUES
DATA EXPERC/8.3,1.7,5.3,6.2,3.9,7.2,2.2,5.2,5.7,
+9.0,2.4,4.4,5.1,4.0,5.7,6.9,5.8,6.6,1.3,3.2/
C DAYHOFF VALUES
C DATA EXPERC/8.6,2.9,5.5,6.0,3.6,8.4,2.0,4.5,6.6,
C 17.4,1.7,4.3,5.2,3.9,4.9,7.0,6.1,6.6,1.3,3.4/
DATA HYDRO/-0.4,0.17,-1.31,-1.22,1.92,
+-0.67,-0.64,1.25,-0.67,1.22,
+1.02,-0.92,-0.49,-0.91,-0.59,
+-0.55,-0.28,0.91,0.50,1.67/
C hydrophobicity values from r. m. sweet and d. eisenberg, j mol. biol
C (1983) 171, 479-488
C
C calc amino acid composition
CALL FILLR(ASUM,20,0.0)
TOTAL=0.
ATOT=0.
DO 200 I=1,4
DO 200 J=1,4
DO 200 K=1,4
DO 150 L=1,20
IF(PAA(K,J,I).EQ.AA(L)) THEN
ASUM(L)=ASUM(L)+SUM(I,J,K)
ATOT=ATOT+SUM(I,J,K)
GO TO 151
END IF
150 CONTINUE
151 CONTINUE
200 CONTINUE
C set hydrophobicity to zero
HYDROP=0.
DO 300 I=1,20
HYDROP=HYDROP+ASUM(I)*HYDRO(I)
IF(ATOT.NE.0.)APERC(I)=100.*((100.*ASUM(I)/ATOT)-EXPERC(I))
+ /EXPERC(I)
C botch-up to allow use of dayhoff aawts-18.0 (H2O really=18.015)
TOTAL=TOTAL+ASUM(I)*(AAWTS(I)-0.015)
300 CONTINUE
C add on one water molecule
TOTAL=TOTAL+18.015
WRITE(IDEV,1002)(AA(K),K=1,10)
WRITE(IDEV,1003)(ASUM(K),K=1,10)
WRITE(IDEV,1005)(APERC(K),K=1,10)
WRITE(IDEV,1002)(AA(K),K=11,20)
WRITE(IDEV,1003)(ASUM(K),K=11,20)
WRITE(IDEV,1005)(APERC(K),K=11,20)
1005 FORMAT(' O-E % ',10F5.0)
1006 FORMAT(' EXP % ',10F4.0)
1002 FORMAT(/' ',10(3X,A1,1X))
1003 FORMAT(' ',10F5.0)
WRITE(IDEV,1004)ATOT,TOTAL,HYDROP
1004 FORMAT(' Total acids=',F6.0,' Molecular weight=',F10.0,
+' Hydrophobicity=',F6.1)
END
C BCOMPC
SUBROUTINE BCOMPC(SUM,IDEV)
C AUTHOR: RODGER STADEN
REAL SUM(4,4,4),COMP(4,3),COMPO(4,3),BCOMP(4),BCOMPX(4)
CHARACTER BASE(4)
SAVE BASE
DATA BASE/'T','C','A','G'/
C calc base composition
TCOMP=0.
DO 50 I=1,4
BCOMP(I)=0.
DO 50 J=1,3
COMPO(I,J)=0.
COMP(I,J)=0.
50 CONTINUE
DO 55 J=1,4
DO 55 K=1,4
DO 55 L=1,4
TCOMP=TCOMP+SUM(J,K,L)
BCOMP(J)=BCOMP(J)+SUM(J,K,L)+SUM(K,J,L)+SUM(K,L,J)
COMP(J,1)=COMP(J,1)+SUM(J,K,L)
COMP(J,2)=COMP(J,2)+SUM(K,J,L)
COMP(J,3)=COMP(J,3)+SUM(K,L,J)
55 CONTINUE
WRITE(IDEV,1013)TCOMP
1013 FORMAT(' Total codons=',F10.0)
C calc % composition
DO 60 I=1,4
DO 60 J=1,3
IF(TCOMP.NE.0.)COMPO(I,J)=COMP(I,J)*100./TCOMP
IF(BCOMP(I).NE.0.)COMP(I,J)=COMP(I,J)*100./BCOMP(I)
60 CONTINUE
WRITE(IDEV,1010)BASE
1010 FORMAT(4(10X,A1))
WRITE(IDEV,1011)(J,(COMP(I,J),I=1,4),J=1,3)
WRITE(IDEV,1000)
1000 FORMAT(' ',4(' -----'))
WRITE(IDEV,1002)
1002 FORMAT(' =',4(' 100% '))
WRITE(IDEV,1001)(J,(COMPO(I,J),I=1,4),J=1,3)
1011 FORMAT(3(/2X,I1,(4(F10.2,1X))))
1001 FORMAT(3(/2X,I1,(4(F10.2,1X)),' = 100%'))
DO 65 I=1,4
IF(TCOMP.NE.0.)BCOMP(I)=BCOMP(I)*33.3333/TCOMP
65 CONTINUE
WRITE(IDEV,1012)BCOMP
1012 FORMAT(2X,'%',4(F10.2,1X),' Observed, overall totals')
1014 FORMAT(2X,'%',4(F10.2,1X),' Expected, even codons per acid')
CALL BCOMPE(SUM,BCOMPX)
WRITE(IDEV,1014)BCOMPX
END
SUBROUTINE BCOMPE(FABC,BCOMP)
C AUTHOR: RODGER STADEN
INTEGER NCODON(21),IACID(4,4,4)
REAL FABC(4,4,4),FA(21),BCOMP(4)
C array with number of codons per acid
SAVE IACID,NCODON
DATA IACID/
+ 1,2,3,5,6,7,8,9,
+ 10,12,14,16,18,20,6,21,
+ 1,2,3,5,6,7,8,9,
+ 10,12,14,16,18,20,6,21,
+ 2,2,3,5,6,7,8,9,
+ 11,13,15,17,11,20,20,21,
+ 2,2,4,5,6,7,8,9,
+ 11,13,15,17,19,20,20,21/
DATA NCODON/
+ 2,6,3,1,4,6,4,4,4,2,3,2,2,2,2,2,2,2,1,6,4/
TOTAL=0.
CALL FILLR(BCOMP,4,0.)
CALL FILLR(FA,21,0.)
DO 20 I=1,4
DO 20 J=1,4
DO 20 K=1,4
C point to this acid
JACID=IACID(I,J,K)
C sum its codons
FA(JACID)=FA(JACID)+FABC(I,J,K)
TOTAL=TOTAL+FABC(I,J,K)
20 CONTINUE
C calculate even usage of each acids codons
C need to look up the acids number, its composition is divided by
C its number of codons
DO 21 I=1,4
DO 21 J=1,4
DO 21 K=1,4
C get acid number this codon
JACID=IACID(I,J,K)
BCOMP(I)=BCOMP(I)+FA(JACID)/(REAL(NCODON(JACID)))
BCOMP(J)=BCOMP(J)+FA(JACID)/(REAL(NCODON(JACID)))
BCOMP(K)=BCOMP(K)+FA(JACID)/(REAL(NCODON(JACID)))
21 CONTINUE
DO 22 I=1,4
IF(TOTAL.NE.0.)BCOMP(I)=BCOMP(I)*33.3333/TOTAL
22 CONTINUE
END
SUBROUTINE CALCOD(SUM,SEQ,IDIM)
C AUTHOR: RODGER STADEN
C calculates codon totals for defined region
REAL SUM(4,4,4)
CHARACTER SEQ(IDIM)
INTEGER CTONUM
EXTERNAL CTONUM
MIN=IDIM/3
J=1
DO 100 I=1,MIN
C test for bad chars
DO 50 L=1,3
IP=J-1+L
IF(CTONUM(SEQ(IP)).EQ.5)GO TO 59
50 CONTINUE
JP0=J
JP1=J+1
JP2=J+2
JP0=CTONUM(SEQ(JP0))
JP1=CTONUM(SEQ(JP1))
JP2=CTONUM(SEQ(JP2))
SUM(JP0,JP1,JP2)=SUM(JP0,JP1,JP2)+1.
59 CONTINUE
J=J+3
100 CONTINUE
END
C codcom
C complements a codon table
SUBROUTINE CODCOM(SUM)
C AUTHOR: RODGER STADEN
REAL SUM(4,4,4),TEMP(4,4,4)
INTEGER IP(4)
SAVE IP
DATA IP/3,4,1,2/
DO 10 I=1,4
DO 10 J=1,4
DO 10 K=1,4
IC=IP(I)
JC=IP(J)
KC=IP(K)
TEMP(I,J,K)=SUM(KC,JC,IC)
10 CONTINUE
DO 20 I=1,4
DO 20 J=1,4
DO 20 K=1,4
SUM(I,J,K)=TEMP(I,J,K)
20 CONTINUE
END
SUBROUTINE CODTBL(IDEV,FILNAM,S2,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,ANSE,IOK)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),HELPF*(*)
INTEGER ANSE
REAL S2(64)
IOK = 1
IOP = ANSE
CALL YESONO(IOP,'Start with empty table',
+'Start with an existing table',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) RETURN
ANSE = IOP
IF(ANSE.EQ.1) THEN
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+ 'Name of codon table file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL REDCOD(S2,IDEV)
CLOSE(UNIT=IDEV)
END IF
IOK = 0
END
SUBROUTINE LSTSEQ(SEQ,IDIM,IDEV,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,J1,J2,IDIMT,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER DIALOG
C J1 IS THE FIRST BASE IN THE RAM BUFFER
C J2 IS THE LAST BASE IN THE RAM BUFFER
C IDIM=J2-J1+1
C IDIMT IS THE ACTUAL SEQUENCE LENGTH
C WRAP AROUND IS ONLY POSSIBLE IF J1=1, AND J2=IDIMT
CALL SHOWFU(KBOUT,'List the sequence')
I1 = J1
I2 = J2
LENGTH = 60
NSTRND = 0
IF(DIALOG.EQ.1) THEN
CALL LSTSQD(IDIM,KBIN,KBOUT,J1,J2,IDIMT,NSTRND,
+ IHELPS,IHELPE,HELPF,IDEVH,I1,I2,LENGTH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL FMTSEQ(SEQ,IDIM,I1,I2,NSTRND,LENGTH,IDEV,J1,IDIMT)
END
SUBROUTINE FMTSEQ(SEQNCE,IDIM,IS1,IE1,NSTRND,LENGTH,IDEV,KSTART,
+IDIMT)
C AUTHOR: RODGER STADEN
CHARACTER SEQNCE(IDIM),SCOMP
INTEGER K(12)
EXTERNAL LWRAP,SCOMP
IS=IS1
IE=IE1
LD10=LENGTH/10
ITOT=IE-IS+1
IF(IE.LT.IS)ITOT=IDIMT+ITOT
IDONE=0
N=IS-1
WRITE(IDEV,1005)
50 CONTINUE
IF(IDONE.GE.ITOT)RETURN
DO 20 I=1,LD10
N=N+10
K(I)=N
IF(K(I).GT.IDIMT)K(I)=K(I)-IDIMT
20 CONTINUE
C need to find end of this line
IADD=LENGTH
IF((IDONE+IADD).GT.ITOT)IADD=ITOT-IDONE
IE=IS+IADD-1
LD10M=MIN(LD10,(IE-IS+1)/10)
IDONE=IDONE+IADD
1001 FORMAT(' ',(12(5X,I6)))
WRITE(IDEV,1002)
+(SEQNCE(LWRAP(IDIMT,M)),M=IS-KSTART+1,IE-KSTART+1)
IF(NSTRND.EQ.1)WRITE(IDEV,1002)(SCOMP(
1(SEQNCE(LWRAP(IDIMT,M)))),M=IS-KSTART+1,IE-KSTART+1)
WRITE(IDEV,1001)(K(I),I=1,LD10M)
1002 FORMAT( ' ',12(10A1,1X))
IS=IE+1
WRITE(IDEV,1005)
1005 FORMAT(' ')
GO TO 50
END
SUBROUTINE LSTSQD(IDIM,KBIN,KBOUT,J1,J2,IDIMT,NSTRND,
+IHELPS,IHELPE,HELPF,IDEVH,I1,I2,LENGTH,IOK)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*)
INTEGER VALUE
IOK = 1
1 CONTINUE
VALUE = I1
CALL GETINT(I1,I2,VALUE,
+'List from ',
+VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0)RETURN
I1 = VALUE
VALUE = I2
CALL GETINT(1,I2,VALUE,
+'List to ',
+VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0)RETURN
I2 = VALUE
C IS WRAP AROUND ALLOWED?
IF(I2.LT.I1)THEN
IF((J1.NE.1).OR.(J2.NE.IDIMT))THEN
WRITE(KBOUT,1004)
1004 FORMAT(' A sequence of this length cannot be treated as a',
+ ' circle')
GO TO 1
END IF
END IF
IOP = NSTRND
CALL YESONO(IOP,'Single stranded','Double stranded',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) RETURN
NSTRND = IOP
MININ = 10
MAXIN = 120
LENGTH = 60
CALL GETINT(MININ,MAXIN,LENGTH,
+'Line length',
+VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0)RETURN
IF(MOD(VALUE,10).NE.0)VALUE = LENGTH
LENGTH = VALUE
END
SUBROUTINE COMPN(SEQ,IDIM1P,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,SCORES(5),DIALOG
CALL SHOWFU(KBOUT,'Plot base composition')
SCORES(5) = 0
CALL GETPAR(24,10,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+SCORES(1),SCORES(2),SCORES(3),SCORES(4),
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
MAXSP = MIN(MXSPAN,MAXSP,J2-J1+1)
IF(DIALOG.EQ.1) THEN
CALL COMPD(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+ SCORES,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL COMPP(SEQ,IDIM1P,MXSPAN,SPAN,IWRIT,SCORES,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2)
CALL VT100M
END
SUBROUTINE COMPD(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+SCORES,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
CHARACTER HELPF*(*)
INTEGER SCORES(5)
CALL CHECK4('T','C','A','G',
+SCORES(1),SCORES(2),SCORES(3),SCORES(4),
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
END
SUBROUTINE COMPP(SEQ,IDIM1P,MXSPAN,SPAN,IWRIT,SCORES,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,SCORES(5),CTONUM
EXTERNAL CTONUM
C length forward and back
LF=SPAN/2
LB=1+SPAN/2
SCRMAX=1.
I1INM1=1+MXSPAN/2
J1P=J1+I1INM1
J2P=J2+I1INM1
XMIN=J1
XMAX=J2
C set ymax
YMAX=SPAN*SCRMAX
YMIN=0.
C do edge
CALL START(SEQ(J1P-LB),IDIM1P,SCORES,5,SPAN,SUM)
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C do rest
IDONE=0
XF=J1
YF=SUM
DO 100 I=J1P,J2P
IDONE=IDONE+1
SUM=SUM+SCORES(CTONUM(SEQ(I+LF)))
+ -SCORES(CTONUM(SEQ(I-LB)))
IF(MOD(I,IWRIT).EQ.0)THEN
XT=I-I1INM1
CALL LINE(XF,XT,YF,SUM,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
XF=XT
YF=SUM
END IF
100 CONTINUE
END
SUBROUTINE START(SEQ,IDIM,SCORES,IDIMS,LENW,SUM)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER SCORES(IDIMS),CTONUM
EXTERNAL CTONUM
SUM=0.
DO 10 I=1,LENW
SUM=SUM+SCORES(CTONUM(SEQ(I)))
10 CONTINUE
END
SUBROUTINE CCHI(SEQ,IDIM1P,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,WORK,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
PARAMETER (ISIZE = 5)
INTEGER SPAN,DIALOG
REAL EXPEC(ISIZE),AVER(ISIZE),OBS(ISIZE),WORK(IDIM1P)
C NOTE J1 J2 IS THE ACTIVE REGION, KSTART IS THE NUMBER OF THE
C BASE IN ELEMENT 1 OF SEQ, AND IDIM1 IS THE LENGTH FROM KSTART
C TO THE END OF SEQ. PLOTS ARE FROM J1 TO J2 BUT STANDARDS CAN
C BE TAKEN FROM KSTART TO KSTART+IDIM1-1 = IENDB
C CHANGED TO ALLOW USER TO DEFINE REGION FOR STANDARD
C AND TO DO SCALING ONLY OVER REAL SEQUENCE (NOT EDGE) 23-08-84
CALL SHOWFU(KBOUT,
+'Plot base composition differences as chi squared')
CALL GETPAR(25,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
K1 = KSTART
K2 = IENDB
IF(DIALOG.EQ.1) THEN
CALL CHID(KSTART,IENDB,MINSP,MAXSP,SPAN,
+ MINIW,MAXIW,IWRIT,K1,K2,
+ KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL CCHIP1(SEQ,IDIM1P,MXSPAN,EXPEC,OBS,AVER,ISIZE,
+SPAN,IWRIT,K1,K2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBOUT,WORK)
END
SUBROUTINE CCHIP1(SEQ,IDIM1P,MXSPAN,EXPEC,OBS,AVER,ISIZE,
+SPAN,IWRIT,K1,K2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBOUT,WORK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,CTONUM
REAL EXPEC(ISIZE),AVER(ISIZE),OBS(ISIZE),WORK(IDIM1P)
EXTERNAL CTONUM,CIE
DIF = K2 - K1 + 1
IF(DIF.LT.1.) RETURN
CALL FILLR(AVER,ISIZE,0.)
I1INM1= 1 + (MXSPAN/2)-KSTART+1
CALL CHI1(SEQ(I1INM1+1),IDIM1P,K1,K2,AVER)
DO 3 I=1,ISIZE
AVER(I)=AVER(I) / DIF
3 CONTINUE
C length forward and back
LF=SPAN/2
LB=1+SPAN/2
J1P=J1+I1INM1
J2P=J2+I1INM1
C calc expected scores
DO 29 I=1,ISIZE
EXPEC(I)=AVER(I)*SPAN
29 CONTINUE
CALL BUSY(KBOUT)
C do edge
CALL FILLR(OBS,ISIZE,0.)
CALL CHI1(SEQ,IDIM1P,J1P,J1P+SPAN-1,OBS)
C do rest
SUMMAX=-99999.
SUMMIN=999999.
J = 1
WORK(J) = CIE(OBS,EXPEC)
DO 200 I=J1P+LB,J2P-LF
ISF=CTONUM(SEQ(I+LF))
ISB=CTONUM(SEQ(I-LB))
OBS(ISF)=OBS(ISF)+1.
OBS(ISB)=OBS(ISB)-1.
SUM = CIE(OBS,EXPEC)
SUMMAX = MAX(SUMMAX,SUM)
SUMMIN = MIN(SUMMIN,SUM)
J = J + 1
WORK(J) = SUM
200 CONTINUE
CALL CLEARV
CALL VECTOM
IDIMW = J2 - J1 + 1 - SPAN
XMIN = -LB
XMAX = IDIMW + LB
YMAX = SUMMAX
YMIN = SUMMIN
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL PLOTR(WORK,IDIMW,XMAX,XMIN,YMAX,YMIN,IWRIT,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL VT100M
WRITE(KBOUT,4444)SUMMAX,SUMMIN
4444 FORMAT(' Observed range=',2F12.6)
END
REAL FUNCTION CIE(OBS,EXPEC)
REAL OBS(4),EXPEC(4)
SUM = 0.
DO 10 K = 1,4
TE = EXPEC(K)
IF(TE.EQ.0.0) GO TO 10
TO = OBS(K)
D = TO - TE
SUM = SUM + D*D/TE
10 CONTINUE
CIE = SUM
END
SUBROUTINE PLOTR(POINTS,IDIMP,XMAX,XMIN,YMAX,YMIN,IWRIT,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
REAL POINTS(IDIMP)
XF = XMIN
YF = POINTS(1)
DO 100 I=1,IDIMP,IWRIT
XT = I
YT = POINTS(I)
CALL LINE(XF,XT,YF,YT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
XF = XT
YF = YT
100 CONTINUE
END
SUBROUTINE CHI1(SEQ,IDIM1P,J1P,J2P,OBS)
REAL OBS(5)
CHARACTER SEQ(IDIM1P)
INTEGER CTONUM
EXTERNAL CTONUM
DO 10 I=J1P,J2P
IS = CTONUM(SEQ(I))
OBS(IS) = OBS(IS) + 1.
10 CONTINUE
END
SUBROUTINE DICHI(SEQ,IDIM1P,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,WORK,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
PARAMETER (ISIZE = 25)
INTEGER SPAN,DIALOG
REAL EXPEC1(ISIZE),AVER1(ISIZE),OBS1(ISIZE)
REAL EXPEC(5,5),AVER(5,5),OBS(5,5),WORK(IDIM1P)
EQUIVALENCE (EXPEC1,EXPEC),(AVER1,AVER),(OBS1,OBS)
C NOTE J1 J2 IS THE ACTIVE REGION, KSTART IS THE NUMBER OF THE
C BASE IN ELEMENT 1 OF SEQ, AND IDIM1 IS THE LENGTH FROM KSTART
C TO THE END OF SEQ. PLOTS ARE FROM J1 TO J2 BUT STANDARDS CAN
C BE TAKEN FROM KSTART TO KSTART+IDIM1-1 = IENDB
CALL SHOWFU(KBOUT,
+'Plot dinucleotide composition differences as chi squared')
C CHANGED TO ALLOW USER TO DEFINE REGION FOR STANDARD
C AND TO DO SCALING ONLY OVER REAL SEQUENCE (NOT EDGE) 23-08-84
CALL GETPAR(26,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
K1 = KSTART
K2 = IENDB
IF(DIALOG.EQ.1) THEN
CALL CHID(KSTART,IENDB,MINSP,MAXSP,SPAN,
+ MINIW,MAXIW,IWRIT,K1,K2,
+ KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL DCHIP1(SEQ,IDIM1P,MXSPAN,EXPEC,OBS,AVER,ISIZE,
+EXPEC1,OBS1,AVER1,
+SPAN,IWRIT,K1,K2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBOUT,WORK)
END
SUBROUTINE DCHI1(SEQ,IDIM1P,J1P,J2P,OBS)
REAL OBS(5,5)
CHARACTER SEQ(IDIM1P)
INTEGER CTONUM
EXTERNAL CTONUM
DO 10 I=J1P,J2P
IS = CTONUM(SEQ(I))
IS1 = CTONUM(SEQ(I+1))
OBS(IS,IS1) = OBS(IS,IS1) + 1.
10 CONTINUE
END
SUBROUTINE DCHIP1(SEQ,IDIM1P,MXSPAN,EXPEC,OBS,AVER,ISIZE,
+EXPEC1,OBS1,AVER1,
+SPAN,IWRIT,K1,K2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBOUT,WORK)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,CTONUM
REAL EXPEC1(ISIZE),AVER1(ISIZE),OBS1(ISIZE)
REAL EXPEC(5,5),AVER(5,5),OBS(5,5),WORK(IDIM1P)
EXTERNAL CTONUM,CED
DIF = K2 - K1
IF(DIF.LT.2.) RETURN
CALL BUSY(KBOUT)
CALL FILLR(AVER1,ISIZE,0.)
I1INM1=1+(MXSPAN/2)-KSTART+1
CALL DCHI1(SEQ(I1INM1+1),IDIM1P,K1,K2-1,AVER)
DO 3 I=1,5
DO 3 J=1,5
AVER(I,J)=AVER(I,J) / DIF
3 CONTINUE
C length forward and back
LF=SPAN/2
LB=1+SPAN/2
J1P=J1+I1INM1
J2P=J2+I1INM1
C calc expected scores
DO 29 I=1,5
DO 29 J=1,5
EXPEC(I,J)=AVER(I,J)*SPAN
29 CONTINUE
C set ymax
C do edge
CALL FILLR(OBS1,ISIZE,0.)
CALL DCHI1(SEQ,IDIM1P,J1P,J1P+SPAN-1,OBS)
J = 1
WORK(J) = CED(OBS,EXPEC)
C do rest
SUMMAX=-999999.
SUMMIN=9999999.
DO 200 I=J1P+LB,J2P-1-LF
ISF=CTONUM(SEQ(I+LF))
ISF1=CTONUM(SEQ(I+LF+1))
ISB=CTONUM(SEQ(I-LB))
ISB1=CTONUM(SEQ(I-LB+1))
OBS(ISF,ISF1)=OBS(ISF,ISF1)+1.
OBS(ISB,ISB1)=OBS(ISB,ISB1)-1.
SUM = CED(OBS,EXPEC)
J = J + 1
WORK(J) = SUM
SUMMAX=MAX(SUMMAX,SUM)
SUMMIN=MIN(SUMMIN,SUM)
200 CONTINUE
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
IDIMW = J2 - J1 - SPAN
XMIN = -LB
XMAX = IDIMW + LB
YMAX = SUMMAX
YMIN = SUMMIN
CALL PLOTR(WORK,IDIMW,XMAX,XMIN,YMAX,YMIN,IWRIT,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL VT100M
WRITE(KBOUT,4444)SUMMAX,SUMMIN
4444 FORMAT(' Observed range=',2F12.6)
RETURN
END
REAL FUNCTION CED(OBS,EXPEC)
REAL OBS(5,5),EXPEC(5,5)
SUM = 0.
DO 10 K=1,4
DO 10 J=1,4
TE = EXPEC(K,J)
IF(TE.EQ.0.) GO TO 10
TO = OBS(K,J)
D = TO - TE
SUM = SUM + D*D/TE
10 CONTINUE
CED = SUM
END
SUBROUTINE TRICHI(SEQ,IDIM1P,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,WORK,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
PARAMETER (ISIZE = 125)
INTEGER SPAN,DIALOG
REAL EXPEC1(ISIZE),AVER1(ISIZE),OBS1(ISIZE)
REAL EXPEC(5,5,5),AVER(5,5,5),OBS(5,5,5),WORK(IDIM1P)
EQUIVALENCE (EXPEC1,EXPEC),(AVER1,AVER),(OBS1,OBS)
C NOTE J1 J2 IS THE ACTIVE REGION, KSTART IS THE NUMBER OF THE
C BASE IN ELEMENT 1 OF SEQ, AND IDIM1 IS THE LENGTH FROM KSTART
C TO THE END OF SEQ. PLOTS ARE FROM J1 TO J2 BUT STANDARDS CAN
C BE TAKEN FROM KSTART TO KSTART+IDIM1-1 = IENDB
CALL SHOWFU(KBOUT,
+'Plot trinucleotide composition differences as chi squared')
C CHANGED TO ALLOW USER TO DEFINE REGION FOR STANDARD
C AND TO DO SCALING ONLY OVER REAL SEQUENCE (NOT EDGE) 23-08-84
CALL GETPAR(27,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
K1 = KSTART
K2 = IENDB
IF(DIALOG.EQ.1) THEN
CALL CHID(KSTART,IENDB,MINSP,MAXSP,SPAN,
+ MINIW,MAXIW,IWRIT,K1,K2,
+ KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL TCHIP1(SEQ,IDIM1P,MXSPAN,EXPEC,OBS,AVER,ISIZE,
+EXPEC1,OBS1,AVER1,
+SPAN,IWRIT,K1,K2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBOUT,WORK)
END
SUBROUTINE TCHIP1(SEQ,IDIM1P,MXSPAN,EXPEC,OBS,AVER,ISIZE,
+EXPEC1,OBS1,AVER1,
+SPAN,IWRIT,K1,K2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBOUT,WORK)
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,CTONUM
REAL EXPEC1(ISIZE),AVER1(ISIZE),OBS1(ISIZE)
REAL EXPEC(5,5,5),AVER(5,5,5),OBS(5,5,5),WORK(IDIM1P)
EXTERNAL CTONUM,CET
DIF = K2 - K1 - 2
IF(DIF.LT.3.) RETURN
CALL BUSY(KBOUT)
CALL FILLR(AVER1,ISIZE,0.)
I1INM1 = 1 + (MXSPAN/2) - KSTART + 1
CALL TCHI1(SEQ(I1INM1+1),IDIM1P,K1,K2-2,AVER)
DO 3 I=1,5
DO 3 J=1,5
DO 3 L=1,5
AVER(I,J,L)=AVER(I,J,L) / DIF
3 CONTINUE
C length forward and back
LF=SPAN/2
LB=1+SPAN/2
J1P=J1+I1INM1
J2P=J2+I1INM1
C calc expected scores
DO 29 I=1,5
DO 29 J=1,5
DO 29 L=1,5
EXPEC(I,J,L)=AVER(I,J,L)*SPAN
29 CONTINUE
C do edge
CALL FILLR(OBS1,ISIZE,0.)
CALL TCHI1(SEQ,IDIM1P,J1P,J1P+SPAN-1,OBS)
J = 1
WORK(J) = CET(OBS,EXPEC)
C do rest
SUMMAX=-99999.
SUMMIN=999999.
DO 200 I=J1P+LB,J2P-2-LF
IS=CTONUM(SEQ(I+LF))
IS1=CTONUM(SEQ(I+LF+1))
IS2=CTONUM(SEQ(I+LF+2))
IB=CTONUM(SEQ(I-LB))
IB1=CTONUM(SEQ(I-LB+1))
IB2=CTONUM(SEQ(I-LB+2))
OBS(IS,IS1,IS2)=OBS(IS,IS1,IS2)+1.
OBS(IB,IB1,IB2)=OBS(IB,IB1,IB2)-1.
SUM = CET(OBS,EXPEC)
J = J + 1
WORK(J) = SUM
SUMMAX=MAX(SUMMAX,SUM)
SUMMIN=MIN(SUMMIN,SUM)
200 CONTINUE
CALL FILLR(OBS1,ISIZE,0.)
CALL TCHI1(SEQ,IDIM1P,J1P,J1P+SPAN-1,OBS)
J = 1
WORK(J) = CET(OBS,EXPEC)
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
IDIMW = J2 - J1 - 1 - SPAN
XMIN = -LB
XMAX = IDIMW + LB
YMAX = SUMMAX
YMIN = SUMMIN
CALL PLOTR(WORK,IDIMW,XMAX,XMIN,YMAX,YMIN,IWRIT,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL VT100M
WRITE(KBOUT,4444)SUMMAX,SUMMIN
4444 FORMAT(' Observed range=',2F12.6)
RETURN
END
REAL FUNCTION CET(OBS,EXPEC)
REAL OBS(5,5,5),EXPEC(5,5,5)
SUM=0.
DO 10 K=1,4
DO 10 J=1,4
DO 10 L=1,4
TEXP=EXPEC(K,J,L)
IF(TEXP.EQ.0.)GO TO 10
TOBS=OBS(K,J,L)
OME=TOBS-TEXP
SUM=SUM+(OME*OME)/TEXP
10 CONTINUE
CET = SUM
END
SUBROUTINE TCHI1(SEQ,IDIM1P,J1P,J2P,OBS)
CHARACTER SEQ(IDIM1P)
REAL OBS(5,5,5)
INTEGER CTONUM
EXTERNAL CTONUM
DO 10 I=J1P,J2P
IS=CTONUM(SEQ(I))
IS1=CTONUM(SEQ(I+1))
IS2=CTONUM(SEQ(I+2))
OBS(IS,IS1,IS2)=OBS(IS,IS1,IS2)+1.
10 CONTINUE
END
SUBROUTINE NEGP1(SEQ,IDIM1P,MXSPAN,OBS,AVER,
+SPAN,IWRIT,K1,K2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBOUT,WORK)
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,OBS(5),CTONUM
REAL AVER(5),WORK(IDIM1P)
REAL NEI
EXTERNAL CTONUM,NEI
I1INM1=1+(MXSPAN/2)-KSTART+1
J1P=J1+I1INM1
J2P=J2+I1INM1
DIF = K2 - K1 + 1
IF(DIF.LT.1.) RETURN
CALL FILLR(AVER,5,0.)
CALL NEG1(SEQ(I1INM1+1),IDIM1P,K1,K2,AVER)
DIF = DIF * SPAN
DO 3 I=1,5
AVER(I)=AVER(I) / DIF
3 CONTINUE
C length forward and back
LF=SPAN/2
LB=1+SPAN/2
CALL BUSY(KBOUT)
C calc expected scores
C do edge
CALL FILLI(OBS,5,0)
DO 166 I=J1P,J1P+SPAN-1
ISF=CTONUM(SEQ(I+LF))
ISB=CTONUM(SEQ(I-LB))
OBS(ISF)=OBS(ISF)+1
OBS(ISB)=OBS(ISB)-1
166 CONTINUE
SUMMAX=-99999.
SUMMIN=999999.
WIND=SPAN
J = 1
WORK(J) = NEI(OBS,AVER,WIND)
DO 200 I=J1P+LB,J2P-LF
ISF=CTONUM(SEQ(I+LF))
ISB=CTONUM(SEQ(I-LB))
OBS(ISF)=OBS(ISF)+1
OBS(ISB)=OBS(ISB)-1
SUM = NEI(OBS,AVER,WIND)
J = J + 1
WORK(J) = SUM
SUMMAX=MAX(SUMMAX,SUM)
SUMMIN=MIN(SUMMIN,SUM)
200 CONTINUE
C set ymax
CALL CLEARV
CALL VECTOM
IDIMW = J2 - J1 + 1 - SPAN
XMIN = -LB
XMAX = IDIMW + LB
YMAX = SUMMAX
YMIN = SUMMIN
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL PLOTR(WORK,IDIMW,XMAX,XMIN,YMAX,YMIN,IWRIT,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL VT100M
WRITE(KBOUT,4444)SUMMAX,SUMMIN
4444 FORMAT(' Observed range=',2F12.6)
END
REAL FUNCTION NEI(OBS,AVER,WIND)
REAL AVER(5)
INTEGER OBS(5)
SUM=0.
DO 10 K=1,4
TEMP=REAL(OBS(K))*AVER(K)
IF(TEMP.GT.0.) TEMP1 = TEMP*LOG(TEMP)
SUM=SUM-TEMP1
10 CONTINUE
SUM=SUM/WIND
NEI = SUM
END
SUBROUTINE NEG1(SEQ,IDIM1P,K1,K2,AVER)
CHARACTER SEQ(IDIM1P)
REAL AVER(5)
INTEGER CTONUM
EXTERNAL CTONUM
DO 2 I=K1,K2
IS=CTONUM(SEQ(I))
AVER(IS)=AVER(IS)+1.
2 CONTINUE
END
SUBROUTINE NEGENT(SEQ,IDIM1P,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,WORK,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
PARAMETER (ISIZE = 5)
INTEGER SPAN,DIALOG
REAL AVER(ISIZE),WORK(IDIM1P)
INTEGER OBS(ISIZE)
C NOTE J1 J2 IS THE ACTIVE REGION, KSTART IS THE NUMBER OF THE
C BASE IN ELEMENT 1 OF SEQ, AND IDIM1 IS THE LENGTH FROM KSTART
C TO THE END OF SEQ. PLOTS ARE FROM J1 TO J2 BUT STANDARDS CAN
C BE TAKEN FROM KSTART TO KSTART+IDIM1-1 = IENDB
C CHANGED TO ALLOW USER TO DEFINE REGION FOR STANDARD
C AND TO DO SCALING ONLY OVER REAL SEQUENCE (NOT EDGE) 23-08-84
CALL SHOWFU(KBOUT,'Plot negentropy')
CALL GETPAR(59,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
K1 = KSTART
K2 = IENDB
IF(DIALOG.EQ.1) THEN
CALL CHID(KSTART,IENDB,MINSP,MAXSP,SPAN,
+ MINIW,MAXIW,IWRIT,K1,K2,
+ KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL NEGP1(SEQ,IDIM1P,MXSPAN,OBS,AVER,
+SPAN,IWRIT,K1,K2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBOUT,WORK)
END
SUBROUTINE CHID(KSTART,IENDB,MINSP,MAXSP,SPAN,
+MINIW,MAXIW,IWRIT,K1,K2,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
INTEGER SPAN
IOK = 1
CALL GTREG(KBIN,KBOUT,KSTART,IENDB,K1,K2,
+'Define region for standard',
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
C IF(IOK.NE.0) RETURN
RETURN
END
SUBROUTINE ZDNA(SEQ,IDIM1P,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,SCORE(5,5),CTONUM,DIALOG
REAL OBS(5,5)
EXTERNAL CTONUM
CALL SHOWFU(KBOUT,'Plot z dna potential')
CALL GETPAR(33,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(DIALOG.EQ.1) THEN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
I1INM1=1+MXSPAN/2
DO 1 I=1,5
DO 1 J=1,5
SCORE(I,J)=0
1 CONTINUE
C at
SCORE(3,1)=1
SCORE(1,3)=1
C cg
SCORE(2,4)=1
SCORE(4,2)=1
C gt
SCORE(4,1)=1
SCORE(1,4)=1
C ac
SCORE(3,2)=1
SCORE(2,3)=1
J1P=J1+I1INM1
J2P=J2+I1INM1
XMIN=J1
XMAX=J2
C length forward and back
LF=SPAN/2
LB=1+SPAN/2
C calc expected scores
YMAX=SPAN
YMIN=0.
C do edge
DO 65 I=1,5
DO 65 J=1,5
OBS(I,J)=0.
65 CONTINUE
DO 66 I=J1P-LB,J1P-LB+SPAN-1
IS=CTONUM(SEQ(I))
IS1=CTONUM(SEQ(I+1))
OBS(IS,IS1)=OBS(IS,IS1)+1.
66 CONTINUE
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C do rest
XF=J1
YF=SUM
DO 100 I=J1P,J2P-1
ILF=CTONUM(SEQ(I+LF))
ILF1=CTONUM(SEQ(I+LF+1))
ILB=CTONUM(SEQ(I-LB))
ILB1=CTONUM(SEQ(I-LB+1))
OBS(ILF,ILF1)=OBS(ILF,ILF1)+1.
OBS(ILB,ILB1)=OBS(ILB,ILB1)-1.
IF(MOD(I,IWRIT).EQ.0)THEN
SUM=0.
DO 99 K=1,4
DO 99 J=1,4
SUM=SUM+SCORE(K,J)*OBS(K,J)
99 CONTINUE
XT=I-I1INM1
CALL LINE(XF,XT,YF,SUM,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
XF=XT
YF=SUM
END IF
100 CONTINUE
CALL VT100M
END
SUBROUTINE ZDNARD(SEQ,IDIM1P,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,RVAL(5),YVAL(5),CTONUM,DIALOG
EXTERNAL CTONUM
SAVE RVAL,YVAL
DATA RVAL/0,0,1,1,0/,YVAL/1,1,0,0,0/
CALL SHOWFU(KBOUT,'Plot z dna potential')
CALL GETPAR(35,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(DIALOG.EQ.1) THEN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
I1INM1=1+MXSPAN/2
J1P=J1+I1INM1
J2P=J2+I1INM1
XMIN=J1
XMAX=J2
C length forward and back
LF=SPAN/2
LB=1+SPAN/2
C calc expected scores
YMAX=SPAN+1
YMIN=LF+1
C do edge
RSUM1=0.
YSUM1=0.
RSUM2=0.
YSUM2=0.
DO 66 I=J1P-LB,J1P-LB+SPAN-1,2
IS=CTONUM(SEQ(I))
IS1=CTONUM(SEQ(I+1))
RSUM1=RSUM1+RVAL(IS)
YSUM1=YSUM1+YVAL(IS1)
RSUM2=RSUM2+RVAL(IS1)
YSUM2=YSUM2+YVAL(IS)
66 CONTINUE
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C do rest
XF=J1
YF=SUM
DO 100 I=J1P,J2P-1,2
ILB=CTONUM(SEQ(I-LB))
ILB1=CTONUM(SEQ(I-LB+1))
ILF1=CTONUM(SEQ(I+LF+1))
ILF2=CTONUM(SEQ(I+LF+2))
RSUM1=RSUM1-RVAL(ILB)+RVAL(ILF1)
YSUM1=YSUM1-YVAL(ILB1)+YVAL(ILF2)
RSUM2=RSUM2-RVAL(ILB1)+RVAL(ILF2)
YSUM2=YSUM2-YVAL(ILB)+YVAL(ILF1)
IF(MOD(I,IWRIT).EQ.0)THEN
SUM=MAX((RSUM1+YSUM1),(RSUM2+YSUM2))
XT=I-I1INM1
CALL LINE(XF,XT,YF,SUM,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
XF=XT
YF=SUM
END IF
100 CONTINUE
CALL VT100M
END
SUBROUTINE ZDNARN(SEQ,IDIM1,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1)
INTEGER SPAN,CTONUM,DIALOG
EXTERNAL CTONUM
CALL SHOWFU(KBOUT,'Plot z dna potential')
CALL GETPAR(34,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(DIALOG.EQ.1) THEN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
XMIN=J1
XMAX=J2
YMAX=SPAN
YMIN=0.
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C do rest
XF=J1
YF=0.
I=J1-1
100 CONTINUE
C
I=I+1
IF(I.GT.J2)GO TO 500
IS=CTONUM(SEQ(I))
IF(IS.EQ.5)GO TO 100
SUM=0.
IF(IS.LT.3)GO TO 300
C must be a or g
200 CONTINUE
C want c or t
I=I+1
IF(I.GT.J2)GO TO 500
IS=CTONUM(SEQ(I))
IF(IS.EQ.5)GO TO 100
IF(IS.GT.2)GO TO 100
C have c or t
SUM=SUM+1.
IF(MOD(I,IWRIT).EQ.0)THEN
XT=I
CALL LINE(XF,XT,YF,SUM,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
XF=XT
YF=SUM
END IF
C now want a or g
300 CONTINUE
I=I+1
IF(I.GT.J2)GO TO 500
IS=CTONUM(SEQ(I))
IF(IS.EQ.5)GO TO 100
IF(IS.LT.3)GO TO 100
C have a or g
SUM=SUM+1.
IF(MOD(I,IWRIT).EQ.0)THEN
XT=I
CALL LINE(XF,XT,YF,SUM,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
XF=XT
YF=SUM
END IF
C want c or t
GO TO 200
500 CONTINUE
CALL VT100M
END
SUBROUTINE GETRC1(KBIN,KBOUT,J1,J2,IDIMT,I1,I2,IDIM1,
+P,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*),P*(*)
INTEGER VALUE
C VERSION FOR LIST REGION
IOK = 1
WRITE(KBOUT,1000)P
1000 FORMAT(' ',A)
1 CONTINUE
VALUE = J1
I1 = J1
I2 = J2
CALL GETINT(I1,I2,VALUE,
+'From',
+VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0)RETURN
I1 = VALUE
VALUE = I2
K1 = MIN(1,J1)
CALL GETINT(K1,J2,VALUE,
+'To',
+VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0)RETURN
I2 = VALUE
C IS WRAP AROUND ALLOWED?
IF(I2.LT.I1)THEN
IF((J1.NE.1).OR.(J2.NE.IDIMT))THEN
WRITE(KBOUT,1001)
1001 FORMAT(' a sequence of this length cannot be treated as a',
+ ' circle')
GO TO 1
END IF
I2 = I2 + IDIM1
END IF
END
SUBROUTINE GETRC2(KBIN,KBOUT,J1,J2,IDIMT,I1,I2,
+P,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*),P*(*)
INTEGER VALUE
C VERSION FOR TRANSLATE REGION
IOK = 1
WRITE(KBOUT,1000)P
1000 FORMAT(' ',A)
1 CONTINUE
VALUE = 0
I1 = 0
I2 = J2
CALL GETINT(I1,I2,VALUE,
+'From',
+VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0)RETURN
I1 = VALUE
IF(I1.EQ.0) THEN
IOK = 0
RETURN
END IF
VALUE = J2
I2 = J2
CALL GETINT(1,I2,VALUE,
+'To',
+VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0)RETURN
I2 = VALUE
C IS WRAP AROUND ALLOWED?
IF(I2.LT.I1)THEN
IF((J1.NE.1).OR.(J2.NE.IDIMT))THEN
WRITE(KBOUT,1001)
1001 FORMAT(' A sequence of this length cannot be treated as a',
+ ' circle')
GO TO 1
END IF
END IF
END
SUBROUTINE FMTRAN(SEQ,IDIM1,IS,IE,OUTP,IDMP,LENGTH,IDEV,
+ KSTART,IDIMT,CODE)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),OUTP(IDMP,3),BLANK
INTEGER IO1(12),CODE
CHARACTER IT,OUTLIN(132),THREEL*3,THREE*3
EXTERNAL LWRAP,THREEL
SAVE BLANK
DATA BLANK/' '/
LD10 = LENGTH/10
ITOT = IE - IS + 1
IF(IE.LE.IS)ITOT = IDIMT - ITOT
IDONE = 0
C SET POINTER TO ELEMENT NUMBERS (TREATING ARRAY AS LINEAR) TO
C START POINTER
IBEG = IS - KSTART + 1
I1 = IS - 1
WRITE(IDEV,1000)
50 CONTINUE
IF(IDONE.GE.ITOT)RETURN
C NEED TO FIND END OF THIS LINE AND ADD TO COUNT
IADD = LENGTH
IF((IDONE + IADD).GT.ITOT)IADD = ITOT - IDONE
IEND = IBEG + IADD - 1
IDONE = IDONE + IADD
LD10M = MIN(LD10,(IEND - IBEG + 1)/10)
DO 200 J = 1,3
IBEGP = IBEG + J - 1
IENDP = IEND + J - 1
C NUMBER OF CODONS THIS LINE?
NP = (IENDP - IBEGP + 1)/3
C TEST FOR NO CODONS
IF(NP.GT.0)THEN
C THIS LINE STARTS AT IBEG AND ENDS AT IEND (TREATING ARRAY AS
C LINEAR) NEED TO KNOW IF THERE ARE ANY AA'S TO OUTPUT.
C TRANSLATE POINTERS AND TEST THEM
C FILL OUTP WITH BLANKS
CALL FILLC(OUTLIN,132,' ')
IEMPT = 0
ITMPCS = IBEGP
L = J - 2
DO 100 I = 1,NP
IOUT = LWRAP(IDIM1,ITMPCS)/3 + 1
ITMPCS = ITMPCS + 3
IT = OUTP(IOUT,J)
IF(IT.NE.BLANK)IEMPT = 1
L = L + 3
IF(CODE.EQ.3)THEN
THREE = THREEL(IT)
OUTLIN(L - 1) = THREE(1:1)
OUTLIN(L) = THREE(2:2)
OUTLIN(L + 1) = THREE(3:3)
ELSE
OUTLIN(L) = IT
END IF
100 CONTINUE
IF(IEMPT.GT.0)
+ WRITE(IDEV,1002)(OUTLIN(K),K = 1,LENGTH + 6)
END IF
200 CONTINUE
WRITE(IDEV,1002)(SEQ(LWRAP(IDIM1,K)),K = IBEG,IEND)
1002 FORMAT(' ',132A1)
DO 300 K = 1,LD10
I1 = I1 + 10
IO1(K) = I1
IF(IO1(K).GT.IDIMT)IO1(K) = IO1(K) - IDIMT
300 CONTINUE
WRITE(IDEV,1001)(IO1(K),K = 1,LD10M)
1001 FORMAT( ' ',12(4X,I6))
IBEG = IEND + 1
WRITE(IDEV,1000)
1000 FORMAT(/)
GO TO 50
END
SUBROUTINE TRAND4(MINO,MAXO,MINOPN,JSTRAN,FILNAM,IDEV,IOK,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*),FILNAM*(*)
IOK = 1
CALL GETINT(MINO,MAXO,MINOPN,
+'Minimum open frame in amino acids',
+IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINOPN = IVAL
IVAL = JSTRAN + 1
CALL GSTRND(IVAL,IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IVAL.LT.1) THEN
IOK = 1
RETURN
END IF
JSTRAN = IVAL - 1
30 CONTINUE
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,1,IOK,KBIN,KBOUT,
+'File name for translation',
+IHELPS,IHELPE,HELPF,IDEVH)
END
SUBROUTINE TRAND5(MINO,MAXO,MINOPN,JSTRAN,IOK,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
IOK = 1
CALL GETINT(MINO,MAXO,MINOPN,
+'Minimum open frame in amino acids',
+IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINOPN = IVAL
IVAL = JSTRAN + 1
CALL GSTRND(IVAL,IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IVAL.LT.1) THEN
IOK = 1
RETURN
END IF
JSTRAN = IVAL - 1
END
SUBROUTINE TRNPIR(SEQ,IDIM,I1,I2,KSTART,KBIN,KBOUT,IDEV,PAA,
+OUTP,IDIMP,FILNAM,MINO,MAXO,MINOPN,JSTRAN,
+IHELPS,IHELPE,HELPF,IDEVH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIMP),PAA(125),OUTP(IDIMP)
CHARACTER FILNAM*(*),HELPF*(*)
CALL TRAND4(MINO,MAXO,MINOPN,JSTRAN,FILNAM,IDEV,IOK,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) THEN
CLOSE(UNIT=IDEV)
RETURN
END IF
MINOPD = 3 * MINOPN
IF(JSTRAN.EQ.0) THEN
CALL TRNDOP(SEQ,IDIM,I1,I2,OUTP,IDIMP,PAA,MINOPD,IDEV)
ELSE IF (JSTRAN.EQ.1) THEN
CALL TRNDOM(SEQ,IDIM,I1,I2,OUTP,IDIMP,PAA,MINOPD,IDEV)
ELSE IF (JSTRAN.EQ.2) THEN
CALL TRNDOP(SEQ,IDIM,I1,I2,OUTP,IDIMP,PAA,MINOPD,IDEV)
CALL TRNDOM(SEQ,IDIM,I1,I2,OUTP,IDIMP,PAA,MINOPD,IDEV)
END IF
CLOSE(UNIT=IDEV)
END
SUBROUTINE TRNDOP(SEQ,IDDNA,J1,J2,
+ACIDS,IDACID,PAA,MINOPN,IDEV)
CHARACTER SEQ(IDDNA),ACIDS(IDACID),PAA(5,5,5)
CHARACTER LINE*60
INTEGER STEP(3),FRAME,PSTOP,WPAIR
EXTERNAL PSTOP,MINIL,WPAIR,ITOSL
C Find open reading frames in their order of occurrence
C For each of 3 frames remember next position to try
STEP(3) = J1 + 2
STEP (1) = J1
STEP(2) = J1 + 1
10 CONTINUE
FRAME = MINIL(STEP,3)
I = STEP(FRAME)
IF(I.LT.J2) THEN
J = PSTOP(SEQ,IDDNA,I,J2,ACIDS,IDACID,PAA,IACID)
STEP(FRAME) = J + 4
IF((J-I+1).GE.MINOPN) THEN
LINE(1:) = '>'
IF (ITOSL(LINE(2:),I).EQ.0) WRITE(*,*)'Scream: ITOSL'
IF(WPAIR(LINE(22:),I,J).NE.0) WRITE(*,*)'Scream: WPAIR'
WRITE(IDEV,1001)LINE
IACID = IACID + 1
ACIDS(IACID) = '*'
CALL FMTDKN(IDEV,ACIDS,IACID)
END IF
GO TO 10
END IF
1001 FORMAT(A)
END
SUBROUTINE TRNDOM(SEQ,IDDNA,J1,J2,ACIDS,IDACID,PAA,MINOPN,IDEV)
CHARACTER SEQ(IDDNA),ACIDS(IDACID),PAA(5,5,5),LINE*60
INTEGER STEP(3),FRAME,MSTOP,WPAIR
EXTERNAL MSTOP,MINIL,WPAIR,ITOSL
C 17-7-91 replced line to write out title! which had disapeared
C Find open reading frames in their order of occurrence
C For each of 3 frames remember next position to try
STEP(3) = J1 + 2
STEP (1) = J1
STEP(2) = J1 + 1
10 CONTINUE
FRAME = MINIL(STEP,3)
I = STEP(FRAME)
IF(I.LT.J2) THEN
J = MSTOP(SEQ,IDDNA,I,J2,ACIDS,IDACID,PAA,IACID)
STEP(FRAME) = J + 4
IF((J-I+1).GE.MINOPN) THEN
LINE(1:) = '> complement('
IF (ITOSL(LINE(2:),I).EQ.0) WRITE(*,*)'Scream: ITOSL'
IF(WPAIR(LINE(33:),I,J).NE.0) WRITE(*,*)'Scream: WPAIR'
LINE(32+INDEX(LINE(33:),' '):) = ')'
WRITE(IDEV,1001)LINE
CALL SQREV(ACIDS,IACID)
IACID = IACID + 1
ACIDS(IACID) = '*'
CALL FMTDKN(IDEV,ACIDS,IACID)
END IF
GO TO 10
END IF
1001 FORMAT(A)
END
SUBROUTINE OPENFR(SEQ,IDIM,I1,I2,KSTART,IDEV,KBIN,KBOUT,PAA,
+OUTP,IDIMP,
+IHELPS,IHELPE,HELPF,IDEVH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIMP),PAA(125),OUTP(IDIMP)
CHARACTER HELPF*(*)
CALL SHOWFU(KBOUT,'Find open reading frames')
CALL GETPAR(54,4,IOK,MINO,MAXO,MINOPN,JSTRAN,
+IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
CALL TRAND5(MINO,MAXO,MINOPN,JSTRAN,IOK,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
MINOPD = 3* MINOPN
IF(JSTRAN.EQ.0) THEN
CALL TRNDP(SEQ,IDIM,I1,I2,OUTP,IDIMP,PAA,MINOPD,IDEV)
ELSE IF (JSTRAN.EQ.1) THEN
CALL TRNDM(SEQ,IDIM,I1,I2,OUTP,IDIMP,PAA,MINOPD,IDEV)
ELSE IF (JSTRAN.EQ.2) THEN
CALL TRNDP(SEQ,IDIM,I1,I2,OUTP,IDIMP,PAA,MINOPD,IDEV)
CALL TRNDM(SEQ,IDIM,I1,I2,OUTP,IDIMP,PAA,MINOPD,IDEV)
END IF
END
SUBROUTINE TRNDP(SEQ,IDDNA,J1,J2,
+ACIDS,IDACID,PAA,MINOPN,IDEV)
CHARACTER SEQ(IDDNA),ACIDS(IDACID),PAA(5,5,5)
CHARACTER LINE*60
INTEGER STEP(3),FRAME,PSTOP,WPAIR
EXTERNAL PSTOP,MINIL,WPAIR
C Find open reading frames in their order of occurrence
C For each of 3 frames remember next position to try
STEP(3) = J1 + 2
STEP (1) = J1
STEP(2) = J1 + 1
10 CONTINUE
FRAME = MINIL(STEP,3)
I = STEP(FRAME)
IF(I.LT.J2) THEN
J = PSTOP(SEQ,IDDNA,I,J2,ACIDS,IDACID,PAA,IACID)
STEP(FRAME) = J + 4
IF((J-I+1).GE.MINOPN) THEN
LINE(1:) = 'FT CDS'
IF(WPAIR(LINE(22:),I,J).NE.0) WRITE(*,*)'Scream: WPAIR'
WRITE(IDEV,1001)LINE
END IF
GO TO 10
END IF
1001 FORMAT(A)
END
SUBROUTINE TRNDM(SEQ,IDDNA,J1,J2,ACIDS,IDACID,PAA,MINOPN,IDEV)
CHARACTER SEQ(IDDNA),ACIDS(IDACID),PAA(5,5,5),LINE*60
INTEGER STEP(3),FRAME,MSTOP,WPAIR
EXTERNAL MSTOP,MINIL,WPAIR
C Find open reading frames in their order of occurrence
C For each of 3 frames remember next position to try
STEP(3) = J1 + 2
STEP (1) = J1
STEP(2) = J1 + 1
10 CONTINUE
FRAME = MINIL(STEP,3)
I = STEP(FRAME)
IF(I.LT.J2) THEN
J = MSTOP(SEQ,IDDNA,I,J2,ACIDS,IDACID,PAA,IACID)
STEP(FRAME) = J + 4
IF((J-I+1).GE.MINOPN) THEN
LINE(1:) = 'FT CDS complement('
IF(WPAIR(LINE(33:),I,J).NE.0) WRITE(*,*)'Scream: WPAIR'
LINE(32+INDEX(LINE(33:),' '):) = ')'
WRITE(IDEV,1001)LINE
END IF
GO TO 10
END IF
1001 FORMAT(A)
END
INTEGER FUNCTION PSTOP(SEQ,IDDNA,IDNA1,IDNA2,
+ACIDS,IDACID,PAA,IACID)
CHARACTER PAA(5,5,5),SEQ(IDDNA),ACIDS(IDACID),TRANF,ACID
EXTERNAL TRANF
C Returns position of next stop codon or IDNA2 if end reached
C Also the number of acids excluding any stop codon
C Also the acids
IACID = 0
DO 10 IDNA=IDNA1,IDNA2-2,3
ACID = TRANF(SEQ(IDNA),PAA)
IF(ACID.EQ.'*') THEN
PSTOP = IDNA - 1
RETURN
ELSE
IACID = IACID + 1
ACIDS(IACID) = ACID
END IF
10 CONTINUE
PSTOP = IDNA2
END
INTEGER FUNCTION MSTOP(SEQ,IDDNA,IDNA1,IDNA2,
+ACIDS,IDACID,PAA,IACID)
CHARACTER PAA(5,5,5),SEQ(IDDNA),ACIDS(IDACID),TRANB,ACID
EXTERNAL TRANB
C Returns position of next stop codon or IDNA2 if end reached
C Also the number of acids excluding any stop codon
C Also the acids
IACID = 0
DO 10 IDNA=IDNA1,IDNA2-2,3
ACID = TRANB(SEQ(IDNA),PAA)
IF(ACID.EQ.'*') THEN
MSTOP = IDNA - 1
RETURN
ELSE
IACID = IACID + 1
ACIDS(IACID) = ACID
END IF
10 CONTINUE
MSTOP = IDNA2
END
INTEGER FUNCTION MINIL(LIST,NLIST)
INTEGER LIST(NLIST)
C sent list, return element number of first element whose
C value equals the minimum value in the list.
MX = LIST(1)
DO 10 I=2,NLIST
MX = MIN(MX,LIST(I))
10 CONTINUE
DO 20 I=1,NLIST
IF(MX.EQ.LIST(I)) THEN
MINIL = I
RETURN
END IF
20 CONTINUE
C Should never get here!
WRITE(*,*)'Scream: MINIL'
MINIL = 1
END
INTEGER FUNCTION WPAIR(LINE,I,J)
CHARACTER LINE*(*)
EXTERNAL ITOSL
C puts a pair of integers into string line, as for a feature table
WPAIR = 1
IF(ITOSL(LINE,I).EQ.0) RETURN
K = INDEX(LINE,' ')
LINE(K:) = '..'
IF(ITOSL(LINE(K+2:),J).EQ.0) RETURN
WPAIR = 0
END
SUBROUTINE TRPIRP(SEQ,IDIM,I1,I2,KSTART,MINOP,IDEV,PAA,
+JSTRAN,OUTP,FRAMEC,IDIMP)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIMP),PAA(125),OUTP(IDIMP)
INTEGER B,FRAME,FRAMEC(IDIM)
EXTERNAL IPDNA
MINOPN = MINOP + 1
DO 200 FRAME = 1,3
B = I1 - KSTART + FRAME
LAA = IDIMP
CALL TRANSD(SEQ,IDIM,
+ B,I2-KSTART+1,OUTP,LAA,PAA,JSTRAN)
LAA = (I2 - B + 1) / 3
B = 1
I = 0
20 CONTINUE
I = I + 1
IF(I.LE.LAA) THEN
IF(OUTP(I).EQ.'*') THEN
LENGTH = I - B + 1
IF(LENGTH.GE.MINOPN) THEN
IF(JSTRAN.EQ.0) THEN
IP1 = IPDNA(B,FRAME,JSTRAN) + I1 - KSTART
IP2 = IPDNA(I,FRAME,JSTRAN) + I1 - KSTART
IP2 = MIN(IP2,IDIM)
C WRITE(IDEV,1002)IP1,IP2,SENSE(1),FRAME,IP2-IP1+1
CALL MBPRIM(FRAMEC,IDIM,IP1,IP2,2)
ELSE IF(JSTRAN.EQ.1) THEN
IP1 = IPDNA(B,FRAME,JSTRAN) + I1 - KSTART
IF(B.NE.1) IP1 = IP1 - 3
IP2 = IPDNA(I,FRAME,JSTRAN) + I1 - KSTART - 3
IP2 = MIN(IP2,IDIM)
C WRITE(IDEV,1002)IP2,IP1,SENSE(2),FRAME,IP2-IP1+1
CALL MBPRIM(FRAMEC,IDIM,IP1,IP2,3)
END IF
END IF
B = I + 1
END IF
GO TO 20
END IF
LENGTH = I - B
IF(LENGTH.GE.MINOPN) THEN
IF(JSTRAN.EQ.0) THEN
IP1 = IPDNA(B,FRAME,JSTRAN) + I1 - KSTART
IP2 = IPDNA(I,FRAME,JSTRAN) + I1 - KSTART
IP2 = MIN(IP2,IDIM)
C WRITE(IDEV,1002)IP1,IP2,SENSE(1),FRAME,IP2-IP1+1
CALL MBPRIM(FRAMEC,IDIM,IP1,IP2,2)
LENGTH = LENGTH + 1
ELSE IF(JSTRAN.EQ.1) THEN
IP1 = IPDNA(B,FRAME,JSTRAN) + I1 - KSTART
IF(B.NE.1) IP1 = IP1 - 3
IP2 = IPDNA(I,FRAME,JSTRAN) + I1 - KSTART
IP2 = MIN(IP2,IDIM)
C WRITE(IDEV,1002)IP2,IP1,SENSE(2),FRAME,IP2-IP1+1
CALL MBPRIM(FRAMEC,IDIM,IP1,IP2,3)
END IF
END IF
200 CONTINUE
C 1002 FORMAT(' FT ',I7,I7,' ',A,I4,I7)
END
SUBROUTINE TRAN6X(SEQ,IDIM,JDEV,I1,I2,PAA,ITRAN,JSTRAN,
+INUM,LINLEN,FC)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),TRANF4*3,TRANB4*3,SCOMP,PAA(125)
CHARACTER LN*133,TICKS*120
INTEGER FC(IDIM)
EXTERNAL TRANF4,TRANB4,SCOMP,NOTRL
SAVE TICKS
DATA TICKS(1:60)/
+' . : . : . : . : . : . :'/
DATA TICKS(61:120)/
+' . : . : . : . : . : . :'/
K1=I1
K2=I1+LINLEN-1
K2 = MIN(K2,I2)
LN = ' '
20 CONTINUE
WRITE(JDEV,1004)
WRITE(LN,1000)(TRANF4(SEQ(K),PAA,ITRAN,FC(K)),K=K1,K2-1,3)
IF(NOTRL(LN,133,' ').NE.0)WRITE(JDEV,1006)LN(1:NOTRL(LN,133,' '))
WRITE(LN,1001)(TRANF4(SEQ(K),PAA,ITRAN,FC(K)),K=K1+1,K2,3)
IF(NOTRL(LN,133,' ').NE.0)WRITE(JDEV,1006)LN(1:NOTRL(LN,133,' '))
IF(K1.NE.1) THEN
WRITE(LN,1002)(TRANF4(SEQ(K),PAA,ITRAN,FC(K)),
+ K=K1-1,K2-2,3)
IF(NOTRL(LN,133,' ').NE.0)WRITE(JDEV,1006)
+ LN(1:NOTRL(LN,133,' '))
ELSE
WRITE(LN,1008)(TRANF4(SEQ(K),PAA,ITRAN,FC(K)),K=3,K2-2,3)
IF(NOTRL(LN,133,' ').NE.0)WRITE(JDEV,1006)
+ LN(1:NOTRL(LN,133,' '))
END IF
IF(INUM.NE.0) THEN
WRITE(JDEV,1003)(SEQ(K),K=K1,K2)
WRITE(JDEV,1005)(N,N=K1+9,K2,10)
ELSE
WRITE(LN,1003)(SEQ(K),K=K1,K2)
WRITE(LN(NOTRL(LN,133,' ')+2:),1007)K2
WRITE(JDEV,1006)LN(1:NOTRL(LN,133,' '))
IF(JSTRAN.NE.0) WRITE(JDEV,1009)TICKS(1:K2-K1+1)
END IF
IF(JSTRAN.NE.0)WRITE(JDEV,1003)(SCOMP(SEQ(K)),K=K1,K2)
WRITE(LN,1000)(TRANB4(SEQ(K),PAA,ITRAN,FC(K)),K=K1,K2-1,3)
IF(NOTRL(LN,133,' ').NE.0)WRITE(JDEV,1006)LN(1:NOTRL(LN,133,' '))
WRITE(LN,1001)(TRANB4(SEQ(K),PAA,ITRAN,FC(K)),K=K1+1,K2,3)
IF(NOTRL(LN,133,' ').NE.0)WRITE(JDEV,1006)LN(1:NOTRL(LN,133,' '))
IF(K1.NE.1) THEN
WRITE(LN,1002)(TRANB4(SEQ(K),PAA,ITRAN,FC(K)),K=K1-1,K2-2,3)
IF(NOTRL(LN,133,' ').NE.0)WRITE(JDEV,1006)
+ LN(1:NOTRL(LN,133,' '))
ELSE
WRITE(LN,1008)(TRANB4(SEQ(K),PAA,ITRAN,FC(K)),K=3,K2-2,3)
IF(NOTRL(LN,133,' ').NE.0)WRITE(JDEV,1006)
+ LN(1:NOTRL(LN,133,' '))
END IF
IF(K2.EQ.I2) RETURN
K1=K2+1
K2=K2+LINLEN
K2 = MIN(K2,I2)
GO TO 20
1000 FORMAT(4X,40(A))
1001 FORMAT(5X,40(A))
1002 FORMAT(3X,40(A))
1003 FORMAT(4X,120A1)
1004 FORMAT( )
1005 FORMAT(4X,12I10)
1006 FORMAT(A)
1007 FORMAT(I7)
1008 FORMAT(6X,40(A))
1009 FORMAT(4X,A)
END
INTEGER FUNCTION IPDNA(IACID,FRAME,ISTRAN)
INTEGER FRAME
C returns a dna position for an amino acid in a translation
C frames on opposing strands: 123123123
C 123123123
IPDNA = FRAME + (IACID-1)*3
END
CHARACTER*3 FUNCTION TRANF4(CODON,PAA,CODE,FRAMEC)
C AUTHOR RODGER STADEN
CHARACTER CODON(3),PAA(5,5,5),THREEL*3,TRANF,THREE*3
INTEGER CODE,FRAMEC
EXTERNAL THREEL,TRANF
THREE(1:3)=' '
IF(MOD(FRAMEC,2).EQ.0) THEN
IF(CODE.EQ.3)THEN
THREE(1:3)=THREEL(TRANF(CODON,PAA))
ELSE
THREE(2:2)=TRANF(CODON,PAA)
END IF
END IF
TRANF4 = THREE
END
C TRANB3
CHARACTER*3 FUNCTION TRANB4(CODON,PAA,CODE,FRAMEC)
C AUTHOR RODGER STADEN
CHARACTER CODON(3),PAA(5,5,5),THREEL*3,TRANB,THREE*3
INTEGER CODE,FRAMEC
EXTERNAL THREEL,TRANB
THREE=' '
IF(MOD(FRAMEC,3).EQ.0) THEN
IF(CODE.EQ.3)THEN
THREE(1:3)=THREEL(TRANB(CODON,PAA))
ELSE
THREE(2:2)=TRANB(CODON,PAA)
END IF
END IF
TRANB4 = THREE(1:3)
END
SUBROUTINE HAIRPN(SEQ,IDIM,ITOT,ITOTEL,ITOTAL,IDTOT,
+MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,KSTART,IDEV,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER ITOT(IDTOT),ITOTEL(IDTOT),ITOTAL(IDTOT)
CALL SHOWFU(KBOUT,'Search for hairpin loops')
CALL GETPAR(30,10,IOK,MINLPI,MAXLPI,MINLP,
+MINLPX,MAXLPX,MAXLP,MINBP,MAXBP,MININ,IGON,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
CALL HAIRPD(J1,J2,KBIN,KBOUT,
+MINLPI,MAXLPI,MINLP,MINLPX,MAXLPX,MAXLP,MINBP,MAXBP,MININ,
+IGON,IOK,
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL HAIRPP(SEQ,IDIM,ITOT,ITOTEL,ITOTAL,IDTOT,
+MINLP,MAXLP,MININ,IGON,
+MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,KSTART,IDEV,KBOUT)
END
SUBROUTINE HAIRPP(SEQ,IDIM,ITOT,ITOTEL,ITOTAL,IDTOT,
+MINLP,MAXLP,MININ,IGON,
+MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,IS,IN,KSTART,IDEV,KBOUT)
C AUTHOR: RODGER STADEN
INTEGER SCORES(30)
CHARACTER SEQ(IDIM)
INTEGER ITOT(IDTOT),ITOTEL(IDTOT),ITOTAL(IDTOT)
INTEGER CTONUM
EXTERNAL CTONUM,LWRAP2
CALL BUSY(KBOUT)
C set scores as at=1,gc=1,gt=1
C could make gt selectable
DO 1 I=1,30
1 SCORES(I)=0
SCORES(16)=1
SCORES(21)=1
SCORES(22)=1
SCORES(8)=1
SCORES(9)=1
SCORES(14)=1
IDO=IN-IS+1
I=IS-1
IP=0
DO 301 K=1,IDO
I=I+1
DO 300 J=MINLP,MAXLP
N=0
100 CONTINUE
IMN=I-N
IPNPJ=I+N+J
IT=CTONUM(SEQ(LWRAP2(IN,IMN)))+
+ 5*CTONUM(SEQ(LWRAP2(IN,IPNPJ)))
IT=SCORES(IT)
IF(IT.NE.0)THEN
N=N+IT
GO TO 100
END IF
C IS THIS LOOP WORTH KEEPING?
IF(N.GE.MININ)THEN
IP=IP+1
IF(IP.GT.IDTOT)THEN
WRITE(KBOUT,1234)IP,I
1234 FORMAT(' Maximum of',I5,
+ ' loops found up to',I6,' No more searching')
GO TO 302
END IF
ITOT(IP)=N
ITOTEL(IP)=LWRAP2(IN,I)
ITOTAL(IP)=J-1
END IF
300 CONTINUE
301 CONTINUE
302 CONTINUE
IF(IP.GT.0)THEN
NOUT=0
DO 700 J=1,IP
JP1=J+1
ID1 = ITOTEL(J) + ITOTAL(J)/2
DO 690 I=JP1,IP
ID2 = ITOTEL(I) + ITOTAL(I)/2
IF(ID1.NE.ID2)GO TO 690
LP2=ITOTAL(I)+2*ITOT(I)
IF(ITOTAL(J).GT.LP2)GO TO 690
ITOT(J)=0
ITOTAL(J)=0
ITOTEL(J)=0
NOUT=NOUT+1
GO TO 700
690 CONTINUE
700 CONTINUE
CALL BUB3AS(ITOTEL,ITOT,ITOTAL,IP)
IF(IGON.EQ.0)THEN
XMAX=IN
XMIN=IS
C PLOT ON SCALE 0 TO 4*MININ IN Y
YMIN=0.
YMAX=MININ*4
YF=0.
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
DO 800 I=NOUT+1,IP
XF=ITOTEL(I)+ITOTAL(I)/2
XT=XF
YT=ITOT(I)
CALL LINE(XF,XT,YF,YT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
800 CONTINUE
CALL VT100M
ELSE IF(IGON.EQ.1) THEN
DO 900 I=NOUT+1,IP
CALL DRAWPN(
+ SEQ,IDIM,ITOT(I),ITOTAL(I),ITOTEL(I),
+ IDEV,IN,KSTART)
900 CONTINUE
END IF
END IF
WRITE(KBOUT,1005)IP-NOUT
1005 FORMAT(' Total loops found=',I6)
END
SUBROUTINE HAIRPD(IS,IN,KBIN,KBOUT,
+MINLPI,MAXLPI,MINLP,MINLPX,MAXLPX,MAXLP,MINBP,MAXBP,MININ,
+IGON,IOK,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
WRITE(KBOUT,1004)
1004 FORMAT(' Define the range of loop sizes')
CALL GETINT(MINLPI,MAXLPI,MINLP,'Minimum loop size',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINLP = IVAL
MINLPX = MAX(MINLPX,MINLP)
MAXLP = MINLPX
CALL GETINT(MINLPX,MAXLPX,MAXLP,'Maximum loop size',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MAXLP = IVAL
MINLP = MINLP + 1
MAXLP = MAXLP + 1
CALL GETINT(MINBP,MAXBP,MININ,'Minimum number of basepairs',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MININ = IVAL
CALL YESONO(IGON,'Plot results','List results',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
C IF(IGON.LT.0)RETURN
END
C
C subroutine to draw hairpin loops on device idev
C hairpins are defined by:
C 1) nstem the number of basepairs in the stem
C 2) nloop the number of bases in the loop
C 3) ip the position of the rightmost base in the left side of the stem
C
C draw the stems very simply, just standing up with a few bases
C before and after
C maximum size is 20 up and 14 across
SUBROUTINE DRAWPN(SEQ,IDIM,NSTEM,NLOOP,IP,IDEV,IN,KSTART)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),ARRAY(14,20),SCORE(5,5),SPACE
INTEGER CTONUM
EXTERNAL CTONUM
SAVE SPACE
DATA SPACE/' '/
C CHECK FOR WRAP AROUND
IF((IP-NSTEM).LT.0)RETURN
IF((IP+NLOOP+NSTEM+1).GT.IN)RETURN
DO 1 I=1,5
DO 1 J=1,5
SCORE(I,J)=' '
1 CONTINUE
SCORE(1,3)='-'
SCORE(3,1)='-'
SCORE(2,4)='-'
SCORE(4,2)='-'
SCORE(1,4)='.'
SCORE(4,1)='.'
DO 2 I=1,14
DO 2 J=1,20
ARRAY(I,J)=SPACE
2 CONTINUE
C point to left stem start
IPL=IP-NSTEM
C point to right stem start
IPR=IP+NSTEM+NLOOP+1
C point to temporary array
IAL=6
IAR=8
KA=1
C fill in stem and loop
DO 10 I=1,MIN(NSTEM+NLOOP/2,19)
IPL=IPL+1
IPR=IPR-1
KA=KA+1
ARRAY(IAL,KA)=SEQ(IPL)
ARRAY(IAR,KA)=SEQ(IPR)
C basepaired?
ARRAY(IAL+1,KA)=
+ SCORE(CTONUM(SEQ(IPL)),CTONUM(SEQ(IPR)))
10 CONTINUE
C odd number of bases in loop?
IF(MOD(NLOOP,2).NE.0)THEN
C put in extra base
IF((KA+1).LE.20)ARRAY((IAL+1),KA+1)=SEQ(IPL+1)
END IF
C20 CONTINUE
C now do 6 bases before the stem and 7 after
IPL=IP-NSTEM-6
DO 30 I=1,6
IPL=IPL+1
IF(IPL.GT.0)ARRAY(I,1)=SEQ(IPL)
30 CONTINUE
IPR=IP+NSTEM+NLOOP
DO 40 I=8,14
IPR=IPR+1
IF(IPR.LT.IN)ARRAY(I,1)=SEQ(IPR)
40 CONTINUE
DO 50 I=1,20
K=21-I
WRITE(IDEV,1000)(ARRAY(L,K),L=1,14)
50 CONTINUE
1000 FORMAT(' ',5X,14A1)
1001 FORMAT(' ',5X,I6)
WRITE(IDEV,1001)IP-NSTEM+KSTART-1
RETURN
END
SUBROUTINE WORDFP(SEQ,IDIM1P,IDIM1,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH)
C AUTHOR: RODGER STADEN
C ROUTINE TO COMPARE OBSERVED WITH EXPECTED 'WORD' FREQUENCIES
C WORDS MAY BE UP TO 10 CHARACTERS IN LENGTH ALTHOUGH, AS THE
C EXPECTED FREQUENCIES ARE CALCULATED SIMPLY BY MULTIPLYING
C THE BASE FREQUENCIES TOGETHER, THE LONGER THE WORD THE MORE
C INAPPROPRIATE THE MODEL. USERS ARE SIMPLY PROMPTED FOR THE WORD
C TO SEARCH FOR - E.G CG - AND A WINDOW LENGTH TO COUNT OVER.
C THE PROGRAM PLOTS (OBSERVED - EXPECTED)
C IF IT IS TOO SLOW, FASTER ALGORITHMS ARE POSSIBLE.
CHARACTER SEQ(IDIM1P),WORD(10),HELPF*(*)
INTEGER SPAN
CALL SHOWFU(KBOUT,'Plot composition differences (obs-exp))')
WORD(1) = 'C'
WORD(2) = 'G'
LENGTH = 2
CALL GETPAR(64,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
CALL WORDFD(WORD,LENGTH,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+YMIN,YMAX,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL WRDFP(SEQ,IDIM1P,IDIM1,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,
+YMIN,YMAX,SPAN,IWRIT,WORD,LENGTH)
END
SUBROUTINE WORDFD(WORD,LENGTH,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+YMIN,YMAX,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
C AUTHOR: RODGER STADEN
CHARACTER WORD(10),WORDA(10),HELPF*(*)
INTEGER SPAN
CALL SQPF3(WORD,WORDA,10,LENGTH,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
YMAX1 = REAL(SPAN)/(4**LENGTH)
YMIN1 = -1.0*YMAX1
YMIN2 = 4.*YMIN1
YMAX2 = 4.*YMAX1
CALL GETRL(YMIN1,YMAX2,YMAX1,
+'Maximum plot value',VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
YMAX = VALUE
CALL GETRL(YMIN2,YMAX,YMIN1,
+'Minimum plot value',VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
YMIN = VALUE
IOK = 0
END
SUBROUTINE WRDFP(SEQ,IDIM1P,IDIM1,MXSPAN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,
+YMIN,YMAX,SPAN,IWRIT,WORD,LENGTH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P),WORD(10)
INTEGER SPAN,CTONUM,IP(10),OBS(5)
EXTERNAL CTONUM
CALL CLEARV
CALL FILLI(OBS,5,0)
DO 15 I = 1,LENGTH
IP(I) = CTONUM(WORD(I))
15 CONTINUE
I1INM1=1+(MXSPAN/2)-KSTART+1
INC = 1
J1P=J1+I1INM1
J2P=J2+I1INM1
XMIN=J1
XMAX=J2
LF=SPAN/2
LB=1+SPAN/2
DO 66 I=J1P,J1P+SPAN-1
IS=CTONUM(SEQ(I))
OBS(IS)=OBS(IS)+1
66 CONTINUE
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
XF = J1
XT = J2
YF = YMIN + (YMAX-YMIN)/2.
YT = YF
CALL LINE(XF,XT,YF,YT,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
MARG = NINT(0.05*MARGT)
MARGB1 = MARGB+MARG
MARGT1 = MARGT - 2*MARG
XF=J1
YF=0.
DO 100 I=J1P+LB,J2P-(LENGTH-1)
ISF=CTONUM(SEQ(I+LF))
ISB=CTONUM(SEQ(I-LB))
OBS(ISF)=OBS(ISF)+1
OBS(ISB)=OBS(ISB)-1
IF(MOD(I,IWRIT).EQ.0)THEN
EXPEC = 1.0
DO 80 J=1,LENGTH
EXPEC = EXPEC * REAL(OBS(IP(J)))/SPAN
80 CONTINUE
EXPEC = EXPEC * SPAN
NFOUND = 0
IAT=I-LB
IEND = I+LF
90 CONTINUE
IAT=IAT+INC
IDIM = IEND - IAT + 1
IF(IDIM.GT.0)THEN
CALL FIND6(SEQ(IAT),IDIM,WORD,LENGTH,INC,JMATCH)
IF(JMATCH.NE.0)THEN
IAT=IAT+JMATCH-1
NFOUND = NFOUND + 1
GO TO 90
END IF
END IF
SUM = NFOUND - EXPEC
XT=I-I1INM1
CALL LINE(XF,XT,YF,SUM,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB1,MARGT1,ISXMAX,ISYMAX)
XF=XT
YF=SUM
END IF
100 CONTINUE
CALL VT100M
END
SUBROUTINE BPHASE(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,MARGL1,MARGR1,MARGB1,MARGT1,J1,J2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,DIALOG
CALL SHOWFU(KBOUT,'Uneven positional base frequencies method')
CALL GETPAR(44,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
MAXSP = MIN(MAXSP,IDIM1/3)
SPAN = MIN(MAXSP,SPAN)
IF(DIALOG.EQ.1) THEN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL BPHASP(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,MARGL1,MARGR1,MARGB1,MARGT1,J1,J2,SPAN,IWRIT)
END
SUBROUTINE BPHASP(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,MARGL1,MARGR1,MARGB1,MARGT1,J1,J2,LENW,IWRIT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
REAL POS(5,3),COMP(5),POS1(15)
EQUIVALENCE (POS,POS1)
I1IN=2+MXSPAN/2
I1INM1=I1IN-1
YMIN=0.4
YMAX=1.34
IDIMJ=J2-J1+1
J1P=J1+I1INM1
J2P=J2+I1INM1
LB=1+LENW/2
LB=3*LB
LF=LENW/2
LF=3*LF
LENW3=3*LENW
IDONE=0
XMIN=J1
XMAX=J2
CALL CLEARV
CALL VECTOM
XP=J1
YP=0.
X=J2
CALL FRAME(MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
C SET UP MARGINS
MARGB=MARGB1+NINT(0.006*ISYMAX)
MARGT=MARGT1-NINT(0.012*ISYMAX)
CALL LINE(XP,X,.78,.78,XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX)
CALL TEXT(XMIN,0.78,'76%',3,0,XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX)
CALL BPHAS1(SEQ(J1P-LB),IDIM1,POS,COMP,LENW3,POS1)
DO 300 IP=J1P,J2P,3
IDONE=IDONE+1
TESTC=ROTPHS(SEQ,IDIM1P,IP,LB,LF,POS,COMP)
IF(MOD(IDONE,IWRIT).EQ.0)THEN
X=IP-I1INM1
CALL LINE(XP,X,YP,TESTC,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX)
YP=TESTC
XP=X
END IF
300 CONTINUE
CALL VT100M
END
REAL FUNCTION ROTPHS(SEQ,IDIM,IAT,LB,LF,POS,BCOMP)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
REAL POS(5,3),BCOMP(5),DIFF(5,3),DIFF1(15)
INTEGER CTONUM
EXTERNAL CTONUM
EQUIVALENCE (DIFF,DIFF1)
C zero variables
CALL FILLR(DIFF1,15,0.)
C LOOK BACK TO SUBTRACT A CODON
IPB=IAT-LB
C sum for each base for each codon position
K=0
DO 10 J=IPB,IPB+2
K=K+1
JJ=CTONUM(SEQ(J))
BCOMP(JJ)=BCOMP(JJ)-1.
POS(JJ,K)=POS(JJ,K)-1.
10 CONTINUE
C look forwards and add a value
IPF=IAT+LF
K=0
DO 20 J=IPF,IPF+2
K=K+1
JJ=CTONUM(SEQ(J))
BCOMP(JJ)=BCOMP(JJ)+1.
POS(JJ,K)=POS(JJ,K)+1.
20 CONTINUE
DO 30 I=1,4
TEMP=BCOMP(I)/3.
DO 30 J=1,3
DIFF(I,J)=ABS(POS(I,J)-TEMP)
30 CONTINUE
R=0.
DO 60 I=1,4
DO 60 J=1,3
IF(BCOMP(I).NE.0.)R=R+DIFF(I,J)/BCOMP(I)
60 CONTINUE
ROTPHS=R
END
SUBROUTINE BPHAS1(SEQ,IDIM,POS,COMP,LWIND3,POS1)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
REAL POS(5,3),COMP(5),POS1(15)
INTEGER CTONUM
EXTERNAL CTONUM
CALL FILLR(POS1,15,0.)
CALL FILLR(COMP,5,0.)
DO 10 I=1,LWIND3-1,3
K=0
I1=I
I2=I1+2
DO 9 J=I1,I2
K=K+1
JJ=CTONUM(SEQ(J))
COMP(JJ)=COMP(JJ)+1.
POS(JJ,K)=POS(JJ,K)+1.
9 CONTINUE
10 CONTINUE
END
SUBROUTINE FICKET(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,MARGL1,MARGR1,MARGB1,MARGT1,J1,J2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,DIALOG
CALL SHOWFU(KBOUT,'Ficketts method to find protein genes')
CALL GETPAR(48,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
IF(DIALOG.EQ.1) THEN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL FICKTP(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,MARGL1,MARGR1,MARGB1,MARGT1,J1,J2,SPAN,IWRIT)
END
SUBROUTINE FICKTP(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,MARGL1,MARGR1,MARGB1,MARGT1,J1,J2,LENW,IWRIT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P)
REAL POS(5,3),COMP(5),POS1(15)
EQUIVALENCE (POS,POS1)
EXTERNAL ROTFCK
I1IN=2+MXSPAN/2
I1INM1=I1IN-1
IDIMJ=J2-J1+1.
J1P=J1+I1INM1
J2P=J2+I1INM1
YMIN=0.
YMAX=1.
XMIN=J1
XMAX=J2
LB=1+LENW/2
LB=3*LB
LF=LENW/2
LF=3*LF
LENW3=3*LENW
C calc fraction of window length for composition
FRAC=1./LENW3
IDONE=0
CALL CLEARV
CALL VECTOM
C set initial values
XP=J1
YP=0.
X=J2
CALL FRAME(MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL LINE(XP,X,.4,.4,XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL LINE(XP,X,.77,.77,XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL FCKSTR(SEQ(J1P-LB),IDIM1,POS,COMP,LENW3,POS1)
DO 300 IP=J1P,J2P,3
IDONE=IDONE+1
TESTC=ROTFCK(SEQ,IDIM1P,IP,LB,LF,POS,COMP,FRAC)
IF(MOD(IDONE,IWRIT).EQ.0)THEN
X=IP-I1INM1
CALL LINE(XP,X,YP,TESTC,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
YP=TESTC
XP=X
END IF
300 CONTINUE
CALL VT100M
END
REAL FUNCTION ROTFCK(SEQ,IDIM,IAT,LB,LF,POS,BCOMP,FRAC)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
REAL POS(5,3),BASMAX(4),BASMIN(4),BPOS(4),BCOMP(5),BCOMPT(5)
REAL WCOMP(4),WPOS(4),PCODP(10,4),PCODC(10,4),PCODCR(10)
REAL PCODPR(10)
INTEGER CTONUM
EXTERNAL CTONUM
SAVE WCOMP,WPOS,PCODCR,PCODPR,PCODP,PCODC
DATA WCOMP/.14,.12,.11,.15/
DATA WPOS/.33,.18,.26,.31/
DATA PCODCR/.17,.19,.21,.23,.25,.27,.29,.31,.33,.99/
DATA PCODPR/1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,99./
DATA PCODC/.58,.51,.69,.56,.75,.55,.40,.39,.24,.28,
1.31,.39,.44,.43,.59,.59,.64,.51,.64,.82,.21,.81,.65,.67,
2.49,.62,.55,.44,.49,.28,.29,.33,.41,.41,.73,.64,.64,.47,
3.54,.40/
DATA PCODP/.09,.09,.20,.54,.44,.69,.68,.91,.97,.97,
1.23,.30,.33,.51,.48,.66,.81,.70,.70,.80,
2.22,.20,.34,.45,.68,.58,.93,.84,.68,.94,
3.08,.08,.16,.27,.48,.53,.64,.74,.88,.90/
C DATA TESTCP/.00,.04,.07,.29,.40,.77,.92,.98,1.0,1.0/
C DATA TESTCR/.43,.53,.64,.74,.84,.95,1.05,1.16,1.26,99./
DO 5 I=1,4
BASMAX(I)=0.
BASMIN(I)=IDIM
BPOS(I)=0.
5 CONTINUE
C look back to subtract a codon
IPB=IAT-LB
C sum for each base for each codon position
K=0
DO 10 J=IPB,IPB+2
K=K+1
JS=CTONUM(SEQ(J))
BCOMP(JS)=BCOMP(JS)-FRAC
POS(JS,K)=POS(JS,K)-1.
10 CONTINUE
C look forwards and add a value
IPF=IAT+LF
K=0
DO 20 J=IPF,IPF+2
K=K+1
JS=CTONUM(SEQ(J))
BCOMP(JS)=BCOMP(JS)+FRAC
POS(JS,K)=POS(JS,K)+1.
20 CONTINUE
C get max and min values for each base in each codon position
DO 30 J=1,3
DO 30 I=1,4
IF(POS(I,J).GT.BASMAX(I))BASMAX(I)=POS(I,J)
IF(POS(I,J).LT.BASMIN(I))BASMIN(I)=POS(I,J)
30 CONTINUE
DO 40 I=1,4
BPOS(I)=BASMAX(I)/(BASMIN(I)+1.)
40 CONTINUE
C look up prob of coding
DO 46 I=1,4
DO 45 J=1,10
IF(BCOMP(I).GT.PCODCR(J))GO TO 45
C lt current so set prob
BCOMPT(I)=PCODC(J,I)
GO TO 46
45 CONTINUE
46 CONTINUE
DO 48 I=1,4
DO 47 J=1,10
IF(BPOS(I).GT.PCODPR(J))GO TO 47
C lt current so set prob
BPOS(I)=PCODP(J,I)
GO TO 48
47 CONTINUE
48 CONTINUE
C apply weights
DO 50 I=1,4
BCOMPT(I)=BCOMPT(I)*WCOMP(I)
BPOS(I)=BPOS(I)*WPOS(I)
50 CONTINUE
C calc testcode
TESTC=0.
DO 60 I=1,4
TESTC=TESTC+BPOS(I)+BCOMPT(I)
60 CONTINUE
ROTFCK=TESTC
END
SUBROUTINE FCKSTR(SEQ,IDIM,POS,COMP,LWIND3,POS1)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
REAL POS(5,3),COMP(5),POS1(15)
INTEGER CTONUM
EXTERNAL CTONUM
C calc fraction to add ie window length recip
FRAC=1./LWIND3
C zero variables
CALL FILLR(COMP,5,0.)
CALL FILLR(POS1,15,0.)
C sum for each base for each codon position
DO 10 I=1,LWIND3-1,3
K=0
I1=I
I2=I1+2
DO 9 J=I1,I2
K=K+1
JS=CTONUM(SEQ(J))
COMP(JS)=COMP(JS)+FRAC
POS(JS,K)=POS(JS,K)+1.
9 CONTINUE
10 CONTINUE
END
SUBROUTINE SHEPED(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1P),PAA(5,5,5)
INTEGER SPAN,DIALOG
CALL SHOWFU(KBOUT,'Shepherds method to find protein genes')
CALL GETPAR(47,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
IF(DIALOG.EQ.1) THEN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL SHEPP(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,PAA,SPAN,IWRIT)
END
SUBROUTINE SHEPP(SEQ,IDIM1P,IDIM1,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,PAA,LENW,IWRIT)
C AUTHOR: RODGER STADEN
REAL SUMW(3),PROB(3)
CHARACTER SEQ(IDIM1P),PAA(5,5,5)
CHARACTER SPACE,RLINE(3),STAR
SAVE SPACE,STAR
DATA SPACE,STAR/' ','*'/
IMH=NINT(0.005*ISYMAX)
I1IN=2+MXSPAN/2
I1INM1=I1IN-1
IDIMJ=J2-J1+1.
J1P=J1+I1INM1
J2P=J2+I1INM1
YMIN=0.2
YMAX=0.4
YMID=YMIN+(YMAX-YMIN)/2.
XMIN=J1
XMAX=J2
C SORT OUT MARGIN IE FIND MAX AND DIVIDE BY THREE
IMARG=NINT(MARGT/3.)
MARGL1=MARGL
MARGR1=MARGR
MARGB1=MARGB
MARGB2=MARGB+IMARG
MARGB3=MARGB+2*IMARG
MARGT3=IMARG
MARGT2=IMARG
MARGT1=IMARG
LB=1+LENW/2
LB=3*LB
LF=LENW/2
LF=3*LF
LENW3=3*LENW
CALL FILLR(SUMW,3,0.)
IDONE=0
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
CALL SHEPST(SEQ(J1P-LB),IDIM1,LENW3,SUMW(1))
CALL SHEPST(SEQ(J1P+1-LB),IDIM1,LENW3,SUMW(2))
CALL SHEPST(SEQ(J1P+2-LB),IDIM1,LENW3,SUMW(3))
C set initial values
XP=J1
YP1=0.5
YP2=0.5
YP3=0.5
DO 300 IP=J1P,J2P,3
IDONE=IDONE+1
IP0=IP
IP1=IP+1
IP2=IP+2
CALL RTSHEP(SEQ,IDIM1P,IP0,LB,LF,SUMW(1))
CALL RTSHEP(SEQ,IDIM1P,IP1,LB,LF,SUMW(2))
CALL RTSHEP(SEQ,IDIM1P,IP2,LB,LF,SUMW(3))
IF(MOD(IDONE,IWRIT).EQ.0)THEN
T=SUMW(1)+SUMW(2)+SUMW(3)
PROB(1)=SUMW(1)/T
PROB(2)=SUMW(2)/T
PROB(3)=SUMW(3)/T
C get largest
DO 302 II=1,3
RLINE(II)=SPACE
302 CONTINUE
TP=MAX(SUMW(1),SUMW(2),SUMW(3))
DO 303 II=1,3
IF(SUMW(II).EQ.TP)RLINE(II)=STAR
303 CONTINUE
C set actual ip
IPA=IP-I1INM1
X=IPA
Y1=PROB(1)
Y2=PROB(2)
Y3=PROB(3)
CALL LINE(XP,X,YP1,Y1,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1+IMH,MARGT1-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP2,Y2,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2+IMH,MARGT2-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP3,Y3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3+IMH,MARGT3-2*IMH,ISXMAX,ISYMAX)
IF(RLINE(1).EQ.STAR)
+ CALL POINT(X,YMID,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
IF(RLINE(2).EQ.STAR)
+ CALL POINT(X,YMID,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
IF(RLINE(3).EQ.STAR)
+ CALL POINT(X,YMID,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
YP1=Y1
YP2=Y2
YP3=Y3
XP=X
END IF
300 CONTINUE
CALL STARTS(SEQ,IDIM1P,J1P,J2P,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL STOPS(SEQ,IDIM1P,J1P,J2P,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL VT100M
END
SUBROUTINE SHEPST(SEQ,IDIM,LENW,SUM)
C AUTHOR: RODGER STADEN
REAL SUM
CHARACTER SEQ(IDIM)
INTEGER SCORER(5),SCOREY(5),CTONUM
EXTERNAL CTONUM
SAVE SCORER,SCOREY
DATA SCORER,SCOREY/0,0,1,1,0,1,1,0,0,0/
SUM=0.
DO 100 I=1,LENW,3
SUM=SUM+SCORER(CTONUM(SEQ(I)))+SCOREY(CTONUM(SEQ(I+2)))
100 CONTINUE
END
SUBROUTINE RTSHEP(SEQ,IDIM1,I,LB,LF,H)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1)
INTEGER SCORER(4),SCOREY(4),CTONUM
EXTERNAL CTONUM
SAVE SCORER,SCOREY
DATA SCORER,SCOREY/0,0,1,1,1,1,0,0/
C set up dummy values
XS=0.
XA=0.
C look back
IPB=I-LB-1
DO 10 J=1,3
IF(CTONUM(SEQ(IPB+J)).EQ.5)GO TO 30
10 CONTINUE
C set value to subtract
IPB=IPB+1
IT1=CTONUM(SEQ(IPB))
IT2=CTONUM(SEQ(IPB+2))
XS=SCORER(IT1)+SCOREY(IT2)
C now look forward
30 CONTINUE
IPB=I+LF-1
C look for bad char
DO 45 J=1,3
IF(CTONUM(SEQ(IPB+J)).EQ.5)GO TO 50
45 CONTINUE
C ok so use
IPB=IPB+1
IT1=CTONUM(SEQ(IPB))
IT2=CTONUM(SEQ(IPB+2))
XA=SCORER(IT1)+SCOREY(IT2)
50 CONTINUE
H=H-XS+XA
END
SUBROUTINE IMPBC(SEQ,IDIM1P,IDIM1,MXSPAN,FTABLE,IDFTAB,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,DIALOG)
CHARACTER HELPF*(*),PAA(5,5,5)
C AUTHOR: RODGER STADEN
INTEGER SPAN,DIALOG
REAL FTABLE(IDFTAB)
CHARACTER SEQ(IDIM1P)
CALL SHOWFU(KBOUT,'McLachlan,Staden,Boswell codon improbability')
C WRITE(KBOUT,*)' Search for protein genes using McLachlan, Staden'
C WRITE(KBOUT,*)' and Boswell improbability method'
C WRITE(KBOUT,*)' expecting codon composition to depend on'
C WRITE(KBOUT,*)' base composition'
CALL GETPAR(45,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(DIALOG.EQ.1) THEN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL IMPBCP(SEQ,IDIM1P,IDIM1,MXSPAN,FTABLE,IDFTAB,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,PAA,SPAN,IWRIT)
END
SUBROUTINE IMPBCP(SEQ,IDIM1P,IDIM1,MXSPAN,FTABLE,IDFTAB,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,PAA,LENW,IWRIT)
CHARACTER PAA(5,5,5)
C AUTHOR: RODGER STADEN
REAL RNBASE(4)
REAL W(3),RNFAC(3),COMP(3),FTABLE(IDFTAB),FACN
INTEGER NBASE1(4),NBASE2(4),NBASE3(4)
INTEGER NCOD1(4,4,4),NCOD2(4,4,4),NCOD3(4,4,4)
CHARACTER SEQ(IDIM1P),RLINE(3),STAR
SAVE STAR
DATA STAR/'*'/
C SORT OUT MARGIN IE FIND MAX AND DIVIDE BY THREE
IMARG=NINT(MARGT/3.)
MARGL1=MARGL
MARGR1=MARGR
MARGB1=MARGB
MARGB2=MARGB+IMARG
MARGB3=MARGB+2*IMARG
MARGT3=IMARG
MARGT2=IMARG
MARGT1=IMARG
IMH=NINT(0.005*ISYMAX)
SD=4.1
EXPM=0.
YMIN=EXPM
YMAX=EXPM+12.*SD
IBH=NINT(0.005*ISXMAX)
C ONLY CALC FACTORS UPTO 99!
CALL FACTAB(FTABLE,99)
I1IN=2+MXSPAN/2
I1INM1=I1IN-1
IDIMJ=J2-J1+1.
J1P=J1+I1INM1
J2P=J2+I1INM1
LB=1+LENW/2
LB=3*LB
LF=LENW/2
LF=3*LF
LENW3=3*LENW
RLENW3=LENW3
FACN=FACTOR(LENW,FTABLE,IDFTAB)
XMIN=J1
XMAX=J2
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB1,MARGT1,ISXMAX,ISYMAX,SD,IBH,YMIN,2)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB2,MARGT2,ISXMAX,ISYMAX,SD,IBH,YMIN,2)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB3,MARGT3,ISXMAX,ISYMAX,SD,IBH,YMIN,2)
CALL ROT1(SEQ(J1P-LB),IDIM1,NCOD1,RNFAC(1),FTABLE,
+IDFTAB,NBASE1,LENW3)
CALL ROT1(SEQ(J1P+1-LB),IDIM1,NCOD2,RNFAC(2),FTABLE,
+IDFTAB,NBASE2,LENW3)
CALL ROT1(SEQ(J1P+2-LB),IDIM1,NCOD3,RNFAC(3),FTABLE,
+IDFTAB,NBASE3,LENW3)
C set initial values
XP=J1
YP1=0.
YP2=0.
YP3=0.
IDONE=0
DO 300 IP=J1P,J2P,3
IDONE=IDONE+1
IP0=IP
IP1=IP+1
IP2=IP+2
CALL ROTFAC(SEQ,IDIM1P,IP0,LB,LF,NCOD1,RNFAC(1),
+ FTABLE,IDFTAB,NBASE1,COMP(1),LENW3)
CALL ROTFAC(SEQ,IDIM1P,IP1,LB,LF,NCOD2,RNFAC(2),
+ FTABLE,IDFTAB,NBASE2,COMP(2),LENW3)
CALL ROTFAC(SEQ,IDIM1P,IP2,LB,LF,NCOD3,RNFAC(3),
+ FTABLE,IDFTAB,NBASE3,COMP(3),LENW3)
DO 200 K=1,3
W(K)=RNFAC(K)-COMP(K)-FACN
200 CONTINUE
IF(MOD(IDONE,IWRIT).EQ.0)THEN
DO 302 II=1,3
RLINE(II)=' '
302 CONTINUE
T=MAX(W(1),W(2),W(3))
DO 303 II=1,3
IF(W(II).EQ.T)RLINE(II)=STAR
303 CONTINUE
IPA=IP-I1INM1
DO 787 JJJ=1,4
RNBASE(JJJ)=FLOAT(NBASE1(JJJ))/RLENW3
787 CONTINUE
CALL POISON(LENW,RNBASE,RNBASE,EW,WD,1)
YMAX=EXPM+12.*WD
WD3=WD*6.
X=IPA
Y1=W(1)-EW
Y2=W(2)-EW
Y3=W(3)-EW
CALL LINE(XP,X,YP1,Y1,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1+IMH,MARGT1-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP2,Y2,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2+IMH,MARGT2-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP3,Y3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3+IMH,MARGT3-2*IMH,ISXMAX,ISYMAX)
IF(RLINE(1).EQ.STAR)CALL POINT(X,WD3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
IF(RLINE(2).EQ.STAR)CALL POINT(X,WD3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
IF(RLINE(3).EQ.STAR)CALL POINT(X,WD3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
YP1=Y1
YP2=Y2
YP3=Y3
XP=X
END IF
300 CONTINUE
CALL STARTS(SEQ,IDIM1P,J1P,J2P,
+MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL STOPS(SEQ,IDIM1P,J1P,J2P,
+MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL VT100M
END
SUBROUTINE IMPAC(SEQ,IDIM1P,IDIM1,MXSPAN,FTABLE,IDFTAB,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,DIALOG)
CHARACTER HELPF*(*),PAA(5,5,5)
C AUTHOR: RODGER STADEN
REAL FTABLE(IDFTAB)
CHARACTER SEQ(IDIM1P)
INTEGER SPAN,DIALOG
CALL SHOWFU(KBOUT,'McLachlan,Staden,Boswell codon improbability')
C WRITE(KBOUT,*)' Search for protein genes using McLachlan, Staden'
C WRITE(KBOUT,*)' and Boswell improbability method'
C WRITE(KBOUT,*)' expecting codon composition to depend on'
C WRITE(KBOUT,*)' amino acid composition'
CALL GETPAR(46,6,IOK,MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,
+IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(DIALOG.EQ.1) THEN
CALL GSPIN(MINSP,MAXSP,SPAN,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL IMPACP(SEQ,IDIM1P,IDIM1,MXSPAN,FTABLE,IDFTAB,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,PAA,SPAN,IWRIT)
END
SUBROUTINE IMPACP(SEQ,IDIM1P,IDIM1,MXSPAN,FTABLE,IDFTAB,
+ISXMAX,ISYMAX,J1,J2,MARGL,MARGR,MARGB,MARGT,PAA,LENW,IWRIT)
CHARACTER PAA(5,5,5)
C AUTHOR: RODGER STADEN
REAL W(3),RNFAC(3),COMP(3),FTABLE(IDFTAB),FACN
INTEGER NBASE1(4),NBASE2(4),NBASE3(4)
INTEGER NCOD1(4,4,4),NCOD2(4,4,4),NCOD3(4,4,4)
INTEGER NCODON(21),IACID(4,4,4)
REAL FABC1(4,4,4),FA1(21)
REAL FABC2(4,4,4),FA2(21)
REAL FABC3(4,4,4),FA3(21)
CHARACTER SEQ(IDIM1P),RLINE(3),STAR
SAVE IACID,NCODON,STAR
DATA STAR/'*'/
C array with number of codons per acid
DATA IACID/
+1,2,3,5,6,7,8,9,
+10,12,14,16,18,20,6,21,
+1,2,3,5,6,7,8,9,
+10,12,14,16,18,20,6,21,
+2,2,3,5,6,7,8,9,
+11,13,15,17,11,20,20,21,
+2,2,4,5,6,7,8,9,
+11,13,15,17,19,20,20,21/
DATA NCODON/
+2,6,3,1,4,6,4,4,4,2,3,2,2,2,2,2,2,2,1,6,4/
C SORT OUT MARGIN IE FIND MAX AND DIVIDE BY THREE
IMARG=NINT(MARGT/3.)
MARGL1=MARGL
MARGR1=MARGR
MARGB1=MARGB
MARGB2=MARGB+IMARG
MARGB3=MARGB+2*IMARG
MARGT3=IMARG
MARGT2=IMARG
MARGT1=IMARG
SD=4.1
EXPM=0.
YMIN=EXPM
YMAX=EXPM+15.*SD
IBH=NINT(0.005*ISXMAX)
IMH=NINT(0.005*ISYMAX)
C ONLY CALC FACTORS UPTO 99!
CALL FACTAB(FTABLE,99)
I1IN=2+MXSPAN/2
I1INM1=I1IN-1
IDIMJ=J2-J1+1.
J1P=J1+I1INM1
J2P=J2+I1INM1
LB=1+LENW/2
LB=3*LB
LF=LENW/2
LF=3*LF
LENW3=3*LENW
XLENW=LENW
FACN=FACTOR(LENW,FTABLE,IDFTAB)
XMIN=J1
XMAX=J2
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB1,MARGT1,ISXMAX,ISYMAX,SD,IBH,YMIN,2)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB2,MARGT2,ISXMAX,ISYMAX,SD,IBH,YMIN,2)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB3,MARGT3,ISXMAX,ISYMAX,SD,IBH,YMIN,2)
CALL ROT2(SEQ(J1P-LB),IDIM1,NCOD1,RNFAC(1),FTABLE,
+IDFTAB,NBASE1,LENW3,FA1,FABC1,NCODON,IACID)
CALL ROT2(SEQ(J1P+1-LB),IDIM1,NCOD2,RNFAC(2),FTABLE,
+IDFTAB,NBASE2,LENW3,FA2,FABC2,NCODON,IACID)
CALL ROT2(SEQ(J1P+2-LB),IDIM1,NCOD3,RNFAC(3),FTABLE,
+IDFTAB,NBASE3,LENW3,FA3,FABC3,NCODON,IACID)
C set initial values
XP=J1
YP1=0.
YP2=0.
YP3=0.
IDONE=0
DO 300 IP=J1P,J2P,3
IDONE=IDONE+1
IP0=IP
IP1=IP+1
IP2=IP+2
CALL ROTFAK(SEQ,IDIM1P,IP0,LB,LF,NCOD1,RNFAC(1),
+FTABLE,IDFTAB,NBASE1,COMP(1),LENW3,XLENW,FA1,FABC1,NCODON,IACID)
CALL ROTFAK(SEQ,IDIM1P,IP1,LB,LF,NCOD2,RNFAC(2),
+FTABLE,IDFTAB,NBASE2,COMP(2),LENW3,XLENW,FA2,FABC2,NCODON,IACID)
CALL ROTFAK(SEQ,IDIM1P,IP2,LB,LF,NCOD3,RNFAC(3),
+FTABLE,IDFTAB,NBASE3,COMP(3),LENW3,XLENW,FA3,FABC3,NCODON,IACID)
DO 200 K=1,3
W(K)=RNFAC(K)-COMP(K)-FACN
200 CONTINUE
IF(MOD(IDONE,IWRIT).EQ.0)THEN
DO 302 II=1,3
RLINE(II)=' '
302 CONTINUE
T=MAX(W(1),W(2),W(3))
DO 303 II=1,3
IF(W(II).EQ.T)RLINE(II)=STAR
303 CONTINUE
IPA=IP-I1INM1
CALL POISON(LENW,FABC1,FABC1,EW1,WD1,2)
CALL POISON(LENW,FABC2,FABC2,EW2,WD2,2)
CALL POISON(LENW,FABC3,FABC3,EW3,WD3,2)
WDT3=(WD1+WD2+WD3)/3.
YMAX=EXPM+15.*WDT3
WDT3=(YMAX-YMIN)/2.
X=IPA
Y1=W(1)-EW1
Y2=W(2)-EW2
Y3=W(3)-EW3
CALL LINE(XP,X,YP1,Y1,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1+IMH,MARGT1-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP2,Y2,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2+IMH,MARGT2-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP3,Y3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3+IMH,MARGT3-2*IMH,ISXMAX,ISYMAX)
IF(RLINE(1).EQ.STAR)CALL POINT(X,WDT3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
IF(RLINE(2).EQ.STAR)CALL POINT(X,WDT3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
IF(RLINE(3).EQ.STAR)CALL POINT(X,WDT3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
YP1=Y1
YP2=Y2
YP3=Y3
XP=X
END IF
300 CONTINUE
CALL STARTS(SEQ,IDIM1P,J1P,J2P,
+MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL STOPS(SEQ,IDIM1P,J1P,J2P,
+MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL VT100M
END
SUBROUTINE ROT1(SEQ,IDIM,NCOD,RNFAC,FTABLE,IDFTAB,
+NBASE,SPAN3)
C AUTHOR: RODGER STADEN
REAL RNFAC,FTABLE(IDFTAB),FACTOR
INTEGER NCOD(4,4,4),NBASE(4),SPAN3,CTONUM
CHARACTER SEQ(IDIM)
EXTERNAL FACTOR,CTONUM
C zero values
DO 1 I=1,4
NBASE(I)=0
DO 1 J=1,4
DO 1 K=1,4
1 NCOD(I,J,K)=0
C loop for span length
DO 10 I=1,SPAN3-1,3
IP=I-1
C check for bad chars
DO 5 J=1,3
5 IF(CTONUM(SEQ(IP+J)).EQ.5)GO TO 10
C ok
IS=CTONUM(SEQ(I))
IS1=CTONUM(SEQ(I+1))
IS2=CTONUM(SEQ(I+2))
NCOD(IS,IS1,IS2)=NCOD(IS,IS1,IS2)+1
NBASE(IS)=NBASE(IS)+1
NBASE(IS1)=NBASE(IS1)+1
NBASE(IS2)=NBASE(IS2)+1
10 CONTINUE
C set up rnfac
RNFAC=0.
DO 20 I=1,4
DO 20 J=1,4
DO 20 K=1,4
20 RNFAC=RNFAC+FACTOR(NCOD(I,J,K),FTABLE,IDFTAB)
END
C rotate factor
SUBROUTINE ROTFAC(SEQ,IDIM,I,LB,LF,NCOD,RNFAC,FTABLE,
+IDFTAB,NBASE,COMP,SPAN3)
C AUTHOR: RODGER STADEN
REAL RNFAC,FTABLE(IDFTAB),FACTOR
INTEGER NCOD(4,4,4),NBASE(4),SPAN3,CTONUM
CHARACTER SEQ(IDIM)
EXTERNAL FACTOR,CTONUM
C look back
IPB=I-LB-1
C look for bad char (it will not have been used)
DO 10 J=1,3
10 IF(CTONUM(SEQ(IPB+J)).EQ.5)GO TO 30
C ok
IPB=IPB+1
IS1=CTONUM(SEQ(IPB))
IS2=CTONUM(SEQ(IPB+1))
IS3=CTONUM(SEQ(IPB+2))
C subtract the current factorial value from rotated score
RNFAC=RNFAC-FACTOR(NCOD(IS1,IS2,IS3),FTABLE,IDFTAB)
C subtract from codon table
NCOD(IS1,IS2,IS3)=NCOD(IS1,IS2,IS3)-1
C add this new value to the rotated score
RNFAC=RNFAC+FACTOR(NCOD(IS1,IS2,IS3),FTABLE,IDFTAB)
C now do the composition part
NBASE(IS1)=NBASE(IS1)-1
NBASE(IS2)=NBASE(IS2)-1
NBASE(IS3)=NBASE(IS3)-1
30 CONTINUE
C now look forwards
IPB=I+LF-1
C look for bad char
DO 40 J=1,3
40 IF(CTONUM(SEQ(IPB+J)).EQ.5)GO TO 50
C ok
IPB=IPB+1
IS1=CTONUM(SEQ(IPB))
IS2=CTONUM(SEQ(IPB+1))
IS3=CTONUM(SEQ(IPB+2))
C subtract the current factorial value from rotated score
RNFAC=RNFAC-FACTOR(NCOD(IS1,IS2,IS3),FTABLE,IDFTAB)
NCOD(IS1,IS2,IS3)=NCOD(IS1,IS2,IS3)+1
C add this new value to the rotated score
RNFAC=RNFAC+FACTOR(NCOD(IS1,IS2,IS3),FTABLE,IDFTAB)
C now do the composition part
NBASE(IS1)=NBASE(IS1)+1
NBASE(IS2)=NBASE(IS2)+1
NBASE(IS3)=NBASE(IS3)+1
50 CONTINUE
COMP=0.
DO 60 J=1,4
FREQ=FLOAT(NBASE(J))/FLOAT(SPAN3)
IF(FREQ.GT.0.)THEN
COMP=COMP+FLOAT(NBASE(J))*LOG(FREQ)
GO TO 60
END IF
60 CONTINUE
END
REAL FUNCTION FACTOR(N,FTABLE,IDFTAB)
C AUTHOR: RODGER STADEN
REAL FTABLE(IDFTAB)
FACTOR=0.0
C in range?
IF(N.LT.1)GO TO 10
IF(N.GT.IDFTAB)GO TO 20
FACTOR=FTABLE(N)
RETURN
10 CONTINUE
FACTOR=1.
RETURN
20 CONTINUE
WRITE(*,1000)N
1000 FORMAT(' ERROR IN FACTOR, N=',I6)
END
C factorial table set up
SUBROUTINE FACTAB(FTABLE,IDFTAB)
C AUTHOR: RODGER STADEN
REAL FTABLE(IDFTAB),X1,LOG2PI
LOG2PI=0.5*LOG(6.2832)
X1=1.
DO 10 I=1,20
X1=X1*FLOAT(I)
10 FTABLE(I)=LOG(X1)
C stirlings formula:
C lni!=(i+.5)*lni-i+0.5ln2pi
C
DO 20 I=21,IDFTAB
X1=I
20 FTABLE(I)=(X1+0.5)*LOG(X1)-X1+LOG2PI
END
SUBROUTINE ROT2(SEQ,IDIM,NCOD,RNFAC,FTABLE,IDFTAB,
+NBASE,SPAN3,FA,FABC,NCODON,IACID)
C AUTHOR: RODGER STADEN
REAL RNFAC,FTABLE(IDFTAB),FACTOR,FA(21),FABC(4,4,4)
INTEGER NCOD(4,4,4),NBASE(4),NCODON(21),IACID(4,4,4),SPAN3
INTEGER CTONUM
CHARACTER SEQ(IDIM)
EXTERNAL FACTOR,CTONUM
SPAN=SPAN3/3.
C zero values
DO 1 I=1,4
NBASE(I)=0
DO 1 J=1,4
DO 1 K=1,4
FABC(I,J,K)=0.
1 NCOD(I,J,K)=0
DO 2 I=1,21
FA(I)=0.0
2 CONTINUE
C loop for span length
DO 10 I=1,SPAN3-1,3
IP=I-1
C check for bad chars
DO 5 J=1,3
5 IF(CTONUM(SEQ(IP+J)).EQ.5)GO TO 10
C ok
IS=CTONUM(SEQ(I))
IS1=CTONUM(SEQ(I+1))
IS2=CTONUM(SEQ(I+2))
NCOD(IS,IS1,IS2)=NCOD(IS,IS1,IS2)+1
NBASE(IS)=NBASE(IS)+1
NBASE(IS1)=NBASE(IS1)+1
NBASE(IS2)=NBASE(IS2)+1
10 CONTINUE
C set up rnfac
RNFAC=0.
DO 20 I=1,4
DO 20 J=1,4
DO 20 K=1,4
JACID=IACID(I,J,K)
FA(JACID)=FA(JACID)+NCOD(I,J,K)
20 RNFAC=RNFAC+FACTOR(NCOD(I,J,K),FTABLE,IDFTAB)
C CALC EVEN USE OF EACH ACIDS CODONS
DO 21 I=1,4
DO 21 J=1,4
DO 21 K=1,4
JACID=IACID(I,J,K)
FABC(I,J,K)=FA(JACID)/(SPAN*NCODON(JACID))
21 CONTINUE
END
C rotate factor
SUBROUTINE ROTFAK(SEQ,IDIM,I,LB,LF,NCOD,RNFAC,FTABLE,
+IDFTAB,NBASE,COMP,SPAN3,SPAN,FA,FABC,NCODON,IACID)
C AUTHOR: RODGER STADEN
REAL RNFAC,FTABLE(IDFTAB),FACTOR,FABC(4,4,4),FA(21)
INTEGER NCOD(4,4,4),NBASE(4),SPAN3,NCODON(21),IACID(4,4,4)
INTEGER CTONUM
CHARACTER SEQ(IDIM)
EXTERNAL FACTOR,CTONUM
C look back
IPB=I-LB-1
C look for bad char (it will not have been used)
DO 10 J=1,3
10 IF(CTONUM(SEQ(IPB+J)).EQ.5)GO TO 30
C ok
IPB=IPB+1
IS1=CTONUM(SEQ(IPB))
IS2=CTONUM(SEQ(IPB+1))
IS3=CTONUM(SEQ(IPB+2))
C subtract the current factorial value from rotated score
RNFAC=RNFAC-FACTOR(NCOD(IS1,IS2,IS3),FTABLE,IDFTAB)
C subtract from codon table
NCOD(IS1,IS2,IS3)=NCOD(IS1,IS2,IS3)-1
C add this new value to the rotated score
RNFAC=RNFAC+FACTOR(NCOD(IS1,IS2,IS3),FTABLE,IDFTAB)
C now do the composition part
NBASE(IS1)=NBASE(IS1)-1
NBASE(IS2)=NBASE(IS2)-1
NBASE(IS3)=NBASE(IS3)-1
JACID=IACID(IS1,IS2,IS3)
FA(JACID)=FA(JACID)-1.
TOTA=FA(JACID)/(SPAN*NCODON(JACID))
CALL NORMA2(FABC,JACID,TOTA)
30 CONTINUE
C now look forwards
IPB=I+LF-1
C look for bad char
DO 40 J=1,3
40 IF(CTONUM(SEQ(IPB+J)).EQ.5)GO TO 50
C ok
IPB=IPB+1
IS1=CTONUM(SEQ(IPB))
IS2=CTONUM(SEQ(IPB+1))
IS3=CTONUM(SEQ(IPB+2))
C subtract the current factorial value from rotated score
RNFAC=RNFAC-FACTOR(NCOD(IS1,IS2,IS3),FTABLE,IDFTAB)
NCOD(IS1,IS2,IS3)=NCOD(IS1,IS2,IS3)+1
C add this new value to the rotated score
RNFAC=RNFAC+FACTOR(NCOD(IS1,IS2,IS3),FTABLE,IDFTAB)
C now do the composition part
NBASE(IS1)=NBASE(IS1)+1
NBASE(IS2)=NBASE(IS2)+1
NBASE(IS3)=NBASE(IS3)+1
JACID=IACID(IS1,IS2,IS3)
FA(JACID)=FA(JACID)+1.
TOTA=FA(JACID)/(SPAN*NCODON(JACID))
CALL NORMA2(FABC,JACID,TOTA)
50 CONTINUE
COMP=0.
DO 60 J=1,4
FREQ=FLOAT(NBASE(J))/FLOAT(SPAN3)
IF(FREQ.GT.0.)THEN
COMP=COMP+FLOAT(NBASE(J))*LOG(FREQ)
GO TO 60
END IF
60 CONTINUE
END
SUBROUTINE POISON(LCOD,F,FABC,OUTW,OUTWD,JOB)
C AUTHOR: D. R BOSWELL AND RODGER STADEN
REAL F(4),FABC(4,4,4)
C
C modified by rs 2-2-83 to include correction of -.5*ln(2piN) where
C N is no of codons (lcod). This is required because I calc using
C multinomial distribution and poisson uses poisson distributon.
C maybe later I should also use poisson.
C Modified 25 Jan 83 to include corrections for lack of restriction
C of total codon number inherent in the Poisson model
C (see Andrew's notes of 7 Jan 83)
C
C The following arrays contain values of <W> and <(delta W)**2>
C computed for Poisson distributions for nu from 0.0 to 10.0 in
C steps of 0.1
C The functions are sufficiently smooth to allow linear interpolation
C between these values
C
REAL W(0:100),DW2(0:100),W10(10),DW210(10)
SAVE W10,DW2,W,DW210
DATA W/ 0.0,0.333677, 0.535378, 0.691144, 0.819071,0.927637,
1 1.021753, 1.104597, 1.178383, 1.244725, 1.304842,
1 1.359685, 1.410006, 1.456414, 1.499409, 1.539405,
1 1.576749, 1.611735, 1.644613, 1.675600, 1.704883,
1 1.732621, 1.758958, 1.784016, 1.807907, 1.830726,
1 1.852561, 1.873487, 1.893574, 1.912883, 1.931471,
1 1.949385, 1.966674, 1.983377, 1.999532, 2.015173,
1 2.030330, 2.045035, 2.059310, 2.073183, 2.086673,
1 2.099801, 2.112587, 2.125048, 2.137200, 2.149058,
1 2.160635, 2.171947, 2.183003, 2.193815, 2.204396,
1 2.214752, 2.224895, 2.234834, 2.244576, 2.254128,
1 2.263500, 2.272696, 2.281723, 2.290591, 2.299299,
1 2.307859, 2.316271, 2.324544, 2.332680, 2.340684,
1 2.348562, 2.356317, 2.363950, 2.371470, 2.378875,
1 2.386173, 2.393366, 2.400454, 2.407444, 2.414335,
1 2.421134, 2.427840, 2.434457, 2.440988, 2.447433,
1 2.453796, 2.460078, 2.466282, 2.472411, 2.478464,
1 2.484445, 2.490354, 2.496194, 2.501966, 2.507673,
1 2.513316, 2.518894, 2.524412, 2.529868, 2.535266,
1 2.540605, 2.545888, 2.551117, 2.556289, 2.561411/
DATA DW2/ 0.0,0.564147, 0.615536, 0.604980,0.578119,0.548425,
1 0.520633, 0.496362, 0.475974, 0.459312, 0.446008,
1 0.435632, 0.427759, 0.421991, 0.417978, 0.415411,
1 0.414025, 0.413597, 0.413938, 0.414884, 0.416304,
1 0.418091, 0.420146, 0.422401, 0.424790, 0.427265,
1 0.429780, 0.432308, 0.434822, 0.437301, 0.439725,
1 0.442090, 0.444380, 0.446592, 0.448719, 0.450763,
1 0.452725, 0.454593, 0.456378, 0.458077, 0.459696,
1 0.461236, 0.462698, 0.464085, 0.465401, 0.466650,
1 0.467835, 0.468958, 0.470021, 0.471033, 0.471987,
1 0.472895, 0.473758, 0.474571, 0.475347, 0.476081,
1 0.476778, 0.477444, 0.478074, 0.478669, 0.479243,
1 0.479780, 0.480300, 0.480791, 0.481261, 0.481711,
1 0.482136, 0.482542, 0.482935, 0.483305, 0.483668,
1 0.484009, 0.484333, 0.484652, 0.484950, 0.485244,
1 0.485518, 0.485790, 0.486046, 0.486291, 0.486531,
1 0.486763, 0.486985, 0.487203, 0.487408, 0.487606,
1 0.487800, 0.487990, 0.488172, 0.488348, 0.488517,
1 0.488678, 0.488844, 0.488993, 0.489148, 0.489290,
1 0.489438, 0.489575, 0.489704, 0.489840, 0.489961/
C
C The next two arrays are for nu from 10.0 to 100.0 in steps of 10.0
C
DATA W10/
1 2.561411, 2.912526, 3.116712, 3.261270, 3.373267,
1 3.464711, 3.541987, 3.608907, 3.667911, 3.720688/
DATA DW210/
1 0.489961, 0.495480, 0.497065, 0.497826, 0.498281,
1 0.498570, 0.498778, 0.498917, 0.499063, 0.499146/
C
C rs correction factor
CORREC=0.5*LOG(6.2832*LCOD)
C
C
C Initialise accumulators to zero
C (Note that SDW will accumulate the sum of squares of DW, and
C we will need to take its square root at the end
C
SW=0.0
SDW=0.0
C Step through all 64 codons
DO 200 I=1,4
DO 200 J=1,4
DO 200 K=1,4
C V (=nu) is the expected codon frequency in this slot of the table
IF(JOB.EQ.1)THEN
V=REAL(LCOD)*F(I)*F(J)*F(K)
ELSE IF(JOB.EQ.2)THEN
V=REAL(LCOD)*FABC(I,J,K)
END IF
C V<0.0 is a fatal error (implies bad parameters)
IF(V.LT.0.0)GO TO 900
C V>=10.0 requires separate handling
IF(V.GE.10.0)GOTO 100
C Now we can interpolate in the tables and accumulate the appropriate values
IV=IFIX(V*10.0)
DV=V*10.0-IV
SW=SW+W(IV)+DV*(W(IV+1)-W(IV))
SDW=SDW+DW2(IV)+DV*(DW2(IV+1)-DW2(IV))
GO TO 200
C Come here if V>=10.0
100 IF(V.GT.100.0)GOTO 110
IV=IFIX(V/10.0)
DV=V/10.0-IV
SW=SW+W10(IV)+DV*(W10(IV+1)-W10(IV))
SDW=SDW+DW210(IV)+DV*(DW210(IV+1)-DW210(IV))
GO TO 200
C If V>100.0 we're quite justified in using the "large V" approximations
110 SW=SW+0.5*(LOG(2.0*3.1415926536)+LOG(V)+1.0)
SDW=SDW+0.5
200 CONTINUE
C Include correction factors
OUTW=MAX(0.0,SW-64.0/(4.0*REAL(LCOD)))
OUTWD=SQRT(MAX(0.0,SDW-64.0*64.0/(4.0*REAL(LCOD))))
C rs correction
OUTW=OUTW-CORREC
RETURN
C Come here if there's an error in the parameters
900 WRITE(*,1)LCOD,F
1 FORMAT(' POISSON ERROR -- INVALID PARAMETERS'/,
1' SEQUENCE LENGTH: ',I5,' BASE FREQUENCIES: ',4F8.3)
OUTW=0.0
OUTWD=0.0
END
SUBROUTINE NORMA2(SUM,JACID,TOTA)
C AUTHOR: RODGER STADEN
C routine to set all the values for any acid to one
C particular value=tota for a table of codon freqs
C ncods is number of cods per acid numbering from f to g
C codnos are pointers to the values of codon usage in sum
C (which is 4,4,4) for each set of codons for each acid in
C turn from f to g (* is acid 11)
C codnos are pointers to the first element in codp for each
C acids set of codons
C if jacid gt 0 process only acid jacid, else process all
C tota is the total value each acid should sum to
REAL SUM(64)
INTEGER NCODS(21),CODNOS(21),CODP(64)
SAVE NCODS,CODNOS,CODP
DATA NCODS/2,6,3,1,4,6,4,4,4,2,3,2,2,2,2,2,2,2,1,6,4/
DATA CODNOS/1,3,9,12,13,17,23,27,31,35,37,40,42,44,46,
+48,50,52,54,55,61/
DATA CODP/1,17,33,49,2,18,34,50,3,19,35,51,
+4,20,36,52,5,21,37,53,15,31,6,22,38,54,
+7,23,39,55,8,24,40,56,9,25,41,57,45,
+10,26,42,58,11,27,43,59,12,28,44,60,
+13,29,61,14,30,46,62,47,63,16,32,48,64/
IACID=1
IF(JACID.NE.0)IACID=JACID
1 CONTINUE
C loop for acid iacid from codnos to codnos+ncods-1
DO 20 J=CODNOS(IACID),CODNOS(IACID)+NCODS(IACID)-1
SUM(CODP(J))=TOTA
20 CONTINUE
IF(JACID.NE.0)RETURN
IACID=IACID+1
IF(IACID.LT.22)GO TO 1
END
SUBROUTINE PCODUS(SEQ,IDIM1P,IEND,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,KSTART,MARGL,MARGR,MARGB,MARGT,IDEV,FILNAM,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA)
CHARACTER HELPF*(*),PAA(5,5,5)
C AUTHOR: RODGER STADEN
REAL FREQ(64),SUM(64),LOGFRQ(64)
CHARACTER SEQ(IDIM1P),FILNAM*(*)
INTEGER ANSTY
C NOTE J1 J2 IS THE ACTIVE REGION, KSTART IS THE NUMBER OF THE
C BASE IN ELEMENT 1 OF SEQ, AND IEND IS THE SEQUENCE NUMBER OF THELAST
C ELEMENT IN THE RAM BUFFER
C PLOTS ARE FROM J1 TO J2 BUT STANDARDS CAN
C BE TAKEN FROM KSTART TO IEND
CALL SHOWFU(KBOUT,'Staden and McLachlan codon usage')
C WRITE(KBOUT,*)'Staden and McLachlan codon usage method'
C WRITE(KBOUT,*)'Codon tables for standards may be read from disk'
C WRITE(KBOUT,*)'or calculated from parts of the current sequence'
CALL GETPAR(42,7,IOK,MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,ANSTY,
+IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
CALL PCODD(SEQ,IDIM1P,IEND,MXSPAN,
+KSTART,IDEV,FILNAM,FREQ,SUM,LOGFRQ,ANSTY,
+MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,P1,P2,P3,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,IOK)
IF(IOK.NE.0) RETURN
CALL PCODPP(SEQ,IDIM1P,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,KSTART,MARGL,MARGR,MARGB,MARGT,PAA,
+LOGFRQ,LENW,IWRIT,P1,P2,P3)
END
SUBROUTINE PCODD(SEQ,IDIM1P,IEND,MXSPAN,
+KSTART,IDEV,FILNAM,FREQ,SUM,LOGFRQ,ANSTY,
+MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,P1,P2,P3,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,IOK)
CHARACTER HELPF*(*),PAA(5,5,5)
C AUTHOR: RODGER STADEN
REAL FREQ(64),SUM(64),LOGFRQ(64)
CHARACTER SEQ(IDIM1P),FILNAM*(*)
INTEGER ANSTY
CALL FILLR(FREQ,64,0.)
CALL PCODD1(ANSTY,PAA,FREQ,IDEV,FILNAM,IOK,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOK.NE.0) RETURN
I1INM1 = 1+(MXSPAN/2)-KSTART+1
IF(ANSTY.EQ.0) THEN
6 CONTINUE
I1 = 0
I2 = 0
CALL GTREG(KBIN,KBOUT,0,IEND,I1,I2,
+ 'Define standard',
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IF(I1.NE.0) THEN
IDIM=I2-I1+1
IF(IDIM.GT.3) THEN
CALL FILLR(SUM,64,0.)
CALL CALCOD(SUM,SEQ(I1+I1INM1),IDIM)
CALL ADDR(SUM,FREQ,64)
CALL WRTCOD(SUM,KBOUT,PAA)
GO TO 6
END IF
END IF
END IF
SUMCOD=0.
DO 50 I=1,64
SUMCOD=SUMCOD+FREQ(I)
50 CONTINUE
WRITE(KBOUT,1006)SUMCOD
1006 FORMAT(' Total codons in standard= ',F8.0)
C DONT CONTINUE IF LESS THAN 64. CODONS SPECIFIED
IF(SUMCOD.LT.64.)THEN
CALL ERROM(KBOUT,'At least 64 codons required in standard')
IOK = 1
RETURN
END IF
CALL AACODM(KBIN,KBOUT,FREQ,PAA,SUMCOD,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.LT.0)RETURN
CALL WRTCOD(FREQ,KBOUT,PAA)
SUMCOD=0.
DO 51 I=1,64
SUMCOD=SUMCOD+FREQ(I)
51 CONTINUE
C NORMALIZE, DO STOP CODONS AND ZERO CODONS
CALL PCOD2(FREQ,LOGFRQ,SUMCOD,LOGFRQ,PAA)
C CALL WRTCOD(LOGFRQ,KBOUT,PAA)
C CALC EXPECTED FREQS FRAME S AND T CODONS AND EXPECTED P'S FOR ALL FRAMES
LENWT = 11
DO 60 I = 1,3
LENWT = LENWT + 10
CALL PCOD3(FREQ,LOGFRQ,LENWT,P1,P2,P3)
WRITE(KBOUT,2000)LENWT,P1,P2,P3
60 CONTINUE
2000 FORMAT(' Span length ',I3,' expected mean values:',3(F6.1))
CALL GSPIN(MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL PCOD3(FREQ,LOGFRQ,LENW,P1,P2,P3)
END
SUBROUTINE PCODPP(SEQ,IDIM1P,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,KSTART,MARGL,MARGR,MARGB,MARGT,PAA,
+LOGFRQ,LENW,IWRIT,P1,P2,P3)
CHARACTER PAA(5,5,5)
C AUTHOR: RODGER STADEN
REAL LOGFRQ(64),SUMW(3)
CHARACTER SEQ(IDIM1P),RFRAME(3)
LB=1+LENW/2
LB=3*LB
LF=LENW/2
LF=3*LF
LENW3=3*LENW
EPS=0.0000001
IMARG=NINT(MARGT/3.)
MARGL1=MARGL
MARGR1=MARGR
MARGB1=MARGB
MARGB2=MARGB+IMARG
MARGB3=MARGB+2*IMARG
MARGT3=IMARG
MARGT2=IMARG
MARGT1=IMARG
I1IN=2+(MXSPAN/2)-KSTART+1
I1INM1=I1IN-1
IDIMJ=J2-J1+1.
J1P=J1+I1INM1
J2P=J2+I1INM1
XMIN=J1
XMAX=J2
IBH=NINT(0.005*ISXMAX)
IMH=NINT(0.005*ISYMAX)
SD=1.
CALL FILLR(SUMW,3,0.)
IDONE=0
YMIN=MIN(P2,P3)
YMIN=YMIN
YMAX=P1
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
YFIRST=INT(YMIN)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB1,MARGT1,ISXMAX,ISYMAX,SD,IBH,YFIRST,2)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB2,MARGT2,ISXMAX,ISYMAX,SD,IBH,YFIRST,2)
CALL SCALES(XMAX,XMIN,YMAX,YMIN,MARGL1,MARGR1,
+MARGB3,MARGT3,ISXMAX,ISYMAX,SD,IBH,YFIRST,2)
CALL CODSTR(SEQ(J1P-LB),IDIM1P,LOGFRQ,LENW3,SUMW(1))
CALL CODSTR(SEQ(J1P+1-LB),IDIM1P,LOGFRQ,LENW3,SUMW(2))
CALL CODSTR(SEQ(J1P+2-LB),IDIM1P,LOGFRQ,LENW3,SUMW(3))
XP=J1
YP1=0.
YP2=0.
YP3=0.
DO 300 IP=J1P,J2P,3
IDONE=IDONE+1
IP0=IP
IP1=IP+1
IP2=IP+2
CALL ROTCOD(SEQ,IDIM1P,IP0,LB,LF,LOGFRQ,SUMW(1))
CALL ROTCOD(SEQ,IDIM1P,IP1,LB,LF,LOGFRQ,SUMW(2))
CALL ROTCOD(SEQ,IDIM1P,IP2,LB,LF,LOGFRQ,SUMW(3))
IF(MOD(IDONE,IWRIT).EQ.0)THEN
CALL PCOD1(SUMW,Y1,Y2,Y3,RFRAME)
C CHECK FOR OVERFLOW SHOWN BY VALUES =0.
IF(ABS(Y1).LT.EPS)Y1=YP1
IF(ABS(Y2).LT.EPS)Y2=YP2
IF(ABS(Y3).LT.EPS)Y3=YP3
IPA=IP-I1INM1
X=IPA
CALL LINE(XP,X,YP1,Y1,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1+IMH,MARGT1-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP2,Y2,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2+IMH,MARGT2-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP3,Y3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3+IMH,MARGT3-2*IMH,ISXMAX,ISYMAX)
IF(RFRAME(1).EQ.'*')
+ CALL POINT(X,0.,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
IF(RFRAME(2).EQ.'*')
+ CALL POINT(X,0.,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
IF(RFRAME(3).EQ.'*')
+ CALL POINT(X,0.,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
C SAVE CURRENT VALUES
YP1=Y1
YP2=Y2
YP3=Y3
XP=X
END IF
300 CONTINUE
C STARTS
CALL STARTS(SEQ,IDIM1P,J1P,J2P,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
C STOPS
CALL STOPS(SEQ,IDIM1P,J1P,J2P,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL VT100M
END
SUBROUTINE PCODD1(ANSTY,PAA,FREQ,IDEV,FILNAM,IOK,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
REAL FREQ(64)
CHARACTER PAA(125),HELPF*(*),FILNAM*(*)
INTEGER CHOICE,ANSTY
IOK = 1
CHOICE = ANSTY
CALL YESONO(CHOICE,'Define internal standard',
+'Read standard from file',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.0) RETURN
ANSTY = CHOICE
IF(ANSTY.EQ.1) THEN
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+ 'File name of standard',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL REDCOD(FREQ,IDEV)
CLOSE(UNIT=IDEV)
CALL WRTCOD(FREQ,KBOUT,PAA)
END IF
IOK = 0
END
SUBROUTINE AACODM(KBIN,KBOUT,FREQ,PAA,SUMCOD,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
REAL FREQ(64)
CHARACTER PAA(5,5,5)
CHARACTER HELPF*(*)
INTEGER CHOICE
PARAMETER (MAXPRM = 43)
CHARACTER PROMPT(3)*(MAXPRM)
CHOICE = 1
PROMPT(1) = 'Use observed frequencies'
PROMPT(2) = 'Normalize to average amino acid composition'
PROMPT(3) = 'Normalize to no amino acid bias'
CALL RADION('Select normalization',PROMPT,3,CHOICE,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.1) THEN
IOK = 1
RETURN
END IF
SUMC = SUMCOD/64.
IF(CHOICE.EQ.2) CALL APLAAC(FREQ,PAA)
IF(CHOICE.EQ.3) CALL NORMAA(FREQ,SUMC,PAA)
IOK = 0
END
SUBROUTINE APLAAC(SUM,PAA)
CHARACTER PAA(5,5,5),AA(21)
REAL SUM(4,4,4),EXPERC(21)
C ARGOS VALUES
SAVE EXPERC,AA
DATA EXPERC/8.3,1.7,5.3,6.2,3.9,7.2,2.2,5.2,5.7,
+9.0,2.4,4.4,5.1,4.0,5.7,6.9,5.8,6.6,1.3,3.2,0.0/
DATA AA/'A','C','D','E','F',
1'G','H','I','K','L',
2'M','N','P','Q','R',
3'S','T','V','W','Y','*'/
C LOOP FOR EACH ACID
DO 150 L=1,21
ASUM=0.
C LOOP FOR EACH CODON
DO 200 I=1,4
DO 200 J=1,4
DO 200 K=1,4
C COUNT CODONS
IF(PAA(K,J,I).EQ.AA(L))ASUM=ASUM+SUM(I,J,K)
200 CONTINUE
IF(ASUM.GT.0.0)ASUM = 6.4 / ASUM
C LOOP FOR EACH CODON
DO 100 I=1,4
DO 100 J=1,4
DO 100 K=1,4
C NORMALIZE
IF(PAA(K,J,I).EQ.AA(L))SUM(I,J,K)=SUM(I,J,K)*ASUM*EXPERC(L)
100 CONTINUE
150 CONTINUE
END
SUBROUTINE PCOD3(FREQ,LOGFRQ,LENW,P11,P22,P33)
C AUTHOR: RODGER STADEN
PARAMETER (SMALL = 1.0E-30)
DOUBLE PRECISION U,V,W,SL,UL,VL,WL,P1,P2,P3
REAL FREQ(64),LOGFRQ(64),S2(64),S3(64)
CALL F2FF1(FREQ,S2)
CALL F3FF1(FREQ,S3)
C CALC EXPECTED MEAN SCORES FOR THIS WINDOW LENGTH AND STANDARD
U=0.
V=0.
W=0.
DO 62 I=1,64
U=U+FREQ(I)*LOGFRQ(I)
V=V+S2(I)*LOGFRQ(I)
W=W+S3(I)*LOGFRQ(I)
62 CONTINUE
SL=U+V+W
SL=SL/3.
U=U-SL
V=V-SL
W=W-SL
UL=EXP(U*LENW)
VL=EXP(V*LENW)
WL=EXP(W*LENW)
SL=UL+VL+WL
P1=UL/SL
P2=VL/SL
P3=WL/SL
IF(ABS(P1-1.0).GT.SMALL)P11=LOG10(P1/(1.-P1))
IF(ABS(P2-1.0).GT.SMALL)P22=LOG10(P2/(1.-P2))
IF(ABS(P3-1.0).GT.SMALL)P33=LOG10(P3/(1.-P3))
END
SUBROUTINE PCOD2(FREQ,LOGFRQ,SUMCOD,LOGFR3,PAA)
C AUTHOR: RODGER STADEN
CHARACTER PAA(5,5,5)
REAL FREQ(64),LOGFRQ(64),LOGFR3(4,4,4)
C NORMALIZE
DO 51 I=1,64
FREQ(I)=FREQ(I)/SUMCOD
51 CONTINUE
C TAKE LOGS
DO 52 I=1,64
LOGFRQ(I)=0.0
IF(FREQ(I).NE.0.)LOGFRQ(I)=LOG(FREQ(I))
52 CONTINUE
C NEED TO DEAL WITH STOP CODONS AND ZERO FREQS
ODX=1./SUMCOD
ODXL=LOG(ODX)
STOPT=0.
NSTOP=0
C LOOK FOR STOPS IN PAA
DO 10 I=1,4
DO 10 J=1,4
DO 10 K=1,4
IF(PAA(K,J,I).EQ.'*')THEN
LOGFR3(I,J,K)=99.
STOPT=STOPT+99.
NSTOP=NSTOP+1
END IF
10 CONTINUE
C SET ZEROES TO LOG(1/TOTAL)
DO 53 I=1,64
IF(LOGFRQ(I).EQ.0.)LOGFRQ(I)=ODXL
53 CONTINUE
C SET STOPS TO MEAN
X1=0.
DO 54 I=1,64
X1=X1+LOGFRQ(I)
54 CONTINUE
IF(NSTOP.NE.64)X1=(X1-STOPT)/(64-NSTOP)
C LOOK FOR STOPS IN PAA
DO 20 I=1,4
DO 20 J=1,4
DO 20 K=1,4
IF(PAA(K,J,I).EQ.'*')LOGFR3(I,J,K)=X1
20 CONTINUE
END
SUBROUTINE PCOD1(SUMW,Y1,Y2,Y3,LINE)
C AUTHOR: RODGER STADEN
DOUBLE PRECISION SUMWT(3),T,T1
REAL SUMW(3)
CHARACTER LINE(3)
C calc mean of sums to add to each
T=SUMW(1)+SUMW(2)+SUMW(3)
T=T/3.
T=-1.*T
DO 80 II=1,3
C add mean to each
SUMWT(II)=SUMW(II)+T
C calc exponential of each
SUMWT(II)=EXP(SUMWT(II))
80 CONTINUE
C calc sum of exponentials
T=SUMWT(1)+SUMWT(2)+SUMWT(3)
DO 298 II=1,3
C divide each by sum of exponetials
IF(T.NE.0.)SUMWT(II)=SUMWT(II)/T
T1=1.-SUMWT(II)
IF(T1.NE.0.)SUMWT(II)=SUMWT(II)/T1
IF(SUMWT(II).GT.0.)SUMWT(II)=LOG10(SUMWT(II))
298 CONTINUE
C get largest
CALL FILLC(LINE,3,' ')
T=MAX(SUMWT(1),SUMWT(2),SUMWT(3))
DO 10 I=1,3
IF(T.EQ.SUMWT(I))LINE(I)='*'
10 CONTINUE
Y1=SUMWT(1)
Y2=SUMWT(2)
Y3=SUMWT(3)
END
SUBROUTINE CODSTR(SEQ,IDIM,LOGFRQ,LWIND3,SUM)
C AUTHOR: RODGER STADEN
REAL LOGFRQ(4,4,4)
CHARACTER SEQ(IDIM)
INTEGER CTONUM,IVAL(3)
EXTERNAL CTONUM
SUM=0.
C LOOP FOR WINDOW LENGTH
DO 100 I=1,LWIND3-1,3
IP=I-1
C CHECK FOR NULL CHARS
DO 10 J=1,3
IVAL(J)=CTONUM(SEQ(IP+J))
IF(IVAL(J).EQ.5)GO TO 100
10 CONTINUE
SUM=SUM+LOGFRQ(IVAL(1),IVAL(2),IVAL(3))
100 CONTINUE
END
SUBROUTINE F2FF1(SUM1,SUM2)
C AUTHOR: RODGER STADEN
REAL SUM1(4,4,4),SUM2(4,4,4),SXAB(4,4),SCXX(4)
DO 1 I=1,4
DO 1 J=1,4
DO 1 K=1,4
SUM2(K,J,I)=0.
1 CONTINUE
C CALC FROM XABCXX ABC IE XAB * CXX
C XAB
DO 10 J=1,4
DO 10 K=1,4
SXAB(J,K)=0.
DO 10 I=1,4
SXAB(J,K)=SXAB(J,K)+SUM1(I,J,K)
10 CONTINUE
C CXX
DO 20 I=1,4
SCXX(I)=0.
DO 20 J=1,4
DO 20 K=1,4
SCXX(I)=SCXX(I)+SUM1(I,J,K)
20 CONTINUE
C SUM2= XAB * CXX
DO 30 I=1,4
DO 30 J=1,4
DO 30 K=1,4
SUM2(I,J,K)=SXAB(I,J)*SCXX(K)
30 CONTINUE
END
C F3FF1
SUBROUTINE F3FF1(SUM1,SUM3)
C AUTHOR: RODGER STADEN
REAL SUM1(4,4,4),SUM3(4,4,4),SXXA(4),SBCX(4,4)
DO 1 I=1,4
DO 1 J=1,4
DO 1 K=1,4
SUM3(K,J,I)=0.
1 CONTINUE
C CALC ABC FROM XXABCXX IE XXA * BCX
C XXA
DO 10 K=1,4
SXXA(K)=0.
DO 10 I=1,4
DO 10 J=1,4
SXXA(K)=SXXA(K)+SUM1(I,J,K)
10 CONTINUE
C BCX
DO 20 I=1,4
DO 20 J=1,4
SBCX(I,J)=0.
DO 20 K=1,4
SBCX(I,J)=SBCX(I,J)+SUM1(I,J,K)
20 CONTINUE
C XXA * BCX
DO 30 I=1,4
DO 30 J=1,4
DO 30 K=1,4
SUM3(I,J,K)=SXXA(I)*SBCX(J,K)
30 CONTINUE
END
C THIS ROUTINE DOES NOT CHECK FOR GOING OUT OF ARRAY
SUBROUTINE ROTCOD(SEQ,IDIM1,I,LB,LF,LOGFRQ,H)
C AUTHOR: RODGER STADEN
REAL LOGFRQ(4,4,4)
CHARACTER SEQ(IDIM1)
INTEGER CTONUM,IVAL(3)
EXTERNAL CTONUM
XS=0.
XA=0.
C LOOK BACK
IPB=I-LB-1
DO 10 J=1,3
IVAL(J)=CTONUM(SEQ(IPB+J))
IF(IVAL(J).EQ.5)GO TO 30
10 CONTINUE
C SET VALUE TO SUBTRACT
XS=LOGFRQ(IVAL(1),IVAL(2),IVAL(3))
C NOW LOOK FORWARD
30 CONTINUE
IPB=I+LF-1
DO 45 J=1,3
IVAL(J)=CTONUM(SEQ(IPB+J))
IF(IVAL(J).EQ.5)GO TO 50
45 CONTINUE
C OK SO USE
XA=LOGFRQ(IVAL(1),IVAL(2),IVAL(3))
50 H=H-XS+XA
RETURN
END
C PSRCH
SUBROUTINE PSRCH(SEQ,IDIM1,J1,J2,STRING,IDIMS,INC,
+XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,MARGB,MARGT,ISXMAX,
+ISYMAX,Y,BH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIMS)
YP=Y+BH
IAT=J1-INC
10 CONTINUE
IAT=IAT+INC
IDIM=J2-IAT
IF(IDIM.GT.0)THEN
CALL FIND6(SEQ(IAT),IDIM,STRING,IDIMS,INC,JMATCH)
IF(JMATCH.NE.0)THEN
IAT=IAT+JMATCH-1
X=IAT
CALL LINE(X,X,YP,Y,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
GO TO 10
END IF
END IF
END
SUBROUTINE PBASEP(SEQ,IDIM1P,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,KSTART,IENDB,
+MARGL1,MARGR1,MARGB,MARGT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,IDEV,FILNAM,DIALOG)
CHARACTER HELPF*(*),PAA(5,5,5),FILNAM*(*)
C AUTHOR: RODGER STADEN
C 11-1-91 Added facility to read standard in the form of codon table
C and greatly reorganised code! and added new routines.
REAL EXPEC(5,3),SCORES(5,3),WT(3),S(3)
CHARACTER SEQ(IDIM1P)
INTEGER ANSTY,ANSABS,ANSS,DIALOG
C ARGOS VALUES
SAVE EXPEC
DATA EXPEC/0.1768,0.2108,0.2767,0.3357,0.,
+ 0.2707,0.2378,0.3097,0.1818,0.,
+ 0.2506,0.2506,0.2396,0.2592,0./
C ORIGINAL DAYHOFF VALUES
C DATA EXPEC/0.1835,0.1932,0.2719,0.3514,0.,
C 10.2382,0.2459,0.3173,0.1985,0.,
C 20.2548,0.2548,0.2377,0.2527,0./
C NOTE J1 J2 IS THE ACTIVE REGION, KSTART IS THE NUMBER OF THE
C BASE IN ELEMENT 1 OF SEQ, AND IDIM1 IS THE LENGTH FROM KSTART
C TO THE END OF SEQ. PLOTS ARE FROM J1 TO J2 BUT STANDARDS CAN
C BE TAKEN FROM KSTART TO KSTART+IDIM1-1 = IENDB
CALL SHOWFU(KBOUT,
+'Positional base preferences method to find protein genes')
CALL GETPAR(43,9,IOK,MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,
+ANSABS,ANSTY,ANSS,
+IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
CALL PBASPD(SEQ,IDIM1P,MXSPAN,
+KSTART,IENDB,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,
+ANSTY,ANSABS,ANSS,MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,
+EXPEC,SCORES,WT,S,SMIN,SMAX,DIALOG,PAA,IDEV,FILNAM,IOK)
IF(IOK.NE.0) RETURN
CALL PBASPP(SEQ,IDIM1P,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,KSTART,ANSABS,LENW,IWRIT,
+EXPEC,SCORES,WT,S,SMIN,SMAX,
+MARGL1,MARGR1,MARGB,MARGT,PAA)
END
SUBROUTINE PBASPD(SEQ,IDIM1P,MXSPAN,
+KSTART,IENDB,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,
+ANSTY,ANSABS,ANSS,MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,
+EXPEC,SCORES,WT,S,SMIN,SMAX,DIALOG,PAA,IDEV,FILNAM,IOK)
C AUTHOR: RODGER STADEN
REAL EXPEC(5,3),SCORES(5,3),WT(3),S(3)
CHARACTER SEQ(IDIM1P)
INTEGER ANSTY,ANSABS,ANSS,CHOICE,DIALOG
REAL FREQ(64)
CHARACTER PAA(125),HELPF*(*),FILNAM*(*)
PARAMETER (MAXPRM = 28)
CHARACTER PROMPT(3)*(MAXPRM)
IOK = 1
DO 30 J=1,3
WT(J)=1.0
DO 30 I=1,5
SCORES(I,J)=EXPEC(I,J)
30 CONTINUE
IF(DIALOG.EQ.0) THEN
CALL PBEXP(SCORES,WT,S)
CALL PBSCAL(SMIN,SMAX,S,ANSABS,LENW,SMIN5,SMAX5,DIF)
IOK = 0
RETURN
END IF
C
C Select source of standard
C
PROMPT(1) = 'Use global standard'
PROMPT(2) = 'Use internal standard'
PROMPT(3) = 'Use codon usage table'
IOK = 1
CHOICE = 1
CALL RADION('Select standard source',PROMPT,3,CHOICE,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.1) RETURN
ANSTY = CHOICE
IF(ANSTY.EQ.3) THEN
C
C Get standard from a codon table
C
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+ 'File name of standard',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL REDCOD(FREQ,IDEV)
CLOSE(UNIT=IDEV)
CALL WRTCOD(FREQ,KBOUT,PAA)
CALL GETPBS(FREQ,SCORES)
ELSE IF(ANSTY.EQ.2) THEN
C
C Get standard from a section of the sequence
C
CALL PBD1(SEQ,IDIM1P,MXSPAN,KSTART,IENDB,KBIN,KBOUT,
+ SCORES,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
IF(ANSTY.NE.1) THEN
C
C Allow combination with global standard
C
PROMPT(1) = 'Use observed frequencies'
PROMPT(2) = 'Combine with global standard'
IOK = 1
CHOICE = 1
CALL RADION('Select normalisation',PROMPT,2,CHOICE,
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.1) RETURN
IF(CHOICE.EQ.2)THEN
DO 36 I=1,2
DO 36 K=1,4
SCORES(K,I)=EXPEC(K,I)
36 CONTINUE
END IF
END IF
WRITE(KBOUT,1004)
1004 FORMAT(' T C A G Range')
DO 4 I=1,3
BIG=0.0
SMALL=99999.0
DO 5 K=1,4
BIG = MAX(SCORES(K,I),BIG)
SMALL = MIN(SCORES(K,I),SMALL)
5 CONTINUE
RANGE=BIG-SMALL
WRITE(KBOUT,1005)I,(SCORES(K,I),K=1,4),RANGE
1005 FORMAT(' ',I3,4(1X,F6.3),1X,F6.3)
4 CONTINUE
C
C Allow different weights for each position
C
IOK = 1
CHOICE = 0
CALL YESONO(CHOICE,
+'Use 1.0 for positional weights',
+'Change positional weights',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.0) RETURN
IOK = 0
IF(CHOICE.EQ.1) CALL PBPD2(WT,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL PBEXP(SCORES,WT,S)
WRITE(KBOUT,1001)S
1001 FORMAT(
+ ' Expected scores per codon in each frame',/,' ',3F10.3)
CALL GSPIN(MINSP,MAXSP,LENW,MINIW,MAXIW,IWRIT,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
C
C Select relative or absolute values
C
IOK = 1
CHOICE = ANSABS
CALL YESONO(CHOICE,
+ 'Plot relative scores',
+ 'Plot absolute scores',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.0) RETURN
ANSABS = CHOICE
CALL PBSCAL(SMIN,SMAX,S,ANSABS,LENW,SMIN5,SMAX5,DIF)
WRITE(KBOUT,1002)SMIN,SMAX,DIF
1002 FORMAT(' Scaling values:',/,
+' Minimum maximum range',/,2X,3F9.4)
C
C Allow plot scaling
C
IOK = 1
CHOICE = ANSS
CALL YESONO(CHOICE,
+'Leave scaling values unchanged',
+'Change scaling values',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.0) RETURN
ANSS = CHOICE
IF(ANSS.EQ.1) THEN
CALL GETRL(SMIN5,SMAX5,SMIN,'Minimum',VALUE,
+ KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
SMIN = VALUE
CALL GETRL(SMIN,SMAX5,SMAX,'Maximum',VALUE,
+ KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
SMAX = VALUE
END IF
IOK = 0
END
SUBROUTINE PBSCAL(SMIN,SMAX,S,ANSABS,LENW,SMIN5,SMAX5,DIF)
C AUTHOR: RODGER STADEN
REAL S(3)
INTEGER ANSABS
SM=0.
IF(ANSABS.EQ.0) THEN
DO 25 I=1,3
SM=SM+S(I)
25 CONTINUE
DO 26 I=1,3
S(I)=S(I)/SM
26 CONTINUE
ELSE
DO 27 I=1,3
S(I)=S(I)*LENW
27 CONTINUE
END IF
SMAX=MAX(S(1),S(2),S(3))
SMIN=MIN(S(1),S(2),S(3))
DIF=SMAX-SMIN
D20=DIF*0.2
SMAX=SMAX+D20
SMIN=SMIN-D20
D50 = DIF*0.5
SMAX5 = SMAX + D50
SMIN5 = SMIN - D50
END
SUBROUTINE PBPD2(WT,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
REAL WT(3)
CHARACTER HELPF*(*)
WRITE(KBOUT,1000)
1000 FORMAT(' Give weights between 0.0 and 1.0',/,
+ ' to each of the 3 codon positions')
WMIN = 0.
WMAX = 1.
DEF = 1.
9 CONTINUE
CALL GETRL(WMIN,WMAX,DEF,'Position 1',VALUE,
+ KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
WT(1) = VALUE
CALL GETRL(WMIN,WMAX,DEF,'Position 2',VALUE,
+ KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
WT(2) = VALUE
CALL GETRL(WMIN,WMAX,DEF,'Position 3',VALUE,
+ KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
WT(3) = VALUE
C CHECK FOR ZERO
X=WT(1)+WT(2)+WT(3)
IF(X.LT.0.0000001)THEN
WRITE(KBOUT,*)
+ 'All weights zero, at least one must be non-zero'
GO TO 9
END IF
IOK = 0
END
SUBROUTINE PBD1(SEQ,IDIM1P,MXSPAN,KSTART,IENDB,KBIN,KBOUT,
+SCORES,IHELPS,IHELPE,HELPF,IDEVH,IOK)
CHARACTER SEQ(IDIM1P)
REAL SCORES(5,3)
CHARACTER HELPF*(*)
INTEGER CTONUM
EXTERNAL CTONUM
C NOTE KSTART IS THE NUMBER OF THE
C BASE IN ELEMENT 1 OF SEQ, AND IDIM1 IS THE LENGTH FROM KSTART
C STANDARDS CAN
C BE TAKEN FROM KSTART TO KSTART+IDIM1-1 = IENDB
I1INM1=1+(MXSPAN/2)-KSTART+1
IPASS = 0
11 CONTINUE
IPASS = IPASS + 1
K1 = 0
K2 = 0
KS1 = KSTART-1
CALL GTREG(KBIN,KBOUT,KS1,IENDB,K1,K2,
+ 'Define region for standard',
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IF((K2-K1).LT.191) THEN
CALL ERROM(KBOUT,'At least 64 codons required in standard')
IF(IPASS.LT.2) GO TO 11
IOK = 1
RETURN
END IF
DO 1 I=1,5
DO 1 J=1,3
SCORES(I,J)=0.
1 CONTINUE
NELTS=0
DO 2 I=K1,K2-2,3
NELTS=NELTS+1
IS=CTONUM(SEQ(I+I1INM1))
IS1=CTONUM(SEQ(I+I1INM1+1))
IS2=CTONUM(SEQ(I+I1INM1+2))
SCORES(IS,1)=SCORES(IS,1)+1.
SCORES(IS1,2)=SCORES(IS1,2)+1.
SCORES(IS2,3)=SCORES(IS2,3)+1.
2 CONTINUE
DO 3 I=1,5
DO 3 J=1,3
SCORES(I,J)=SCORES(I,J)/NELTS
3 CONTINUE
IOK = 0
END
SUBROUTINE GETPBS(SUM,COMP)
C AUTHOR: RODGER STADEN
REAL SUM(4,4,4),COMP(5,3)
TCOMP=0.
DO 50 I=1,5
DO 50 J=1,3
COMP(I,J)=0.
50 CONTINUE
DO 55 J=1,4
DO 55 K=1,4
DO 55 L=1,4
TCOMP=TCOMP+SUM(J,K,L)
COMP(J,1)=COMP(J,1)+SUM(J,K,L)
COMP(J,2)=COMP(J,2)+SUM(K,J,L)
COMP(J,3)=COMP(J,3)+SUM(K,L,J)
55 CONTINUE
IF(TCOMP.NE.0) THEN
DO 60 I=1,4
DO 60 J=1,3
COMP(I,J)=COMP(I,J)/TCOMP
60 CONTINUE
END IF
END
SUBROUTINE PBASPP(SEQ,IDIM1P,MXSPAN,
+ISXMAX,ISYMAX,J1,J2,KSTART,ANSABS,LENW,IWRIT,
+EXPEC,SCORES,WT,S,SMIN,SMAX,
+MARGL1,MARGR1,MARGB,MARGT,PAA)
CHARACTER PAA(5,5,5)
C AUTHOR: RODGER STADEN
REAL SUMW(3),PROB(3),EXPEC(5,3),SCORES(5,3),WT(3),S(3)
CHARACTER SEQ(IDIM1P)
CHARACTER TLINE(3),STAR
INTEGER ANSABS
SAVE STAR
DATA STAR/'*'/
CALL FILLR(SUMW,3,0.)
IDONE=0
IMARG=NINT(MARGT/3.)
MARGB1=MARGB
MARGB2=MARGB+IMARG
MARGB3=MARGB+2*IMARG
MARGT3=IMARG
MARGT2=IMARG
MARGT1=IMARG
I1IN=2+(MXSPAN/2)-KSTART+1
I1INM1=I1IN-1
IDIMJ=J2-J1+1.
J1P=J1+I1INM1
J2P=J2+I1INM1
IMH=NINT(0.005*ISYMAX)
LB=1+LENW/2
LB=3*LB
LF=LENW/2
LF=3*LF
LENW3=3*LENW
YMIN=SMIN
YMAX=SMAX
XMIN=J1
XMAX=J2
C set initial values
XP=J1
YMID=YMIN+(YMAX-YMIN)/2.
YP1=YMID
YP2=YMID
YP3=YMID
CALL CLEARV
CALL VECTOM
XMAX1=XMIN+(XMAX-XMIN)/80.
CALL LINE(XMIN,XMAX1,S(1),S(1),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX1,S(2),S(2),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX1,S(3),S(3),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX1,S(1),S(1),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX1,S(2),S(2),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX1,S(3),S(3),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX1,S(1),S(1),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX1,S(2),S(2),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX1,S(3),S(3),XMAX,XMIN,YMAX,YMIN,
+MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL FRAME(MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
CALL PBASES(SEQ(J1P-LB),IDIM1P,LENW3,SUMW(1),SCORES,WT)
CALL PBASES(SEQ(J1P+1-LB),IDIM1P,LENW3,SUMW(2),SCORES,WT)
CALL PBASES(SEQ(J1P+2-LB),IDIM1P,LENW3,SUMW(3),SCORES,WT)
DO 300 IP=J1P,J2P,3
IDONE=IDONE+1
IP0=IP
IP1=IP+1
IP2=IP+2
CALL ROTPB1(SEQ,IDIM1P,IP0,LB,LF,SUMW(1),SCORES,WT)
CALL ROTPB1(SEQ,IDIM1P,IP1,LB,LF,SUMW(2),SCORES,WT)
CALL ROTPB1(SEQ,IDIM1P,IP2,LB,LF,SUMW(3),SCORES,WT)
IF(MOD(IDONE,IWRIT).EQ.0)THEN
PROB(1)=SUMW(1)
PROB(2)=SUMW(2)
PROB(3)=SUMW(3)
IF(ANSABS.EQ.0) THEN
T=SUMW(1)+SUMW(2)+SUMW(3)
PROB(1)=PROB(1)/T
PROB(2)=PROB(2)/T
PROB(3)=PROB(3)/T
END IF
CALL FILLC(TLINE,3,' ')
T=MAX(PROB(1),PROB(2),PROB(3))
DO 303 II=1,3
IF(PROB(II).EQ.T)TLINE(II)=STAR
303 CONTINUE
IPA=IP-I1INM1
X=IPA
CALL LINE(XP,X,YP1,PROB(1),XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1+IMH,MARGT1-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP2,PROB(2),XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2+IMH,MARGT2-2*IMH,ISXMAX,ISYMAX)
CALL LINE(XP,X,YP3,PROB(3),XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3+IMH,MARGT3-2*IMH,ISXMAX,ISYMAX)
YP1=PROB(1)
YP2=PROB(2)
YP3=PROB(3)
IF(TLINE(1).EQ.STAR)CALL POINT(
+ X,YMID,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
IF(TLINE(2).EQ.STAR)CALL POINT(
+ X,YMID,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB2,MARGT2,ISXMAX,ISYMAX)
IF(TLINE(3).EQ.STAR)CALL POINT(
+ X,YMID,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB3,MARGT3,ISXMAX,ISYMAX)
XP=X
END IF
300 CONTINUE
CALL STARTS(SEQ,IDIM1P,J1P,J2P,
+MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL STOPS(SEQ,IDIM1P,J1P,J2P,
+MARGL1,MARGR1,MARGB,MARGT,ISXMAX,ISYMAX,PAA)
CALL VT100M
END
SUBROUTINE PBASES(SEQ,IDIM,LENW,SUM,SCORES,WT)
C AUTHOR: RODGER STADEN
REAL SUM
CHARACTER SEQ(IDIM)
REAL SCORES(5,3),WT(3)
INTEGER CTONUM
EXTERNAL CTONUM
SUM=0.
DO 100 I=1,LENW,3
SUM=SUM+SCORES(CTONUM(SEQ(I)),1)*WT(1)+
+ SCORES(CTONUM(SEQ(I+1)),2)*WT(2)
+ +SCORES(CTONUM(SEQ(I+2)),3)*WT(3)
100 CONTINUE
RETURN
END
SUBROUTINE ROTPB1(SEQ,IDIM1,I,LB,LF,H,SCORES,WT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1)
INTEGER CTONUM
REAL SCORES(5,3),WT(3)
EXTERNAL CTONUM
C set up dummy values
XS=0.
XA=0.
C look back
IPB=I-LB-1
DO 10 J=1,3
IF(CTONUM(SEQ(IPB+J)).EQ.5)GO TO 30
10 CONTINUE
C set value to subtract
IPB=IPB+1
XS=SCORES(CTONUM(SEQ(IPB)),1)*WT(1)+
+SCORES(CTONUM(SEQ(IPB+1)),2)*WT(2)
++SCORES(CTONUM(SEQ(IPB+2)),3)*WT(3)
C now look forward
30 CONTINUE
IPB=I+LF-1
C look for bad char
DO 45 J=1,3
IF(CTONUM(SEQ(IPB+J)).EQ.5)GO TO 50
45 CONTINUE
C ok so use
IPB=IPB+1
XA=SCORES(CTONUM(SEQ(IPB)),1)*WT(1)+
+SCORES(CTONUM(SEQ(IPB+1)),2)*WT(2)
++SCORES(CTONUM(SEQ(IPB+2)),3)*WT(3)
50 H=H-XS+XA
END
SUBROUTINE PBEXP(SCORES,WT,S)
C AUTHOR: RODGER STADEN
REAL SCORES(5,3),S(3),WT(3)
DO 50 I=1,3
S(I)=0.
50 CONTINUE
DO 60 I=1,4
S(1)=S(1)+SCORES(I,1)*SCORES(I,1)*WT(1)
+ +SCORES(I,2)*SCORES(I,2)*WT(2)
+ +SCORES(I,3)*SCORES(I,3)*WT(3)
S(2)=S(2)+SCORES(I,2)*SCORES(I,1)*WT(1)
+ +SCORES(I,3)*SCORES(I,2)*WT(2)
+ +SCORES(I,1)*SCORES(I,3)*WT(3)
S(3)=S(3)+SCORES(I,3)*SCORES(I,1)*WT(1)
+ +SCORES(I,1)*SCORES(I,2)*WT(2)
+ +SCORES(I,2)*SCORES(I,3)*WT(3)
60 CONTINUE
END
SUBROUTINE TRNA(SEQ,IDIM,ISS,ISE,KSTART,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER CONSC(18)
INTEGER ANSCON,DIALOG
CALL SHOWFU(KBOUT,'tRNA search ')
CALL GETPAR(49,35,IOK,
+MNLEN,MXLEN,MAXLNT,
+MNAC,MXAC,MINACL,
+MNTU,MXTU,MINTU,
+MNAN,MXAN,MINANT,
+MND,MXD,MIND,
+MNI1,MXI1,INT1,
+MNI2,MXI2,JNT2,
+MNTUL1,MXTUL1,LTUMIN,
+MNTUL2,MXTUL2,LTUMAX,
+MNB,MXB,MINBAS,
+MNC,MXC,MINCON,IGON,ANSCON)
IF(IOK.NE.0) RETURN
IF(DIALOG.EQ.1) CALL TRNAD(
+MNLEN,MXLEN,MAXLNT,
+MNAC,MXAC,MINACL,
+MNTU,MXTU,MINTU,
+MNAN,MXAN,MINANT,
+MND,MXD,MIND,
+MNI1,MXI1,INT1,
+MNI2,MXI2,JNT2,
+MNTUL1,MXTUL1,LTUMIN,
+MNTUL2,MXTUL2,LTUMAX,
+MNB,MXB,MINBAS,
+MINCON,IGON,ANSCON,
+CONSC,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL TRNAP(SEQ,IDIM,ISS,ISE,KSTART,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,KBOUT,
+MAXLNT,MINACL,MINTU,MINANT,MIND,INT1,JNT2,
+LTUMIN,LTUMAX,MINBAS,MINCON,IGON,ANSCON,CONSC)
CALL VT100M
END
SUBROUTINE TRNAP(SEQ,IDIM,ISS,ISE,KSTART,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,KBOUT,
+MAXLNT,MINACL,MINTU,MINANT,MIND,INT1,JNT2,
+LTUMIN,LTUMAX,MINBAS,MINCON,IGON,ANSCON,CONSC)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER RAM,RAN,RTU
INTEGER SCORES(5,5)
INTEGER CONSC(18),TOTCON,CTONUM
INTEGER ANSCON
EXTERNAL CTONUM
SAVE SCORES
DATA SCORES/25*0/
SCORES(1,3)=2
SCORES(3,1)=2
SCORES(1,4)=1
SCORES(4,1)=1
SCORES(2,4)=2
SCORES(4,2)=2
CALL CLEARV
CALL BUSY(KBOUT)
IF(IGON.EQ.0)THEN
XMIN=ISS
XMAX=ISE
C ymin= half stems base-paired, ymax= all stems base-paired
YMIN=18.
YMAX=42.
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
C loop for all stem start points
DO 300 N=ISS,ISE
IS=N+60+INT1
IE=N+MAXLNT+JNT2
IF(IE.GT.ISE)IE=ISE
IF(IE.LT.IS)RETURN
C STEM LEFT END IS N,RIGHT END RANGES FROM IS TO IE
C LOOP FOR ALL POSITIONS THIS STEM START POINT
DO 200 I=IS,IE
C SET UP POINTER TO LEFT STEM
LAM=N-1
C SET IANALL TO 0 AS FLAG FOR RE-ENTRY TO ANLOOP
IANALL=0
ISUM=0
C LOOP FOR STEM LENGTH 7
DO 100 J=1,7
C POINTER TO STEM LEFT END
LAM=LAM+1
C RIGHT HAND POINTER (MOVES BACKWARDS)
RAM=I-J
ISUM=ISUM+SCORES(
+ CTONUM(SEQ(LAM)),CTONUM(SEQ(RAM)))
100 CONTINUE
IF(ISUM.LT.MINACL)GO TO 200
C SET TULOOP START POINT
MTU=0
140 CONTINUE
IANALL=0
CALL TULOOP(SEQ,IDIM,RAM,LTUMIN,
+ LTUMAX,MINTU,ISUMTU,SCORES,RTU,LTU,MTU)
IF(ISUMTU.LT.MINTU)GO TO 200
150 CONTINUE
CALL ANLOOP(SEQ,IDIM,MINANT,ISUMAN,N,
+ RTU,SCORES,JNT2,RAN,LAN,IANALL,INT1)
IF(ISUMAN.LT.MINANT)GO TO 140
CALL DLOOP(SEQ,IDIM,ISUMD,LAM,LAN,SCORES)
IF(ISUMD.GE.MIND)THEN
C HIGH ENOUGH OVERALL SCORE?
ITOTBS=ISUM+ISUMTU+ISUMAN+ISUMD
IF(ITOTBS.GE.MINBAS)THEN
C FILTER ON CONSERVED BASES
IF(ANSCON.EQ.0)THEN
CALL CONSCR(SEQ,IDIM,N,
+ LAN,RTU,CONSC,TOTCON)
IF(TOTCON.LT.MINCON)
+ GO TO 150
END IF
ISUMD=ISUMD-MIND
ISUMAN=ISUMAN-MINANT
IF(IGON.EQ.0)THEN
X=N
Y=ITOTBS
CALL LINE(X,X,0.,Y,XMAX,XMIN,
+ YMAX,YMIN,MARGL,MARGR,MARGB,
+ MARGT,ISXMAX,ISYMAX)
END IF
IF(IGON.EQ.1) CALL TRNADR(SEQ,IDIM,I,N,LAN,
+ RAN,LTU,RTU,IDEV,KSTART)
C HAVE ALL POSSIBLE ANTICODONS BEEN TRIED?
END IF
END IF
GO TO 150
200 CONTINUE
300 CONTINUE
END
SUBROUTINE TRNAD(
+MNLEN,MXLEN,MAXLNT,
+MNAC,MXAC,MINACL,
+MNTU,MXTU,MINTU,
+MNAN,MXAN,MINANT,
+MND,MXD,MIND,
+MNI1,MXI1,INT1,
+MNI2,MXI2,JNT2,
+MNTUL1,MXTUL1,LTUMIN,
+MNTUL2,MXTUL2,LTUMAX,
+MNB,MXB,MINBAS,
+MINCON,IGON,ANSCON,
+CONSC,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
INTEGER CONSC(18),ANSCON
CHARACTER HELPF*(*)
IOK = 1
CALL GETINT(MNLEN,MXLEN,MAXLNT,
+'Maximum trna length',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MAXLNT = IVAL
CALL GETINT(MNAC,MXAC,MINACL,
+'Aminoacyl stem score',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINACL = IVAL
CALL GETINT(MNTU,MXTU,MINTU,
+'Tu stem score',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINTU = IVAL
CALL GETINT(MNAN,MXAN,MINANT,
+'Anticodon stem score',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINANT = IVAL
CALL GETINT(MND,MXD,MIND,
+'D stem score',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MIND = IVAL
MNB = MIND + MINTU + MINACL + MINANT
C MXB = MXD + MXTU + MXAC + MNAN
MINBAS = MNB
CALL GETINT(MNB,MXB,MINBAS,
+'Minimum base pairing total',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINBAS = IVAL
CALL GETINT(MNI1,MXI1,INT1,
+'Minimum intron length',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
INT1 = IVAL
MNI2 = INT1
JNT2 = INT1
C IF(INT1.GT.0) THEN
CALL GETINT(MNI2,MXI2,JNT2,
+ 'Maximum intron length',
+ IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
JNT2 = IVAL
C END IF
CALL GETINT(MNTUL1,MXTUL1,LTUMIN,
+'Minimum length for TU loop',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
LTUMIN = IVAL
MNTUL2 = LTUMIN
CALL GETINT(MNTUL2,MXTUL2,LTUMAX,
+'Maximum length for TU loop',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
LTUMAX = IVAL
C conserved bases
CALL YESONO(ANSCON,
+'Search for conserved bases',
+'Skip search for conserved bases',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(ANSCON.LT.0) THEN
IOK = 1
RETURN
END IF
IOK = 1
IF(ANSCON.EQ.0)CALL CONSET(CONSC,MINCON,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
IF(MINCON.LT.0)RETURN
CALL YESONO(IGON,'Plot results','List results',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IGON.LT.0)RETURN
IOK = 0
END
C
C conset
SUBROUTINE CONSET(CONSC,MINCON,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
INTEGER CONSC(18),NUM(18),CONSER(18)
CHARACTER CONBAS(18)
CHARACTER STRING*35
SAVE NUM,CONSER,CONBAS
DATA NUM/8,10,11,14,15,21,32,33,37,48,53,54,55,56,57,
+58,60,61/
DATA CONSER/100,95,96,100,100,97,100,98,91,100,100,
+95,97,100,100,100,92,100/
DATA CONBAS/'T','G','Y','A','R','A','Y','T','A','Y',
+'G','T','T','C','R','A','Y','C'/
C SET FLAG FOR RETURN
MINCON=-9
ITOT = 0
MN = 0
MX = 100
MNT = 0
WRITE(KBOUT,1002)
1002 FORMAT(' Give a score for each base, then a minimum',
+' total at the end')
DO 10 J=1,18
STRING = ' '
WRITE(STRING,1000)NUM(J),CONBAS(J),CONSER(J)
CALL GETINT(MN,MX,MN,
+ STRING,
+ IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CONSC(J) = IVAL
1000 FORMAT( 'Base ',I2,', ',A1,' is ',I3,'% conserved.',
+ ' Score')
ITOT = ITOT + CONSC(J)
10 CONTINUE
CALL GETINT(MNT,ITOT,MNT,
+'Minimum total conserved base score',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINCON = IVAL
END
SUBROUTINE CONSCR(SEQ,IDIM,N,LAN,RTU,CONSC,TOTCON)
C AUTHOR: RODGER STADEN
C bug fix 6-8-92: previously did not allow for case!
CHARACTER SEQ(IDIM)
INTEGER CONSC(18),NUMS(18),TOTCON,RTU,B,CHAR1(18),CHAR2(18)
INTEGER CTONUM
EXTERNAL CTONUM
SAVE CHAR1,CHAR2,NUMS
DATA CHAR1/1,4,2,3,3,3,2,1,3,2,4,1,1,2,3,3,2,2/
DATA CHAR2/1,4,1,3,4,3,1,1,3,1,4,1,1,2,4,3,1,2/
DATA NUMS/7,9,10,13,14,-10,1,2,6,-5,0,1,2,3,4,5,7,8/
C DATA CHAR1/'T','G','C','A','A','A','C','T','A','C','G',
C + 'T','T','C','A','A','C','C'/
C DATA CHAR2/'T','G','T','A','G','A','T','T','A','T','G',
C + 'T','T','C','G','A','T','C'/
C
C test those from left end
TOTCON=0
DO 10 J=1,5
K=N+NUMS(J)
B = CTONUM(SEQ(K))
IF((B.EQ.CHAR1(J)).OR.(B.EQ.CHAR2(J)))
+ TOTCON=TOTCON+CONSC(J)
10 CONTINUE
C those from anticodon
DO 20 J=6,9
K=LAN+NUMS(J)
B = CTONUM(SEQ(K))
IF((B.EQ.CHAR1(J)).OR.(B.EQ.CHAR2(J)))
+ TOTCON=TOTCON+CONSC(J)
20 CONTINUE
C THOSE FROM TU LOOP
DO 30 J=10,18
K=RTU+NUMS(J)
B = CTONUM(SEQ(K))
IF((B.EQ.CHAR1(J)).OR.(B.EQ.CHAR2(J)))
+ TOTCON=TOTCON+CONSC(J)
30 CONTINUE
END
SUBROUTINE ANLOOP(SEQ,IDIM,MINANT,ISUMAN,N,RTU,SCORES,JNT2,
+RAN,LAN,IANALL,INT1)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER SCORES(5,5),RTU,RAN,CTONUM
EXTERNAL CTONUM
C NEED TO TEST FLAG FOR MULTIPLE ENTRY TO THIS SUBROUTINE
C WHEN SEARCHING FOR BEST ANTICODON. IANALL =0 MEANS NO
C POSITIONS TRIED FOR THIS START, IANALL=1 MEANS SOME TRIED
C MIN DIST FROM N TO FIRST BASE IS 6+2+3+4+3+1=19
C MAX IS TUSTEM-3=RTU-8
C ALLOWING 3 IN VARIABLE LOOP
IAS=N+19
C allow 2 in variable loop to be sure
IEND=RTU-7
C restrict max d loop size to 17
IAE=N+36
IF(IAE.GT.IEND)IAE=IEND
C LOOP FOR ALL LEFT STEM STARTS
C NN IS EFFECTIVELY THE LEFT STEM START
C RIGHT STEM RANGE FROM NEAREST TO NEAREST + INTRON
C IE NN+16 TO NN+16+INTRON
C IAS TO IAE DEFINES THE REGION TO THRU TRYING ALL START
C POSITIONS AND ALL INTRON LENGTHS
C TEST FLAG FOR MULTIPLE ENTRY. IF SET START SEARCHING 1 ON FROM
C LAST ANTICODON
IF(IANALL.EQ.1)IAS=LAN-3
C reset flag
IANALL=1
ISUMAN=0
IF(IAE.LT.IAS)RETURN
DO 750 NN=IAS,IAE
IIS=NN+16+INT1
IIE=NN+16+JNT2
IF(IIE.GT.IEND)IIE=IEND
IF(IIE.LT.IIS)RETURN
C
C try all positions for the right stem from this left stem start
C
DO 740 I=IIS,IIE
C
C will only try one position if intron=0
C or intron max=intron min
LAN=NN-1
ISUMAN=0
RAN=I+1
C
C loop for stem
C
DO 730 J=1,5
LAN=LAN+1
RAN=RAN-1
ISUMAN=ISUMAN+SCORES
+ (CTONUM(SEQ(RAN)),CTONUM(SEQ(LAN)))
730 CONTINUE
C score high enough?
IF(ISUMAN.GE.MINANT)RETURN
740 CONTINUE
750 CONTINUE
END
C
C dloop
SUBROUTINE DLOOP(SEQ,IDIM,ISUMD,LAM,LAN,SCORES)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER SCORES(5,5),RD,CTONUM
EXTERNAL CTONUM
C first base left end is lam+3
C first base right end is lan-6
C
LD=LAM+2
RD=LAN-5
ISUMD=0
C
DO 10 I=1,4
LD=LD+1
RD=RD-1
ISUMD=ISUMD+SCORES(CTONUM(SEQ(LD)),CTONUM(SEQ(RD)))
10 CONTINUE
END
C trnadr
SUBROUTINE TRNADR(SEQ,IDIM,I,N,LAN,RAN,LTU,RTU,IDEV,KSTART)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
CHARACTER BLANK
CHARACTER ARRAY(30,30)
INTEGER RAN,RTU
CHARACTER SCORES(5,5),SCOREZ(5,5)
INTEGER CTONUM
EXTERNAL CTONUM
SAVE SCORES,SCOREZ,BLANK
DATA BLANK/' '/
DATA SCORES/25*' '/,SCOREZ/25*' '/
SCORES(1,3)='-'
SCORES(3,1)='-'
SCORES(2,4)='-'
SCORES(4,2)='-'
SCORES(1,4)='+'
SCORES(4,1)='+'
SCOREZ(1,3)='!'
SCOREZ(3,1)='!'
SCOREZ(2,4)='!'
SCOREZ(4,2)='!'
SCOREZ(1,4)='+'
SCOREZ(4,1)='+'
C
C blank arrays
DO 10 J=1,30
DO 10 K=1,30
ARRAY(K,J)=BLANK
10 CONTINUE
C aminoacyl
ARRAY(16,5)=(SEQ(I))
J=I-1
K=N
DO 20 L=6,12
ARRAY(14,L)=(SEQ(K))
ARRAY(16,L)=(SEQ(J))
ARRAY(15,L)=SCORES(CTONUM(SEQ(K)),CTONUM(SEQ(J)))
K=K+1
J=J-1
20 CONTINUE
C
C gap between aminoacyl d stem
ARRAY(13,13)=(SEQ(N+7))
ARRAY(12,14)=(SEQ(N+8))
C
C
C anticodon
K=LAN-4
J=RAN+4
DO 30 L=19,23
ARRAY(13,L)=(SEQ(K))
ARRAY(15,L)=(SEQ(J))
ARRAY(14,L)=SCORES(CTONUM(SEQ(K)),CTONUM(SEQ(J)))
K=K+1
J=J-1
30 CONTINUE
C
C anticodon loop
ARRAY(12,24)=(SEQ(LAN+1))
ARRAY(12,25)=(SEQ(LAN+2))
ARRAY(16,25)=(SEQ(LAN+6))
ARRAY(16,24)=(SEQ(RAN-1))
K=LAN+3
DO 35 L=13,15
ARRAY(L,26)=(SEQ(K))
K=K+1
35 CONTINUE
C
C gap between dstem and anticodon
ARRAY(12,18)=(SEQ(LAN-5))
C
C tustem
K=LTU+4
L=RTU-4
DO 40 J=17,21
ARRAY(J,13)=(SEQ(K))
ARRAY(J,15)=(SEQ(L))
ARRAY(J,14)=SCOREZ(CTONUM(SEQ(K)),CTONUM(SEQ(L)))
K=K-1
L=L+1
40 CONTINUE
C tu loop
C length of loop
LNGTH=K-L+1
IF(LNGTH.GT.18)RETURN
C return if loop length lt 3
IF(LNGTH.LT.3)RETURN
C number in outer
LU=(LNGTH-1)/2
M=22
DO 45 J=1,LU
ARRAY(M,12)=(SEQ(K))
ARRAY(M,16)=(SEQ(L))
K=K-1
L=L+1
M=M+1
45 CONTINUE
C
ARRAY(M,15)=(SEQ(L))
L=L+1
IF(MOD(LNGTH,2).EQ.0)ARRAY(M,14)=(SEQ(L))
C
C
46 CONTINUE
C dstem
K=N+9
L=LAN-6
LNGTH=L-K+1
J2=4
M=11
IF(LNGTH.GT.10)GO TO 47
C need stem of 3 not 4
M=10
J2=3
47 CONTINUE
DO 50 J=1,J2
ARRAY(M,15)=(SEQ(K))
ARRAY(M,17)=(SEQ(L))
ARRAY(M,16)=SCOREZ(CTONUM(SEQ(K)),CTONUM(SEQ(L)))
K=K+1
L=L-1
M=M-1
50 CONTINUE
C
C dloop
C length of loop
LNGTH=L-K+1
IF(LNGTH.GT.14)RETURN
C if loop length lt 3 return
IF(LNGTH.LT.3)RETURN
C number in outer
LU=(LNGTH-1)/2
DO 80 J=1,LU
ARRAY(M,14)=(SEQ(K))
ARRAY(M,18)=(SEQ(L))
K=K+1
L=L-1
M=M-1
80 CONTINUE
ARRAY(M,17)=(SEQ(L))
L=L-1
IF(MOD(LNGTH,2).EQ.0)ARRAY(M,16)=(SEQ(L))
C
C
85 CONTINUE
C variable loop
LNGTH=(RTU-5)-(RAN+5)+1
IF(LNGTH.GT.26)RETURN
IF(LNGTH.LT.3)RETURN
C number of elements in lower diagonal
LL=(LNGTH/2)-1
C correction made in june 81 for lngthg=3
IF(LL.EQ.0)LL=1
C lsi must go thru loops even with 0!
C number in upper
LU=(LNGTH+1)/2
C do lower
K=16
L=19
M=RAN+5
DO 60 J=1,LL
ARRAY(K,L)=(SEQ(M))
K=K+1
L=L+1
M=M+1
60 CONTINUE
C check for silly loop size
IF(LNGTH.LT.3)RETURN
C put in extra base
K=K+1
L=L-1
IF(LNGTH.GT.3)ARRAY(K,L)=(SEQ(M))
C do upper
K=17
L=16
M=RTU-5
DO 70 J=1,LU
ARRAY(K,L)=(SEQ(M))
K=K+1
L=L+1
M=M-1
70 CONTINUE
C
C output
WRITE(IDEV,1002)N+KSTART-1
1002 FORMAT(/5X,I6)
DO 200 M=1,30
DO 100 J=1,30
IF(ARRAY(J,M).EQ.BLANK)GO TO 100
WRITE(IDEV,1000)(ARRAY(L,M),L=1,30)
GO TO 200
100 CONTINUE
200 CONTINUE
1000 FORMAT(5X,30A)
END
SUBROUTINE TULOOP(SEQ,IDIM,RAM,LTUMIN,LTUMAX,MINTU,ISUMTU,
+SCORES,RTU,LTU,MTU)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER SCORES(5,5)
INTEGER RAM,RTU,RAMM2,CTONUM
EXTERNAL CTONUM
ISUMTU=0
C loop must start at ram-1
C vary loop from ltumin to ltumax
RAMM2=RAM-2
M=LTUMIN+9
N=LTUMAX+9
C is this first pass thru this aminoacyl?
IF(MTU.GT.0)M=MTU+1
C all done?
IF(M.GT.N)RETURN
C
DO 650 I=M,N
ISUMTU=0
RTU=RAMM2-I
DO 640 J=1,5
LTU=RAM-J
RTU=RTU+1
ISUMTU=ISUMTU+SCORES(
+ CTONUM(SEQ(RTU)),CTONUM(SEQ(LTU)))
640 CONTINUE
MTU=I
C score high enough?
IF(ISUMTU.GE.MINTU)RETURN
650 CONTINUE
END
SUBROUTINE CODMPP(SEQ,IDIM,FTABLE,IDFTAB,
+KBOUT,J1,J2,I1,I2,IDIM1,IDEV)
C AUTHOR: RODGER STADEN
REAL RNFAC,FTABLE(IDFTAB),FACTOR,FACN
INTEGER NBASE(4),NCOD(4,4,4)
EXTERNAL FACTOR
CHARACTER SEQ(IDIM)
INTEGER NCODON(21),IACID(4,4,4)
REAL FABC(4,4,4),FA(21)
C array with number of codons per acid
SAVE IACID,NCODON
DATA IACID/
+1,2,3,5,6,7,8,9,
+10,12,14,16,18,20,6,21,
+1,2,3,5,6,7,8,9,
+10,12,14,16,18,20,6,21,
+2,2,3,5,6,7,8,9,
+11,13,15,17,11,20,20,21,
+2,2,4,5,6,7,8,9,
+11,13,15,17,19,20,20,21/
DATA NCODON/
+2,6,3,1,4,6,4,4,4,2,3,2,2,2,2,2,2,2,1,6,4/
LB=150
LF=147
IF(IDIM1.GT.310)THEN
CALL BUSY(KBOUT)
FACN=FACTOR(99,FTABLE,IDFTAB)
SUMY=0.
C do calc
CALL ROT2(SEQ(I1-J1+1),IDIM1,NCOD,RNFAC,FTABLE,IDFTAB,
+ NBASE,297,FA,FABC,NCODON,IACID)
COMP=0.
DO 90 J=I1+LB-J1+1,I2-LF-2-J1+1,3
CALL ROTFAK(SEQ,IDIM,J,LB,LF,
+ NCOD,RNFAC,FTABLE,IDFTAB,
+ NBASE,COMP,297,99.,
+ FA,FABC,NCODON,IACID)
W=RNFAC-COMP-FACN
CALL POISON(99,FABC,FABC,EW,EWD,2)
Y=(W-EW)/EWD
SUMY=SUMY+Y
90 CONTINUE
SUMY=SUMY/((I2-LF-2-(I1+LB))/3.)
WRITE(IDEV,5555)SUMY
5555 FORMAT(' Mean (W-EW)/EWD, window 99 ',F10.1)
END IF
END
SUBROUTINE PREPT1(SEQ,POSNS,IDIM,J1IN,J2IN,KSTART,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IDEV,DIALOG,
+WORDP,WORDN,MAXWRD,MAXDIC)
INTEGER WORDP(MAXDIC),WORDN(MAXDIC)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
INTEGER POSNS(IDIM),DIALOG
CHARACTER SEQ(IDIM)
INTEGER R
CALL SHOWFU(KBOUT,'Plot repeats')
CALL GETPAR(32,4,IOK,MINRP,MAXRP,MINREP,IGON,
+IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
L = J1IN
R = J2IN
IF(DIALOG.EQ.1) THEN
CALL YESONO(IGON,'Plot results','List results',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IGON.LT.0)RETURN
CALL GTREG(KBIN,KBOUT,J1IN,J2IN,L,R,
+'Define restricted region',
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL GETINT(MINRP,MAXRP,MINREP,
+'Minimum repeat',
+IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINREP = IVAL
END IF
IDIM1=R-L+1
IDIM2=J2IN-J1IN+1
L1=L-J1IN+1
CALL BUSY(KBOUT)
CALL REPEAT(SEQ(L-KSTART+1),
+POSNS,IDIM1,MINREP,L1,J1IN,IDIM2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,IGON,
+WORDP,WORDN,MAXWRD,MAXDIC)
CALL VT100M
END
C REPEAT
SUBROUTINE REPEAT(SEQ,POSNS,IDIM,MINR,L,KSTART,IDIM1,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,IGON,
+WORDP,WORDN,MAXWRD,MAXDIC)
INTEGER WORDP(MAXDIC),WORDN(MAXDIC)
C AUTHOR: RODGER STADEN
INTEGER POSNS(IDIM),CONST(6)
CHARACTER SEQ(IDIM)
LENGTH=MAXWRD
LE4=MAXDIC
CALL ENCO(SEQ,IDIM,POSNS,CONST,LENGTH)
CALL ENCONA(POSNS,IDIM,WORDP,WORDN,LE4,LENGTH)
CALL REPT1(SEQ,IDIM,IDIM1,L,KSTART,
+POSNS,WORDN,WORDP,LENGTH,LE4,MINR,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,IGON)
END
SUBROUTINE REPT1(SEQ,IDIM,IDIM1,LEFT,KSTART,
+POSNS,WORDN,WORDP,LENGTH,LE4,MINR,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,IGON)
C AUTHOR: RODGER STADEN
INTEGER POSNS(IDIM),WORDP(LE4)
INTEGER WORDN(LE4)
CHARACTER SEQ(IDIM)
C POSNS CONTAIN THE WORD NUMBERS ENCODED BY 4**LENGTH
C WORDN CONTAINS THE NUMBER OF OCCURENCES OF EACH POSSIBLE WORD
C WORDP CONTAINS THE POINTERS TO THE FIRST OCCURRENCE OF THE WORDS
C ENCODED--- EG POINTER TO FIRST OCCURRENCE OF THE WORD ENCODED AS 101
C WILL BE STORED IN ELEMENT 101
IF(IGON.EQ.0) CALL VECTOM
XMIN=1.
XMAX=IDIM1
YMIN=0.
YMAX=1.
X1=LEFT
Y1=0.
YINC=1./IDIM1
C LOOK FOR MATCHES BY THOSE ELEMENTS OF WORDN>1
DO 100 I=1,LE4
IF(WORDN(I).GT.1)THEN
C POINT TO FIRST POSITION AS ITS IN WORDP
IP=WORDP(I)
C A MATCH, TRY ALL PAIRS
DO 50 J=1,WORDN(I)
C COMPARE THIS POSITION WITH ALL OTHERS
IPS=IP
DO 40 K=J+1,WORDN(I)
C FIND LENGTH OF MATCH
L=LENGTH
IP1=IP+LENGTH
IPS=POSNS(IPS)
IP2=IPS+LENGTH
20 CONTINUE
IF((IP1.LE.IDIM).AND.(IP2.LE.IDIM))THEN
IF(SEQ(IP1).EQ.SEQ(IP2))THEN
IP1=IP1+1
IP2=IP2+1
L=L+1
GO TO 20
END IF
END IF
C END OF MATCH, IS IT LONG ENOUGH?
IF(L.GE.MINR)THEN
X1=LEFT+IP-1
X2=LEFT+IPS-1
Y2=Y1+YINC*ABS(X2-X1)
IF(IGON.EQ.0)THEN
CALL LINE(X1,X1,Y1,Y2,
+ XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,
+ MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(X1,X2,Y2,Y2,
+ XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,
+ MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(X2,X2,Y2,Y1,
+ XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,
+ MARGB,MARGT,ISXMAX,ISYMAX)
END IF
IF(IGON.EQ.1)THEN
INTX1=X1+KSTART-1
INTX2=X2+KSTART-1
WRITE(IDEV,1000)INTX1,INTX2,L,(SEQ(KK),KK=IP,IP+L-1)
END IF
END IF
1000 FORMAT(' ',I6,2X,I6,2X,I6,(' ',50A1))
40 CONTINUE
IP=POSNS(IP)
50 CONTINUE
END IF
100 CONTINUE
END
SUBROUTINE PREPT2(SEQ,POSNS,IDIM,J1IN,J2IN,KSTART,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IDEV,DIALOG,
+WORDP,WORDN,MAXWRD,MAXDIC)
INTEGER WORDP(MAXDIC),WORDN(MAXDIC)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
INTEGER POSNS(IDIM),DIALOG
CHARACTER SEQ(IDIM)
INTEGER R
CALL SHOWFU(KBOUT,'Plot long-range inverted repeats')
CALL GETPAR(31,4,IOK,MINRP,MAXRP,MINREP,IGON,
+IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
L = J1IN
R = J2IN
IF(DIALOG.EQ.1) THEN
CALL YESONO(IGON,'Plot results','List results',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IGON.LT.0)RETURN
CALL GTREG(KBIN,KBOUT,J1IN,J2IN,L,R,
+'Define restricted region',
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL GETINT(MINRP,MAXRP,MINREP,
+'Minimum inverted repeat',
+IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINREP = IVAL
END IF
IDIM1=R-L+1
IDIM2=J2IN-J1IN+1
L1=L-J1IN+1
CALL BUSY(KBOUT)
CALL REPT2(SEQ(L-KSTART+1),POSNS,IDIM1,MINREP,L1,J1IN,IDIM2,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,IGON,
+WORDP,WORDN,MAXWRD,MAXDIC)
CALL VT100M
END
C REPT2
SUBROUTINE REPT2(SEQ,POSNS,IDIM,MINR,L,KSTART,IDIM1,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,IGON,
+WORDP,WORDN,MAXWRD,MAXDIC)
INTEGER WORDP(MAXDIC),WORDN(MAXDIC)
C AUTHOR: RODGER STADEN
INTEGER POSNS(IDIM),CONST(6)
CHARACTER SEQ(IDIM)
LENGTH=MAXWRD
LE4=MAXDIC
CALL ENCO(SEQ,IDIM,POSNS,CONST,LENGTH)
CALL ENCONA(POSNS,IDIM,WORDP,WORDN,LE4,LENGTH)
CALL REPT3(SEQ,IDIM,IDIM1,L,KSTART,
+POSNS,WORDN,WORDP,LENGTH,LE4,MINR,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,IGON)
END
SUBROUTINE REPT3(SEQ,IDIM,IDIM1,LEFT,KSTART,
+POSNS,WORDN,WORDP,LENGTH,LE4,MINR,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IDEV,IGON)
C AUTHOR: RODGER STADEN
INTEGER POSNS(IDIM),WORDP(LE4)
INTEGER WORDN(LE4),CONST(6)
CHARACTER SEQ(IDIM),SCOMP
EXTERNAL SCOMP
C POSNS CONTAIN THE WORD NUMBERS ENCODED BY 4**LENGTH
C WORDN CONTAINS THE NUMBER OF OCCURENCES OF EACH POSSIBLE WORD
C WORDP CONTAINS THE POINTERS TO THE FIRST OCCURRENCE OF THE WORDS
C ENCODED--- EG POINTER TO FIRST OCCURRENCE OF THE WORD ENCODED AS 101
C WILL BE STORED IN ELEMENT 101
IF(IGON.EQ.0) CALL VECTOM
XMIN=1.
XMAX=IDIM1
YMIN=0.
YMAX=1.
X1=LEFT
Y1=0.
YINC=1./IDIM1
C SET UP CONST FOR ROUTINE THAT LOOKS FOR COMPLEMENTARY WORD POINTERS
ISTART=1
DO 1 I=1,LENGTH
CONST(I)=4**(I-1)
ISTART=ISTART-CONST(I)
1 CONTINUE
C
C LOOK FOR MATCHES BY THOSE ELEMENTS OF WORDN>1
DO 100 I=1,LE4
IF(WORDN(I).GE.1)THEN
C POINT TO FIRST POSITION AS ITS IN WORDP
IP=WORDP(I)
C DOES ITS COMPLEMENT EXIST?
JCOMPN=ICOMPN(SEQ,IDIM,IP,CONST,LENGTH,ISTART)
NCOMP=WORDN(JCOMPN)
IF(NCOMP.NE.0)THEN
C YES, NEED TO COMPARE ALL PAIRS IE COMPARE WORDN(I) POSITIONS
C WITH NCOMP POSITIONS. POINT TO FIRST OCCURRENCE
IPN=WORDP(JCOMPN)
C A MATCH, TRY ALL PAIRS
DO 50 J=1,WORDN(I)
C COMPARE THIS POSITION WITH ALL OTHERS
IPN1=IPN
DO 40 K=1,NCOMP
C FIND LENGTH OF MATCH
L=LENGTH
IP1=IP+LENGTH
IP2=IPN1-1
20 CONTINUE
IF((IP1.LE.IDIM).AND.(IP2.GT.1))THEN
IF(SEQ(IP1).EQ.SCOMP(SEQ(IP2)))THEN
IP1=IP1+1
IP2=IP2-1
L=L+1
GO TO 20
END IF
END IF
IF(L.GE.MINR)THEN
X1=LEFT+IP-1
X2 = LEFT + IP2
C X2=LEFT+IPN1-3
Y2=Y1+YINC*ABS(X2-X1)
IF(IGON.EQ.0)THEN
CALL LINE(X1,X1,Y1,Y2,
+ XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,
+ MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(X1,X2,Y2,Y2,
+ XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,
+ MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(X2,X2,Y2,Y1,
+ XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,
+ MARGB,MARGT,ISXMAX,ISYMAX)
END IF
IF(IGON.EQ.1)THEN
INTX1=X1+KSTART-1
INTX2=X2+KSTART-1
IF(IP.EQ.IPN1) INTX2 = INTX1
WRITE(IDEV,1000)INTX1,INTX2,L,(SEQ(KK),KK=IP,IP+L-1)
END IF
END IF
IPN1=POSNS(IPN1)
1000 FORMAT(' ',I6,2X,I6,2X,I6,(' ',50A1))
40 CONTINUE
C POINT TO NEXT WORD
IP=POSNS(IP)
50 CONTINUE
WORDN(JCOMPN)=0
END IF
C SET THIS WORD TO ZERO SO WE DONT USE IT AGAIN
WORDN(I)=0
END IF
100 CONTINUE
END
INTEGER FUNCTION ICOMPN(SEQ,IDIM,IP,CONST,LENGTH,ISTART)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER CONST(LENGTH),CTONUM
EXTERNAL CTONUM,ICOMP
N=ISTART
L=IP+LENGTH
K=0
DO 1 J=1,LENGTH
K=K+1
L=L-1
N=N+CONST(K)*ICOMP(CTONUM(SEQ(L)))
1 CONTINUE
ICOMPN=N
END
SUBROUTINE DISIG(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,IDEV,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IDEVOT,
+SUM,WT,TOT,IDM,MAXLEN,LINE,CHRSET,DIALOG)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILNAM*(*),HELPF*(*)
CHARACTER LINE(MAXLEN),CHRSET(IDM)
INTEGER SUM(IDM,IDM,MAXLEN),TOT(MAXLEN),DIALOG
REAL WT(IDM,IDM,MAXLEN)
CALL SHOWFU(KBOUT,
+'Motif search using dinucleotide weight matrix')
CALL GETPAR(60,1,IOK,IGON,
+IPAR2,IPAR3,IPAR4,IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
CALL DISIGD(IDEV,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,
+SUM,IDM,MAXLEN,TOT,WT,LENGTH,MIDDLE,YMIN,YMAX,
+IGON,IDEVOT,LINE,CHRSET,DIALOG,IOK)
IF(IOK.NE.0) RETURN
CALL DISIGP(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,IDEVOT,WT,IDM,MAXLEN,LENGTH,
+YMIN,YMAX,IGON,MIDDLE)
END
SUBROUTINE DISIGD(IDEV,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,
+SUM,IDM,MAXLEN,TOT,WT,LENGTH,MIDDLE,YMIN,YMAX,
+IGON,IDEVOT,LINE,CHRSET,DIALOG,IOK)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),HELPF*(*),LINE(MAXLEN),CHRSET(IDM)
INTEGER SUM(IDM,IDM,MAXLEN),TOT(MAXLEN),DIALOG
REAL WT(IDM,IDM,MAXLEN)
PARAMETER (MAXPRM = 21)
CHARACTER PROMPT(3)*(MAXPRM)
IOK = 1
IDO = 1
PROMPT(1) = 'Use weight matrix'
PROMPT(2) = 'Make weight matrix'
PROMPT(3) = 'Rescale weight matrix'
CALL RADION('Select operation',PROMPT,3,IDO,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IDO.LT.1) RETURN
IF(IDO.GT.1) THEN
CALL MKWTD(WT,SUM,TOT,CHRSET,IDM,MAXLEN,
+ IDEV,IDEVOT,KBIN,KBOUT,LINE,
+ FILNAM,IDO,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IOK = 1
RETURN
END IF
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+'Motif weight matrix file',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
LENGTH=MAXLEN
CALL RDWMTD(TOT,SUM,MIDDLE,LENGTH,MAXLEN,YMIN,YMAX,IDEV,
+ IOK,IDM,KBOUT)
IF(IOK.NE.0)THEN
CALL ERROM(KBOUT,'Error in weight matrix')
RETURN
END IF
IF(DIALOG.EQ.1) THEN
AMN = -9999.
AMX = 9999.
CALL GETRL(AMN,AMX,YMIN,'Cutoff score',VAL,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
YMIN = VAL
END IF
IF(YMIN.LT.0.0)CALL GETWD(TOT,SUM,WT,LENGTH,IDM,MAXLEN)
IF(YMIN.GE.0.0)CALL GETW2D(SUM,WT,LENGTH,IDM,MAXLEN)
CALL YESONO(IGON,'Plot results','List results',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IGON.LT.0)RETURN
IOK = 0
END
SUBROUTINE GETCD(TOT,SUM,LINE,IDM,MAXLEN,IDEV,KBOUT,LENGTH,
+IOK)
INTEGER TOT(MAXLEN),SUM(IDM,IDM,MAXLEN)
CHARACTER LINE(MAXLEN)
INTEGER CTONUM
EXTERNAL CTONUM
IOK = 1
DO 2 I=1,MAXLEN
TOT(I)=0
DO 1 J=1,IDM
DO 1 K=1,IDM
SUM(K,J,I)=0
1 CONTINUE
2 CONTINUE
N=0
10 CONTINUE
1003 FORMAT(1X,120A1)
1004 FORMAT(' ',I6,' ',120A1)
READ(IDEV,1003,END=100)LINE
N=N+1
WRITE(KBOUT,1004)N,LINE
DO 20 I=1,MAXLEN
IF(LINE(I).EQ.' ')GO TO 10
J = CTONUM(LINE(I+1))
K = CTONUM(LINE(I))
SUM(K,J,I) = SUM(K,J,I) + 1
20 CONTINUE
GO TO 10
100 CONTINUE
IF(N.EQ.0)THEN
CALL ERROM(KBOUT,'Empty file of aligned sequences')
RETURN
END IF
C NOW FIND LENGTH OF MOTIF
DO 40 I=1,MAXLEN
K=0
L=I
DO 30 J=1,IDM
DO 30 M=1,IDM
K=K+SUM(M,J,I)
30 CONTINUE
IF(K.EQ.0)GO TO 50
TOT(I)=TOT(I)+K
40 CONTINUE
50 CONTINUE
LENGTH=L-2
IOK = 0
END
SUBROUTINE GETWD(TOT,SUM,FREQ,LENGTH,MAXCHR,MAXLEN)
INTEGER TOT(LENGTH),SUM(MAXCHR,MAXCHR,MAXLEN)
REAL FREQ(MAXCHR,MAXCHR,MAXLEN)
DO 70 I=1,LENGTH
DO 60 J=1,MAXCHR
DO 60 K=1,MAXCHR
FREQ(K,J,I)=LOG((REAL(SUM(K,J,I)+1)/REAL(TOT(I)+MAXCHR)))
60 CONTINUE
70 CONTINUE
END
SUBROUTINE GETW2D(SUM,FREQ,LENGTH,MAXCHR,MAXLEN)
INTEGER SUM(MAXCHR,MAXCHR,MAXLEN)
REAL FREQ(MAXCHR,MAXCHR,MAXLEN)
DO 70 I=1,LENGTH
DO 60 J=1,MAXCHR
DO 60 K=1,MAXCHR
FREQ(K,J,I)=REAL(SUM(K,J,I))
60 CONTINUE
70 CONTINUE
END
SUBROUTINE MKWTD(FREQ,SUM,TOT,CHRSET,IDM,MAXLEN,
+IDEV2,IDEV3,KBIN,KBOUT,LINE,
+FILNAM,IOPT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
C AUTHOR RODGER STADEN
CHARACTER FILNAM*(*),HELPF*(*)
CHARACTER LINE(MAXLEN),TITLE*60,CHRSET(IDM)
INTEGER SUM(IDM,IDM,MAXLEN),TOT(MAXLEN)
REAL FREQ(IDM,IDM,MAXLEN)
IOK = 1
IF(IOPT.EQ.3)THEN
FILNAM = ' '
CALL OPENF1(IDEV2,FILNAM,0,IOK,KBIN,KBOUT,
+ 'Name of existing weight matrix file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL RDWMTD(TOT,SUM,MIDDLE,LENGTH,MAXLEN,
+ CUTMIN,CUTMAX,IDEV2,
+ IOK,IDM,KBOUT)
IF(IOK.NE.0) RETURN
END IF
FILNAM = ' '
CALL OPENF1(IDEV2,FILNAM,0,IOK,KBIN,KBOUT,
+'Name of aligned sequences file',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
IF(IOPT.EQ.2) THEN
CALL GETCD(TOT,SUM,LINE,IDM,MAXLEN,IDEV2,
+ KBOUT,LENGTH,IOK)
IF(IOK.NE.0)RETURN
END IF
WRITE(KBOUT,1006)LENGTH
1006 FORMAT(' Length of motif',I6)
IOK = 1
CALL YESNO(IOPT,'Sum logs of weights',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOPT.LT.0) RETURN
CUTMIN = -10.0
IF(IOPT.EQ.1)CUTMIN = 10.
CALL MASKWD(SUM,LENGTH,IDM,MAXLEN,KBIN,KBOUT,TITLE,
+IOPT,IHELPS,IHELPE,HELPF,IDEVH)
IF(IOPT.LT.0) RETURN
C NOW CALC WEIGHTS
IF(CUTMIN.LT.0.0)CALL GETWD(TOT,SUM,FREQ,LENGTH,IDM,MAXLEN)
IF(CUTMIN.GE.0.0)CALL GETW2D(SUM,FREQ,LENGTH,IDM,MAXLEN)
C NOW APPLY THE WEIGHTS
REWIND IDEV2
CALL APPLWD(FREQ,IDM,LENGTH,IDEV2,IDEV3,KBIN,KBOUT,LINE,MAXLEN,
+BOT,TOP,TITLE,MIDDLE,IOK)
CLOSE(UNIT=IDEV2)
IF(IOK.NE.0) RETURN
FILNAM = ' '
CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT,
+'Name for new weight matrix file',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL WRTSCD(TITLE,LENGTH,MIDDLE,BOT,TOP,IDM,
+TOT,SUM,CHRSET,IDEV2,MAXLEN)
CLOSE(UNIT=IDEV2)
RETURN
END
SUBROUTINE MASKWD(SUM,LENGTH,IDM,MAXLEN,KBIN,KBOUT,MASK,
+IOPT,IHELPS,IHELPE,HELPF,IDEVH)
INTEGER SUM(IDM,IDM,MAXLEN)
CHARACTER MASK*(*),HELPF*(*)
CALL YESNO(IOPT,'Use all motif positions',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOPT.LT.0) RETURN
5 CONTINUE
IF(IOPT.EQ.1)THEN
WRITE(KBOUT,1002)
1002 FORMAT(' x means use, - means ignore',/,
+ ' e.g. xx-x---x-x means use positions 1,2,4,8,10')
LIN = 0
CALL GTSTR('Mask',' ',MASK,LIN,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 5
END IF
IF(INFLAG.EQ.2) RETURN
IF(LIN.EQ.0) RETURN
DO 70 I=1,LENGTH
IF(MASK(I:I).EQ.'-')THEN
DO 60 J=1,IDM
DO 60 K=1,IDM
SUM(K,J,I) = 0
60 CONTINUE
END IF
70 CONTINUE
END IF
END
SUBROUTINE APPLWD(FREQ,IDM,LENGTH,IDEV,IDEV3,KBIN,KBOUT,
+LINE,MAXLEN,BOT,TOP,TITLE,MIDDLE,IOK)
REAL FREQ(IDM,IDM,MAXLEN)
CHARACTER LINE(MAXLEN),TITLE*(*)
INTEGER CTONUM
EXTERNAL CTONUM
IOK = 1
N=0
TOP=-99999.
BOT=9999999.
WRITE(KBOUT,*)' Applying weights to input sequences'
SMEAN = 0.
SUMSQ = 0.
1003 FORMAT(1X,120A1)
1004 FORMAT(' ',I4,' ',F12.3,' ',120A1)
200 CONTINUE
READ(IDEV,1003,END=300)LINE
N=N+1
SCORE=0.
DO 210 I=1,LENGTH
J = CTONUM(LINE(I+1))
K = CTONUM(LINE(I))
SCORE=SCORE+FREQ(K,J,I)
210 CONTINUE
WRITE(IDEV3,1004)N,SCORE,(LINE(K),K=1,LENGTH)
IF(SCORE.GT.TOP)TOP=SCORE
IF(SCORE.LT.BOT)BOT=SCORE
SMEAN=SMEAN+SCORE
SUMSQ=SUMSQ+SCORE*SCORE
GO TO 200
300 CONTINUE
IF(N.LT.1)THEN
CALL ERROM(KBOUT,'Error: empty sequence file')
RETURN
END IF
SMEAN=SMEAN/N
SM=SMEAN
SMEAN=SMEAN*SMEAN
SUMSQ=SUMSQ/N
SD = 0.
T = SUMSQ - SMEAN
IF(T.GT.0.)SD = SQRT(T)
SMM3=SM-3*SD
SMP3=SM+3*SD
WRITE(KBOUT,1000)TOP,BOT
1000 FORMAT(' Top score',F12.3,' Bottom score',F12.3)
WRITE(KBOUT,1001)SM,SD
1001 FORMAT(' Mean',F12.3,' Standard deviation',F12.3)
WRITE(KBOUT,1002)SMM3,SMP3
1002 FORMAT(' Mean minus 3.sd',F12.3,' Mean plus 3.sd',F12.3)
BOT=SMM3
TOP=SMP3
XMN = -999.
XMX = 9999.
CALL GETRL(XMN,XMX,BOT,'Cutoff score',VAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
BOT = VAL
XMN = BOT
XMX = 999.
CALL GETRL(XMN,XMX,TOP,'Top score for scaling plots',
+VAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
TOP = VAL
MN = 0
MX = LENGTH
MIDDLE = 1
CALL GETINT(MN,MX,MIDDLE,'Position to identify',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MIDDLE = IVAL
305 CONTINUE
LIN = 0
CALL GTSTR('Title',' ',TITLE,LIN,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 305
END IF
IF(INFLAG.EQ.2) RETURN
IOK = 0
END
SUBROUTINE GETW1D(SUM,FREQ,LENGTH,MAXCHR,MAXLEN)
INTEGER SUM(MAXCHR,MAXCHR,MAXLEN)
REAL FREQ(MAXCHR,MAXCHR,MAXLEN)
DO 70 I=1,LENGTH
DO 60 J=1,MAXCHR
DO 60 K=1,MAXCHR
FREQ(K,J,I) = 0.
IF(SUM(K,J,I).GT.0)FREQ(K,J,I) = 1.
60 CONTINUE
70 CONTINUE
END
SUBROUTINE WRTSCD(TITLE,LENGTH,MIDDLE,BOT,TOP,IDM,
+TOT,SUM,CHRSET,IDEV,MAXLEN)
INTEGER TOT(LENGTH),SUM(IDM,IDM,MAXLEN)
CHARACTER CHRSET(IDM),TITLE*(*)
C PROTEIN MATRICES DONT WRITE ROWS FOR -X? AND SPACE SO SET DIMENSION
C TO IDM-4
MINUS = 1
IF(IDM.EQ.26)MINUS = 4
WRITE(IDEV,1018)TITLE
1018 FORMAT(' ',A)
1019 FORMAT(' P',20I4)
1020 FORMAT(' N',20I4)
1021 FORMAT(' ',A,A,20I4)
1022 FORMAT(' ',2I6,2F10.3)
WRITE(IDEV,1022)LENGTH,MIDDLE,BOT,TOP
NLINES=1+(LENGTH-1)/20
K1=1
DO 400 J=1,NLINES
K2=MIN((K1+19),LENGTH)
WRITE(IDEV,1019)(K,K=K1-MIDDLE,K2-MIDDLE)
WRITE(IDEV,1020)(TOT(K),K=K1,K2)
DO 390 I=1,IDM-MINUS
DO 390 K=1,IDM-MINUS
WRITE(IDEV,1021)CHRSET(K),CHRSET(I),(SUM(K,I,KK),KK=K1,K2)
390 CONTINUE
K1=K1+20
IF(K1.GT.LENGTH)K1=LENGTH
400 CONTINUE
CLOSE(UNIT=IDEV)
END
SUBROUTINE RDWMTD(TOT,WT,MIDDLE,LENGTH,MAXLEN,YMIN,YMAX,IDEV,
+IFAIL,IDM,KBOUT)
C AUTHOR: RODGER STADEN
INTEGER WT(5,5,MAXLEN),TOT(MAXLEN)
CHARACTER LINE*79
C SET FAIL FLAG
IFAIL=1
1000 FORMAT( )
1001 FORMAT(3X,20I4)
1002 FORMAT(1X,2I6,2F10.3)
1003 FORMAT(A)
1004 FORMAT(' ',A)
DO 3 I = 1,MAXLEN
TOT(I) = 0
DO 2 J = 1,5
DO 1 K = 1,5
WT(K,J,I) = 0
1 CONTINUE
2 CONTINUE
3 CONTINUE
C READ TITLE
READ(IDEV,1003,ERR=100,END=100)LINE
WRITE(KBOUT,1004)LINE
C READ PLOT VALUES ETC
READ(IDEV,1002,ERR=100,END=100)
+LENGTH,MIDDLE,YMIN,YMAX
C HOW MANY LINES TO READ?
NLINES=1+(LENGTH-1)/20
K1=1
DO 10 JJ=1,NLINES
READ(IDEV,1000,ERR=100,END=100)
K2=MIN((K1+19),LENGTH)
C READ(IDEV,1001,ERR=100,END=100)(TOT(K),K=K1,K2)
READ(IDEV,1000,ERR=100,END=100)
DO 5 I=1,4
DO 5 J=1,4
READ(IDEV,1001,ERR=100,END=100)(WT(J,I,K),K=K1,K2)
DO 6 K=K1,K2
TOT(K) = TOT(K) + WT(J,I,K)
6 CONTINUE
5 CONTINUE
K1=K1+20
IF(K1.GT.LENGTH)K1=LENGTH
10 CONTINUE
CLOSE(UNIT=IDEV)
C SET FAIL FLAG TO GOOD
IFAIL=0
RETURN
100 CONTINUE
END
SUBROUTINE DISIGP(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,IDEVOT,WT,IDM,MAXLEN,LENGTH,
+YMIN,YMAX,IGON,MIDDLE)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
REAL WT(IDM,IDM,MAXLEN)
INTEGER CTONUM
EXTERNAL CTONUM
XMAX=J2
XMIN=J1
IF(IGON.EQ.0)THEN
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
DO 10 I=J1,J2-LENGTH
SUM1=0.
K=0
DO 5 J=I,I+LENGTH-1
K=K+1
KT1=CTONUM(SEQ(J))
KT2=CTONUM(SEQ(J+1))
SUM1 = SUM1 + WT(KT1,KT2,K)
5 CONTINUE
IF(SUM1.GE.YMIN)THEN
IF(IGON.EQ.0)THEN
X=I+MIDDLE
CALL LINE(X,X,YMIN,SUM1,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
IF(IGON.EQ.1)THEN
CALL VT100M
WRITE(IDEVOT,1010)I+MIDDLE,SUM1,(SEQ(K),K=I,I+LENGTH-1)
1010 FORMAT(' ',I7,F10.2,' ',120A1)
END IF
END IF
10 CONTINUE
CALL VT100M
RETURN
END
C LOCALF
SUBROUTINE LOCALF(SEQNCE,IDIM1,STRING,MATCH,IDIM3I,
+ITOT,ITOTEL,ITOTID,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,J1,J2,KSTART,
+IDEV,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQNCE(IDIM1),STRING(IDIM3I),MATCH(IDIM3I)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
INTEGER ANSRV,ANSC
INTEGER SPAN,DIALOG
CALL SHOWFU(KBOUT,
+'Search for local similarity or complementarity')
CALL GETPAR(36,15,IOK,MINSP,MAXSP,SPAN,MINPR,MAXPR,IPR,
+MNIR1,MXIR1,IRAN1,MNIR2,MXIR2,IRAN2,
+ANSRV,ANSC,IGON,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
PR = REAL(IPR)
10 CONTINUE
CALL LOCALD(
+ANSRV,ANSC,IOK,MINSP,MAXSP,SPAN,MINPR,MAXPR,IPR,PR,
+MNIR1,MXIR1,IRAN1,MNIR2,MXIR2,IRAN2,IGON,
+J1,J2,IS,IE,KBIN,KBOUT,MINS,
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL LOCALP(SEQNCE,IDIM1,STRING,MATCH,
+ITOT,ITOTEL,ITOTID,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,KSTART,
+ANSRV,SPAN,IRAN1,IRAN2,IGON,MINS,
+J1,J2,IS,IE,KBIN,KBOUT,IDEV)
DIALOG = 1
GO TO 10
END
SUBROUTINE LOCALD(
+ANSRV,ANSC,IOK,MINSP,MAXSP,SPAN,MINPR,MAXPR,IPR,PR,
+MNIR1,MXIR1,IRAN1,MNIR2,MXIR2,IRAN2,IGON,
+J1,J2,IS,IE,KBIN,KBOUT,MINS,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
INTEGER ANSRV,ANSC
INTEGER CHOICE,SPAN
IOK = 1
CHOICE = ANSRV
CALL YESONO(CHOICE,'Find direct repeats',
+'Find inverted repeats',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.0) RETURN
ANSRV = CHOICE
CALL YESNO(ANSC,'Keep picture',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(ANSC.LT.0) RETURN
IF(ANSC.EQ.1)CALL CLEARG
CALL GETINT(MINSP,MAXSP,SPAN,'Span',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
SPAN = IVAL
IS = J1
IE = J2
CALL GTREG(KBIN,KBOUT,0,J2,IS,IE,'Define restricted region',
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL SQPF5(SPAN,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PR,MINS,IOK)
IF(IOK.NE.0) RETURN
CALL GETINT(MNIR1,MXIR1,IRAN1,'Range start',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IRAN1 = IVAL
MNIR2 = MAX(MNIR2,IRAN1)
MXIR2 = MAX(MNIR2,MXIR2)
IRAN2 = MAX(IRAN2,MNIR2)
CALL GETINT(MNIR2,MXIR2,IRAN2,'Range end',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IRAN2 = IVAL
CALL YESONO(IGON,'Plot results','List results',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IGON.LT.0)RETURN
IOK = 0
END
SUBROUTINE LOCALP(SEQNCE,IDIM1,STRING,MATCH,
+ITOT,ITOTEL,ITOTID,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,KSTART,
+ANSRV,SPAN,IRAN1,IRAN2,IGON,MINS,
+J1,J2,IS,IE,KBIN,KBOUT,IDEV)
C AUTHOR: RODGER STADEN
INTEGER SPAN
CHARACTER SEQNCE(IDIM1),STRING(SPAN),MATCH(SPAN)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
INTEGER ANSRV
XMIN=J1
XMAX=J2
YMIN=0.
YMAX=SPAN
CALL BUSY(KBOUT)
IF(IGON.EQ.0)THEN
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END IF
C LAST START POSITION
ILAST=IE-SPAN+1
C HOW MANY LOOPS?
IT1=IRAN1+SPAN-1
IT2=IRAN2+SPAN+SPAN-2
DO 50 J=IS,IE
IS1=J+IT1
IE1=J+IT2
IF(IE1.GT.IE)IE1=IE
IF(IS1.GT.ILAST)GO TO 51
CALL SQCOPY(SEQNCE(J-KSTART+1),STRING,SPAN)
IF(ANSRV.EQ.1)THEN
CALL SQREV(STRING,SPAN)
CALL SQCOM(STRING,SPAN)
END IF
CALL SQFIT(SEQNCE,IDIM1,STRING,SPAN,ITOT,ITOTEL,ITOTID,
+ IS1,IE1,MINS,ITOTP,KSTART)
IF(ITOTP.GT.ITOTID) THEN
IF(IGON.EQ.0)CALL VT100M
WRITE(KBOUT,1017)ITOTID
1017 FORMAT(/10X,'More than',I4,' matches.',
+ ' Try changing percentage or region'/)
RETURN
END IF
IF(ITOTP.GT.0)THEN
IF(IGON.EQ.0)THEN
X1=J
DO 41 I=1,ITOTP
X=ITOTEL(I)
Y=ITOT(I)
CALL LINE(X1,X,Y,YMIN,
+ XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,MARGB,
+ MARGT,ISXMAX,ISYMAX)
41 CONTINUE
END IF
IF(IGON.EQ.1)THEN
DO 49 I=1,ITOTP
K=ITOTEL(I)-KSTART+1
WRITE(IDEV,1008)
1008 FORMAT(/)
CALL SQMTCH(SEQNCE(K),STRING,MATCH,SPAN)
CALL FMT4LN(SEQNCE(K),STRING,MATCH,
+ SPAN,ITOTEL(I),J,IDEV)
49 CONTINUE
END IF
END IF
50 CONTINUE
51 CONTINUE
IF(IGON.EQ.0)CALL VT100M
END
C SETGEN
SUBROUTINE SETGEN(PAAS,PAA,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER PAAS(5,5,5),PAA(5,5,5)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*),UGA(3),AUA(3),CUA(3),AGA(3),AGG(3)
INTEGER CTONUM
PARAMETER (MAXPRM = 23)
CHARACTER PROMPT(4)*(MAXPRM)
EXTERNAL CTONUM
SAVE UGA,AUA,CUA,AGA,AGG
DATA UGA,AUA,CUA,AGA,AGG/'T','G','A','A','T','A','C','T','A',
+'A','G','A','A','G','G'/
NUM = 1
C SET STANDARD CODE
DO 10 I=1,5
DO 10 J=1,5
DO 10 K=1,5
PAA(I,J,K)=PAAS(I,J,K)
10 CONTINUE
PROMPT(1) = 'Standard'
PROMPT(2) = 'Mammalian mitochondrial'
PROMPT(3) = 'Yeast mitochondrial'
PROMPT(4) = 'Personal'
CALL RADION('Select genetic code',PROMPT,4,NUM,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(NUM.LT.2) RETURN
IF(NUM.EQ.2)THEN
PAA(CTONUM(AUA(3)),CTONUM(AUA(2)),CTONUM(AUA(1)))='M'
PAA(CTONUM(UGA(3)),CTONUM(UGA(2)),CTONUM(UGA(1)))='W'
PAA(CTONUM(AGA(3)),CTONUM(AGA(2)),CTONUM(AGA(1)))='*'
PAA(CTONUM(AGG(3)),CTONUM(AGG(2)),CTONUM(AGG(1)))='*'
ELSE IF(NUM.EQ.3)THEN
PAA(CTONUM(CUA(3)),CTONUM(CUA(2)),CTONUM(CUA(1)))='T'
PAA(CTONUM(AUA(3)),CTONUM(AUA(2)),CTONUM(AUA(1)))='M'
PAA(CTONUM(UGA(3)),CTONUM(UGA(2)),CTONUM(UGA(1)))='W'
ELSE IF(NUM.EQ.4)THEN
CALL GETCOD(PAA,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH)
END IF
END
SUBROUTINE GETCOD(PAA,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*)
CHARACTER PAA(5,5,5),AA,CODON*3,AIN,AAA
INTEGER CTONUM
EXTERNAL CTONUM
WRITE(KBOUT,1004)
1004 FORMAT(' Define genetic code by typing a codon',
+/,' followed by a 1 letter amino acid symbol')
1 CONTINUE
L = 0
CALL GTSTR('Codon',' ',CODON,L,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 1
END IF
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.3) RETURN
AA =
+PAA(CTONUM(CODON(3:3)),CTONUM(CODON(2:2)),CTONUM(CODON(1:1)))
IF(AA.NE.'-')THEN
2 CONTINUE
L = 1
AAA = AA
CALL GTSTR('Amino acid symbol',AAA,AIN,L,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 2
END IF
IF(INFLAG.EQ.2) RETURN
IF(L.GT.0) AAA = AIN
PAA(CTONUM(CODON(3:3)),CTONUM(CODON(2:2)),
+ CTONUM(CODON(1:1))) = AAA
GO TO 1
END IF
GO TO 1
END
SUBROUTINE DINUCF(SEQ,IDIM,J1,J2,IDEV,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),BASE(4)
REAL OBS(5,5),EXPEC(5,5),COMP(5)
INTEGER CTONUM
EXTERNAL CTONUM
SAVE BASE
DATA BASE/'T','C','A','G'/
CALL SHOWFU(KBOUT,'Calculate dinucleotide frequencies')
DO 10 I=1,5
COMP(I)=0.
DO 10 J=1,5
EXPEC(I,J)=0.
OBS(I,J)=0.
10 CONTINUE
DO 20 I=J1,J2-1
K0=CTONUM(SEQ(I))
K1=CTONUM(SEQ(I+1))
COMP(K0)=COMP(K0)+1.
OBS(K0,K1)=OBS(K0,K1)+1.
20 CONTINUE
TOT=J2-J1
DO 25 I=1,5
COMP(I)=COMP(I)/TOT
25 CONTINUE
TOT=TOT/100.
DO 26 I=1,5
DO 26 J=1,5
EXPEC(I,J)=COMP(I)*COMP(J)
26 CONTINUE
DO 30 I=1,5
DO 30 J=1,5
EXPEC(I,J)=EXPEC(I,J)*100.
OBS(I,J)=OBS(I,J)/TOT
30 CONTINUE
WRITE(IDEV,1002)BASE
WRITE(IDEV,1001)
1002 FORMAT(/10X,4(4X,A1,9X))
1001 FORMAT(' ',7X,4(' Obs Expected'))
DO 40 K=1,4
WRITE(IDEV,1000)BASE(K),(OBS(K,J),EXPEC(K,J),J=1,4)
1000 FORMAT(' ',A1,8F7.2)
40 CONTINUE
END
INTEGER FUNCTION ICOMP(IN)
C AUTHOR: RODGER STADEN
INTEGER COMPS(5)
SAVE COMPS
DATA COMPS/3,4,1,2,5/
ICOMP=COMPS(IN)
END
C lwrap2
INTEGER FUNCTION LWRAP2(IDIM,I)
C AUTHOR: RODGER STADEN
LWRAP2=I
IF(LWRAP2.LT.1)LWRAP2=LWRAP2+IDIM+1
IF(LWRAP2.GT.IDIM)LWRAP2=LWRAP2-IDIM
END
C LWRAP3
INTEGER FUNCTION LWRAP3(IDIM,I)
C AUTHOR: RODGER STADEN
LWRAP3=I
IF(LWRAP3.LT.1)LWRAP3=LWRAP3+IDIM
IF(LWRAP3.GT.IDIM)LWRAP3=LWRAP3-IDIM
END
SUBROUTINE NORMAA(SUM,TOTA,PAA)
CHARACTER PAA(5,5,5),AA(21)
REAL SUM(4,4,4)
SAVE AA
DATA AA/'A','C','D','E','F',
+'G','H','I','K','L',
+'M','N','P','Q','R',
+'S','T','V','W','Y','*'/
C LOOP FOR EACH ACID
DO 150 L=1,21
ASUM=0.
C LOOP FOR EACH CODON
DO 200 I=1,4
DO 200 J=1,4
DO 200 K=1,4
C COUNT CODONS
IF(PAA(K,J,I).EQ.AA(L))ASUM=ASUM+SUM(I,J,K)
200 CONTINUE
IF(ASUM.NE.0.0)ASUM=TOTA/ASUM
C LOOP FOR EACH CODON
DO 100 I=1,4
DO 100 J=1,4
DO 100 K=1,4
C NORMALIZE
IF(PAA(K,J,I).EQ.AA(L))SUM(I,J,K)=SUM(I,J,K)*ASUM
100 CONTINUE
150 CONTINUE
END
SUBROUTINE PPROM1(SEQ,IDIM,ISXMAX,ISYMAX,I1,I2,MARGL,MARGR,
+ MARGB,MARGT,IDEV,FILEIN,KBOUT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILEIN*(*)
INTEGER CTONUM
REAL WTM35(25,5),WTM10(19,5),WTP1(12,5),PENALT(7)
EXTERNAL CTONUM
CALL SHOWFU(KBOUT,'E coli promoter search')
CALL RPPROM(WTM35,WTM10,WTP1,IDEV,FILEIN,IFAIL)
IF(IFAIL.NE.0)THEN
CALL ERROM(KBOUT,'Error in weight matrix')
RETURN
END IF
IMARG=NINT(MARGT/2.0)
MARGB2=MARGB+IMARG
MARGT2=IMARG
MARGB1=MARGB
MARGT1=IMARG
PENALT(1)=0.02
PENALT(2)=0.2
PENALT(3)=1.0
PENALT(4)=0.2
PENALT(5)=0.05
PENALT(6)=0.02
PENALT(7)=0.01
DO 1 I=1,7
PENALT(I)=LOG(PENALT(I))
1 CONTINUE
CUTM35=-36.85
CUTM10=-28.13
CUTP1=-21.8
XMAX=I2
XMIN=I1
YMIN=-77.3
YMAX=-60.4
YMIN1=-77.3*0.01
YMAX1=-60.4*0.01
YMIN1=-77.3
YMAX1=-60.4
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL FRAME(MARGL,MARGR,MARGB2,MARGT2,ISXMAX,ISYMAX)
DO 100 I=I1,I2-65
C DO -35 REGION
SUMM35=0.
K=0
DO 5 J=I,I+24
K=K+1
SUMM35=SUMM35+WTM35(K,CTONUM(SEQ(J)))
5 CONTINUE
C HIGH ENOUGH?
IF(SUMM35.GE.CUTM35)THEN
C YES, TRY -10 REGION
BESTM1=-99999.
DO 20 J=I+25,I+25+6
K=0
SUMM10=0.
DO 10 L=J,J+18
K=K+1
SUMM10=SUMM10 + WTM10(K,CTONUM(SEQ(L)))
10 CONTINUE
C IS THIS -10 SCORE HIGH ENOUGH?
IF((SUMM10).GE.CUTM10)THEN
C YES, IS IT THE HIGHEST?
IF(SUMM10.GT.BESTM1)THEN
BESTM1=SUMM10
IBEST=J
IBESTA=IBEST-I-24
END IF
END IF
20 CONTINUE
C -10 DONE, HIGH ENOUGH SCORE?
IF(BESTM1.GE.CUTM10)THEN
C YES, DO +1 REGION
BESTP1=-99999.
DO 40 J=IBEST+19,IBEST+19+4
SUMP1=0.
K=0
DO 30 L=J,J+11
K=K+1
SUMP1=SUMP1 + WTP1(K,CTONUM(SEQ(L)))
30 CONTINUE
C HIGH ENOUGH?
IF(SUMP1.GE.CUTP1)THEN
C YES, HIGHEST?
IF(SUMP1.GT.BESTP1)THEN
BESTP1=SUMP1
JBEST=J
END IF
END IF
40 CONTINUE
C +1 REGION DONE, HIGH ENOUGH SCORE?
IF(BESTP1.GE.CUTP1)THEN
C YES, SO ALL HIGH ENOUGH
X=JBEST+2
Y=SUMM35+BESTM1+BESTP1
CALL LINE(X,X,YMIN,Y,XMAX,XMIN,
+ YMAX,YMIN,
+ MARGL,MARGR,MARGB2,MARGT2,
+ ISXMAX,ISYMAX)
Y=Y+PENALT(IBESTA)
CALL LINE(X,X,YMIN1,Y,XMAX,XMIN,
+ YMAX1,YMIN1,
+ MARGL,MARGR,MARGB1,MARGT1,
+ ISXMAX,ISYMAX)
END IF
END IF
END IF
100 CONTINUE
CALL VT100M
END
SUBROUTINE RPPROM(WTM35,WTM10,WTP1,IDEV,FILEIN,IFAIL)
C AUTHOR: RODGER STADEN
REAL WTM35(25,5),WTM10(19,5),WTP1(12,5)
INTEGER WT(25),TOT(25)
CHARACTER FILEIN*(*)
IFAIL=0
1001 FORMAT(2X,25I3)
CALL OPENRS(IDEV,FILEIN,IOK,LRECL,2)
IF(IOK.NE.0)GO TO 100
C READ SPACE,P
READ(IDEV,1000,ERR=100)
READ(IDEV,1000,ERR=100)
READ(IDEV,1000,ERR=100)
1000 FORMAT()
READ(IDEV,1001,ERR=100)TOT
DO 5 I=1,4
READ(IDEV,1001,ERR=100)(WT(K),K=1,25)
DO 5 J=1,25
IF(WT(J).NE.0)THEN
WTM35(J,I) = LOG(FLOAT(WT(J))/FLOAT(TOT(J)))
ELSE
WTM35(J,I)=LOG(0.5/FLOAT(TOT(J)))
END IF
5 CONTINUE
C DO 5'S
DO 20 I=1,25
WTM35(I,5)=LOG(0.25/TOT(I))
20 CONTINUE
C do -10 region
READ(IDEV,1000,ERR=100)
READ(IDEV,1000,ERR=100)
READ(IDEV,1001)(TOT(K),K=1,19)
DO 30 I=1,4
READ(IDEV,1001,ERR=100)(WT(K),K=1,19)
DO 30 J=1,19
IF(WT(J).NE.0)THEN
WTM10(J,I) = LOG(FLOAT(WT(J))/FLOAT(TOT(J)))
ELSE
WTM10(J,I)=LOG(0.5/FLOAT(TOT(J)))
END IF
30 CONTINUE
C do 5's
DO 35 I=1,19
WTM10(I,5)=LOG(0.25/TOT(I))
35 CONTINUE
C DO +1 REGION
READ(IDEV,1000,ERR=100)
READ(IDEV,1000,ERR=100)
READ(IDEV,1001,ERR=100)(TOT(K),K=1,12)
DO 40 I=1,4
READ(IDEV,1001,ERR=100)(WT(K),K=1,12)
DO 40 J=1,12
IF(WT(J).NE.0)THEN
WTP1(J,I) = LOG(FLOAT(WT(J))/FLOAT(TOT(J)))
ELSE
WTP1(J,I)=LOG(0.5/FLOAT(TOT(J)))
END IF
40 CONTINUE
C DO 5'S
DO 45 I=1,12
WTP1(I,5)=LOG(0.25/TOT(I))
45 CONTINUE
CLOSE(UNIT=IDEV)
RETURN
100 CONTINUE
IFAIL=1
END
SUBROUTINE PPROM2(SEQ,IDIM,ISXMAX,ISYMAX,I1,I2,MARGL,MARGR,
+MARGB,MARGT,IDEV,FILEIN,KBOUT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILEIN*(*)
REAL WTM35(25,5),WTM10(19,5),WTP1(12,5)
INTEGER CTONUM
EXTERNAL CTONUM
CALL SHOWFU(KBOUT,'E coli promoter -35 and -10 regions search')
CALL RPPROM(WTM35,WTM10,WTP1,IDEV,FILEIN,IFAIL)
IF(IFAIL.NE.0)THEN
CALL ERROM(KBOUT,'Error in weight matrix')
RETURN
END IF
IMARG=NINT(MARGT/2.0)
MARGB2=MARGB+IMARG
MARGT2=IMARG
MARGB1=MARGB
MARGT1=IMARG
XMAX=I2
XMIN=I1
YMIN35=-38.0
YMAX35=-23.5
YMIN1=-29.0
YMAX1=-18.4
YMID35=YMIN35+(YMAX35-YMIN35)/2.
YMID1=YMIN1+(YMAX1-YMIN1)/2.
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL FRAME(MARGL,MARGR,MARGB2,MARGT2,ISXMAX,ISYMAX)
CALL TEXT(XMIN,YMID35,'-35',3,0,XMAX,XMIN,YMAX35,YMIN35,
+MARGL,MARGR,MARGB1,MARGT1,ISXMAX,ISYMAX)
CALL TEXT(XMIN,YMID1,'-10',3,0,XMAX,XMIN,YMAX1,YMIN1,
+MARGL,MARGR,MARGB2,MARGT2,ISXMAX,ISYMAX)
C
DO 100 I=I1,I2-25
C DO -35 REGION
SUMM35=0.
K=0
DO 5 J=I,I+24
K=K+1
SUMM35=SUMM35+WTM35(K,CTONUM(SEQ(J)))
5 CONTINUE
X=I+48
Y=SUMM35
CALL LINE(X,X,YMIN35,Y,XMAX,XMIN,YMAX35,YMIN35,
+ MARGL,MARGR,MARGB2,MARGT2,ISXMAX,ISYMAX)
100 CONTINUE
DO 200 I=I1,I2-19
SUMM10=0.
K=0
DO 20 J=I,I+18
K=K+1
SUMM10=SUMM10+WTM10(K,CTONUM(SEQ(J)))
20 CONTINUE
X=I+22
Y=SUMM10
CALL LINE(X,X,YMIN1,Y,XMAX,XMIN,YMAX1,YMIN1,
+ MARGL,MARGR,MARGB1,MARGT1,ISXMAX,ISYMAX)
200 CONTINUE
CALL VT100M
END
C PROMOTERS ON THE COMPLEMENTARY STRAND
SUBROUTINE PPROM3(SEQ,IDIM,ISXMAX,ISYMAX,I1,I2,MARGL1,MARGR1,
+MARGB1,MARGT1,IDEV,FILEIN,KBOUT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILEIN*(*)
REAL WTM35(25,5),WTM10(19,5),WTP1(12,5)
REAL WTM35C(25,5),WTM10C(19,5),WTP1C(12,5)
INTEGER CTONUM
EXTERNAL CTONUM
CALL SHOWFU(KBOUT,'E coli promters on the complementary strand')
CALL RPPROM(WTM35,WTM10,WTP1,IDEV,FILEIN,IFAIL)
IF(IFAIL.NE.0)THEN
CALL ERROM(KBOUT,'Error in weight matrix')
RETURN
END IF
C REVERSE AND COMPLEMENT MATRICES
DO 5 I=1,25
DO 5 J=1,2
WTM35C(I,J+2)=WTM35(26-I,J)
WTM35C(I,J)=WTM35(26-I,J+2)
5 CONTINUE
DO 6 I=1,19
DO 6 J=1,2
WTM10C(I,J+2)=WTM10(20-I,J)
WTM10C(I,J)=WTM10(20-I,J+2)
6 CONTINUE
DO 7 I=1,12
DO 7 J=1,2
WTP1C(I,J+2)=WTP1(13-I,J)
WTP1C(I,J)=WTP1(13-I,J+2)
7 CONTINUE
DO 1 I = 1,25
WTM35C(I,5)=WTM35(26-I,5)
1 CONTINUE
DO 2 I = 1,19
WTM10C(I,5)=WTM10(20-I,5)
2 CONTINUE
DO 3 I = 1,12
WTP1C(I,5)=WTP1(13-I,5)
3 CONTINUE
CUTM35=-36.85
CUTM10=-28.13
CUTP1=-21.8
XMAX=I2
XMIN=I1
YMIN=-77.3
YMAX=-60.4
CALL CLEARV
CALL VECTOM
CALL FRAME(MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX)
DO 100 I=I1,I2-65
C DO +1 REGION
SUMP1=0.
K=0
DO 8 J=I,I+11
K=K+1
SUMP1=SUMP1+WTP1C(K,CTONUM(SEQ(J)))
8 CONTINUE
C HIGH ENOUGH?
IF(SUMP1.GE.CUTP1)THEN
C YES, TRY -10 REGION
BESTM1=-99999.
DO 20 J=I+12,I+12+4
K=0
SUMM10=0.
DO 10 L=J,J+18
K=K+1
SUMM10=SUMM10 + WTM10C(K,CTONUM(SEQ(L)))
10 CONTINUE
C IS THIS -10 SCORE HIGH ENOUGH?
IF(SUMM10.GE.CUTM10)THEN
C YES, IS IT THE HIGHEST?
IF(SUMM10.GT.BESTM1)THEN
BESTM1=SUMM10
IBEST=J
END IF
END IF
20 CONTINUE
C -10 DONE, HIGH ENOUGH SCORE?
IF(BESTM1.GE.CUTM10)THEN
C YES, DO -35 REGION
BESTM3=-99999.
DO 40 J=IBEST+19,IBEST+19+6
SUMM35=0.
K=0
DO 30 L=J,J+24
K=K+1
SUMM35=SUMM35+WTM35C(K,CTONUM(SEQ(L)))
30 CONTINUE
C HIGH ENOUGH?
IF(SUMM35.GE.CUTM35)THEN
C YES, HIGHEST?
IF(SUMM35.GT.BESTM3)THEN
BESTM3=SUMM35
END IF
END IF
40 CONTINUE
C -35 REGION DONE, HIGH ENOUGH SCORE?
IF(BESTM3.GE.CUTM35)THEN
C YES, SO ALL HIGH ENOUGH
X=I+10
Y=SUMP1+BESTM3+BESTM1
CALL LINE(X,X,YMIN,Y,
+ XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,
+ MARGT1,ISXMAX,ISYMAX)
END IF
END IF
END IF
100 CONTINUE
CALL VT100M
END
SUBROUTINE PRIBS(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,I1,I2,IDEV,FILE1,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILE1*(*)
INTEGER WEIGHT(505),MARGB1(3),MARGT1(3),CTONUM
EXTERNAL CTONUM
CALL SHOWFU(KBOUT,'Search for E coli ribosome binding sites')
C SORT OUT MARGIN IE FIND MAX AND DIVIDE BY THREE
IMARG=NINT(MARGT/3.)
MARGL1=MARGL
MARGR1=MARGR
MARGB1(1)=MARGB
MARGB1(2)=MARGB+IMARG
MARGB1(3)=MARGB+2*IMARG
MARGT1(1)=IMARG
MARGT1(2)=IMARG
MARGT1(3)=IMARG
YMAX=100.
YMIN=0.
CALL RPRIBS(IDEV,FILE1,WEIGHT,IOK,KBOUT)
IF(IOK.NE.0)RETURN
MINSMN = -50
MINSMX = 100
MINSI = 2
CALL GETINT(MINSMN,MINSMX,MINSI,'Minimum score',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINS = IVAL
XMAX=I2
XMIN=I1
Y=0.
YP=20.0
CALL CLEARV
CALL BUSY(KBOUT)
CALL VECTOM
DO 10 I=I1,I2-100
ISUM=0
K=0
II=I
III=I+100
DO 5 J=II,III
K=K+1
KK=CTONUM(SEQ(J))-1
KK=KK*101
IP=K+KK
ISUM=ISUM+WEIGHT(IP)
5 CONTINUE
IF(ISUM.GT.MINS)THEN
X=I+60
IMARG=1+MOD((II-I1),3)
CALL LINE(X,X,Y,YP,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB1(IMARG),MARGT1(IMARG),
+ ISXMAX,ISYMAX)
END IF
10 CONTINUE
CALL VT100M
END
SUBROUTINE RPRIBS(IDEV,FILE1,WT,IOK,KBOUT)
C AUTHOR: RODGER STADEN
INTEGER WT(505)
CHARACTER FILE1*(*)
1001 FORMAT(2X,25I3)
CALL OPENRS(IDEV,FILE1,IOK,LRECL,2)
IF(IOK.NE.0)GO TO 100
DO 20 J=1,4
C read space,p
READ(IDEV,1000,END=200,ERR=200)
READ(IDEV,1000,END=200,ERR=200)
1000 FORMAT()
JJ=(J-1)*25
DO 5 I=1,4
II=(I-1)*101
K1=JJ+II+1
READ(IDEV,1001,END=200,ERR=200)(WT(K),K=K1,K1+24)
5 CONTINUE
20 CONTINUE
C do last bit
READ(IDEV,1000,END=200,ERR=200)
READ(IDEV,1000,END=200,ERR=200)
K1=101
1002 FORMAT(2X,I3)
DO 30 I=1,4
READ(IDEV,1002,ERR=200,END=200)WT(K1)
K1=K1+101
30 CONTINUE
CLOSE(UNIT=IDEV)
DO 40 I=405,505
WT(I)=0
40 CONTINUE
RETURN
100 CONTINUE
CALL ERROM(KBOUT,'Error opening file')
RETURN
200 CONTINUE
CALL ERROM(KBOUT,'Error reading file')
IOK = 1
END
C SCANS FOR EUKARYOTIC RIBOSOME BINDING SITES
SUBROUTINE RIBEUK(SEQ,IDIM,ISXMAX,ISYMAX,J1,J2,
+MARGL,MARGR,MARGB,MARGT,IDEV,FILEIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILEIN*(*)
REAL WT(10,5)
INTEGER CTONUM,MARGB1(3),MARGT1(3)
EXTERNAL CTONUM
CALL SHOWFU(KBOUT,'Search for eukaryotic ribosome binding sites')
CALL RRIBEU(WT,IDEV,FILEIN,IFAIL)
IF(IFAIL.NE.0)THEN
CALL ERROM(KBOUT,'Error in weight matrix')
RETURN
END IF
C SORT OUT MARGIN IE FIND MAX AND DIVIDE BY THREE
IMARG=NINT(MARGT/3.)
MARGL1=MARGL
MARGR1=MARGR
MARGB1(1)=MARGB
MARGB1(2)=MARGB+IMARG
MARGB1(3)=MARGB+2*IMARG
MARGT1(1)=IMARG
MARGT1(2)=IMARG
MARGT1(3)=IMARG
YMIN=-9.0
YMAX=0.0
XMAX=J2
XMIN=J1
CALL CLEARV
CALL VECTOM
C ADD 2 TO MAKE PHASE CORRECT
I1=J1+1
DO 20 II=1,3
DO 10 I=I1+II,J2-10,3
IF((SEQ(I+7).EQ.'A').AND.(SEQ(I+8).EQ.'T').
+ AND.(SEQ(I+9).EQ.'G'))THEN
SUM=0.
K=0
DO 5 J=I,I+9
K=K+1
SUM=SUM+WT(K,CTONUM(SEQ(J)))
5 CONTINUE
X=I+7
CALL LINE(X,X,YMIN,SUM,XMAX,
+ XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1(II),
+ MARGT1(II),ISXMAX,ISYMAX)
END IF
10 CONTINUE
20 CONTINUE
CALL VT100M
END
SUBROUTINE RRIBEU(WTR,IDEV,FILEIN,IFAIL)
C AUTHOR: RODGER STADEN
CHARACTER FILEIN*(*)
REAL WTR(10,5)
INTEGER WT(10),TOT(10)
IFAIL=0
1001 FORMAT(2X,10I3)
CALL OPENRS(IDEV,FILEIN,IOK,LRECL,2)
IF(IOK.NE.0)GO TO 50
C read space,p
READ(IDEV,1000,ERR=50)
READ(IDEV,1000,ERR=50)
1000 FORMAT()
READ(IDEV,1001,ERR=50)TOT
DO 6 I=1,4
READ(IDEV,1001,ERR=50)(WT(K),K=1,10)
DO 5 J=1,10
IF(WT(J).NE.0)THEN
WTR(J,I)=LOG(FLOAT(WT(J))/FLOAT(TOT(J)))
ELSE
WTR(J,I)=LOG(1./FLOAT(TOT(J)))
END IF
5 CONTINUE
6 CONTINUE
CLOSE(UNIT=IDEV)
Z=LOG(1./102.)
DO 40 I=1,10
WTR(I,5)=Z
40 CONTINUE
RETURN
50 CONTINUE
IFAIL=1
END
SUBROUTINE FSPLIC(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,IDEV,DEFFIL,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IDEVOT,
+SUM,WTD,WTA,CEXACD,CEXACA,PEXACD,PEXACA,
+TOT,IDM,MAXLEN,LINE,CHRSET,DIALOG)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILNAM*(*),HELPF*(*),DEFFIL*(*)
CHARACTER LINE(MAXLEN),CHRSET(IDM)
INTEGER SUM(IDM,MAXLEN),TOT(MAXLEN),DIALOG,ANSFIL
REAL WTA(IDM,MAXLEN),WTD(IDM,MAXLEN)
INTEGER CEXACD(MAXLEN),PEXACD(MAXLEN)
INTEGER CEXACA(MAXLEN),PEXACA(MAXLEN)
CALL SHOWFU(KBOUT,'Splice search using weight matrix')
CALL GETPAR(62,1,IOK,IGON,
+IPAR2,IPAR3,IPAR4,IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
CALL SPLICD(IDEV,DEFFIL,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,
+SUM,IDM,MAXLEN,TOT,WTD,LENGTD,MIDDLD,YMIND,YMAXD,
+WTA,LENGTA,MIDDLA,YMINA,YMAXA,
+IGON,IDEVOT,LINE,CHRSET,DIALOG,ANSFIL,IOK,
+CEXACD,PEXACD,IEXACD,CEXACA,PEXACA,IEXACA)
IF(IOK.NE.0) RETURN
MARGTD = MARGT/2
CALL SPLICP(SEQ,IDIM,MARGL,MARGR,MARGB,MARGTD,
+ISXMAX,ISYMAX,J1,J2,IDEVOT,WTD,IDM,MAXLEN,LENGTD,
+YMIND,YMAXD,IGON,MIDDLD,0,
+CEXACD,PEXACD,IEXACD)
MARGBA = MARGB + MARGTD
CALL SPLICP(SEQ,IDIM,MARGL,MARGR,MARGBA,MARGTD,
+ISXMAX,ISYMAX,J1,J2,IDEVOT,WTA,IDM,MAXLEN,LENGTA,
+YMINA,YMAXA,IGON,MIDDLA,1,
+CEXACA,PEXACA,IEXACA)
END
SUBROUTINE SPLICD(IDEV,DEFFIL,FILNAM,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,
+SUM,IDM,MAXLEN,TOT,WTD,LENGTD,MIDDLD,YMIND,YMAXD,
+WTA,LENGTA,MIDDLA,YMINA,YMAXA,
+IGON,IDEVOT,LINE,CHRSET,DIALOG,ANSFIL,IOK,
+CEXACD,PEXACD,IEXACD,CEXACA,PEXACA,IEXACA)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),HELPF*(*),DEFFIL*(*)
CHARACTER LINE(MAXLEN),CHRSET(IDM)
INTEGER SUM(IDM,MAXLEN),TOT(MAXLEN),DIALOG,ANSFIL
INTEGER CEXACD(MAXLEN),PEXACD(MAXLEN)
INTEGER CEXACA(MAXLEN),PEXACA(MAXLEN)
REAL WTA(IDM,MAXLEN),WTD(IDM,MAXLEN)
IOK = 1
ANSFIL = 0
IF(DIALOG.EQ.1) THEN
CALL YESNO(ANSFIL,'Use default weights file',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(ANSFIL.LT.0) RETURN
END IF
IF(ANSFIL.EQ.1) THEN
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+ 'Splice site weight matrix file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
ELSE
CALL OPENRS(IDEV,DEFFIL,IOK,LRECL,2)
IF(IOK.NE.0) RETURN
END IF
LENGTD=MAXLEN
CALL RDWMTN(TOT,SUM,MIDDLD,LENGTD,MAXLEN,YMIND,YMAXD,IDEV,
+ IOK,IDM,KBOUT,0)
IF(IOK.NE.0)THEN
CALL ERROM(KBOUT,'Error in weight matrix')
CLOSE(UNIT=IDEV)
RETURN
END IF
CALL GETWC(TOT,SUM,LENGTD,IDM,MAXLEN,CEXACD,PEXACD,IEXACD)
IF(YMIND.LT.0.0)CALL GETW(TOT,SUM,WTD,LENGTD,IDM,MAXLEN)
IF(YMIND.GE.0.0)CALL GETW2(SUM,WTD,LENGTD,IDM,MAXLEN)
LENGTA=MAXLEN
CALL RDWMTN(TOT,SUM,MIDDLA,LENGTA,MAXLEN,YMINA,YMAXA,IDEV,
+ IOK,IDM,KBOUT,0)
CLOSE(UNIT=IDEV)
IF(IOK.NE.0)THEN
CALL ERROM(KBOUT,'Error in weight matrix')
RETURN
END IF
CALL GETWC(TOT,SUM,LENGTA,IDM,MAXLEN,CEXACA,PEXACA,IEXACA)
IF(YMINA.LT.0.0)CALL GETW(TOT,SUM,WTA,LENGTA,IDM,MAXLEN)
IF(YMINA.GE.0.0)CALL GETW2(SUM,WTA,LENGTA,IDM,MAXLEN)
C assume donors in first file: and that the first and last bases in
C exons are marked as middle. Subtract 1 from middla to make frames
C equivalent, but send midc to splicp as a correction so that positions
C of matches are positions on first and last bases in exons. midc=0 for
C donors and 1 for acceptors
MIDDLA = MIDDLA - 1
IF(DIALOG.EQ.1) THEN
AMN = -9999.
AMX = 9999.
CALL GETRL(AMN,AMX,YMIND,'Donor cutoff score',VAL,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
YMIND = VAL
CALL GETRL(AMN,AMX,YMINA,'Acceptor cutoff score',VAL,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
YMINA = VAL
IOK = 1
CALL YESONO(IGON,'Plot results','List results',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IGON.LT.0)RETURN
END IF
IOK = 0
END
SUBROUTINE SPLICP(SEQ,IDIM,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,J1,J2,IDEVOT,WT,IDM,MAXLEN,LENGTH,
+YMIN,YMAX,IGON,MIDDLE,MIDC,CEXACT,PEXACT,IEXACT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
REAL WT(IDM,MAXLEN)
INTEGER CEXACT(IEXACT),PEXACT(IEXACT)
INTEGER CTONUM
EXTERNAL CTONUM,MATWTC
XMAX=J2
XMIN=J1
IF(IGON.EQ.0)THEN
CALL CLEARV
CALL VECTOM
IMARG = MARGT/3
CALL FRAME(MARGL,MARGR,MARGB,IMARG,ISXMAX,ISYMAX)
CALL FRAME(MARGL,MARGR,MARGB+IMARG,IMARG,ISXMAX,ISYMAX)
CALL FRAME(MARGL,MARGR,MARGB+(2*IMARG),IMARG,ISXMAX,ISYMAX)
END IF
I = J1 - 1
1 CONTINUE
I = I + 1
IF(IEXACT.GT.0) I = MATWTC(SEQ,IDIM,J1,J2,I,CEXACT,PEXACT,IEXACT)
IF((I+LENGTH-2).LT.J2) THEN
SUM1=0.
K=0
DO 5 J=I,I+LENGTH-1
K=K+1
SUM1=SUM1+WT(CTONUM(SEQ(J)),K)
5 CONTINUE
IF(SUM1.GE.YMIN)THEN
IF(IGON.EQ.0)THEN
M = I + MIDDLE
X = M + MIDC
M = MARGB + (MOD(M,3)*IMARG)
CALL LINE(X,X,YMIN,SUM1,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,M,IMARG,ISXMAX,ISYMAX)
END IF
IF(IGON.EQ.1)THEN
CALL VT100M
M = MOD((I+MIDDLE),3)
WRITE(IDEVOT,1010)I+MIDDLE+MIDC,M,SUM1,
+ (SEQ(K),K=I,I+LENGTH-1)
1010 FORMAT(' ',I7,I2,F10.2,' ',120A1)
END IF
END IF
GO TO 1
END IF
CALL VT100M
END
SUBROUTINE SQCOM2(SEQ,IDIM)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),COMP*17
INTEGER DTONUM
EXTERNAL DTONUM
SAVE COMP
DATA COMP/'AGTC-YRWSKMDVBHN-'/
DO 100 I=1,IDIM
J = DTONUM(SEQ(I))
SEQ(I) = COMP(J:J)
100 CONTINUE
END
C SQFIT
SUBROUTINE SQFIT(SEQ,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MIN,ITOTP,KSTART)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIM2)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
C ALLOWS R,Y AND - SPECIAL CHARACTERS
IDIF=(IE-IS+2)-IDIM2
C IDIF IS THE NUMBER OF POSNS TO TRY
C IPSTR GOES FROM 1 TO IDIM2 IDIF TIMES
C TRY ALL POSSIBLE POSITIONS FOR MATCHING AND SCORE FOR EACH
C POINT TO ARRAY ELEMENT CORRESPONDING TO FIRST BASE
IPSEQ=IS-KSTART+1
ITOTP=0
DO 200 I=1,IDIF
NTOT=0
IP=IPSEQ
DO 100 J=1,IDIM2
IF(SEQ(IP).EQ.STRING(J))THEN
NTOT=NTOT+1
ELSE IF (STRING(J).EQ.'R')THEN
IF((SEQ(IP).EQ.'A').OR.(SEQ(IP).EQ.'G')) NTOT=NTOT+1
ELSE IF (STRING(J).EQ.'Y')THEN
IF((SEQ(IP).EQ.'C').OR.(SEQ(IP).EQ.'T')) NTOT=NTOT+1
ELSE IF (STRING(J).EQ.'-')THEN
NTOT=NTOT+1
END IF
IP=IP+1
100 CONTINUE
C END OF COUNTING FOR THIS POSITION.IS TOTAL HIGH ENOUGH?
IF(NTOT.GE.MIN)THEN
ITOTP=ITOTP+1
C TEST FOR OVERFLOW
IF(ITOTP.GT.ITOTID)RETURN
C SAVE TOTAL AND POSITION
ITOT(ITOTP)=NTOT
ITOTEL(ITOTP)=IP-IDIM2+KSTART-1
END IF
IPSEQ=IPSEQ+1
200 CONTINUE
END
C SQFIT6
SUBROUTINE SQFIT6(SEQ,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINS,ITOTP,KSTART)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIM2)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
INTEGER IUBM1
EXTERNAL IUBM1
IDIF=(IE-IS+2)-IDIM2
IPSEQ=IS-KSTART+1
ITOTP=0
DO 200 I=1,IDIF
NTOT = 0
IP=IPSEQ
DO 100 J=1,IDIM2
NTOT = NTOT + IUBM1(STRING(J),SEQ(IP))
IP=IP+1
100 CONTINUE
IF(NTOT.GE.MINS)THEN
ITOTP=ITOTP+1
IF(ITOTP.GT.ITOTID)RETURN
ITOT(ITOTP)=NTOT
ITOTEL(ITOTP)=IP-IDIM2+KSTART-1
END IF
IPSEQ=IPSEQ+1
200 CONTINUE
END
SUBROUTINE SRCHP(SEQ,IDIM1,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,
+ISYMAX,J1,J2,KBOUT)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(6)
SAVE STRING
DATA STRING/'A','A','T','A','A','A'/
CALL SHOWFU(KBOUT,'PolyA search. i.e. AATAAA search')
XMIN=J1
XMAX=J2
YMIN=0.
YMAX=MARGT
Y0=0.
IBH=MARGT
CALL VECTOM
CALL SRCHP1(SEQ,IDIM1,J1,J2,STRING,6,1,
+XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,MARGB,MARGT,ISXMAX,
+ISYMAX,Y0,IBH)
CALL VT100M
END
SUBROUTINE SRCHP1(SEQ,IDIM1,J1,J2,STRING,IDIMS,INC,
+XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,MARGB,MARGT,ISXMAX,
+ISYMAX,Y,IBH)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(6)
YP=Y+IBH
IAT=J1-INC
10 CONTINUE
IAT=IAT+INC
IDIM=J2-IAT+1
IF(IDIM.LE.0)RETURN
CALL FIND6(SEQ(IAT),IDIM,STRING,IDIMS,INC,JMATCH)
IF(JMATCH.EQ.0)RETURN
IAT=IAT+JMATCH-1
X=IAT
CALL LINE(X,X,YP,Y,XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,MARGB,
+MARGT,ISXMAX,ISYMAX)
GO TO 10
END
SUBROUTINE STOPSB(SEQ,IDIM1,J1,J2,MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX,PAA)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),PAA(5,5,5)
IMARG=NINT(MARGT/2.)
MARGB3=MARGB
MARGB5=MARGB+IMARG
MARGT3=IMARG
MARGT5=IMARG
CALL STOPS(SEQ,IDIM1,J1,J2,
+MARGL,MARGR,MARGB5,MARGT5,
+ISXMAX,ISYMAX,PAA)
CALL STOPSC(SEQ,IDIM1,J1,J2,MARGL,MARGR,
+MARGB3,MARGT3,
+ISXMAX,ISYMAX,PAA)
END
C
C STOPS
SUBROUTINE STOPS(SEQ,IDIM1,J1,J2,
+MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,PAA)
C AUTHOR: RODGER STADEN
CHARACTER STOP(3),SEQ(IDIM1),PAA(5,5,5),BASE(5)
SAVE BASE
DATA BASE/'T','C','A','G','-'/
CALL VECTOM
XMIN=J1
XMAX=J2
YMIN=0.
C DIVIDE SCREEN INTO 3 STRIPS AND PUT BLIPS AT 1/6,3/6,5/6
YMAX=ISYMAX-(MARGB1+MARGT1)
YMAX3=YMAX/3.
BLIPH=YMAX*0.02
DO 10 ICOD=1,5
DO 10 JCOD=1,5
DO 10 KCOD=1,5
IF(PAA(KCOD,JCOD,ICOD).EQ.'*')THEN
C NEED TO KNOW THE CODON
STOP(1)=BASE(ICOD)
STOP(2)=BASE(JCOD)
STOP(3)=BASE(KCOD)
BLIPF=YMAX3/2.0
CALL PSRCH(SEQ,IDIM1,J1,J2,STOP,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
BLIPF=BLIPF+YMAX3
J11=J1+1
CALL PSRCH(SEQ,IDIM1,J11,J2,STOP,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
BLIPF=BLIPF+YMAX3
J11=J1+2
CALL PSRCH(SEQ,IDIM1,J11,J2,STOP,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
END IF
10 CONTINUE
CALL VT100M
END
C STOPSC
SUBROUTINE STOPSC(SEQ,IDIM1,J1,J2,
+MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,PAA)
C AUTHOR: RODGER STADEN
CHARACTER STOP(3),SEQ(IDIM1),PAA(5,5,5),BASE(5)
SAVE BASE
DATA BASE/'A','G','T','C','-'/
CALL VECTOM
XMIN=J1
XMAX=J2
YMIN=0.
C DIVIDE SCREEN INTO 3 STRIPS AND PUT BLIPS AT 1/12,5/12,9/12
YMAX=ISYMAX-(MARGB1+MARGT1)
YMAX3=YMAX/3.
BLIPH=YMAX*0.02
DO 10 ICOD=1,5
DO 10 JCOD=1,5
DO 10 KCOD=1,5
IF(PAA(KCOD,JCOD,ICOD).EQ.'*')THEN
C NEED TO KNOW THE CODON
STOP(1)=BASE(KCOD)
STOP(2)=BASE(JCOD)
STOP(3)=BASE(ICOD)
BLIPF=YMAX3/4.0
CALL PSRCH(SEQ,IDIM1,J1,J2,STOP,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
BLIPF=BLIPF+YMAX3
J11=J1+1
CALL PSRCH(SEQ,IDIM1,J11,J2,STOP,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
BLIPF=BLIPF+YMAX3
J11=J1+2
CALL PSRCH(SEQ,IDIM1,J11,J2,STOP,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
END IF
10 CONTINUE
CALL VT100M
END
C
C STARTS
SUBROUTINE STARTS(SEQ,IDIM1,J1,J2,
+MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,PAA)
C AUTHOR: RODGER STADEN
CHARACTER CODON(3),SEQ(IDIM1),PAA(5,5,5),BASE(5)
SAVE BASE
DATA BASE/'T','C','A','G','-'/
CALL VECTOM
XMIN=J1
XMAX=J2
C DIVIDE SCREEN INTO 3 STRIPS AND PUT BLIPS AT 0.0,1/3,2/3
YMAX=ISYMAX-(MARGB1+MARGT1)
YMAX3=YMAX/3.
BLIPH=YMAX*0.013
YMIN=0.
DO 10 ICOD=1,5
DO 10 JCOD=1,5
DO 10 KCOD=1,5
IF(PAA(KCOD,JCOD,ICOD).EQ.'M')THEN
C NEED TO KNOW THE CODON
CODON(1)=BASE(ICOD)
CODON(2)=BASE(JCOD)
CODON(3)=BASE(KCOD)
BLIPF=0.0
CALL PSRCH(SEQ,IDIM1,J1,J2,CODON,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
BLIPF=BLIPF+YMAX3
J11=J1+1
CALL PSRCH(SEQ,IDIM1,J11,J2,CODON,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
BLIPF=BLIPF+YMAX3
J11=J1+2
CALL PSRCH(SEQ,IDIM1,J11,J2,CODON,3,3,XMAX,XMIN,YMAX,YMIN,
+ MARGL1,MARGR1,MARGB1,MARGT1,ISXMAX,ISYMAX,BLIPF,BLIPH)
END IF
10 CONTINUE
CALL VT100M
END
CHARACTER*1 FUNCTION SCOMP(CHAR1)
C AUTHOR: RODGER STADEN
CHARACTER CHAR1,CHARSU(5),CHARSL(5)
INTEGER CTONUM
EXTERNAL CTONUM,ICOMP
SAVE CHARSU,CHARSL
DATA CHARSU/'T','C','A','G','-'/
DATA CHARSL/'t','c','a','g','-'/
JCHAR=CTONUM(CHAR1)
IF(CHAR1.EQ.CHARSU(JCHAR)) THEN
SCOMP=CHARSU(ICOMP(JCHAR))
ELSE
SCOMP=CHARSL(ICOMP(JCHAR))
END IF
END
C TRANS
C SUBROUTINE TO TRANSLATE A SEQUENCE IN 1 PHASE GIVEN START & STOP
C POSITIONS AND A SEQUENCE
SUBROUTINE TRANS(SEQNCE,IDIM1,IS,IE,OUTP,PAA,IDIM2,KSTART,
+JSTRAN)
C AUTHOR: RODGER STADEN
CHARACTER SEQNCE(IDIM1),PAA(125),OUTP(IDIM2),CODON(3),TRANF,TRANB
INTEGER POUT
EXTERNAL LWRAP,TRANF,TRANB
IX2=IE
C ADD ON TO END IF OVER JOIN. REALLY A COUNTER OF
C NUMBER OF ELEMENTS TO TRANSLATE. LWRAP TAKES CARE OF OVERFLOW.
C WRAP AROUND IF OVER JOIN
IF(IX2.LE.IS)IX2=IX2+IDIM1
DO 500 I=IS-KSTART+1,IX2-KSTART+1,3
DO 400 J=1,3
IP1=I+J-1
IP1=LWRAP(IDIM1,IP1)
CODON(J)=SEQNCE(IP1)
400 CONTINUE
IP=I
IP=LWRAP(IDIM1,IP)
POUT=IP/3+1
IF(JSTRAN.EQ.0)OUTP(POUT)=TRANF(CODON,PAA)
IF(JSTRAN.EQ.1)OUTP(POUT)=TRANB(CODON,PAA)
500 CONTINUE
END
SUBROUTINE TRANSD(SEQNCE,IDIM1,IS,IE,OUTP,IDIM2,PAA,JSTRAN)
C AUTHOR: RODGER STADEN
CHARACTER SEQNCE(IDIM1),PAA(125),OUTP(IDIM2),CODON(3),TRANF,TRANB
INTEGER POUT
EXTERNAL JCODNO,TRANF,TRANB,LWRAP
POUT=0
C SET UP LOOP VALUE
IX2=IE
C ADD ON TO END IF OVER JOIN. REALLY A COUNTER OF
C NUMBER OF ELEMENTS TO TRANSLATE. LWRAP TAKES CARE OF OVERFLOW.
C WRAP AROUND IF OVER JOIN
IF(IX2.LE.IS)IX2=IX2+IDIM1
DO 500 I=IS,IX2,3
DO 400 J=1,3
IP1=I+J-1
IP1=LWRAP(IDIM1,IP1)
CODON(J)=SEQNCE(IP1)
400 CONTINUE
POUT=POUT+1
IF(JSTRAN.EQ.0)OUTP(POUT)=TRANF(CODON,PAA)
IF(JSTRAN.EQ.1)OUTP(POUT)=TRANB(CODON,PAA)
500 CONTINUE
IDIM2=POUT
END
INTEGER FUNCTION JCODNO(SEQ,IDIM,I)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
INTEGER CTONUM
EXTERNAL CTONUM,LWRAP
C DECODE NEXT 3 CHARS TO GIVE POINTERS TO AMINO ACIDS
J0=I
J0=LWRAP(IDIM,J0)
J1=LWRAP(IDIM,J0+1)
J2=LWRAP(IDIM,J1+1)
K0=CTONUM(SEQ(J0))*25
K1=CTONUM(SEQ(J1))*5
K2=CTONUM(SEQ(J2))
JCODNO=K0+K1+K2-30
END
SUBROUTINE OPENTR(SEQ,IDIM,KSTART,IDEV,KBIN,KBOUT,PAA,
+OUTP,IDIMP,FRAMEC,POSNS,MAXPS,IDEVE,FILNAM,
+IHELPS,IHELPE,HELPF,IDEVH,DIALOG)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIMP),PAA(125),OUTP(IDIMP)
CHARACTER HELPF*(*),FILNAM*(*),OPRATR*20
INTEGER FRAMEC(IDIMP),POSNS(MAXPS),ANSE,DIALOG
CALL SHOWFU(KBOUT,
+'Find open reading frames, translate and list')
CALL GETPAR(39,10,IOK,MINO,MINOPN,JSTRAN,
+MINLEN,MAXLEN,LINLEN,ITRAN,JTRAN,INUM,ANSE,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
MAXPOS = MAXPS
MAXO = IDIM/3
I1 = 1
I2 = IDIM
IF(DIALOG.EQ.1)CALL TRAND7(KBIN,KBOUT,I1,I2,ITRAN,
+MINLEN,MAXLEN,LINLEN,
+MINO,MAXO,MINOPN,JSTRAN,INUM,IDEVE,ANSE,JTRAN,FILNAM,
+IOK,OPRATR,
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL FILLI(FRAMEC,IDIM,1)
SEQ(IDIM+1) = SEQ(1)
SEQ(IDIM+2) = SEQ(2)
IF(JTRAN.EQ.0) THEN
IF(ANSE.EQ.1) THEN
CALL TRANEM(SEQ,IDIM,I1,I2,FRAMEC,
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
ELSE IF(ANSE.EQ.2) THEN
CALL TRANEN(SEQ,IDIM,I1,I2,FRAMEC,JSTRAN,IDEVE,
+ POSNS,MAXPOS,KBOUT,OPRATR,IOK)
ELSE
IF((JSTRAN.EQ.0).OR.(JSTRAN.EQ.1))THEN
CALL TRPIRP(SEQ,IDIM,I1,I2,KSTART,MINOPN,IDEV,PAA,
+ JSTRAN,OUTP,FRAMEC,IDIMP)
ELSE IF (JSTRAN.EQ.2) THEN
CALL TRPIRP(SEQ,IDIM,I1,I2,KSTART,MINOPN,IDEV,PAA,
+ 0,OUTP,FRAMEC,IDIMP)
CALL TRPIRP(SEQ,IDIM,I1,I2,KSTART,MINOPN,IDEV,PAA,
+ 1,OUTP,FRAMEC,IDIMP)
END IF
END IF
END IF
CALL TRAN6X(SEQ,IDIMP,IDEV,I1,I2,PAA,ITRAN,JSTRAN,INUM,LINLEN,
+FRAMEC)
END
SUBROUTINE TRANEM(SEQ,IDIM,J1,J2,FRAMEC,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
C get start and end points for translation, then set up
C the integer array for tran6x
CHARACTER SEQ(IDIM)
INTEGER FRAMEC(IDIM),FRAME
KSTRAN = 0
50 CONTINUE
KSTRAN = 0
CALL TRAND8(IDIM,J1,J2,KSTRAN,IP1,IP2,'Translate',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
IF(IP1.EQ.0) RETURN
FRAME = MOD(IP1,3)
IF(FRAME.EQ.0) FRAME = 3
IF(KSTRAN.EQ.0) THEN
CALL MBPRIM(FRAMEC,IDIM,IP1,IP2,2)
ELSE
CALL MBPRIM(FRAMEC,IDIM,IP1,IP2,3)
END IF
GO TO 50
END
SUBROUTINE TRAND8(IDIM,J1,J2,KSTRAN,IP1,IP2,PROMPT,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
C get start and end points and strand for translation
CHARACTER PROMPT*(*),HELPF*(*)
CALL GETRC2(KBIN,KBOUT,J1,J2,IDIM,IP1,IP2,PROMPT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IF(IP1.EQ.0) RETURN
IOK = 1
IOP = KSTRAN
CALL YESONO(IOP,'+ strand','- strand',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) RETURN
KSTRAN = IOP
IOK = 0
END
SUBROUTINE TRAND7(KBIN,KBOUT,I1,I2,
+ITRAN,MINLEN,MAXLEN,LINLEN,MINO,MAXO,MINOPN,JSTRAN,INUM,
+IDEVE,ANSE,JTRAN,FILNAM,IOK,OPRATR,
+IHELPS,IHELPE,HELPF,IDEVH)
C AUTHOR: RODGER STADEN
C organise user interaction for translation to screen
CHARACTER HELPF*(*),FILNAM*(*)
PARAMETER (MAXPRM = 25)
CHARACTER PROMPT(4)*(MAXPRM),OPRATR*(*)
INTEGER ANSE
C Modified 11-4-91 to make 6 phase translation more obvious
C Translate if JTRAN = 0
C Keyboard input if ANSE = 1
C feature table input if ANSE = 2
C open reading frames is ANSE = 3
C + strand if JSTRAN = 0
C - strand if JSTRAN = 1
C both strands JSTRAN = 2
C one letter codes if ITRAN = 0
C three letter codes if ITRAN = 3
C number ends of lines if INUM = 0
C Display I1 to I2
IOK = 1
J1 = I1
J2 = I2
IOP = 0
CALL YESONO(IOP,'Show translation','Hide translation',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
JTRAN = IOP
IF(JTRAN.EQ.0) THEN
PROMPT(1) = 'Typed on the keyboard'
PROMPT(2) = 'Read from a feature table'
PROMPT(3) = 'Open reading frames'
PROMPT(4) = 'All six frames'
CALL RADION('The segments to translate can be',
+ PROMPT,4,ANSE,
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(ANSE.LT.1) RETURN
IF(ANSE.EQ.2) THEN
CALL TRAND9(KBIN,KBOUT,IDEVE,FILNAM,OPRATR,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
IF(ANSE.EQ.3) THEN
CALL GETINT(MINO,MAXO,MINOPN,
+ 'Minimum open frame in amino acids',
+ IVAL,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MINOPN = IVAL
END IF
IF(ANSE.EQ.4) THEN
MINOPN = 0
JSTRAN = 2
END IF
IOP = ITRAN
IF(IOP.EQ.3) IOP = 1
CALL YESONO(IOP,'Use 1 letter codes',
+ 'Use 3 letter codes',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
IF(IOP.EQ.1) ITRAN = 3
IF(IOP.EQ.0) ITRAN = 0
END IF
CALL GTREG(KBIN,KBOUT,J1,J2,I1,I2,
+'Define section of DNA to display',
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CALL GETINT(MINLEN,MAXLEN,LINLEN,'Line length',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
LINLEN = IVAL
IF(JTRAN.EQ.0) THEN
IF(MOD(LINLEN,3).NE.0) LINLEN = 60
END IF
IF(ANSE.NE.4) THEN
IVAL = JSTRAN + 1
CALL GSTRND(IVAL,IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IVAL.LT.1) THEN
IOK = 1
RETURN
END IF
JSTRAN = IVAL - 1
END IF
IOP = INUM
CALL YESONO(IOP,'Number ends of lines',
+'Number every tenth base',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) THEN
IOK = 1
RETURN
END IF
INUM = IOP
IOK = 0
END
SUBROUTINE EMBLF(IDEVE,FILNAM,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,ANSE,OPRATR,IOK)
C AUTHOR: RODGER STADEN
C ask about keyboard or ft input. If ft open file and anse = 1
CHARACTER FILNAM*(*),HELPF*(*),OPRATR*(*)
INTEGER ANSE
IOK = 1
IOP = ANSE
CALL YESONO(IOP,'Define segments using keyboard',
+'Define segments using embl feature table',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) RETURN
ANSE = IOP
IF(ANSE.EQ.1) THEN
CALL TRAND9(KBIN,KBOUT,IDEVE,FILNAM,OPRATR,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
IOK = 0
END
SUBROUTINE TRAND9(KBIN,KBOUT,IDEVE,FILNAM,OPRATR,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
C open feature table file and get ft operator
CHARACTER FILNAM*(*),HELPF*(*)
CHARACTER OPRATR*(*)
FILNAM = ' '
CALL OPENF1(IDEVE,FILNAM,0,IOK,KBIN,KBOUT,
+'Feature table file name',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
20 CONTINUE
IOK = 1
LIN = 3
CALL GTSTR('Operator','all',OPRATR,LIN,
+KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.2) RETURN
CALL CCASE(OPRATR,1)
IF((LIN.EQ.0).OR.(OPRATR(1:3).EQ.'ALL')) OPRATR(1:1) = ' '
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 20
END IF
IOK = 0
END
SUBROUTINE TRANEN(SEQ,IDIM,J1,J2,FRAMEC,JSTRAN,IDEVE,
+POSNS,MAXPOS,KBOUT,OPRATR,IOK)
C use ft to set up array for tran6x
CHARACTER SEQ(IDIM),OPRATR*(*)
INTEGER POSNS(MAXPOS),FRAMEC(IDIM)
INTEGER EMBLFT
EXTERNAL EMBLFT
ISTRAN = 0
IF(JSTRAN.EQ.1) ISTRAN = 1
10 CONTINUE
NOBJ = 0
IOK = EMBLFT(IDEVE,KBOUT,'CDS',OPRATR,ISTRAN,
+POSNS,MAXPOS,NPOS,NOBJ)
IF(IOK.NE.0) THEN
CLOSE(UNIT=IDEVE)
RETURN
END IF
IF(ISTRAN.EQ.0) THEN
K = NPOS
DO 100 I=1,NOBJ
NSEG = POSNS(K)
IADD = 0
ITOT = 0
DO 50 J=K-NSEG,K-1,2
IP1 = POSNS(J)
IP2 = POSNS(J+1)
ITOT = ITOT + IP2 - IP1 + 1
IP1 = IP1 - IADD
CALL MBPRIM(FRAMEC,IDIM,IP1,IP2,2)
IADD = MOD(ITOT,3)
50 CONTINUE
K = K - NSEG - 1
100 CONTINUE
ELSE
K = NPOS
DO 200 I=1,NOBJ
NSEG = POSNS(K)
IADD = 0
ITOT = 0
DO 60 J=K-1,K-NSEG,-2
IP1 = POSNS(J-1)
IP2 = POSNS(J)
ITOT = ITOT + IP2 - IP1 + 1
IP2 = IP2 + IADD
C mbprim expect complementary strand codon positions to start
C from the same position as their counterparts on the other strand
C ie the first position should correspond to the third
C + strand 123
C - strand 123
CALL MBPRIM(FRAMEC,IDIM,IP1,IP2,-3)
IADD = MOD(ITOT,3)
60 CONTINUE
K = K - NSEG - 1
200 CONTINUE
END IF
IF((JSTRAN.EQ.2).AND.(ISTRAN.EQ.0)) THEN
ISTRAN = 1
REWIND(IDEVE)
GO TO 10
END IF
CLOSE(UNIT=IDEVE)
END
SUBROUTINE TRAND1(JSTRAN,ANSE,IDEVE,FILNAM,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,OPRATR,IOK)
C AUTHOR: RODGER STADEN
C ask for ft or kb input, get operator and strand
INTEGER ANSE
CHARACTER FILNAM*(*),HELPF*(*),OPRATR*(*)
IOK = 1
CALL EMBLF(IDEVE,FILNAM,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,ANSE,OPRATR,IOK)
IF(IOK.NE.0)RETURN
IF(ANSE.EQ.1)THEN
IOP = JSTRAN + 1
CALL GSTRND(IOP,IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.1) THEN
IOK = 1
RETURN
END IF
JSTRAN = IOP - 1
END IF
IOK = 0
END
SUBROUTINE TRANDK(SEQ1,IDIM1,OUTP1,IDIMP,ISTART,ISTOP,IDIMT,
+IDEVE,IDEV,FILNAM,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,J1,J2,POSNS,MAXPOS)
CHARACTER HELPF*(*),OPRATR*20
INTEGER ANSE,POSNS(MAXPOS)
C AUTHOR: RODGER STADEN
CHARACTER SEQ1(IDIM1),OUTP1(IDIMP),PAA(125),FILNAM*(*)
C J1 IS THE FIRST BASE IN THE RAM BUFFER
C J2 IS THE LAST BASE IN THE RAM BUFFER
C IDIMT IS THE ACTUAL SEQUENCE LENGTH
C WRAP AROUND IS ONLY POSSIBLE IF J1=1, AND J2=IDIMT
CALL SHOWFU(KBOUT,'Translate and write protein sequence to disk')
CALL GETPAR(40,7,IOK,JSTRAN,ANSE,IOPEN,MINO,MAXO,MINOPN,JSTRAN,
+IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IOP = IOPEN
CALL YESONO(IOP,'Translate selected regions',
+'Translate open reading frames',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) RETURN
IF(IOP.EQ.1) THEN
CALL TRNPIR(SEQ1,IDIM1,J1,J2,ISTART,KBIN,KBOUT,IDEVE,PAA,
+ OUTP1,IDIMP,FILNAM,MINO,MAXO,MINOPN,JSTRAN,
+ IHELPS,IHELPE,HELPF,IDEVH)
RETURN
END IF
CALL TRAND1(JSTRAN,ANSE,IDEVE,FILNAM,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,OPRATR,IOK)
IF(IOK.NE.0) THEN
IF(ANSE.EQ.1) CLOSE(UNIT=IDEVE)
RETURN
END IF
IF(ANSE.EQ.1) THEN
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,1,IOK,KBIN,KBOUT,
+ 'Output file name',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL TRNEMB(SEQ1,IDIM1,J1,J2,OUTP1,PAA,JSTRAN,IDEVE,IDEV,
+ POSNS,MAXPOS,KBOUT,OPRATR,IOK)
RETURN
END IF
IOUT = -4
CALL FILLC(OUTP1,IDIMP,'-')
C
50 CONTINUE
C
CALL TRAND8(IDIMT,J1,J2,JSTRAN,I1,N1,'Translate',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
IF(I1.NE.0) THEN
IB = N1 - I1 + 1
IF(N1.LT.I1) IB = IB + IDIM1
IAA = IB/3 + 1
IF(IAA.GE.2) THEN
IOUT = IOUT + 5
CALL TRANSD(SEQ1,IDIM1,
+ I1-ISTART+1,N1-ISTART+1,OUTP1(IOUT),IAA,PAA,JSTRAN)
IF(JSTRAN.EQ.1) CALL SQREV(OUTP1(IOUT),IAA)
IOUT = IOUT + IAA - 1
END IF
GO TO 50
END IF
IF(IOUT.GT.0) THEN
FILNAM = ' '
CALL OPENF1(IDEVE,FILNAM,1,IOK,KBIN,KBOUT,
+ 'Output file name',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL FMTDKN(IDEVE,OUTP1,IOUT)
CLOSE(UNIT=IDEVE)
RETURN
END IF
END
SUBROUTINE TRNEMB(SEQ,IDIM,J1,J2,SEQW,PAA,JSTRAN,IDEVE,IDEV,
+POSNS,MAXPOS,KBOUT,OPRATR,IOK)
C translate to disk using ft
CHARACTER SEQ(IDIM),OPRATR*(*),SEQW(IDIM),PAA(125),LINE*60
INTEGER POSNS(MAXPOS)
INTEGER EMBLFT,WPAIR
EXTERNAL EMBLFT,ITOSL,WPAIR
ISTRAN = 0
IF(JSTRAN.EQ.1) ISTRAN = 1
10 CONTINUE
NOBJ = 0
IOK = EMBLFT(IDEVE,KBOUT,'CDS',OPRATR,ISTRAN,
+POSNS,MAXPOS,NPOS,NOBJ)
IF(IOK.NE.0) THEN
CLOSE(UNIT=IDEVE)
RETURN
END IF
IF(ISTRAN.EQ.0) THEN
K = NPOS
DO 100 I=1,NOBJ
NSEG = POSNS(K)
IB = 1
IF((POSNS(K-NSEG).GE.J1).AND.(POSNS(K-1).LE.J2)) THEN
DO 50 J=K-NSEG,K-1,2
L = POSNS(J+1) - POSNS(J) + 1
CALL SQCOPY(SEQ(POSNS(J)),SEQW(IB),L)
IB = IB + L
1001 FORMAT(A)
50 CONTINUE
IB = IB - 1
IACIDS = IB/3 + 1
CALL TRANSD(SEQW,IDIM,1,IB,SEQW,IACIDS,PAA,ISTRAN)
LINE(1:) = '>'
IF(ITOSL(LINE(2:),POSNS(K-NSEG)).EQ.0)
+ WRITE(*,*)'Scream: ITOSL'
IF(WPAIR(LINE(22:),POSNS(K-NSEG),POSNS(K-1)).NE.0)
+ WRITE(*,*)'Scream: WPAIR'
WRITE(IDEV,1001)LINE
IF (SEQW(IACIDS).NE.'*') THEN
IACIDS = IACIDS + 1
SEQW(IACIDS) = '*'
END IF
CALL FMTDKN(IDEV,SEQW,IACIDS)
END IF
K = K - NSEG - 1
100 CONTINUE
ELSE
K = NPOS
DO 200 I=1,NOBJ
NSEG = POSNS(K)
IB = 1
IF((POSNS(K-NSEG).GE.J1).AND.(POSNS(K-1).LE.J2)) THEN
DO 60 J=K-NSEG,K-1,2
L = POSNS(J+1) - POSNS(J) + 1
CALL SQCOPY(SEQ(POSNS(J)),SEQW(IB),L)
IB = IB + L
60 CONTINUE
IB = IB - 1
CALL SQREV(SEQW,IB)
CALL SQCOM2(SEQW,IB)
IACIDS = IB/3 + 1
CALL TRANSD(SEQW,IDIM,1,IB,SEQW,IACIDS,PAA,0)
IF (SEQW(IACIDS).NE.'*') THEN
IACIDS = IACIDS + 1
SEQW(IACIDS) = '*'
END IF
LINE(1:) = '> complement('
IF (ITOSL(LINE(2:),POSNS(K-1)).EQ.0) WRITE(*,*)'Scream: ITOSL'
IF(WPAIR(LINE(33:),POSNS(K-NSEG),POSNS(K-1)).NE.0)
+ WRITE(*,*)'Scream: WPAIR'
LINE(32+INDEX(LINE(33:),' '):) = ')'
WRITE(IDEV,1001)LINE
CALL FMTDKN(IDEV,SEQW,IACIDS)
END IF
K = K - NSEG - 1
200 CONTINUE
END IF
IF((JSTRAN.EQ.2).AND.(ISTRAN.EQ.0)) THEN
ISTRAN = 1
REWIND(IDEVE)
GO TO 10
END IF
CLOSE(UNIT=IDEVE)
CLOSE(UNIT=IDEV)
END
SUBROUTINE CODTDK(SEQ,IDIM,J1,J2,IDEVE,FILNAM,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,POSNS,MAXPOS,SEQW)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILNAM*(*),PAA(5,5,5),OPRATR*20
REAL S1(64),S2(64)
CHARACTER SEQW(IDIM)
INTEGER POSNS(MAXPOS)
INTEGER NORM,ANSE,ANST,ANSTO
CALL SHOWFU(KBOUT,'Calculate codon table and write it to disk')
CALL GETPAR(41,5,IOK,JSTRAN,NORM,ANSE,ANST,ANSTO,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
JSTRAN = 0
NORM = 0
ANSE = 0
ANST = 0
ANSTO = 0
C ANSTO REDUNDANT
CALL FILLR(S2,64,0.0)
CALL CODTBL(IDEVE,FILNAM,S2,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,ANST,IOK)
IF(IOK.NE.0) RETURN
CALL CODND1(JSTRAN,NORM,ANSE,IDEVE,FILNAM,OPRATR,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
IF(ANSE.EQ.1) THEN
CALL CTDEMB(SEQ,IDIM,J1,J2,SEQW,PAA,JSTRAN,IDEVE,
+ POSNS,MAXPOS,KBIN,KBOUT,OPRATR,S1,S2,NORM,FILNAM,IOK)
RETURN
END IF
100 CONTINUE
CALL FILLR(S1,64,0.0)
CALL TRAND8(IDIM,J1,J2,JSTRAN,I1,I2,'Count over',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
IF(I1.EQ.0) GO TO 200
IDIM1 = I2-I1+1
CALL CALCOD(S1,SEQ(I1),IDIM1)
IF(JSTRAN.EQ.1)CALL CODCOM(S1)
CALL ADDR(S1,S2,64)
IF(NORM.EQ.1)CALL NORMAA(S1,100.,PAA)
CALL WRTCOD(S1,KBOUT,PAA)
GO TO 100
200 CONTINUE
WRITE(KBOUT,9991)
9991 FORMAT(5X,'Codon totals over all genes')
IF(NORM.EQ.1) CALL NORMAA(S2,100.,PAA)
CALL WRTCOD(S2,KBOUT,PAA)
FILNAM = ' '
CALL OPENF1(IDEVE,FILNAM,1,IOK,KBIN,KBOUT,
+ 'Name for codon table file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL WRTCOD(S2,IDEVE,PAA)
CLOSE(UNIT=IDEVE)
END
SUBROUTINE CODND1(JSTRAN,NORM,ANSE,IDEVE,FILNAM,OPRATR,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
C AUTHOR: RODGER STADEN
C get observed or normalised, kb or ft, operator, strand
INTEGER ANSE
CHARACTER FILNAM*(*),HELPF*(*),OPRATR*(*)
IOK = 1
IOP = NORM
CALL YESONO(IOP,'Show observed counts',
+'Normalize counts',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOP.LT.0) RETURN
NORM = IOP
CALL TRAND1(JSTRAN,ANSE,IDEVE,FILNAM,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,OPRATR,IOK)
IF(IOK.NE.0) THEN
IF(ANSE.EQ.1) CLOSE(UNIT=IDEVE)
RETURN
END IF
IOK = 0
END
SUBROUTINE CTDEMB(SEQ,IDIM,J1,J2,SEQW,PAA,JSTRAN,IDEVE,
+POSNS,MAXPOS,KBIN,KBOUT,OPRATR,S1,S2,NORM,FILNAM,IOK)
C codons to disk using ft
CHARACTER SEQ(IDIM),OPRATR*(*),SEQW(IDIM),PAA(125)
INTEGER POSNS(MAXPOS)
REAL S1(64),S2(64)
CHARACTER FILNAM*(*)
INTEGER EMBLFT
EXTERNAL EMBLFT
ISTRAN = 0
IF(JSTRAN.EQ.1) ISTRAN = 1
10 CONTINUE
NOBJ = 0
IOK = EMBLFT(IDEVE,KBOUT,'CDS',OPRATR,ISTRAN,
+POSNS,MAXPOS,NPOS,NOBJ)
IF(IOK.NE.0) THEN
CLOSE(UNIT=IDEVE)
RETURN
END IF
IF(ISTRAN.EQ.0) THEN
K = NPOS
DO 100 I=1,NOBJ
NSEG = POSNS(K)
IB = 1
IF((POSNS(K-NSEG).GE.J1).AND.(POSNS(K-1).LE.J2)) THEN
WRITE(KBOUT,*)'Data from end point ',POSNS(K-NSEG)
DO 50 J=K-NSEG,K-1,2
L = POSNS(J+1) - POSNS(J) + 1
CALL SQCOPY(SEQ(POSNS(J)),SEQW(IB),L)
IB = IB + L
C note not checking for within j1,j2
50 CONTINUE
IB = IB - 1
IB = 3*(IB/3)
CALL FILLR(S1,64,0.0)
CALL CALCOD(S1,SEQW,IB)
CALL ADDR(S1,S2,64)
IF(NORM.EQ.1)CALL NORMAA(S1,100.,PAA)
CALL WRTCOD(S1,KBOUT,PAA)
END IF
K = K - NSEG - 1
100 CONTINUE
ELSE
K = NPOS
DO 200 I=1,NOBJ
NSEG = POSNS(K)
IB = 1
IF((POSNS(K-NSEG).GE.J1).AND.(POSNS(K-1).LE.J2)) THEN
WRITE(KBOUT,*)'Data from end point ',POSNS(K-1)
DO 60 J=K-NSEG,K-1,2
L = POSNS(J+1) - POSNS(J) + 1
CALL SQCOPY(SEQ(POSNS(J)),SEQW(IB),L)
IB = IB + L
60 CONTINUE
IB = IB - 1
CALL SQREV(SEQW,IB)
CALL SQCOM2(SEQW,IB)
IB = 3*(IB/3)
CALL FILLR(S1,64,0.0)
CALL CALCOD(S1,SEQW,IB)
CALL ADDR(S1,S2,64)
IF(NORM.EQ.1)CALL NORMAA(S1,100.,PAA)
CALL WRTCOD(S1,KBOUT,PAA)
END IF
K = K - NSEG - 1
200 CONTINUE
END IF
IF((JSTRAN.EQ.2).AND.(ISTRAN.EQ.0)) THEN
ISTRAN = 1
REWIND(IDEVE)
GO TO 10
END IF
CLOSE(UNIT=IDEVE)
WRITE(KBOUT,9991)
9991 FORMAT(5X,'Codon totals over all genes')
IF(NORM.EQ.1) CALL NORMAA(S2,100.,PAA)
CALL WRTCOD(S2,KBOUT,PAA)
FILNAM = ' '
CALL OPENF1(IDEVE,FILNAM,1,IOK,KBIN,KBOUT,
+ 'Name for codon table file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL WRTCOD(S2,IDEVE,PAA)
CLOSE(UNIT=IDEVE)
END
SUBROUTINE CODEMB(SEQ,IDIM,J1,J2,SEQW,PAA,JSTRAN,IDEVE,IDEV,
+POSNS,MAXPOS,KBIN,KBOUT,OPRATR,S1,S2,S3,NORM,FILNAM,IOK)
C codons, acids, bases to screen using ft
CHARACTER SEQ(IDIM),OPRATR*(*),SEQW(IDIM),PAA(125)
INTEGER POSNS(MAXPOS)
REAL S1(64),S2(64),S3(64)
CHARACTER FILNAM*(*)
INTEGER EMBLFT
EXTERNAL EMBLFT
CALL FILLR(S2,64,0.0)
ISTRAN = 0
IF(JSTRAN.EQ.1) ISTRAN = 1
10 CONTINUE
NOBJ = 0
IOK = EMBLFT(IDEVE,KBOUT,'CDS',OPRATR,ISTRAN,
+POSNS,MAXPOS,NPOS,NOBJ)
IF(IOK.NE.0) THEN
CLOSE(UNIT=IDEVE)
RETURN
END IF
IF(ISTRAN.EQ.0) THEN
K = NPOS
DO 100 I=1,NOBJ
NSEG = POSNS(K)
IB = 1
IF((POSNS(K-NSEG).GE.J1).AND.(POSNS(K-1).LE.J2)) THEN
WRITE(KBOUT,*)'Data from end point ',POSNS(K-NSEG)
DO 50 J=K-NSEG,K-1,2
L = POSNS(J+1) - POSNS(J) + 1
CALL SQCOPY(SEQ(POSNS(J)),SEQW(IB),L)
IB = IB + L
C note not checking for within j1,j2
50 CONTINUE
IB = IB - 1
IB = 3*(IB/3)
CALL FILLR(S1,64,0.0)
CALL CALCOD(S1,SEQW,IB)
CALL ADDR(S1,S2,64)
IF(NORM.EQ.1)THEN
CALL COPYR(S1,S3,64)
CALL NORMAA(S3,100.,PAA)
CALL WRTCOD(S3,IDEV,PAA)
ELSE
CALL WRTCOD(S1,IDEV,PAA)
END IF
IF(IDEV.EQ.KBOUT)THEN
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL BCOMPC(S1,IDEV)
CALL AACOMP(S1,IDEV,PAA)
IF(IDEV.EQ.KBOUT)THEN
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
END IF
END IF
K = K - NSEG - 1
100 CONTINUE
ELSE
K = NPOS
DO 200 I=1,NOBJ
NSEG = POSNS(K)
IB = 1
IF((POSNS(K-NSEG).GE.J1).AND.(POSNS(K-1).LE.J2)) THEN
WRITE(KBOUT,*)'Data from end point ',POSNS(K-1)
DO 60 J=K-NSEG,K-1,2
L = POSNS(J+1) - POSNS(J) + 1
CALL SQCOPY(SEQ(POSNS(J)),SEQW(IB),L)
IB = IB + L
60 CONTINUE
IB = IB - 1
CALL SQREV(SEQW,IB)
CALL SQCOM2(SEQW,IB)
IB = 3*(IB/3)
CALL FILLR(S1,64,0.0)
CALL CALCOD(S1,SEQW,IB)
CALL ADDR(S1,S2,64)
IF(NORM.EQ.1)THEN
CALL COPYR(S1,S3,64)
CALL NORMAA(S3,100.,PAA)
CALL WRTCOD(S3,IDEV,PAA)
ELSE
CALL WRTCOD(S1,IDEV,PAA)
END IF
IF(IDEV.EQ.KBOUT)THEN
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL BCOMPC(S1,IDEV)
CALL AACOMP(S1,IDEV,PAA)
IF(IDEV.EQ.KBOUT)THEN
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
END IF
END IF
K = K - NSEG - 1
200 CONTINUE
END IF
IF((JSTRAN.EQ.2).AND.(ISTRAN.EQ.0)) THEN
ISTRAN = 1
REWIND(IDEVE)
GO TO 10
END IF
WRITE(IDEV,9991)
9991 FORMAT(5X,'Codon totals over all genes')
IF(NORM.EQ.1)THEN
CALL COPYR(S2,S3,64)
CALL NORMAA(S3,100.,PAA)
CALL WRTCOD(S3,IDEV,PAA)
ELSE
CALL WRTCOD(S2,IDEV,PAA)
END IF
IF(IDEV.EQ.KBOUT)THEN
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL BCOMPC(S2,IDEV)
CALL AACOMP(S2,IDEV,PAA)
END
SUBROUTINE CODONS(SEQ,IDIM,J1,J2,IDEV,IDEVE,FILNAM,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PAA,POSNS,MAXPOS,SEQW)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM),FILNAM*(*),PAA(5,5,5),SEQW(IDIM),OPRATR*20
REAL S1(64),S2(64),S3(64)
INTEGER NORM,ANSE,POSNS(MAXPOS)
CALL SHOWFU(KBOUT,
+'Calculate base, codon and amino acid compositions')
CALL GETPAR(23,3,IOK,JSTRAN,NORM,ANSE,
+IPAR4,IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
JSTRAN = 0
NORM = 0
ANSE = 0
CALL CODND1(JSTRAN,NORM,ANSE,IDEVE,FILNAM,OPRATR,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
IF(ANSE.EQ.1) THEN
CALL CODEMB(SEQ,IDIM,J1,J2,SEQW,PAA,JSTRAN,IDEVE,IDEV,
+ POSNS,MAXPOS,KBIN,KBOUT,OPRATR,S1,S2,S3,NORM,FILNAM,IOK)
CLOSE(UNIT=IDEVE)
RETURN
END IF
CALL FILLR(S2,64,0.0)
100 CONTINUE
CALL FILLR(S1,64,0.0)
CALL TRAND8(IDIM,J1,J2,JSTRAN,I1,I2,'Count over',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
IF(I1.EQ.0) GO TO 200
IDIM1 = I2-I1+1
CALL CALCOD(S1,SEQ(I1),IDIM1)
IF(JSTRAN.EQ.1)CALL CODCOM(S1)
CALL ADDR(S1,S2,64)
IF(NORM.EQ.1)THEN
CALL COPYR(S1,S3,64)
CALL NORMAA(S3,100.,PAA)
CALL WRTCOD(S3,IDEV,PAA)
ELSE
CALL WRTCOD(S1,IDEV,PAA)
END IF
IF(IDEV.EQ.KBOUT)THEN
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL BCOMPC(S1,IDEV)
CALL AACOMP(S1,IDEV,PAA)
IF(IDEV.EQ.KBOUT)THEN
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
END IF
GO TO 100
200 CONTINUE
WRITE(IDEV,9991)
9991 FORMAT(5X,'Codon totals over all genes')
IF(NORM.EQ.1)THEN
CALL COPYR(S2,S3,64)
CALL NORMAA(S3,100.,PAA)
CALL WRTCOD(S3,IDEV,PAA)
ELSE
CALL WRTCOD(S2,IDEV,PAA)
END IF
IF(IDEV.EQ.KBOUT)THEN
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
END IF
CALL BCOMPC(S2,IDEV)
CALL AACOMP(S2,IDEV,PAA)
END
SUBROUTINE CODIMP(SEQ,IDIM,FTABLE,IDFTAB,IDEVE,FILNAM,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,J1,J2,IDEV,POSNS,MAXPOS,SEQW)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
REAL FTABLE(IDFTAB)
CHARACTER SEQ(IDIM),FILNAM*(*),OPRATR*20,SEQW(IDIM)
INTEGER ANSE,POSNS(MAXPOS)
CALL FACTAB(FTABLE,99)
CALL SHOWFU(KBOUT,'Calculate codon improbability')
CALL GETPAR(28,2,IOK,JSTRAN,ANSE,
+IPAR3,IPAR4,IPAR5,
+IPAR6,IPAR7,IPAR8,IPAR9,IPAR10,
+IPAR11,IPAR12,IPAR13,IPAR14,IPAR15,
+IPAR16,IPAR17,IPAR18,IPAR19,IPAR20,
+IPAR21,IPAR22,IPAR23,IPAR24,IPAR25,
+IPAR26,IPAR27,IPAR28,IPAR29,IPAR30,
+IPAR31,IPAR32,IPAR33,IPAR34,IPAR35)
IF(IOK.NE.0) RETURN
CALL TRAND1(JSTRAN,ANSE,IDEVE,FILNAM,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,OPRATR,IOK)
IF(IOK.NE.0) THEN
IF(ANSE.EQ.1)CLOSE(UNIT=IDEVE)
RETURN
END IF
IF(ANSE.EQ.1) THEN
CALL CDIEMB(SEQ,IDIM,J1,J2,SEQW,JSTRAN,IDEVE,IDEV,
+ POSNS,MAXPOS,KBOUT,FTABLE,IDFTAB,OPRATR,IOK)
CLOSE(UNIT=IDEVE)
RETURN
END IF
100 CONTINUE
CALL TRAND8(IDIM,J1,J2,JSTRAN,I1,I2,'Count over',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
IF(I1.EQ.0) RETURN
IDIM1 = I2-I1+1
IF(JSTRAN.EQ.1) THEN
CALL SQCOPY(SEQ(I1),SEQW,IDIM1)
CALL SQREV(SEQW,IDIM1)
CALL SQCOM2(SEQW,IDIM1)
CALL CODMPP(SEQW,IDIM,FTABLE,IDFTAB,KBOUT,1,IDIM1,1,IDIM1,
+ IDIM1,IDEV)
ELSE
CALL CODMPP(SEQ,IDIM,FTABLE,IDFTAB,
+ KBOUT,1,IDIM,I1,I2,IDIM1,IDEV)
END IF
GO TO 100
END
SUBROUTINE CDIEMB(SEQ,IDIM,J1,J2,SEQW,JSTRAN,IDEVE,IDEV,
+POSNS,MAXPOS,KBOUT,FTABLE,IDFTAB,OPRATR,IOK)
C codon improbability using ft
CHARACTER SEQ(IDIM),OPRATR*(*),SEQW(IDIM)
INTEGER POSNS(MAXPOS)
REAL FTABLE(IDFTAB)
INTEGER EMBLFT
EXTERNAL EMBLFT
ISTRAN = 0
IF(JSTRAN.EQ.1) ISTRAN = 1
10 CONTINUE
NOBJ = 0
IOK = EMBLFT(IDEVE,KBOUT,'CDS',OPRATR,ISTRAN,
+POSNS,MAXPOS,NPOS,NOBJ)
IF(IOK.NE.0) THEN
CLOSE(UNIT=IDEVE)
RETURN
END IF
IF(ISTRAN.EQ.0) THEN
K = NPOS
DO 100 I=1,NOBJ
NSEG = POSNS(K)
IB = 1
IF((POSNS(K-NSEG).GE.J1).AND.(POSNS(K-1).LE.J2)) THEN
WRITE(KBOUT,*)'Data from end point ',POSNS(K-NSEG)
DO 50 J=K-NSEG,K-1,2
L = POSNS(J+1) - POSNS(J) + 1
CALL SQCOPY(SEQ(POSNS(J)),SEQW(IB),L)
IB = IB + L
C note not checking for within j1,j2
50 CONTINUE
IB = IB - 1
IB = 3*(IB/3)
CALL CODMPP(SEQW,IDIM,FTABLE,IDFTAB,KBOUT,1,IB,1,IB,IB,IDEV)
END IF
K = K - NSEG - 1
100 CONTINUE
ELSE
K = NPOS
DO 200 I=1,NOBJ
NSEG = POSNS(K)
IB = 1
IF((POSNS(K-NSEG).GE.J1).AND.(POSNS(K-1).LE.J2)) THEN
WRITE(KBOUT,*)'Data from end point ',POSNS(K-1)
DO 60 J=K-NSEG,K-1,2
L = POSNS(J+1) - POSNS(J) + 1
CALL SQCOPY(SEQ(POSNS(J)),SEQW(IB),L)
IB = IB + L
60 CONTINUE
IB = IB - 1
CALL SQREV(SEQW,IB)
CALL SQCOM2(SEQW,IB)
IB = 3*(IB/3)
CALL CODMPP(SEQW,IDIM,FTABLE,IDFTAB,KBOUT,1,IB,1,IB,IB,IDEV)
END IF
K = K - NSEG - 1
200 CONTINUE
END IF
IF((JSTRAN.EQ.2).AND.(ISTRAN.EQ.0)) THEN
ISTRAN = 1
REWIND(IDEVE)
GO TO 10
END IF
END
SUBROUTINE PLTEMB(JSTRAN,IDEVE,
+POSNS,MAXPOS,KBOUT,KEYWRD,OPRATR,YF,BLIPB,BLIPT,
+ XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C plot features
CHARACTER OPRATR*(*),KEYWRD*(*)
INTEGER POSNS(MAXPOS)
INTEGER EMBLFT
EXTERNAL EMBLFT
ISTRAN = 0
IF(JSTRAN.EQ.1) ISTRAN = 1
10 CONTINUE
NOBJ = 0
IOK = EMBLFT(IDEVE,KBOUT,KEYWRD,OPRATR,ISTRAN,
+POSNS,MAXPOS,NPOS,NOBJ)
IF(IOK.NE.0) THEN
CLOSE(UNIT=IDEVE)
RETURN
END IF
IF(ISTRAN.EQ.0) THEN
K = NPOS
DO 100 I=1,NOBJ
NSEG = POSNS(K)
DO 50 J=K-NSEG,K-1,2
CALL PLTBAR(REAL(POSNS(J)),REAL(POSNS(J+1)),
+ YF,BLIPB,BLIPT,
+ XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C note not checking for within j1,j2
50 CONTINUE
K = K - NSEG - 1
100 CONTINUE
ELSE
K = NPOS
DO 200 I=1,NOBJ
NSEG = POSNS(K)
DO 60 J=K-NSEG,K-1,2
CALL PLTBAR(REAL(POSNS(J)),REAL(POSNS(J+1)),
+ YF,BLIPB,BLIPT,
+ XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
60 CONTINUE
K = K - NSEG - 1
200 CONTINUE
END IF
IF((JSTRAN.EQ.2).AND.(ISTRAN.EQ.0)) THEN
ISTRAN = 1
REWIND(IDEVE)
GO TO 10
END IF
END
C PLOTMAP
SUBROUTINE PLTMAQ(IDEV,FILNAM,POSNS,MAXPOS,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IS,IE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
INTEGER POSNS(MAXPOS)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),OPRATR*20,KEYWRD*15
PARAMETER (IBLIPH=128)
CALL SHOWFU(KBOUT,
+'Display a map using a feature table file')
XMAX=IE
XMIN=IS
YMIN=0.
YMAX=ISYMAX
IY = IBLIPH
FILNAM = ' '
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+'Feature table file name',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
10 CONTINUE
REWIND IDEV
LIN = 3
CALL GTSTR('Feature code','CDS',KEYWRD,LIN,
+KBOUT,KBIN,INFLAG)
IF(LIN.EQ.0) KEYWRD(1:3) = 'CDS'
CALL CCASE(KEYWRD,1)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(INFLAG.EQ.2) GO TO 900
IF(KEYWRD.EQ.' ') GO TO 900
20 CONTINUE
LIN = 3
CALL GTSTR('Operator','all',OPRATR,LIN,
+KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.2) GO TO 900
CALL CCASE(OPRATR,1)
IF((LIN.EQ.0).OR.(OPRATR(1:3).EQ.'ALL')) OPRATR(1:1) = ' '
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 20
END IF
JSTRAN = 1
CALL GSTRND(JSTRAN,IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
JSTRAN = JSTRAN - 1
IF(JSTRAN.LT.0) GO TO 900
MININ = 0
MAXIN = MARGT
CALL GETINT(MININ,MAXIN,IY,
+'level',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) GO TO 900
IY = IVAL
C YBASE=IBASE
YF=IY
CALL CLEARV
CALL VECTOM
C NEED BARS AT ENDS OF FEATURES FROM BLIPB TO BLIPT
BLIPB=YF-IBLIPH/2
BLIPT=YF+IBLIPH/2
CALL PLTEMB(JSTRAN,IDEV,
+POSNS,MAXPOS,KBOUT,KEYWRD,OPRATR,YF,BLIPB,BLIPT,
+ XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL VT100M
GO TO 10
900 CONTINUE
CLOSE(UNIT=IDEV)
END
SUBROUTINE PLTBAR(POSNL,POSNR,YF,BLIPB,BLIPT,
+XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C plot a bar for a feature
CALL LINE(POSNL,POSNR,YF,YF,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(POSNL,POSNL,BLIPB,BLIPT,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(POSNR,POSNR,BLIPB,BLIPT,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
END
SUBROUTINE MBPRIM(FRAMEC,IDIMS,J1,J2,PRIME)
INTEGER FRAMEC(IDIMS),PRIME,FROTAT(0:2,0:2)
SAVE FROTAT
DATA FROTAT/3,2,1,1,3,2,2,1,3/
C AUTHOR RODGER STADEN
C multiplies array framec(i1-i2) by 2 or 3
C frame rotation depends on frame and sequence length
C on - strand only works for complete codons
IF(PRIME.LT.0) THEN
C special case for ft input
DO 1 I = J2-2,J1,-3
FRAMEC(I) = FRAMEC(I) * PRIME * (-1)
1 CONTINUE
RETURN
END IF
I1 = J1
I2 = J2
IF(J1.GT.J2) THEN
I2 = IDIMS
DO 5 I = I1,I2,3
FRAMEC(I) = FRAMEC(I) * PRIME
5 CONTINUE
LFRAME = MOD(IDIMS,3)
IFRAME = MOD(J1,3)
I1 = FROTAT(LFRAME,IFRAME)
I2 = J2
DO 10 I = I1,I2,3
FRAMEC(I) = FRAMEC(I) * PRIME
10 CONTINUE
ELSE
DO 20 I = I1,I2,3
FRAMEC(I) = FRAMEC(I) * PRIME
20 CONTINUE
END IF
END
SUBROUTINE SQTREE(SEQ,SEQN,IDIM,WORDP,IDE,TREE,MAXTRE,
+LEVELS,MAXLEV,IDM,KSTART,
+IDEV,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER SEQ(IDIM),HELPF*(*)
INTEGER SEQN(IDIM),WORDP(IDE),TREE(MAXTRE)
INTEGER LEVELS(MAXLEV)
PARAMETER (MAXLEN = 6,
+ MAXCON = 4*MAXLEN)
INTEGER IALPHA(5)
INTEGER CONSTS(0:MAXCON)
C INTEGER CONSTS(MAXLEN)
C PROGRAM TO REPRESENT A SEQUENCE AS A TREE
C THE FIRST LEVEL OF THE TREE IS THE SEQUENCE COMPOSITION
C THE SECOND LEVEL CONTAINS ALL THE TWO LETTER WORDS PRESENT
C AND SO ON. AS THE WORD LENGTH INCREASES THE NUMBER OF
C DIFFERENT WORDS PRESENT DECREASES, AS DOES THE NUMBER
C OF OCCURRENCES OF EACH. THE ROUTINE FOLLOWS WORDS UNTIL
C THEY ONLY OCCUR ONCE.
C IN ITS SIMPLEST FORM THE ALGORITHM STARTS WITH THE COMPOSITION
C AND FOLLOWS WORDS UNTIL THEY ONLY OCCUR ONCE. THE METHOD
C ENCODED BELOW STARTS WITH SOME MINIMUM WORD LENGTH
C USING THE HASHING METHOD OF THE SHOTGUN PROGRAMS. IT THEN
C CONVERTS TO THE DATA STRUCTURE FOR THE TREE METHOD. THE CURRENT
C DATA STRUCTURE IS AS FOLLOWS: FOR ANY WORD LENGTH THERE ARE
C NWORD DIFFERENT REPEATED WORDS, FOR EACH OF THOSE WORDS
C THERE ARE IWORD OCCURRENCES. THE TREE IS STORED IN AN ARRAY
C NAMED TREE. THE FIRST ELEMENT FOR EACH LENGTH OF WORD CONTAINS
C THE NUMBER OF DIFFERENT WORDS OF THAT LENGTH THAT ARE REPEATED,
C THE NEXT ELEMENT THE NUMBER OF OCCURRENCES OF THE FIRST WORD, THE
C NEXT ELEMENTS POINT TO THE STARTS OF THE INDIVIDUAL WORDS, UNTIL
C THE COUNT FOR THE NEXT REPEATED WORD. THE START POSITION IN THE
C TREE FOR EACH WORD LENGTH IS STORED IN ARRAY LEVEL.
C
C tree structure:
C
C word length = wordl
C number of different repeated words of length wordl = nword
C number of occurrences of each repeated word = iword
C positions of words in the sequence = pword
C
C for each length of word (level of the tree) store:
C
C nword(wordl(level)),iword(1),pword(iword(1),1),pword(iword(1),2)
C ,,, [iword(1) elements],iword(2),pword(iword(2),1)
C ,,,
C
C we also store in array levels a pointer into tree: levels(n) points to the
C start of info for words of length n
C
CALL CONNUM(SEQ,SEQN,IDIM)
C COUNT OCCURENCES OF ALPHABET
CALL FILLI(IALPHA,IDM,0)
DO 10 I=1,IDIM
IALPHA(SEQN(I))=IALPHA(SEQN(I))+1
10 CONTINUE
NALPHA=0
DO 11 I=1,IDM
IF(IALPHA(I).NE.0)NALPHA=NALPHA+1
11 CONTINUE
C CALC EXPECTED LONGEST DIRECT REPEAT
C lN = 2LOGN/LOG(1/V) - [1+LOG(1-V)/LOGV) + 0.5772/LOGV] + LOG2/LOGV
C WHERE V=SUM PI, I=1,NALPHA AND N=IDIM
V=0.
DO 12 I=1,NALPHA
V1=REAL(IALPHA(I))/REAL(IDIM)
V=V+V1*V1
12 CONTINUE
VLOG=LOG(V)
VLOGR=LOG(1.0/V)
VLOG1M=LOG(1.0-V)
REALLN=(2.*LOG(REAL(IDIM))/VLOGR)-(1+(VLOG1M/VLOG)+
+ 0.5772/VLOG)+LOG(2.)/VLOG
LENEXP = NINT(REALLN)
WRITE(KBOUT,1010)LENEXP
1010 FORMAT(' Expected length of longest repeat',I6)
LENGTH = MIN(MAXLEN,LENEXP)
MN = 1
MX = LENGTH
JLEVEL = MX
CALL GETINT(MN,MX,JLEVEL,'Minumim word length',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF (IOK.NE.0) RETURN
LENGTH = IVAL
CALL BUSY(KBOUT)
IF (LENGTH.EQ.1) THEN
CALL SQTRE1(SEQN,IDIM,TREE,MAXTRE,IALPHA,IDM,IENDT)
ELSE
CALL SETCN(CONSTS,LENGTH,IDM,MAXCON)
CALL ENCOND(SEQN,IDIM,WORDP,IDE,IDM,CONSTS,LENGTH,MAXCON)
CALL CONV(SEQN,IDIM,WORDP,IDE,TREE,MAXTRE,IENDT,LENGTH)
CALL CONNUM(SEQ,SEQN,IDIM)
END IF
C
LEVEL = LENGTH
LEVELS(LEVEL) = 1
IENDT = IENDT + 1
C
C
C
300 CONTINUE
C
C
LEVEL=LEVEL+1
IF(LEVEL.GT.MAXLEV)GO TO 650
LEVELS(LEVEL)=IENDT
IENDT=IENDT+1
NODES=TREE(LEVELS(LEVEL-1))
NBRAN=0
ITREE=LEVELS(LEVEL-1)+1
DO 400 I=1,NODES
NBRAN1=TREE(ITREE)
ITREE=ITREE+1
CALL NEWBRA(TREE,MAXTRE,IALPHA,NALPHA,NBRAN1,ITREE,
+ NBRAN2,SEQN,IDIM,IENDT,IOK)
IF(IOK.NE.0)GO TO 600
NBRAN=NBRAN+NBRAN2
ITREE=ITREE+NBRAN1
400 CONTINUE
TREE(LEVELS(LEVEL))=NBRAN
IF(NBRAN.LT.2) THEN
IF(NBRAN.EQ.0) LEVEL = LEVEL - 1
GO TO 700
END IF
GO TO 300
C
C Come here with problems
C
600 CONTINUE
CALL ERROM(KBOUT,'Not enough memory')
LEVEL = LEVEL - 1
GO TO 700
650 CONTINUE
CALL ERROM(KBOUT,'Maximum repeat length too long')
LEVEL = LEVEL - 1
C
C Come here to show results
C
700 CONTINUE
WRITE(KBOUT,1006)4*(IENDT-1),LEVEL
1006 FORMAT(' Memory used in bytes',I8,'. Length of longest repeat',I6)
MN = LENGTH
MX = LEVEL
JLEVEL = MX
CALL GETINT(MN,MX,JLEVEL,
+'Show repeat frequencies for words of at least length',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF (IOK.NE.0) RETURN
JLEVEL = IVAL
ITREE=LEVELS(JLEVEL)
DO 800 ILEVEL=JLEVEL,LEVEL
ITREE=LEVELS(ILEVEL)
WRITE(IDEV,1008)ILEVEL,TREE(ITREE)
1008 FORMAT(' For length',I6,
+ ' the number of different repeated words is',I6)
C find numbers of each word
INODES=TREE(ITREE)
ITREE=ITREE+1
IWORD=TREE(ITREE)
DO 750 NODE=1,INODES
ITREE=ITREE+IWORD+1
IWORD=TREE(ITREE)
750 CONTINUE
800 CONTINUE
C
C
801 CONTINUE
C
C
MN = LENGTH
MX = LEVEL
JLEVEL = MX
CALL GETINT(MN,MX,JLEVEL,'Show repeats for words of length',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF (IOK.NE.0) RETURN
JLEVEL = IVAL
MN = 2
MX = 9999
NOCCUR = MN
CALL GETINT(MN,MX,NOCCUR,
+'Show repeats for words occuring with frequency',
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF (IOK.NE.0) RETURN
NOCCUR = IVAL
Ccccccccccccccccccccccccccccccccccccccccccccc
ITREE=LEVELS(JLEVEL)
C find numbers of different words
INODES=TREE(ITREE)
ITREE=ITREE+1
C find number of occurences of first word
IWORD=TREE(ITREE)
C loop for each of the different words
DO 850 NODE=1,INODES
IF(IWORD.LT.NOCCUR)GO TO 876
C note this last line used to go to 850 not 876
C enough occurences so output
WRITE(IDEV,1014)(SEQ(K),K=TREE(ITREE+1)-(JLEVEL-1),
+TREE(ITREE+1))
1014 FORMAT(' ',50A1)
JJ=ITREE
DO 875 J=1,IWORD
JJ=JJ+1
WRITE(IDEV,1015)(TREE(JJ)-JLEVEL+1)+KSTART-1
1015 FORMAT(' occurs at ',I6)
875 CONTINUE
876 CONTINUE
C move tree pointer on to next word
ITREE=ITREE+IWORD+1
C point to next node
IWORD=TREE(ITREE)
850 CONTINUE
900 CONTINUE
GO TO 801
END
SUBROUTINE NEWBRA(TREE,IDTREE,IALPHA,NALPHA,NBRAN1,ITREE,
+NBRAN2,SEQ,IDSEQ,IENDT,IOK)
INTEGER IALPHA(NALPHA),TREE(IDTREE)
INTEGER SEQ(IDSEQ)
DO 10 I=1,NALPHA
IALPHA(I)=0
10 CONTINUE
C loop for all elements this node
DO 20 I=ITREE,ITREE+NBRAN1-1
C WRITE(*,*)'I',I
C point to seq
K=TREE(I)+1
C WRITE(*,*)'K',K
IF(K.LE.IDSEQ)IALPHA(SEQ(K))=IALPHA(SEQ(K))+1
20 CONTINUE
C zero number of branches next level
NBRAN2=0
C how many new branches?
DO 30 I=1,NALPHA
IF(IALPHA(I).GT.1)NBRAN2=NBRAN2+1
30 CONTINUE
IF(NBRAN2.LT.1)RETURN
C
C now store the values
DO 50 ICHAR=1,NALPHA
IF(IALPHA(ICHAR).LT.2)GO TO 50
C save thickness
IF(IENDT.GT.IDTREE)GO TO 100
TREE(IENDT)=IALPHA(ICHAR)
IENDT=IENDT+1
C now look thru sequence and save relevent pointers
DO 40 I=ITREE,ITREE+NBRAN1-1
K=TREE(I)+1
IF(K.GT.IDSEQ)GO TO 40
IF(SEQ(K).NE.ICHAR)GO TO 40
C save pointer
IF(IENDT.GT.IDTREE)GO TO 100
TREE(IENDT)=K
IENDT=IENDT+1
40 CONTINUE
50 CONTINUE
IOK = 0
RETURN
100 CONTINUE
IOK = 1
END
SUBROUTINE CONV(POSN,IDIM,WORDP,IDE,TREE,MAXTRE,IENDT,LENGTH)
INTEGER POSN(IDIM),WORDP(IDE),TREE(MAXTRE)
IENDT = 1
NWORD = 0
DO 20 I = 1,IDE
IF(WORDP(I).NE.0)THEN
IP = WORDP(I)
IF(POSN(IP).NE.0)THEN
IWORD = 1
IENDT = IENDT + 1
IT = IENDT
NWORD = NWORD + 1
K = IP
IENDT = IENDT + 1
TREE(IENDT) = K + LENGTH - 1
10 CONTINUE
IF(POSN(K).NE.0)THEN
K = POSN(K)
IENDT =IENDT + 1
TREE(IENDT) = K + LENGTH - 1
IWORD = IWORD + 1
GO TO 10
END IF
TREE(IT) = IWORD
END IF
END IF
20 CONTINUE
TREE(1) = NWORD
END
SUBROUTINE SQTRE1(SEQN,IDIM,TREE,MAXTRE,IALPHA,IDM,IENDT)
INTEGER SEQN(IDIM),TREE(MAXTRE),IALPHA(IDM)
IENDT = 2
TREE(1) = IDM - 1
DO 200 I = 1,IDM - 1
TREE(IENDT) = IALPHA(I)
IENDT = IENDT + 1
DO 100 J = 1,IDIM
IF(SEQN(J).EQ.I) THEN
TREE(IENDT) = J
IENDT = IENDT + 1
END IF
100 CONTINUE
200 CONTINUE
IENDT = IENDT - 1
END