staden-lg/src/staden/nipl.f

1208 lines
39 KiB
Fortran

C NIPL (Nucleotide interpretation program (library))
C 2-12-92 upped maxseq, maxwir to 360,000
C 9-7-92 added fasta format capability
C 29-3-1990 DSPLAZ thought repeat was class 5 corrected to 8
C 11-4-1990 Changed title to be string rather than array
C 18-4-1990 Changed to allow use of library index when appropriate
C 8-11-90 Changed call to getmf for compatibility with nip
C and replaced all calls to radio by radion
C 11-12-90 Changed library handling so added filnll and a parameter
C 3-7-91 set namlen = 60
C 4-7-91 inlined weight matrix search (copied from pipl). Should do rest!
C 18-7-91 Added titles to pattern files
C 16-12-91 Added access to pir library in codata form to pipl so make this the same
C 2-3-92 set filnam = ' ' for some calls to openf1
SUBROUTINE FMAIN()
INTEGER BOTOPT,TOPOPT
PARAMETER (NAMLEN = 60)
CHARACTER*(NAMLEN) FILNAM,HELPF,LIBLF
PARAMETER (
+ MAXSEQ=360000,
+ MXSPAN=603,
+ MAXWIN=MAXSEQ+MXSPAN,
+ MAXWIR=360000,
+ MAXD36=MAXWIR/36,
+ MAXD2=MAXWIR/2,
+ MAXD3=MAXWIR/3,
+ MAXSD2=MAXSEQ/2,
+ MAXSD3=MAXSEQ/3,
+ MAXDEV=10,
+ LIBLF = 'SEQUENCELIBRARIES',
+ LENNAM = 10)
PARAMETER (MAXMOT = 50,
+ MAXWTS = 5000,
+ IDM = 5,
+ IDME = 17)
INTEGER BESTP(MAXMOT),BESTQ(MAXMOT)
REAL BESTS(MAXMOT)
CHARACTER*(LENNAM) ENAMEL,ENAME,NAMSAV(MAXMOT)
PARAMETER (MAXDIV = 15)
INTEGER DIVDEV(MAXDIV),RSIZEN
C COMPATIBILITY WITH ANALYSEQ HELP STUFF
PARAMETER (BOTOPT=0,TOPOPT=1)
INTEGER HELPS(BOTOPT:TOPOPT),HELPE(BOTOPT:TOPOPT)
INTEGER DEVNOS(MAXDEV)
REAL WORKR(MAXWIR)
INTEGER WORKI(MAXWIR)
CHARACTER SEQ(MAXWIN),SEQW(MAXSEQ)
C MAT1 SIMPLE IDENTITY
C MAT2 IUB SCORES 0-1
C MAT3 IUB SCORES 0-36
C MAT4 INVERTED REPEAT
INTEGER MAT1(IDM,IDM),MAT2(IDME,IDME)
INTEGER MAT3(IDME,IDME),MAT4(IDM,IDM)
CHARACTER*8 KEYNS(MAXMOT)
CHARACTER LTYPE
EQUIVALENCE (WORKR,WORKI)
DATA MAT1/
+ 1,0,0,0,0,
+ 0,1,0,0,0,
+ 0,0,1,0,0,
+ 0,0,0,1,0,
+ 0,0,0,0,0/
DATA MAT2/
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
+1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
+1,1,1,0,0,0,0,1,0,1,0,1,0,0,0,0,0,
+1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,
+0,1,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,
+1,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0/
DATA MAT3/
+ 36, 0, 0, 0, 9, 0,18,18, 0, 0,18,12,12, 0,12, 9, 0,
+ 0,36, 0, 0, 9, 0,18, 0,18,18, 0,12,12,12, 0, 9, 0,
+ 0, 0,36, 0, 9,18, 0,18, 0,18, 0,12, 0,12,12, 9, 0,
+ 0, 0, 0,36, 9,18, 0, 0,18, 0,18, 0,12,12,12, 9, 0,
+ 9, 9, 9, 9,36,18,18,18,18,18,18,27,27,27,27,36, 0,
+ 0, 0,18,18,18,36, 0, 9, 9, 9, 9, 6, 6,12,12,18, 0,
+ 18,18, 0, 0,18, 0,36, 9, 9, 9, 9,12,12, 6, 6,18, 0,
+ 18, 0,18, 0,18, 9, 9,36, 0, 9, 9,12, 6, 6,12,18, 0,
+ 0,18, 0,18,18, 9, 9, 0,36, 9, 9, 6,12,12, 6,18, 0,
+ 0,18,18, 0,18, 9, 9, 9, 9,36, 0,12, 6,12, 6,18, 0,
+ 18, 0, 0,18,18, 9, 9, 9, 9, 0,36, 6,12, 6,12,18, 0,
+ 12,12,12, 0,27, 6,12,12, 6,12, 6,36, 8, 8, 8,27, 0,
+ 12,12, 0,12,27, 6,12, 6,12, 6,12, 8,36, 8, 8,27, 0,
+ 0,12,12,12,27,12, 6, 6,12,12, 6, 8, 8,36, 8,27, 0,
+ 12, 0,12,12,27,12, 6,12, 6, 6,12, 8, 8, 8,36,27, 0,
+ 9, 9, 9, 9,36,18,18,18,18,18,18,27,27,27,27,36, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/
DATA MAT4/
+ 0,0,2,1,0,
+ 0,0,0,2,0,
+ 2,0,0,0,0,
+ 1,2,0,0,0,
+ 0,0,0,0,0/
C GET DEVICE NUMBERS
CALL UNITNO(KBIN,KBOUT,DEVNOS,MAXDEV)
WRITE(KBOUT,1000)
1000 FORMAT(/,
+' NIPL (Nucleotide interpretation program (Library))',
+' V4.1 Jul 1991',/,
+' Author: Rodger Staden',/,
+' Searches nucleotide libraries for patterns of motifs',/)
CALL INITLU(IDM)
FILNAM = ' '
CALL OPENF1(DEVNOS(1),FILNAM,1,IOK,KBIN,KBOUT,
+'Name for results file',
+HELPS(1),HELPE(1),HELPF,DEVNOS(2))
IF(IOK.NE.0)STOP
C OPEN LIBRARY
LIBIN = 1
IDEVNL = DEVNOS(7)
IDEVLL = DEVNOS(8)
IDEVEN = DEVNOS(9)
IDEVD = DEVNOS(MAXDEV)
CALL RDLIBL(FILNAM,KBIN,KBOUT,
+HELPS(1),HELPE(1),HELPF,DEVNOS(2),IDEVLL,IDEVEN,IDEVNL,
+LIBLF,LIBIN,DIVDEV,MAXDIV,IDEVD,
+LIST,ENAMEL,LIBTYP,LTYPE,NDIV,RSIZEN,NRECEN,IOK)
IF(IOK.NE.0)STOP
IDEVOT=DEVNOS(1)
J1 = 1
CALL PATTEO(SEQ(MXSPAN+1),MAXSEQ,SEQW,MAXSEQ,
+ WORKI(1),WORKI(MAXMOT+1),WORKI(2*MAXMOT+1),WORKI(3*MAXMOT+1),
+ WORKI(4*MAXMOT+1),WORKI(5*MAXMOT+1),WORKI(6*MAXMOT+1),
+ WORKI(7*MAXMOT+1),WORKI(8*MAXMOT+1),WORKI(9*MAXMOT+1),
+ WORKI(10*MAXMOT+1),WORKI(11*MAXMOT+1),WORKI(12*MAXMOT+1),
+ WORKI(13*MAXMOT+1),WORKI(14*MAXMOT+1),WORKI(15*MAXMOT+1),
+ WORKI(16*MAXMOT+1),WORKI(17*MAXMOT+1),WORKI(18*MAXMOT+1),
+ WORKI(19*MAXMOT+1),WORKI(20*MAXMOT+1),
+ WORKR(22*MAXMOT+1),WORKR(23*MAXMOT+1),
+ FILNAM,MAXMOT,MAXWTS,IDEVOT,DEVNOS(3),DEVNOS(4),J1,
+ KBIN,KBOUT,DEVNOS(5),IDM,SEQ(1),IDME,
+ MAT1,MAT2,MAT3,MAT4,NAMSAV,KEYNS,
+ WORKI(23*MAXMOT+1+MAXWTS),
+ WORKI(23*MAXMOT+MAXWTS+3001),LIST,
+ HELPS,HELPE,HELPF,DEVNOS(2),DEVNOS(6),ENAMEL,ENAME,LIBTYP,
+ IDEVEN,RSIZEN,NRECEN,IDEVNL,DIVDEV,NDIV,BESTP,BESTQ,BESTS,
+ LTYPE)
C NB HAVE SET MAXIMUM COMBINED STRING LENGTH TO 3000 ON PREVIOUS LINE
C AND MAX INTEGER VERSION OF SEQUENCE IS WHATEVER IS LEFT AND THIS IS NOT
C SENT OR CHECKED BY THE CODE !!!!!!!!!!!!!!!!!!!!!!!!!!
C
900 CONTINUE
END
SUBROUTINE PATTEO(SEQ,MAXSEQ,STRING,MAXSTR,
+LENGTH,CLASS,RELMOT,RANGES,RANGEL,RANGET,RANGEM,IENTRY,
+START2,IEND2,WTSTR,START,IEND,MATCHQ,RELEND,MATCHP,
+STRNGS,LAST5,LAST3S,LAST3E,MATCHS,CUTOFF,WEIGHT,FILNAM,
+MAXMOT,MAXWTS,
+IDEV1,IDEV2,IDEV3,
+KSTART,KBIN,KBOUT,
+IDEV4,IDM,COMBIN,IDME,
+ MAT1,MAT2,MAT3,MAT4,NAMSAV,KEYNS,STRNGI,SEQI,LIST,
+ IHELPS,IHELPE,HELPF,IDEVH,IDEVN,ENAMEL,NAMIN,LIBTYP,
+ IDEVEN,RSIZEN,NRECEN,IDEVNL,DIVDEV,NDIV,BESTP,BESTQ,BESTS,
+ LTYPE)
INTEGER DIVDEV(NDIV),RSIZEN
INTEGER LENGTH(MAXMOT),CLASS(MAXMOT),RELMOT(MAXMOT)
INTEGER RANGES(MAXMOT),RANGEL(MAXMOT)
INTEGER RANGET(MAXMOT),RANGEM(MAXMOT),IENTRY(MAXMOT)
INTEGER START2(MAXMOT),IEND2(MAXMOT)
CHARACTER SEQ(MAXSEQ),STRING(MAXSTR)
INTEGER WTSTR(MAXMOT),START(MAXMOT),IEND(MAXMOT)
INTEGER MATCHQ(MAXMOT),RELEND(MAXMOT)
INTEGER MATCHP(MAXMOT),STRNGS(MAXMOT)
INTEGER LAST5(MAXMOT),LAST3S(MAXMOT),LAST3E(MAXMOT)
REAL WEIGHT(MAXWTS),CUTOFF(MAXMOT),MATCHS(MAXMOT)
REAL MINSCR,MAXSCR
CHARACTER FILNAM*(*),NAMIN*(*),ENAMEL*(*),HELPF*(*)
CHARACTER*(*) KEYNS(MAXMOT),NAMSAV(MAXMOT)
CHARACTER COMBIN(MAXMOT),LTYPE
C NB PROBLEM ABOUT USING MAXSEQ AS DIMENSION!!!!!!!!!!!!
INTEGER STRNGI(MAXSTR),SEQI(MAXSEQ)
INTEGER BESTP(MAXMOT),BESTQ(MAXMOT),ENTRYN
REAL BESTS(MAXMOT)
CHARACTER TITLE*60,TITLEP*80
C MAT1 SIMPLE IDENTITY
C MAT2 IUB SCORES 0-1
C MAT3 IUB SCORES 0-36
C MAT4 INVERTED REPEAT
INTEGER MAT1(IDM,IDM),MAT2(IDME,IDME)
INTEGER MAT3(IDME,IDME),MAT4(IDM,IDM)
REAL EXPECC(4)
PARAMETER (MAXCLS = 8)
PARAMETER (MAXPRM = 25)
CHARACTER PROMPT(5)*(MAXPRM)
SAVE EXPECC
DATA EXPECC/.25,.25,.25,.25/
IDSEQ = 1000
C ZERO ARRAYS
C
CALL FILLI(LENGTH,MAXMOT,0)
CALL FILLI(CLASS,MAXMOT,0)
CALL FILLI(RELMOT,MAXMOT,0)
CALL FILLI(RANGES,MAXMOT,0)
CALL FILLI(RANGEL,MAXMOT,0)
CALL FILLI(RANGET,MAXMOT,0)
CALL FILLI(RANGEM,MAXMOT,0)
CALL FILLI(IENTRY,MAXMOT,0)
CALL FILLI(START2,MAXMOT,0)
CALL FILLI(IEND2,MAXMOT,0)
CALL FILLI(WTSTR,MAXMOT,0)
CALL FILLI(START,MAXMOT,0)
CALL FILLI(IEND,MAXMOT,0)
CALL FILLI(MATCHQ,MAXMOT,0)
CALL FILLI(RELEND,MAXMOT,0)
CALL FILLI(MATCHP,MAXMOT,0)
CALL FILLI(STRNGS,MAXMOT,0)
CALL FILLI(LAST5,MAXMOT,0)
CALL FILLI(LAST3S,MAXMOT,0)
CALL FILLI(LAST3E,MAXMOT,0)
CALL FILLR(CUTOFF,MAXMOT,0.0)
CALL FILLR(MATCHS,MAXMOT,0.0)
CALL FILLR(WEIGHT,MAXWTS,0.0)
CALL FILLC(COMBIN,MAXMOT,'A')
CALL FILLI(BESTP,MAXMOT,0)
CALL FILLC(BESTQ,MAXMOT,0)
CALL FILLC(BESTS,MAXMOT,0.)
ITOTAL = 0
CALL SETCMP(EXPECC,4)
PROMPT(1) = 'Motif by motif'
PROMPT(2) = 'Inclusive'
PROMPT(3) = 'Scores only'
PROMPT(4) = 'Complete padded sequences'
PROMPT(5) = 'Padded sections'
IOPT = 1
CALL RADION('Select results display mode',PROMPT,5,IOPT,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOPT.LT.1) RETURN
IF(IOPT.EQ.4)THEN
MININ = 1
MAXIN = 9999
JSTART = 1
WRITE(KBOUT,1020)
1020 FORMAT(
+' For output option 4, we need to position the first motifs',/,
+' in a pattern so that they are aligned with one another')
CALL GETINT(MININ,MAXIN,JSTART,'Position of first motif',
+ IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
JSTART = IVAL
END IF
JOPT = 0
CALL YESNO(JOPT,'Report all matches',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(JOPT.LT.0) RETURN
FILNAM = ' '
CALL OPENF1(IDEV3,FILNAM,0,IOK,KBIN,KBOUT,
+ 'Pattern definition file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
JDEV = IDEV3
C
C GET MOTIF DEFINITIONS
C
C RETURN STRING LENGTH FOR COMPATIBILITY WITH LIB SEARCH
NSTRNG = MAXSTR
CALL GETMF(KBIN,KBOUT,STRING,NSTRNG,ISTRNG,
+ LENGTH,MAXMOT,CLASS,RELMOT,RANGES,RANGEL,
+ RANGET,RANGEM,
+ STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,IDEV2,
+ WTSTR,JDEV,IOK,RELEND,IDSEQ,IDEV4,IDM,COMBIN,
+ MAXCLS,
+ MAT1,IDM,MAT2,IDME,MAT3,IDME,MAT4,IDM,
+ PMINT,PMAXT,PROBT,EXPTT,
+ IHELPS,IHELPE,HELPF,IDEVH,KEYNS,NAMSAV,FILNAM,0,TITLEP)
IF(IOK.NE.0)RETURN
IF(NMOT.LT.1)RETURN
C
C
C DISPLAY THE SIGNAL DESCRIPTION
C
RANGES(1) = 1
CALL DESSIG(
+ KBOUT,STRING,MAXSTR,
+ LENGTH,CLASS,RELMOT,RANGES,RANGEL,
+ RANGET,RANGEM,
+ STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,
+ WTSTR,RELEND,COMBIN,KEYNS,TITLEP)
WRITE(KBOUT,2003)PROBT
2003 FORMAT(' Probability of finding pattern = ',E10.4)
WRITE(KBOUT,2004)EXPTT
2004 FORMAT(' Expected number of matches per 1000 residues = ',E10.4)
CALL GETRL(0.,1.,1.,'Maximum pattern probability',
+XP,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
PMINC = XP
CALL GETRL(-9999.,9999.,-9999.,'Minimum pattern score',
+XP,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CUTSCR = XP
MINSCR = 9999999.
MAXSCR = -9999999.
IF(NSTRNG.GT.0)CALL CONNUM(STRING,STRNGI,NSTRNG)
C
C
ICREC = 0
IFINEX = 0
NENTRY = 0
ENTRYN = 0
20 CONTINUE
IDSEQ = MAXSEQ
IF(LIBTYP.EQ.1) THEN
CALL CDROML(LIST,NAMIN,ENAMEL,
+ IDEVEN,RSIZEN,NRECEN,IDEVNL,SEQ,IDSEQ,
+ DIVDEV,NDIV,ICREC,IFINEX,TITLE,KBOUT,LTYPE,IOK)
IF(IOK.NE.0) GO TO 900
FILNAM = NAMIN
ELSE IF(LIBTYP.EQ.2) THEN
CALL RDPIRA(SEQ,IDSEQ,
+ IDEVEN,KBOUT,TITLE,FILNAM,LIST,ENAMEL,IDEVNL)
ELSE IF(LIBTYP.EQ.3) THEN
CALL RDFASA(SEQ,IDSEQ,
+ IDEVEN,KBOUT,TITLE,FILNAM,LIST,ENAMEL,IDEVNL,ENTRYN)
ELSE
RETURN
END IF
IF(IDSEQ.LT.1)GO TO 900
NENTRY = NENTRY + 1
C CONVERT TO INTEGER
IF(IDSEQ.GT.0)CALL CONNUM(SEQ,SEQI,IDSEQ)
C
RANGES(1) = 1
RANGEL(1) = IDSEQ
C
C DO THE SEARCH
C
CALL SRCSIH(KBIN,KBOUT,WTSTR,LENGTH,CLASS,
+RANGES,RANGEL,START,IEND,RELMOT,MATCHP,STRNGS,WEIGHT,
+MAXWTS,CUTOFF,MATCHS,NMOT,STRING,MAXSTR,SEQ,IDSEQ,
+RANGET,RANGEM,IENTRY,START2,IEND2,MATCHQ,RELEND,
+IDEV1,LAST5,LAST3S,LAST3E,IOPT,
+ITOTAL,KSTART,IDM,COMBIN,
+CUTSCR,MINSCR,MAXSCR,IDME,
+SEQI,STRNGI,TITLE,FILNAM,
+PMINT,PMAXT,PROBT,MAT1,MAT2,MAT3,
+MAT4,PMINC,IDEVN,KEYNS,BESTP,BESTQ,BESTS,JOPT)
C
C
GO TO 20
C
C
900 CONTINUE
WRITE(KBOUT,1002)ITOTAL
1002 FORMAT(' Total matches found',I7)
WRITE(KBOUT,1006)MINSCR,MAXSCR
1006 FORMAT(' Minimum and maximum observed scores=',2F12.2)
WRITE(KBOUT,1009)NENTRY
1009 FORMAT(' ',I7,' Entries processed')
END
SUBROUTINE SRCSIH(KBIN,KBOUT,WTSTR,LENGTH,CLASS,
+RANGES,RANGEL,START,IEND,RELMOT,MATCHP,STRNGS,WEIGHT,
+MAXWTS,CUTOFF,MATCHS,NMOT,STRING,MAXSTR,SEQ,IDSEQ,
+RANGET,RANGEM,IENTRY,START2,IEND2,MATCHQ,RELEND,
+IDEVOT,LAST5,LAST3S,LAST3E,
+IOPT,ITOTAL,KSTART,IDM,COMBIN,
+CUTSCR,MINSCR,MAXSCR,IDME,SEQI,STRNGI,TITLE,FILNAM,
+PMINT,PMAXT,PROBT,MAT1,MAT2,MAT3,
+MAT4,PMINC,IDEVN,KEYNS,BESTP,BESTQ,BESTS,JOPT)
C ROUTINE TO SEARCH FOR SIGNALS COMPOSED OF MOTIFS
C WEIGHT = WEIGHTS FOR MATRICES
C CUTOFF = CUTOFF SCORES
C LENGTH = MOTIF LENGTHS
C CLASS = MOTIF CLASS
C COMBIN = LOGICAL COMBINATION A, O, N
C WTSTR = POINTER TO WEIGHT STARTS
C RANGES = RANGES START
C RANGEL = RANGE LENGTH (A DISTANCE MEASURED FROM RANGES)
C RELMOT = MOTIF NUMBER THAT A RANGE IS RELATIVE TO IE THE
C FIRST MOTIF'S RANGE IS RELATIVE TO MOTIF 0, BUT
C ANY OTHER MOTIF MAY HAVE TO BE DEFINED RELATIVE
C TO ANY OTHER. THE MOST COMMON WOULD BE THE FIRST
C MOTIF OR THE LAST ONE SEARCHED FOR.
C RELEND = IS A SPECIAL CASE FLAG FOR STEMS. IT ALLOWS OTHER
C MOTIFS TO HAVE THEIR POSITIONS RELATIVE TO THE 3' SIDE
C OF A STEM. IT IS 5 FOR THE 5 PRIME SIDE, 3 FOR 3' SIDE
C START = RANGE START DURING SEARCH (SOME POSITIONS MAY HAVE
C BEEN TRIED)
C IEND = RANGE END POSITION FOR CURRENT INITIAL START (WHEN IT
C IS RESET DEPENDS ON WHETHER IT IS DEFINED RELATIVE TO
C THE FIRST OR THE PREVIOUS MOTIF. IF IT IS DEFINED RELATIVE
C TO THE FIRST MOTIF IT IS RESET WHEN WE FIND A MATCH FOR THE
C FIRST MOTIF. IF IT IS DEFINED RELATIVE TO THE PREVIOUS MOTIF
C WE MUST RESET WHEN WE MOVE FORWARD ONE MOTIF. I THINK THIS
C CAN BE TAKEN CARE OF BY UPDATING ALL THOSE MOTIFS THAT ARE
C DEFINED TO THE CURRENT MOTIF EVERY TIME WE MOVE FORWARD
C ONE MOTIF (IE IT INCLUDES THE FIRST MOTIF SO IT IS NOT A
C SPECIAL CASE). OTHERWISE WE UPDATE POSITIONS WHEN WE FIND
C A MATCH FOR THEM (WE SET TO THE MATCH POSITION PLUS 1)
C MATCHP = LIST OF CURRENT MATCH POSITIONS FOR EACH MOTIF
C MATCHS = LIST OF CURRENT MATCH SCORES FOR EACH MOTIF
C IFOUND = A FLAG TO INDICATE SUCCESS OR FAILURE OF A SEARCH ROUTINE
C 1 = SUCCESS, 0 = FAIL
C STRNGS = POINTER TO STRING STARTS IN CHARACTER ARRAY STRING
C TEMPORARY VALUES ARE:
C MOTIF = ACTUAL MOTIF NUMBER
C ICLASS = CLASS
C ILEN = LENGTH OF MOTIF
C CUT = CUTOFF
C WT = START OF WEIGHTS FOR THIS MOTIF
C ISTRST = START OF STRING
C RANGET = START OF 3' RANGE FOR STEM SEARCHES
C RANGEM = END OF 3' RANGE FOR STEM SEARCHES
C IENTRY = FLAG TO SIGNIFY MORE 3' STEM POSITIONS FOR LAST 5' START
C 0 = NONE, ON RETURN FROM MOTIF6 IT CONTAINS THE 3' MATCH
C POSITION
C MATCHQ = MATCH POSITION FOR STEM SEARCH
C COMB = LOGICAL COMBINATION A, O, N
INTEGER WTSTR(NMOT),LENGTH(NMOT),CLASS(NMOT)
INTEGER RANGES(NMOT),RANGEL(NMOT),START(NMOT),IEND(NMOT)
INTEGER RELMOT(NMOT),MATCHP(NMOT),STRNGS(NMOT)
INTEGER RANGET(NMOT),RANGEM(NMOT),IENTRY(NMOT),RELEND(NMOT)
INTEGER START2(NMOT),IEND2(NMOT),MATCHQ(NMOT)
INTEGER LAST5(NMOT),LAST3S(NMOT),LAST3E(NMOT)
REAL WEIGHT(MAXWTS),CUTOFF(NMOT),MATCHS(NMOT)
REAL MINSCR,MAXSCR
CHARACTER SEQ(IDSEQ),STRING(MAXSTR)
CHARACTER COMBIN(NMOT),COMB
INTEGER STRNGI(MAXSTR),SEQI(IDSEQ)
CHARACTER TITLE*(*),FILNAM*(*)
CHARACTER*(*) KEYNS(NMOT)
C MAT1 SIMPLE IDENTITY
C MAT2 IUB SCORES 0-1
C MAT3 IUB SCORES 0-36
C MAT4 INVERTED REPEAT
INTEGER MAT1(IDM,IDM),MAT2(IDME,IDME)
INTEGER MAT3(IDME,IDME),MAT4(IDM,IDM)
INTEGER BESTP(NMOT),BESTQ(NMOT)
REAL BESTS(NMOT),MAXSS,MINMSS
PARAMETER (MINMSS = -999999.)
C
C
C INITIALIZE
JMOT = 0
IRET = 0
5 CONTINUE
JMOT = JMOT + 1
IF(JMOT.LE.NMOT)THEN
IF(RELMOT(JMOT).EQ.0)THEN
START(JMOT) = RANGES(1)
IEND(JMOT) = RANGES(1) + RANGEL(1) -1
GO TO 5
END IF
END IF
MOTIF = 1
ICLASS = CLASS(1)
ILEN = LENGTH(1)
CUT = CUTOFF(1)
IWT = WTSTR(1)
ISTRST = STRNGS(1)
IENTRY(1) = 0
COMB = COMBIN(1)
DO 10 I = 1,NMOT
MATCHP(I) = 0
10 CONTINUE
MAXSS = MINMSS
C
C
C
C
C
100 CONTINUE
C
C
C THIS A CLASS CLASS MOTIF, PERFORM THE APPROPRIATE SEARCH IF THE START
C POSITION IS >0. (IF IT IS NOT THE CURRENT MOTIF IS A NOT THAT HAS
C ALREADY BEEN SEARCHED FOR
C
IFOUND = 0
IF(START(MOTIF).GT.0)THEN
C
C
IF(ICLASS.EQ.1)THEN
CALL MOTIF1(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
+ IEND(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),IFOUND,
+ CUTOFF(MOTIF),0)
ELSE IF(ICLASS.EQ.2)THEN
CALL MOTIF2(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
+ IEND(MOTIF),CUT,MATCHP(MOTIF),MATCHS(MOTIF),IFOUND)
ELSE IF(ICLASS.EQ.3)THEN
CALL MOTFI3(SEQI,IDSEQ,STRNGI(ISTRST),ILEN,START(MOTIF),
+ IEND(MOTIF),CUT,MATCHP(MOTIF),MATCHS(MOTIF),IFOUND,MAT3,
+ IDME)
ELSE IF(ICLASS.EQ.4)THEN
CALL MOTFI4(SEQI,IDSEQ,ILEN,START(MOTIF),
+ IEND(MOTIF),WEIGHT(IWT),CUT,MATCHP(MOTIF),MATCHS(MOTIF),
+ IFOUND,IDM)
ELSE IF(ICLASS.EQ.5)THEN
CALL MOTFI4(SEQI,IDSEQ,ILEN,START(MOTIF),
+ IEND(MOTIF),WEIGHT(IWT),CUT,MATCHP(MOTIF),MATCHS(MOTIF),
+ IFOUND,IDM)
ELSE IF(ICLASS.EQ.6)THEN
CALL MOTIF6(SEQI,IDSEQ,MAT4,LENGTH(MOTIF),START(MOTIF),
+ IEND(MOTIF),RANGET(MOTIF),RANGEM(MOTIF),
+ CUTOFF(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),
+ IENTRY(MOTIF),IFOUND,MATCHQ(MOTIF),
+ LAST5(MOTIF),LAST3S(MOTIF),LAST3E(MOTIF),IDM)
ELSE IF(ICLASS.EQ.7)THEN
CALL MOTIF1(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
+ IEND(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),IFOUND,
+ CUTOFF(MOTIF),1)
ELSE IF(ICLASS.EQ.8)THEN
CALL MOTIF8(SEQI,IDSEQ,MAT1,LENGTH(MOTIF),START(MOTIF),
+ IEND(MOTIF),RANGET(MOTIF),RANGEM(MOTIF),
+ CUTOFF(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),
+ IENTRY(MOTIF),IFOUND,MATCHQ(MOTIF),
+ LAST5(MOTIF),LAST3S(MOTIF),LAST3E(MOTIF),IDM)
ELSE
WRITE(KBOUT,*)'UNKNOWN CLASS!!'
END IF
C
C
END IF
C
C
C MATCH FOUND WHEN MATCH WANTED ?
C
C
C
IF(((IFOUND.EQ.0).AND.(COMB.NE.'N')).OR.
+ ((IFOUND.GT.0).AND.(COMB.EQ.'N')))THEN
C
C NO SO GO BACK OR SIDEWAYS ONE MOTIF
C
C
CALL BAKSID(CLASS,LENGTH,CUTOFF,STRNGS,NMOT,
+ MOTIF,ICLASS,ILEN,CUT,IWT,ISTRST,WTSTR,
+ RELMOT,START,IEND,MATCHQ,RANGES,RANGEL,RELEND,IRET,MATCHP,
+ COMBIN,COMB)
C
C
C IF CANT GO BACK ANY FURTHER QUIT
IF(IRET.NE.0) THEN
IF(MAXSS.GT.MINMSS) THEN
CALL DSPLAZ(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEVOT,
+ CLASS,MATCHQ,IOPT,KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
+ TITLE,FILNAM,
+ PMINT,PMAXT,PROBT,IDM,MAT1,IDM,MAT2,IDME,MAT3,IDM,
+ MAT4,IDM,WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,RANGES,RANGEL,
+ IDEVN,KEYNS,BESTP,BESTQ,BESTS,2,MAXSS)
ITOTAL = ITOTAL + 1
END IF
RETURN
END IF
C
C
ELSE
C
C
C MATCH FOUND.
C
C
C PREPARE FOR NEXT SEARCH THIS MOTIF BY INCREMENTING POINTER
C TO SEARCH RANGE (NOT FOR CLASS 6 WHICH IS HANDLED BY MOTIF6)
C
C
IF(COMB.EQ.'N')THEN
START(MOTIF) = -9
ELSE
IF((ICLASS.NE.6).AND.(ICLASS.NE.8))
+ START(MOTIF) = MATCHP(MOTIF) + 1
IF(ICLASS.EQ.7)START(MOTIF) = MATCHP(MOTIF) + CUTOFF(MOTIF)
END IF
C
C TRY GOING FORWARD ONE MOTIF
C
C
CALL FORWAD(CLASS,LENGTH,CUTOFF,NMOT,
+ MOTIF,ICLASS,ILEN,CUT,IWT,RELMOT,START,IEND,
+ RANGES,RANGEL,STRNGS,ISTRST,WTSTR,IDSEQ,IENTRY,
+ RANGET,RANGEM,START2,IEND2,MATCHQ,RELEND,MATCHP,IDSPLY,
+ COMBIN,COMB)
C
C
C
C IS THIS THE LAST MOTIF? IF SO DISPLAY THE MATCH
C
C
IF(IDSPLY.EQ.1)THEN
IF(JOPT.NE.1) ITOTAL = ITOTAL + 1
CALL DSPLAZ(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEVOT,
+ CLASS,MATCHQ,IOPT,KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
+ TITLE,FILNAM,
+ PMINT,PMAXT,PROBT,IDM,MAT1,IDM,MAT2,IDME,MAT3,IDM,
+ MAT4,IDM,WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,RANGES,RANGEL,
+ IDEVN,KEYNS,BESTP,BESTQ,BESTS,JOPT,MAXSS)
C
C
C HORRIBLE SPECIAL CASE - IF LAST MOTIF IS NOTTED WE MUST MOVE BACK AFTER
C DISPLAY
IF(COMB.EQ.'N')THEN
CALL BAKSID(CLASS,LENGTH,CUTOFF,STRNGS,NMOT,
+ MOTIF,ICLASS,ILEN,CUT,IWT,ISTRST,WTSTR,
+ RELMOT,START,IEND,MATCHQ,RANGES,RANGEL,RELEND,IRET,MATCHP,
+ COMBIN,COMB)
C
C
C IF CANT GO BACK ANY FURTHER QUIT
IF(IRET.NE.0) THEN
IF(MAXSS.GT.MINMSS) THEN
CALL DSPLAZ(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEVOT,
+ CLASS,MATCHQ,IOPT,KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
+ TITLE,FILNAM,
+ PMINT,PMAXT,PROBT,IDM,MAT1,IDM,MAT2,IDME,MAT3,IDM,
+ MAT4,IDM,WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,RANGES,RANGEL,
+ IDEVN,KEYNS,BESTP,BESTQ,BESTS,2,MAXSS)
ITOTAL = ITOTAL + 1
END IF
RETURN
END IF
END IF
END IF
C
C
END IF
C
C
C GO BACK FOR NEXT SEARCH
C
C
GO TO 100
END
C*********************************************************************
SUBROUTINE MOTIF8(SEQ,IDSEQ,MATRIX,LENGTH,I5STAR,I5END,
+ I3STAR,I3END,CUTOFF,MATCHP,MATCHS,
+ IENTRY,IFOUND,MATCHQ,
+ LAST5,LAST3S,LAST3E,IDM)
C AUTHOR: RODGER STADEN
INTEGER SEQ(IDSEQ)
INTEGER MATRIX(IDM,IDM),REPEET
REAL MATCHS
EXTERNAL REPEET
C WE HAVE A START POSITION FOR THE 5' END OF THE 5' END OF
C A POTENTIAL REPEAT I5STAR AND AN END DEFINED BY A RANGE I5END
C WE HAVE A REPEAT LENGTH LENGTH
C WE HAVE A RANGE OF POSITIONS FOR THE 3' STEM TO START
C I3STAR TO I3END
C TRY THE TIGHTEST LOOPS FIRST
C BUT FIRST WE MAY HAVE TO FINISH A PREVIOUS SEARCH
C THIS IS DENOTED BY IENTRY NE 0.
C NOTE IENTRY IS ALSO USED TO RETURN THE 3' MATCH POSITION
ICUT = CUTOFF
C WRITE(*,*)'ICUT',ICUT
IFOUND = 0
JENTRY = IENTRY
IENTRY = 0
IF(I5STAR.LT.1)I5STAR=1
IF((I5STAR+I3STAR+LENGTH-2).GT.IDSEQ)RETURN
IF(JENTRY.NE.0)THEN
I1 = LAST5
C WRITE(*,*)'I1,LAST3S,LAST3E',I1,LAST3S,LAST3E
DO 50 J=LAST3S+1,LAST3E
J1 = J
ISUM = REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
C RETURN IF GOOD ENOUGH
IF(ISUM.GE.ICUT)THEN
MATCHP = I1
IENTRY = J1
MATCHQ = J1
MATCHS = ISUM
IFOUND = MATCHP
LAST3S = J1
RETURN
END IF
50 CONTINUE
C NOW MOVE 5' STEM START POSITION (WE HAVE JUST FINISHED THE LAST)
C TO THE LAST MATCH + 1
I5STAR = MATCHP + 1
END IF
C SET ENTRY FLAG TO ZERO TO SIGNIFY LAST SEARCH NOW COMPLETED
IENTRY = 0
ISUM = 0
LOOPI1 = I5STAR
IF((I5STAR+I3STAR+LENGTH-2).GT.IDSEQ)RETURN
LOOPI2 = MIN(IDSEQ-2*LENGTH+1,I5END)
C WRITE(*,*)'IDSEQ,LENGTH,I5STAR,I5END',
C +IDSEQ,LENGTH,I5STAR,I5END
C
C TRY ALL STEM STARTS FROM 5' START TO 5' END
C
C
DO 200 I = LOOPI1,LOOPI2
C
C
I1 = I
C
C TRY ALL LOOPS FROM 3' START TO 3' END
C
LOOPJ1 = I + I3STAR -1
IF((LOOPJ1+LENGTH-1).GT.IDSEQ)RETURN
LOOPJ2 = MIN(IDSEQ-LENGTH+1,I+I3END-1)
C WRITE(*,*)'I3STAR,I3END',I3STAR,I3END
C
C
C
DO 100 J = LOOPJ1,LOOPJ2
C
C
C
J1 = J
C IN REPEAT NOTE THAT
C THE 5' END POINTER I1 GOES FORWARDS
C THE 3' END POINTER J1 GOES FORWARDS
C
ISUM = REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
C RETURN IF GOOD ENOUGH
C WRITE(*,*)ISUM
IF(ISUM.GE.ICUT)THEN
MATCHP = I1
IENTRY = J1
MATCHQ = J1
MATCHS = ISUM
IFOUND = MATCHP
C SAVE CURRENT POSITION FOR LATER ENTRIES
LAST5 = I1
LAST3S = J1
LAST3E = LOOPJ2
RETURN
END IF
100 CONTINUE
200 CONTINUE
END
C*********************************************************************
INTEGER FUNCTION REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I5P,I3P,IDM)
INTEGER SEQ(IDSEQ)
INTEGER MATRIX(IDM,IDM)
C THE 5' END POINTER GOES FORWARDS
C THE 3' END POINTER GOES FORWARDS
L=0
I5=I5P-1
I3=I3P-1
DO 100 I=1,LENGTH
I5 = I5 + 1
I3 = I3 + 1
C WRITE(*,*)'I5,I3',I5,I3
L5 = SEQ(I5)
L3 = SEQ(I3)
L = L + MATRIX(L5,L3)
100 CONTINUE
REPEET = L
END
SUBROUTINE PADOUT(IDEV,CHAR,LINLEN,NCHAR)
CHARACTER CHAR
C HOW MANY LINES?
NLINE = 1 + (NCHAR-1)/LINLEN
K1 = 1
K2 = MIN(NCHAR,LINLEN)
DO 10 I = 1,NLINE
WRITE(IDEV,1000,ERR=20)(CHAR,K=K1,K2)
K1 = K2 + 1
K2 = K1 + LINLEN - 1
K2 = MIN(K2,NCHAR)
10 CONTINUE
20 CONTINUE
1000 FORMAT(' ',60A1)
END
C*********************************************************************
SUBROUTINE MOTFI3(SEQ,IDIM1,STRING,IDIM2,ISTART,IEND,CUTOFF,
+MATCHP,MATCHS,IFOUND,MATRIX,IDM)
INTEGER SEQ(IDIM1),STRING(IDIM2)
INTEGER MATRIX(IDM,IDM)
REAL MATCHS
IFOUND = 0
IF(ISTART.LT.1)ISTART=1
IF(ISTART.GT.IDIM1)RETURN
CALL SQFTI5(SEQ,IDIM1,STRING,IDIM2,ISTART,IEND,CUTOFF,MATCHS,
+IFOUND,MATRIX,IDM)
IF(IFOUND.EQ.0)RETURN
C SAVE MATCH POSITION
MATCHP = IFOUND
RETURN
END
C*********************************************************************
SUBROUTINE SQFTI5(SEQ,IDIM1,STRING,IDIM2,
1IS,IE,MINSC,MATCHS,IFOUND,MATRIX,IDM)
C AUTHOR: RODGER STADEN
INTEGER SEQ(IDIM1),STRING(IDIM2)
REAL MATCHS,MINSC
INTEGER MATRIX(IDM,IDM)
MINSCR = MINSC
C
IDIF=(IE-IS+2)-IDIM2
C IDIF IS THE NUMBER OF POSNS TO TRY
C IPSTR GOES FROM 1 TO IDIM2 IDIF TIMES
C TRY ALL POSSIBLE POSITIONS FOR MATCHING AND SCORE FOR EACH
C POINT TO ARRAY ELEMENT CORRESPONDING TO FIRST BASE
IPSEQ=IS
DO 200 I=1,IDIF
NTOT=0
IP=IPSEQ
DO 100 J=1,IDIM2
NTOT = NTOT + MATRIX(SEQ(IP),STRING(J))
IP=IP+1
100 CONTINUE
C END OF COUNTING FOR THIS POSITION.IS TOTAL HIGH ENOUGH?
IF(NTOT.GE.MINSCR)THEN
MATCHS = NTOT
IFOUND = IP-IDIM2
RETURN
END IF
IPSEQ=IPSEQ+1
200 CONTINUE
IFOUND = 0
RETURN
END
C*********************************************************************
SUBROUTINE MOTFI4(SEQ,IDIM1,LENGTH,ISTART,IEND,
+WEIGHT,CUTOFF,MATCHP,MATCHS,IFOUND,IDM)
REAL WEIGHT(IDM,LENGTH)
REAL MATCHS
INTEGER SEQ(IDIM1)
IFOUND = 0
IF(ISTART.LT.1)ISTART=1
L1 = IEND-ISTART+1
IF(ISTART.GT.IDIM1)RETURN
IF(L1.LT.LENGTH)RETURN
DO 10 I=ISTART,ISTART+L1-LENGTH
SUM = 0.
K = 0
DO 5 J=I,I+LENGTH-1
K = K + 1
SUM = SUM + WEIGHT(SEQ(J),K)
5 CONTINUE
IF(SUM.GE.CUTOFF) THEN
MATCHP = I
MATCHS = SUM
IFOUND = I
RETURN
END IF
10 CONTINUE
IFOUND = 0
END
C*********************************************************************
SUBROUTINE FMOTI4(SEQ,IDIM,WT,LENGTH,CUTOFF,SUM,IFOUND,IDM)
C AUTHOR: RODGER STADEN
INTEGER SEQ(IDIM)
REAL WT(IDM,LENGTH)
DO 10 I=1,IDIM-LENGTH+1
SUM=0.
K=0
DO 5 J=I,I+LENGTH-1
K=K+1
SUM=SUM+WT(SEQ(J),K)
5 CONTINUE
IF(SUM.GE.CUTOFF)THEN
IFOUND = I
RETURN
END IF
10 CONTINUE
IFOUND = 0
RETURN
END
C*********************************************************************
SUBROUTINE MOTIF6(SEQ,IDSEQ,MATRIX,LENGTH,I5STAR,I5END,
+ I3STAR,I3END,CUTOFF,MATCHP,MATCHS,
+ IENTRY,IFOUND,MATCHQ,
+ LAST5,LAST3S,LAST3E,IDM)
C AUTHOR: RODGER STADEN
INTEGER SEQ(IDSEQ)
INTEGER MATRIX(IDM,IDM)
REAL MATCHS
EXTERNAL LOOP
C WE HAVE A START POSITION FOR THE 5' END OF THE 5' END OF
C A POTENTIAL STEM I5STAR AND AN END DEFINED BY A RANGE I5END
C WE HAVE A STEM LENGTH LENGTH
C WE HAVE A RANGE OF POSITIONS FOR THE 3' STEM TO START
C I3STAR TO I3END
C TRY THE TIGHTEST LOOPS FIRST
C BUT FIRST WE MAY HAVE TO FINISH A PREVIOUS SEARCH
C THIS IS DENOTED BY IENTRY NE 0.
C NOTE IENTRY IS ALSO USED TO RETURN THE 3' MATCH POSITION
ICUT = CUTOFF
C WRITE(*,*)'ICUT',ICUT
IFOUND = 0
JENTRY = IENTRY
IENTRY = 0
IF(I5STAR.LT.1)I5STAR=1
IF((I5STAR+I3STAR-1).GT.IDSEQ)RETURN
IF(JENTRY.NE.0)THEN
I1 = LAST5
C WRITE(*,*)'I1,LAST3S,LAST3E',I1,LAST3S,LAST3E
DO 50 J=LAST3S+1,LAST3E
J1 = J
ISUM = LOOP(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
C RETURN IF GOOD ENOUGH
IF(ISUM.GE.CUTOFF)THEN
MATCHP = I1
IENTRY = J1
MATCHQ = J1
MATCHS = ISUM
IFOUND = MATCHP
LAST3S = J1
RETURN
END IF
50 CONTINUE
C NOW MOVE 5' STEM START POSITION (WE HAVE JUST FINISHED THE LAST)
C TO THE LAST MATCH + 1
I5STAR = MATCHP + 1
END IF
C SET ENTRY FLAG TO ZERO TO SIGNIFY LAST SEARCH NOW COMPLETED
IENTRY = 0
ISUM = 0
LOOPI1 = I5STAR
IF((I5STAR+I3STAR-1).GT.IDSEQ)RETURN
LOOPI2 = MIN(IDSEQ,I5END)
C WRITE(*,*)'IDSEQ,LENGTH,I5STAR,I5END',
C +IDSEQ,LENGTH,I5STAR,I5END
C
C TRY ALL STEM STARTS FROM 5' START TO 5' END
C
C
DO 200 I = LOOPI1,LOOPI2
C
C
I1 = I
C
C TRY ALL LOOPS FROM 3' START TO 3' END
C
LOOPJ1 = I+I3STAR-1
IF(LOOPJ1.GT.IDSEQ)RETURN
LOOPJ2 = MIN(IDSEQ,I+I3END-1)
C WRITE(*,*)'I3STAR,I3END',I3STAR,I3END
C
C
C
DO 100 J = LOOPJ1,LOOPJ2
C
C
C
J1 = J
C IN LOOP NOTE THAT
C THE 5' END POINTER I1 GOES FORWARDS
C THE 3' END POINTER J1 GOES BACKWARDS
C
ISUM = LOOP(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
C RETURN IF GOOD ENOUGH
C WRITE(*,*)ISUM
IF(ISUM.GE.ICUT)THEN
MATCHP = I1
IENTRY = J1
MATCHQ = J1
MATCHS = ISUM
IFOUND = MATCHP
C SAVE CURRENT POSITION FOR LATER ENTRIES
LAST5 = I1
LAST3S = J1
LAST3E = LOOPJ2
RETURN
END IF
100 CONTINUE
200 CONTINUE
END
C*********************************************************************
INTEGER FUNCTION LOOP(SEQ,IDSEQ,MATRIX,LENGTH,I5P,I3P,IDM)
INTEGER SEQ(IDSEQ)
INTEGER MATRIX(IDM,IDM)
C THE 5' END POINTER GOES FORWARDS
C THE 3' END POINTER GOES BACKWARDS
L=0
I5=I5P-1
I3=I3P+1
DO 100 I=1,LENGTH
I5 = I5 + 1
I3 = I3 - 1
L5 = SEQ(I5)
L3 = SEQ(I3)
L = L + MATRIX(L5,L3)
100 CONTINUE
LOOP = L
END
SUBROUTINE SETCMP(COMPIN,IDM)
REAL COMPIN(IDM)
PARAMETER (MAXCHR = 17)
COMMON /COMPC/COMP(MAXCHR)
SAVE /COMPC/
DO 10 I = 1,MAXCHR
COMP(I) = 0.0
10 CONTINUE
T = 0.
DO 20 I = 1,IDM
COMP(I) = COMPIN(I)
T = T + COMPIN(I)
20 CONTINUE
DO 30 I = 1,IDM
COMP(I) = COMP(I) / T
30 CONTINUE
END
C*****************************************************************
SUBROUTINE DSPLAZ(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEV,
+CLASS,MATCHQ,IOPT,KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
+TITLE,FILNAM,
+ PMINT,PMAXT,PROBT,IDM,MAT1,IDMAT1,MAT2,IDMAT2,MAT3,IDMAT3,
+ MAT4,IDMAT4,WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,
+ RANGES,RANGEL,IDEV1,KEYNS,BESTP,BESTQ,BESTS,JOPT,MAXSS)
INTEGER RANGES(NMOT),RANGEL(NMOT)
INTEGER MATCHP(NMOT),LENGTH(NMOT),CLASS(NMOT)
INTEGER MATCHQ(NMOT)
CHARACTER SEQ(IDSEQ),TITLE*(*),FILNAM*(*)
CHARACTER*(*) KEYNS(NMOT)
REAL MATCHS(NMOT),MINSCR,MAXSCR
INTEGER WTSTR(NMOT)
REAL WEIGHT(MAXWTS),CUTOFF(NMOT)
C MAT1 SIMPLE IDENTITY
C MAT2 IUB SCORES 0-1
C MAT3 IUB SCORES 0-36
C MAT4 INVERTED REPEAT
INTEGER MAT1(IDMAT1,IDMAT1),MAT2(IDMAT2,IDMAT2)
INTEGER MAT3(IDMAT3,IDMAT3),MAT4(IDMAT4,IDMAT4)
CHARACTER DASH
C stuff for best
INTEGER BESTP(NMOT),BESTQ(NMOT)
REAL BESTS(NMOT),MAXSS
EXTERNAL PSCORE
SAVE DASH
DATA DASH/'-'/
C jopt 1 get best match for any individual sequence, then
C display it. So check each match for being best (>maxss), if it is
C save its coords in bestp, bestq. Keep a note that a score
C has been recorded for this sequence (actually noted by bestp(1) ne.0)
C when we finish a sequence (denoted by jopt = 2) put all the
C saved values into matchp, matchq and process as normal.
IF(JOPT.NE.0) THEN
IF(JOPT.EQ.2) THEN
DO 5 I=1,NMOT
MATCHP(I) = BESTP(I)
MATCHQ(I) = BESTQ(I)
MATCHS(I) = BESTS(I)
5 CONTINUE
ELSE IF(JOPT.EQ.1) THEN
C Add scores
T = 0.
DO 6 I = 1,NMOT
IF(MATCHP(I).NE.0) T = T + MATCHS(I)
6 CONTINUE
IF(T.GT.MAXSS) THEN
DO 7 I=1,NMOT
BESTP(I) = MATCHP(I)
BESTQ(I) = MATCHQ(I)
BESTS(I) = MATCHS(I)
7 CONTINUE
MAXSS = T
END IF
RETURN
END IF
END IF
C Add scores
T = 0.
DO 10 I = 1,NMOT
IF(MATCHP(I).NE.0) T = T + MATCHS(I)
10 CONTINUE
C
C If a probability cut off set calc its value
C
POBS = 1.0
IF(PMINC.LT.1.0)THEN
DO 20 I = 1,NMOT
IF(MATCHP(I).NE.0)THEN
CALL GETP(CLASS(I),SEQ(MATCHP(I)+KSTART-1),LENGTH(I),
+ IDM,MAT1,IDMAT1,MAT2,IDMAT2,MAT3,IDMAT3,MAT4,IDMAT4,
+ WEIGHT(MAX(1,WTSTR(I))))
PROB = PSCORE(MATCHS(I))
POBS = POBS * PROB
END IF
20 CONTINUE
IF((PMINC.LT.1.0).AND.(POBS.GT.PMINC))RETURN
END IF
C
IF(T.GT.MAXSCR) MAXSCR = T
IF(T.LT.MINSCR) MINSCR = T
IF(T.LT.CUTSCR) RETURN
C
C Write out mofif by motif
C
IF(IOPT.EQ.1)THEN
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
1001 FORMAT(' >',A,' ',F10.3,' ',A)
DO 100 I=1,NMOT
J = I
C Check for no match (needed for ored motifs)
IF(MATCHP(J).NE.0)THEN
WRITE(IDEV,1000)MATCHP(J)+KSTART-1,MATCHS(J),KEYNS(I)
WRITE(IDEV,1002)(SEQ(K),K=MATCHP(J),MATCHP(J)+LENGTH(J)-1)
C Repeat ?
IF(CLASS(J).EQ.8)THEN
WRITE(IDEV,1002)(SEQ(K),K=MATCHQ(J),MATCHQ(J)+LENGTH(J)-1)
WRITE(IDEV,1000)MATCHQ(J)+KSTART-1
END IF
C Stem ?
IF(CLASS(J).EQ.6)THEN
WRITE(IDEV,1002)
+ (SEQ(K),K=MATCHQ(J),MATCHQ(J)-LENGTH(J)+1,-1)
WRITE(IDEV,1000)MATCHQ(J)+KSTART-1
END IF
END IF
100 CONTINUE
1000 FORMAT(' ',I7,' ',F10.3,' ',A8)
1002 FORMAT(' ',60A1)
IF(PMINC.LT.1.0)WRITE(IDEV,1004)POBS
1004 FORMAT(' Probability =',E10.4)
RETURN
END IF
C
C write only name, score, title
C
IF(IOPT.EQ.3)THEN
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
IF(PMINC.LT.1.0)WRITE(IDEV,1004)POBS
END IF
C
C write name, score, title and the inclusive match
C
IF(IOPT.EQ.2)THEN
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
MINP = 999999
MAXP = -999999
DO 300 I = 1, NMOT
K = MATCHP(I)
IF(K.NE.0)THEN
IF(K.LT.MINP)MINP = K
K = K + LENGTH(I) - 1
C Inverted repeat ?
IF(CLASS(I).EQ.6) K = MATCHQ(I)
C Repeat ?
IF(CLASS(I).EQ.8) K = MATCHQ(I) + LENGTH(I) - 1
IF(K.GT.MAXP)MAXP = K
END IF
300 CONTINUE
WRITE(IDEV,1000)MINP+KSTART-1
WRITE(IDEV,1002)
+ ((SEQ(K1),K1=K2,MIN(K2+59,MAXP)),K2=MINP,MAXP,60)
IF(PMINC.LT.1.0)WRITE(IDEV,1004)POBS
RETURN
END IF
C
C write out whole seq to a new file
C
IF(IOPT.EQ.4)THEN
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
CALL OPENRS(IDEV1,FILNAM,IOK,LLL,1)
IF(IOK.NE.0)THEN
WRITE(IDEV,*)' Error opening sequence file'
RETURN
END IF
C Write up to and including first motif
J1 = MATCHP(1)+LENGTH(1)-1
WRITE(IDEV1,1005,ERR=401)(SEQ(K),K=1,J1)
DO 400 I=2,NMOT
J = I
C Put dashes for next gap in now. what is max gap?
IMAXG = RANGES(J) - LENGTH(J-1) + RANGEL(J) - LENGTH(J)
C What is actual gap?
IG = MATCHP(J) - MATCHP(J-1) - LENGTH(J-1)
C Want to put in difference number of dashes
IDASH = IMAXG - IG
IF(IDASH.GT.0)CALL PADOUT(IDEV1,DASH,60,IDASH)
C Write up to and including next motif
J2 = MATCHP(J) + LENGTH(J) - 1
WRITE(IDEV1,1005,ERR=401)(SEQ(K),K=J1+1,J2)
J1 = J2
400 CONTINUE
C Write to end of sequence
WRITE(IDEV1,1005,ERR=401)(SEQ(K),K=J1+1,IDSEQ)
401 CONTINUE
CLOSE(UNIT=IDEV1)
1005 FORMAT(' ',60A1)
END IF
C
C write inclusive to new file
C
IF(IOPT.EQ.5)THEN
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
CALL OPENRS(IDEV1,FILNAM,IOK,LLL,1)
IF(IOK.NE.0)THEN
WRITE(IDEV,*)' Error opening sequence file'
RETURN
END IF
C Write first motif
J1 = MATCHP(1)+LENGTH(1)-1
WRITE(IDEV1,1005,ERR=501)(SEQ(K),K=MATCHP(1),J1)
DO 500 I=2,NMOT
J = I
C Put dashes for next gap in now. what is max gap?
IMAXG = RANGES(J) - LENGTH(J-1) + RANGEL(J) - LENGTH(J)
C What is actual gap?
IG = MATCHP(J) - MATCHP(J-1) - LENGTH(J-1)
C Want to put in difference number of dashes
IDASH = IMAXG - IG
IF(IDASH.GT.0)CALL PADOUT(IDEV1,DASH,60,IDASH)
C Write up to and including next motif
J2 = MATCHP(J) + LENGTH(J) - 1
WRITE(IDEV1,1005,ERR=501)(SEQ(K),K=J1+1,J2)
J1 = J2
500 CONTINUE
501 CONTINUE
CLOSE(UNIT=IDEV1)
END IF
END