staden-lg/src/staden/patternn.f

1152 lines
36 KiB
Fortran

C 24-9-90 Changed definition of xmax from xmax=idseq to xmax=idseq+kstart-1
C because a match with j1=53000, j2=59000 did not get plotted. Note
C that kstart is called j1 in main
C 5-11-90 Very many changes for addition of file of file names, plus
C replacement of calls to radio by radion
C 18-7-91 added titles to pattern files
C 2-3-92 set filnam = ' ' for calls to openf1
SUBROUTINE PATTEN(SEQ,IDSEQ,STRING,MAXSTR,
+LENGTH,CLASS,RELMOT,RANGES,RANGEL,RANGET,RANGEM,IENTRY,
+START2,IEND2,WTSTR,START,IEND,MATCHQ,RELEND,MATCHP,
+STRNGS,LAST5,LAST3S,LAST3E,MATCHS,CUTOFF,WEIGHT,FILNAM,
+MAXMOT,MAXWTS,
+IDEV1,IDEV2,IDEV3,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,KSTART,KBIN,KBOUT,
+IDEV4,IDM,COMBIN,IDME,MAT1,MAT2,MAT3,MAT4,NAMSAV,KEYNS,
+IHELPS,IHELPE,HELPF,IDEVH,FOFNAM,IDEVFF)
INTEGER LENGTH(MAXMOT),CLASS(MAXMOT),RELMOT(MAXMOT)
INTEGER RANGES(MAXMOT),RANGEL(MAXMOT)
INTEGER RANGET(MAXMOT),RANGEM(MAXMOT),IENTRY(MAXMOT)
INTEGER START2(MAXMOT),IEND2(MAXMOT)
CHARACTER SEQ(IDSEQ),STRING(MAXSTR)
INTEGER WTSTR(MAXMOT),START(MAXMOT),IEND(MAXMOT)
INTEGER MATCHQ(MAXMOT),RELEND(MAXMOT)
INTEGER MATCHP(MAXMOT),STRNGS(MAXMOT)
INTEGER LAST5(MAXMOT),LAST3S(MAXMOT),LAST3E(MAXMOT)
INTEGER MAT1(IDM,IDM),MAT2(IDME,IDME)
INTEGER MAT3(IDME,IDME),MAT4(IDM,IDM)
REAL WEIGHT(MAXWTS),CUTOFF(MAXMOT),MATCHS(MAXMOT)
REAL MINSCR,MAXSCR
CHARACTER FILNAM*(*),HELPF*(*),FOFNAM*(*),TITLE*80
CHARACTER COMBIN(MAXMOT)
CHARACTER*(*) NAMSAV(MAXMOT),KEYNS(MAXMOT)
PARAMETER (MAXCLS = 8)
PARAMETER (SMALL=1.0E-5)
C MAT1 SIMPLE IDENTITY
C MAT2 IUB SCORES 0-1
C MAT3 IUB SCORES 0-36
C MAT4 INVERTED REPEAT
C
PARAMETER (MAXPRM = 30)
CHARACTER PROMPT(4)*(MAXPRM)
WRITE(KBOUT,*)' Pattern searcher'
JDEV = KBIN
PROMPT(1) = 'Use keyboard'
PROMPT(2) = 'Use pattern file'
PROMPT(3) = 'Use file of pattern file names'
IN = 1
CALL RADION('Select pattern definition mode',PROMPT,3,IN,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IN.LT.1) RETURN
IF(IN.EQ.2)THEN
FILNAM = ' '
CALL OPENF1(IDEV3,FILNAM,0,IOK,KBIN,KBOUT,
+ 'Pattern definition file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
JDEV = IDEV3
ELSE IF(IN.EQ.3) THEN
FOFNAM = ' '
CALL OPENF1(IDEVFF,FOFNAM,0,IOK,KBIN,KBOUT,
+ 'File of pattern file names',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
JDEV = IDEV3
END IF
IOPT = 1
PROMPT(1) = 'Motif by motif'
PROMPT(2) = 'Inclusive'
PROMPT(3) = 'Graphical'
PROMPT(4) = 'EMBL feature table'
CALL RADION('Select results display mode',PROMPT,4,IOPT,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOPT.LT.1) GO TO 201
IF((IN.EQ.3).AND.(IOPT.EQ.3)) IOPT = 2
PMINC = 1.
CUTSCR = -99999.
NPAT = 0
IPROB = 0
NTOTAL = 0
JTOTAL = 0
CALL SETCMP(SEQ,IDSEQ,IDME)
IF(IN.EQ.3) THEN
IPROB = 1
CALL BUSY(KBOUT)
END IF
10 CONTINUE
IF(IN.EQ.3) THEN
READ(IDEVFF,1000,ERR=100,END=200)FOFNAM
CALL OPENRS(IDEV3,FOFNAM,IOK,LRECL,2)
IF(IOK.NE.0) GO TO 200
NPAT = NPAT + 1
END IF
1000 FORMAT(A)
C ZERO ARRAYS
C
CALL FILLI(LENGTH,MAXMOT,0)
CALL FILLI(CLASS,MAXMOT,0)
CALL FILLI(RELMOT,MAXMOT,0)
CALL FILLI(RANGES,MAXMOT,0)
CALL FILLI(RANGEL,MAXMOT,0)
CALL FILLI(RANGET,MAXMOT,0)
CALL FILLI(RANGEM,MAXMOT,0)
CALL FILLI(IENTRY,MAXMOT,0)
CALL FILLI(START2,MAXMOT,0)
CALL FILLI(IEND2,MAXMOT,0)
CALL FILLI(WTSTR,MAXMOT,0)
CALL FILLI(START,MAXMOT,0)
CALL FILLI(IEND,MAXMOT,0)
CALL FILLI(MATCHQ,MAXMOT,0)
CALL FILLI(RELEND,MAXMOT,0)
CALL FILLI(MATCHP,MAXMOT,0)
CALL FILLI(STRNGS,MAXMOT,0)
CALL FILLI(LAST5,MAXMOT,0)
CALL FILLI(LAST3S,MAXMOT,0)
CALL FILLI(LAST3E,MAXMOT,0)
CALL FILLR(CUTOFF,MAXMOT,0.0)
CALL FILLR(MATCHS,MAXMOT,0.0)
CALL FILLR(WEIGHT,MAXWTS,0.0)
CALL FILLC(COMBIN,MAXMOT,'A')
ITOTAL = 0
C
C
C
C GET MOTIF DEFINITIONS
C
C RETURN STRING LENGTH FOR COMPATIBILITY WITH LIB SEARCH
NSTRNG = MAXSTR
IF(IN.EQ.1) THEN
CALL GETMK(KBIN,KBOUT,STRING,NSTRNG,ISTRNG,
+ LENGTH,MAXMOT,CLASS,RELMOT,RANGES,RANGEL,
+ RANGET,RANGEM,
+ STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,IDEV2,
+ WTSTR,JDEV,IOK,RELEND,IDSEQ,IDEV4,IDM,COMBIN,
+ MAXCLS,
+ MAT1,IDM,MAT2,IDME,MAT3,IDME,MAT4,IDM,
+ PMINT,PMAXT,PROBT,EXPTT,
+ IHELPS,IHELPE,HELPF,IDEVH,KEYNS,NAMSAV,FILNAM,TITLE)
ELSE
CALL GETMF(KBIN,KBOUT,STRING,NSTRNG,ISTRNG,
+ LENGTH,MAXMOT,CLASS,RELMOT,RANGES,RANGEL,
+ RANGET,RANGEM,
+ STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,IDEV2,
+ WTSTR,JDEV,IOK,RELEND,IDSEQ,IDEV4,IDM,COMBIN,
+ MAXCLS,
+ MAT1,IDM,MAT2,IDME,MAT3,IDME,MAT4,IDM,
+ PMINT,PMAXT,PROBT,EXPTT,
+ IHELPS,IHELPE,HELPF,IDEVH,KEYNS,NAMSAV,FILNAM,IPROB,TITLE)
END IF
IF(IOK.NE.0) GO TO 201
IF(NMOT.LT.1) GO TO 201
C
C
C DISPLAY THE SIGNAL DESCRIPTION
C
RANGES(1) = 1
IF(IN.NE.3) THEN
CALL DESSIG(
+ KBOUT,STRING,MAXSTR,
+ LENGTH,CLASS,RELMOT,RANGES,RANGEL,
+ RANGET,RANGEM,
+ STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,
+ WTSTR,RELEND,COMBIN,KEYNS,TITLE)
C
C
WRITE(KBOUT,2003)PROBT
2003 FORMAT(' Probability of finding pattern = ',E10.4)
WRITE(KBOUT,2004)EXPTT
2004 FORMAT(' Expected number of matches = ',E10.4)
CALL GETRL(0.,1.,1.0,'Maximum pattern probability',
+ XP,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
PMINC = XP
CALL GETRL(-9999.,9999.,-9999.,'Minimum pattern score',
+ XP,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
CUTSCR = XP
MINSCR = 9999999.
MAXSCR = -9999999.
YMIN = LOG(1.-PMINT)
YMAX = LOG(1.-PMAXT)
YDEL = ABS(YMIN - YMAX)
IF(YDEL.LT.SMALL)THEN
YMAX = YMAX + SMALL
YMIN = YMIN - SMALL
END IF
YDEL = ABS(YMIN - YMAX) * 0.1
YMIN = YMIN - YDEL
YMAX = YMAX + YDEL
CALL BUSY(KBOUT)
END IF
IF(IOPT.EQ.3)THEN
XMAX = IDSEQ + KSTART - 1
XMIN = KSTART
CALL VECTOM
CALL FRAME(MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL CLEARV
END IF
C
C DO THE SEARCH
C
CALL SRCSIG(KBIN,KBOUT,WTSTR,LENGTH,CLASS,
+RANGES,RANGEL,START,IEND,RELMOT,MATCHP,STRNGS,WEIGHT,
+MAXWTS,CUTOFF,MATCHS,NMOT,STRING,MAXSTR,SEQ,IDSEQ,
+RANGET,RANGEM,IENTRY,START2,IEND2,MATCHQ,RELEND,
+IDEV1,LAST5,LAST3S,LAST3E,MARGL,MARGR,MARGB,MARGT,ISXMAX,
+ISYMAX,IOPT,XMAX,XMIN,YMAX,YMIN,ITOTAL,KSTART,IDM,COMBIN,
+CUTSCR,MINSCR,MAXSCR,IDME,PMINT,PMAXT,PROBT,MAT1,MAT2,MAT3,
+MAT4,PMINC,KEYNS)
CALL VT100M
IF(IN.EQ.3) THEN
IF(ITOTAL.NE.0) THEN
WRITE(IDEV1,1003)TITLE
1003 FORMAT(' ',A)
WRITE(IDEV1,1001)FOFNAM,ITOTAL
1001 FORMAT(' ',A,' ',I6,/)
NTOTAL = NTOTAL + ITOTAL
JTOTAL = JTOTAL + 1
END IF
GO TO 10
END IF
WRITE(KBOUT,1002)ITOTAL
1002 FORMAT(' Total matches found',I7)
IF((ITOTAL.GT.0).AND.(IN.NE.3))WRITE(KBOUT,1006)MINSCR,MAXSCR
1006 FORMAT(' Minimum and maximum observed scores',2F12.2)
RETURN
100 CONTINUE
WRITE(KBOUT,*)'Error reading file of file names'
200 CONTINUE
WRITE(KBOUT,*)'Of the ',NPAT,' patterns processed'
WRITE(KBOUT,*)JTOTAL,' gave a total of',NTOTAL,' matches'
201 CONTINUE
CLOSE(UNIT=IDEVFF)
END
SUBROUTINE GETMK(KBIN,KBOUT,STRING,MAXSTR,ISTRNG,
+LENGTH,MAXMOT,CLASS,RELMOT,RANGES,RANGEL,RANGET,RANGEM,
+STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,IDEV,WTSTR,JDEV,IOK,
+RELEND,IDSEQ,IDEVSV,IDM,COMBIN,MAXCLS,
+MAT1,IDMAT1,MAT2,IDMAT2,MAT3,IDMAT3,MAT4,IDMAT4,
+PMINT,PMAXT,PROBT,EXPTT,
+IHELPS,IHELPE,HELPF,IDEVH,KEYNS,NAMSAV,FILNAM,TITLE)
C MAT1 SIMPLE IDENTITY
C MAT2 IUB SCORES 0-1
C MAT3 IUB SCORES 0-36
C MAT4 INVERTED REPEAT
INTEGER LENGTH(MAXMOT),CLASS(MAXMOT),RELMOT(MAXMOT)
INTEGER RANGES(MAXMOT),RANGEL(MAXMOT),STRNGS(MAXMOT)
INTEGER WTSTR(MAXMOT),RANGET(MAXMOT),RANGEM(MAXMOT)
INTEGER RELEND(MAXMOT)
CHARACTER STRING(MAXSTR),FILNAM*(*),HELPF*(*)
REAL WEIGHT(MAXWTS),CUTOFF(MAXMOT)
CHARACTER COMBIN(MAXMOT),TCLASS
INTEGER MAT1(IDMAT1,IDMAT1),MAT2(IDMAT2,IDMAT2)
INTEGER MAT3(IDMAT3,IDMAT3),MAT4(IDMAT4,IDMAT4)
CHARACTER*(*) KEYNS(MAXMOT)
CHARACTER*(*) NAMSAV(MAXMOT),TITLE
PARAMETER (MAXPRM = 31)
CHARACTER PROMPT(9)*(MAXPRM)
EXTERNAL PSCORE
C GETS PATTERN DEFINITIONS IN TERMS OF MOTIFS
PMINT = 1.0
PMAXT = 1.0
PROBT = 1.0
EXPTS = 0.0
PROBS = 0.0
PMINS = 0.0
PMAXS = 0.0
EXPTT = 1.0
IOK = 0
IPROB = 0
DO 10 I=1,MAXMOT
RELEND(I) = 5
COMBIN(I) = 'A'
10 CONTINUE
RANGES(1) = 0
RANGEL(1) = IDSEQ
IREL = 0
C COUNT MOTIFS
NMOT = 0
MOTIF = 0
C SET POINTER TO SEARCH STRINGS ARRAY
ISTRNG = 1
C SET POINTER TO WEIGHT ARRAY
IWT = 1
TCLASS = 'A'
C SET CLASS TO EXACT MATCH
ICLASS = 1
100 CONTINUE
IOK = 1
PROMPT(1) = 'Exact match'
PROMPT(2) = 'Percentage match'
PROMPT(3) = 'Cut-off score and score matrix'
PROMPT(4) = 'Cut-off score and weight matrix'
PROMPT(5) = 'Complement of weight matrix'
PROMPT(6) = 'Inverted repeat or stem-loop'
PROMPT(7) = 'Exact match, defined step'
PROMPT(8) = 'Direct repeat'
PROMPT(9) = 'Pattern complete'
CALL RADION('Select motif definition mode',PROMPT,9,ICLASS,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(ICLASS.LT.1) RETURN
IF(ICLASS.EQ.9)GO TO 900
101 CONTINUE
LKEY = 0
CALL GTSTR('Motif name',' ',KEYNS(MOTIF+1),LKEY,
+KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 101
END IF
IF(MOTIF.GT.0) THEN
IF(COMBIN(MOTIF).NE.'N') THEN
PROMPT(1) = 'And'
PROMPT(2) = 'Or'
PROMPT(3) = 'Not'
IC = 1
CALL RADION('Select logical operator',PROMPT,3,IC,
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IC.LT.1) RETURN
IF(IC.EQ.1) TCLASS = 'A'
IF(IC.EQ.2) TCLASS = 'O'
IF(IC.EQ.3) TCLASS = 'N'
ELSE
PROMPT(1) = 'And'
PROMPT(2) = 'Not'
IC = 1
CALL RADION('Select logical operator',PROMPT,2,IC,
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IC.LT.1) RETURN
IF(IC.EQ.1) TCLASS = 'A'
IF(IC.EQ.2) TCLASS = 'N'
END IF
END IF
NMOT = NMOT + 1
MOTIF = MOTIF + 1
IF(NMOT.GT.MAXMOT)THEN
WRITE(KBOUT,*)'Maximum number of motifs exceeded'
GO TO 901
END IF
CLASS(MOTIF) = ICLASS
COMBIN(MOTIF) = TCLASS
C IF NOT THE FIRST MOTIF, AND AN ANDED MOTIF OR THE FIRST IN A LIST OF ORS
C GET ITS RANGE ETC
IOK = 0
IF((MOTIF.GT.1).AND.(COMBIN(MOTIF).NE.'O'))THEN
C WRITE(KBOUT,1002)
C1002 FORMAT(' RANGES ARE DEFINED RELATIVE TO OTHER MOTIFS',/,
C +' AND BY STARTS AND DISTANCES.')
IOK = 1
MININ = 1
MAXIN = MOTIF
211 CONTINUE
MAXIN = MAXIN - 1
IF(COMBIN(MAXIN).NE.'A') GO TO 211
IREL = MAXIN
CALL GETINT(MININ,MAXIN,IREL,'Number of reference motif',
+ IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IREL = IVAL
RELMOT(MOTIF) = IREL
IOK = 1
IF(CLASS(IREL).EQ.6)THEN
CALL YESNO(MEND,'Relative to 5 prime end',
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(MEND.LT.0)RETURN
RELEND(MOTIF) = 5
IF(MEND.EQ.1)RELEND(MOTIF) = 3
END IF
MININ = -1000
MAXIN = 1000
ID = LENGTH(IREL) + 1
IF((CLASS(IREL).EQ.6).OR.(CLASS(IREL).EQ.8)) ID = 1
CALL GETINT(MININ,MAXIN,ID,'Relative start position',
+ IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
ID = IVAL
RANGES(MOTIF) = ID - 1
MININ = 0
MAXIN = 1000
IR = 0
CALL GETINT(MININ,MAXIN,IR,'Number of extra positions',
+ IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IR = IVAL
RANGEL(MOTIF) = IR
ELSE IF(COMBIN(MOTIF).EQ.'O')THEN
C NEED TO SET RANGES TO THOSE OF THE FIRST IN A SET OF ORED MOTIFS
C SET TO THOSE OF THE PREVIOUS MOTIF BECAUSE IT MUST BE THE SAME
RANGES(MOTIF) = RANGES(MOTIF-1)
RANGEL(MOTIF) = RANGEL(MOTIF-1)
RELEND(MOTIF) = RELEND(MOTIF-1)
RELMOT(MOTIF) = IREL
END IF
CALL GETMC(KBIN,KBOUT,STRING,MAXSTR,ISTRNG,
+LENGTH,MAXMOT,CLASS,RELMOT,RANGES,RANGEL,RANGET,RANGEM,
+STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,IDEV,WTSTR,JDEV,IOK,
+RELEND,IDSEQ,IDEVSV,IDM,COMBIN,MAXCLS,
+MAT1,IDMAT1,MAT2,IDMAT2,MAT3,IDMAT3,MAT4,IDMAT4,
+PMINT,PMAXT,PROBT,EXPTT,EXPTS,PROBS,PMINS,PMAXS,
+IHELPS,IHELPE,HELPF,IDEVH,MOTIF,IWT,NAMSAV,FILNAM,IPROB)
IF(IOK.EQ.0) GO TO 100
901 CONTINUE
WRITE(KBOUT,*)' Error in pattern definition'
IOK = 1
RETURN
900 CONTINUE
EXPTT = EXPTT * EXPTS
PROBT = PROBT * PROBS
PMINT = PMINT * PMINS
PMAXT = PMAXT * PMAXS
CALL YESNO(IN,'Save pattern in a file',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IN.LT.0)RETURN
IF(IN.EQ.0)THEN
FILNAM = ' '
CALL OPENF1(IDEVSV,FILNAM,1,IOK,KBIN,KBOUT,
+ 'Pattern definition file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
CALL SAVSIG(
+ IDEVSV,STRING,MAXSTR,
+ LENGTH,CLASS,RELMOT,RANGES,RANGEL,
+ RANGET,RANGEM,
+ STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,
+ WTSTR,RELEND,NAMSAV,COMBIN,KEYNS,TITLE,KBIN,KBOUT)
CLOSE(UNIT=IDEVSV)
END IF
DO 899 I = 2,NMOT
IF((CLASS(I).EQ.6).OR.(CLASS(I).EQ.8)) THEN
RANGEL(I) = RANGEL(I) + 1
ELSE
RANGEL(I) = RANGEL(I) + LENGTH(I)
END IF
899 CONTINUE
C RETURN STRING LENGTH
MAXSTR = ISTRNG - 1
IOK = 0
END
SUBROUTINE SRCSIG(KBIN,KBOUT,WTSTR,LENGTH,CLASS,
+RANGES,RANGEL,START,IEND,RELMOT,MATCHP,STRNGS,WEIGHT,
+MAXWTS,CUTOFF,MATCHS,NMOT,STRING,MAXSTR,SEQ,IDSEQ,
+RANGET,RANGEM,IENTRY,START2,IEND2,MATCHQ,RELEND,
+IDEVOT,LAST5,LAST3S,LAST3E,MARGL,MARGR,MARGB,MARGT,ISXMAX,
+ISYMAX,IOPT,XMAX,XMIN,YMAX,YMIN,ITOTAL,KSTART,IDM,COMBIN,
+CUTSCR,MINSCR,MAXSCR,IDME,PMINT,PMAXT,PROBT,MAT1,MAT2,MAT3,MAT4,
+PMINC,KEYNS)
C ROUTINE TO SEARCH FOR SIGNALS COMPOSED OF MOTIFS
C WEIGHT = WEIGHTS FOR MATRICES
C CUTOFF = CUTOFF SCORES
C LENGTH = MOTIF LENGTHS
C CLASS = MOTIF CLASS
C COMBIN = LOGICAL COMBINATION A, O, N
C WTSTR = POINTER TO WEIGHT STARTS
C RANGES = RANGES START
C RANGEL = RANGE LENGTH (A DISTANCE MEASURED FROM RANGES)
C RELMOT = MOTIF NUMBER THAT A RANGE IS RELATIVE TO IE THE
C FIRST MOTIF'S RANGE IS RELATIVE TO MOTIF 0, BUT
C ANY OTHER MOTIF MAY HAVE TO BE DEFINED RELATIVE
C TO ANY OTHER. THE MOST COMMON WOULD BE THE FIRST
C MOTIF OR THE LAST ONE SEARCHED FOR.
C RELEND = IS A SPECIAL CASE FLAG FOR STEMS. IT ALLOWS OTHER
C MOTIFS TO HAVE THEIR POSITIONS RELATIVE TO THE 3' SIDE
C OF A STEM. IT IS 5 FOR THE 5 PRIME SIDE, 3 FOR 3' SIDE
C START = RANGE START DURING SEARCH (SOME POSITIONS MAY HAVE
C BEEN TRIED)
C IEND = RANGE END POSITION FOR CURRENT INITIAL START (WHEN IT
C IS RESET DEPENDS ON WHETHER IT IS DEFINED RELATIVE TO
C THE FIRST OR THE PREVIOUS MOTIF. IF IT IS DEFINED RELATIVE
C TO THE FIRST MOTIF IT IS RESET WHEN WE FIND A MATCH FOR THE
C FIRST MOTIF. IF IT IS DEFINED RELATIVE TO THE PREVIOUS MOTIF
C WE MUST RESET WHEN WE MOVE FORWARD ONE MOTIF. I THINK THIS
C CAN BE TAKEN CARE OF BY UPDATING ALL THOSE MOTIFS THAT ARE
C DEFINED TO THE CURRENT MOTIF EVERY TIME WE MOVE FORWARD
C ONE MOTIF (IE IT INCLUDES THE FIRST MOTIF SO IT IS NOT A
C SPECIAL CASE). OTHERWISE WE UPDATE POSITIONS WHEN WE FIND
C A MATCH FOR THEM (WE SET TO THE MATCH POSITION PLUS 1)
C MATCHP = LIST OF CURRENT MATCH POSITIONS FOR EACH MOTIF
C MATCHS = LIST OF CURRENT MATCH SCORES FOR EACH MOTIF
C IFOUND = A FLAG TO INDICATE SUCCESS OR FAILURE OF A SEARCH ROUTINE
C 1 = SUCCESS, 0 = FAIL
C STRNGS = POINTER TO STRING STARTS IN CHARACTER ARRAY STRING
C TEMPORARY VALUES ARE:
C MOTIF = ACTUAL MOTIF NUMBER
C ICLASS = CLASS
C ILEN = LENGTH OF MOTIF
C CUT = CUTOFF
C WT = START OF WEIGHTS FOR THIS MOTIF
C ISTRST = START OF STRING
C RANGET = START OF 3' RANGE FOR STEM SEARCHES
C RANGEM = END OF 3' RANGE FOR STEM SEARCHES
C IENTRY = FLAG TO SIGNIFY MORE 3' STEM POSITIONS FOR LAST 5' START
C 0 = NONE, ON RETURN FROM MOTIF6 IT CONTAINS THE 3' MATCH
C POSITION
C MATCHQ = MATCH POSITION FOR STEM SEARCH
C COMB = LOGICAL COMBINATION A, O, N
INTEGER WTSTR(NMOT),LENGTH(NMOT),CLASS(NMOT)
INTEGER RANGES(NMOT),RANGEL(NMOT),START(NMOT),IEND(NMOT)
INTEGER RELMOT(NMOT),MATCHP(NMOT),STRNGS(NMOT)
INTEGER RANGET(NMOT),RANGEM(NMOT),IENTRY(NMOT),RELEND(NMOT)
INTEGER START2(NMOT),IEND2(NMOT),MATCHQ(NMOT)
INTEGER LAST5(NMOT),LAST3S(NMOT),LAST3E(NMOT)
INTEGER MAT1(IDM,IDM),MAT2(IDME,IDME)
INTEGER MAT3(IDME,IDME),MAT4(IDM,IDM)
REAL WEIGHT(MAXWTS),CUTOFF(NMOT),MATCHS(NMOT)
REAL MINSCR,MAXSCR
CHARACTER SEQ(IDSEQ),STRING(MAXSTR)
CHARACTER COMBIN(NMOT),COMB
CHARACTER*(*) KEYNS(NMOT)
C
C
C INITIALIZE
JMOT = 0
IRET = 0
5 CONTINUE
JMOT = JMOT + 1
IF(JMOT.LE.NMOT)THEN
IF(RELMOT(JMOT).EQ.0)THEN
START(JMOT) = RANGES(1)
IEND(JMOT) = RANGES(1) + RANGEL(1) -1
GO TO 5
END IF
END IF
MOTIF = 1
ICLASS = CLASS(1)
ILEN = LENGTH(1)
CUT = CUTOFF(1)
IWT = WTSTR(1)
ISTRST = STRNGS(1)
IENTRY(1) = 0
COMB = COMBIN(1)
DO 10 I = 1,NMOT
MATCHP(I) = 0
10 CONTINUE
C
C
C
100 CONTINUE
C
C
C THIS A CLASS CLASS MOTIF, PERFORM THE APPROPRIATE SEARCH IF THE START
C POSITION IS >0. (IF IT IS NOT THE CURRENT MOTIF IS A NOT THAT HAS
C ALREADY BEEN SEARCHED FOR
C
IFOUND = 0
IF(START(MOTIF).GT.0)THEN
C
C
IF(ICLASS.EQ.1)THEN
C WRITE(*,*)'SRCSIG,MOTIF,ICLASS,START,END'
C WRITE(*,*)MOTIF,ICLASS,START(MOTIF),IEND(MOTIF)
CALL MOTIF1(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
+ IEND(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),IFOUND,
+ CUTOFF(MOTIF),0)
ELSE IF(ICLASS.EQ.2)THEN
CALL MOTIF2(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
+ IEND(MOTIF),CUT,MATCHP(MOTIF),MATCHS(MOTIF),IFOUND)
ELSE IF(ICLASS.EQ.3)THEN
CALL MOTIF3(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
+ IEND(MOTIF),CUT,MATCHP(MOTIF),MATCHS(MOTIF),IFOUND,MAT3,
+ IDME)
ELSE IF(ICLASS.EQ.4)THEN
CALL MOTIF4(SEQ,IDSEQ,ILEN,START(MOTIF),
+ IEND(MOTIF),WEIGHT(IWT),CUT,MATCHP(MOTIF),MATCHS(MOTIF),
+ IFOUND,IDM)
ELSE IF(ICLASS.EQ.5)THEN
CALL MOTIF4(SEQ,IDSEQ,ILEN,START(MOTIF),
+ IEND(MOTIF),WEIGHT(IWT),CUT,MATCHP(MOTIF),MATCHS(MOTIF),
+ IFOUND,IDM)
ELSE IF(ICLASS.EQ.6)THEN
CALL MOTIF6(SEQ,IDSEQ,MAT4,LENGTH(MOTIF),START(MOTIF),
+ IEND(MOTIF),RANGET(MOTIF),RANGEM(MOTIF),
+ CUTOFF(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),
+ IENTRY(MOTIF),IFOUND,MATCHQ(MOTIF),
+ LAST5(MOTIF),LAST3S(MOTIF),LAST3E(MOTIF),IDM)
ELSE IF(ICLASS.EQ.7)THEN
CALL MOTIF1(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
+ IEND(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),IFOUND,
+ CUTOFF(MOTIF),1)
ELSE IF(ICLASS.EQ.8)THEN
CALL MOTIF8(SEQ,IDSEQ,MAT2,LENGTH(MOTIF),START(MOTIF),
+ IEND(MOTIF),RANGET(MOTIF),RANGEM(MOTIF),
+ CUTOFF(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),
+ IENTRY(MOTIF),IFOUND,MATCHQ(MOTIF),
+ LAST5(MOTIF),LAST3S(MOTIF),LAST3E(MOTIF),IDME)
ELSE
WRITE(KBOUT,*)'UNKNOWN CLASS!!'
END IF
C
C
END IF
C
C
C MATCH FOUND WHEN MATCH WANTED ?
C
C
C
IF(((IFOUND.EQ.0).AND.(COMB.NE.'N')).OR.
+ ((IFOUND.GT.0).AND.(COMB.EQ.'N')))THEN
C
C NO SO GO BACK OR SIDEWAYS ONE MOTIF
C
C
CALL BAKSID(CLASS,LENGTH,CUTOFF,STRNGS,NMOT,
+ MOTIF,ICLASS,ILEN,CUT,IWT,ISTRST,WTSTR,
+ RELMOT,START,IEND,MATCHQ,RANGES,RANGEL,RELEND,IRET,MATCHP,
+ COMBIN,COMB)
C
C
C IF CANT GO BACK ANY FURTHER QUIT
IF(IRET.NE.0)RETURN
C
C
ELSE
C
C
C MATCH FOUND.
C
C
C PREPARE FOR NEXT SEARCH THIS MOTIF BY INCREMENTING POINTER
C TO SEARCH RANGE (NOT FOR CLASS 6 WHICH IS HANDLED BY MOTIF6)
C
C
IF(COMB.EQ.'N')THEN
START(MOTIF) = -9
ELSE
C IF((ICLASS.NE.6).AND.(ICLASS.NE.8))
C REPLACE LAST LINE 7-7-89
IF(ICLASS.NE.6)START(MOTIF) = MATCHP(MOTIF) + 1
C ERROR?????????
IF(ICLASS.EQ.7)START(MOTIF) = MATCHP(MOTIF) + CUTOFF(MOTIF)
END IF
C
C TRY GOING FORWARD ONE MOTIF
C
C
CALL FORWAD(CLASS,LENGTH,CUTOFF,NMOT,
+ MOTIF,ICLASS,ILEN,CUT,IWT,RELMOT,START,IEND,
+ RANGES,RANGEL,STRNGS,ISTRST,WTSTR,IDSEQ,IENTRY,
+ RANGET,RANGEM,START2,IEND2,MATCHQ,RELEND,MATCHP,IDSPLY,
+ COMBIN,COMB)
C
C
C
C IS THIS THE LAST MOTIF? IF SO DISPLAY THE MATCH
C
C
IF(IDSPLY.EQ.1)THEN
ITOTAL = ITOTAL + 1
CALL DSPLAY(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEVOT,
+ CLASS,MATCHQ,MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IOPT,
+ XMAX,XMIN,YMAX,YMIN,KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
+ PMINT,PMAXT,PROBT,IDM,MAT1,IDM,MAT2,IDME,MAT3,IDM,
+ MAT4,IDM,WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,KEYNS)
C
C
C HORRIBLE SPECIAL CASE - IF LAST MOTIF IS NOTTED WE MUST MOVE BACK AFTER
C DISPLAY
IF(COMB.EQ.'N')THEN
CALL BAKSID(CLASS,LENGTH,CUTOFF,STRNGS,NMOT,
+ MOTIF,ICLASS,ILEN,CUT,IWT,ISTRST,WTSTR,
+ RELMOT,START,IEND,MATCHQ,RANGES,RANGEL,RELEND,IRET,MATCHP,
+ COMBIN,COMB)
C
C
C IF CANT GO BACK ANY FURTHER QUIT
IF(IRET.NE.0)RETURN
END IF
END IF
C
C
END IF
C
C
C GO BACK FOR NEXT SEARCH
C
C
GO TO 100
END
C*********************************************************************
SUBROUTINE DSPLAY(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEV,
+CLASS,MATCHQ,MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IOPT,
+XMAX,XMIN,YMAX,YMIN,KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
+PMINT,PMAXT,PROBT,IDM,MAT1,IDMAT1,MAT2,IDMAT2,MAT3,IDMAT3,
+MAT4,IDMAT4,WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,KEYNS)
INTEGER MATCHP(NMOT),LENGTH(NMOT),CLASS(NMOT)
INTEGER MATCHQ(NMOT)
CHARACTER SEQ(IDSEQ)
REAL MATCHS(NMOT),MINSCR,MAXSCR
REAL WEIGHT(MAXWTS),CUTOFF(NMOT)
INTEGER MAT1(IDMAT1,IDMAT1),MAT2(IDMAT2,IDMAT2)
INTEGER MAT3(IDMAT3,IDMAT3),MAT4(IDMAT4,IDMAT4)
INTEGER WTSTR(NMOT)
CHARACTER*(*) KEYNS(NMOT)
EXTERNAL PSCORE
C ADD SCORES
T = 0.
DO 10 I = 1,NMOT
IF(MATCHP(I).NE.0) T = T + MATCHS(I)
10 CONTINUE
POBS = 1.0
IF((PMINC.LT.1.0).OR.(IOPT.EQ.3))THEN
C CALC PROB
DO 20 I = 1,NMOT
IF(MATCHP(I).NE.0)THEN
CALL GETP(CLASS(I),SEQ(MATCHP(I)+KSTART-1),LENGTH(I),
+ IDM,MAT1,IDMAT1,MAT2,IDMAT2,MAT3,IDMAT3,MAT4,IDMAT4,
+ WEIGHT(MAX(1,WTSTR(I))))
PROB = PSCORE(MATCHS(I))
POBS = POBS * PROB
END IF
20 CONTINUE
IF((PMINC.LT.1.0).AND.(POBS.GT.PMINC))RETURN
END IF
C
IF(T.GT.MAXSCR) MAXSCR = T
IF(T.LT.MINSCR) MINSCR = T
IF(T.LT.CUTSCR) RETURN
C
IF(IOPT.EQ.1)THEN
WRITE(IDEV,1001)
1001 FORMAT(' Match')
DO 100 I=1,NMOT
J = I
C CHECK FOR NO MATCH (NEEDED FOR ORED MOTIFS)
IF(MATCHP(J).NE.0)THEN
WRITE(IDEV,1000)MATCHP(J)+KSTART-1,KEYNS(I)
WRITE(IDEV,1002)(SEQ(K),K=MATCHP(J),MATCHP(J)+LENGTH(J)-1)
C STEM ?
IF(CLASS(J).EQ.6)THEN
WRITE(IDEV,1002)(SEQ(K),K=MATCHQ(J),MATCHQ(J)-LENGTH(J)+1,-1)
WRITE(IDEV,1000)MATCHQ(J)+KSTART-1
END IF
C REPEAT ?
IF(CLASS(J).EQ.8)THEN
WRITE(IDEV,1002)(SEQ(K),K=MATCHQ(J),MATCHQ(J)+LENGTH(J)-1)
WRITE(IDEV,1000)MATCHQ(J)+KSTART-1
END IF
END IF
100 CONTINUE
1000 FORMAT(' ',I7,' ',A8)
1002 FORMAT(' ',60A1)
IF(PMINC.LT.1.0)WRITE(IDEV,1004)POBS
1004 FORMAT(' Probability =',E10.4)
RETURN
END IF
IF(IOPT.EQ.3)THEN
C MARK THE POSITION OF THE FIRST NON-ZERO MATCH ONLY
DO 200 I = 1,NMOT
IF(MATCHP(I).NE.0)THEN
X = MATCHP(I) + KSTART - 1
Y = LOG(1.-POBS)
CALL VECTOM
CALL LINE(X,X,YMIN,Y,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL VT100M
GO TO 201
END IF
200 CONTINUE
201 CONTINUE
END IF
IF(IOPT.EQ.2)THEN
MINP = 999999
MAXP = -999999
DO 300 I = 1, NMOT
K = MATCHP(I)
IF(K.NE.0)THEN
IF(K.LT.MINP)MINP = K
K = K + LENGTH(I) - 1
C INVERTED REPEAT ?
IF(CLASS(I).EQ.6) K = MATCHQ(I)
C REPEAT ?
IF(CLASS(I).EQ.8) K = MATCHQ(I) + LENGTH(I) - 1
IF(K.GT.MAXP)MAXP = K
END IF
300 CONTINUE
WRITE(IDEV,1000)MINP+KSTART-1,KEYNS(1)
WRITE(IDEV,1002)
+ ((SEQ(K1),K1=K2,MIN(K2+59,MAXP)),K2=MINP,MAXP,60)
IF(PMINC.LT.1.0)WRITE(IDEV,1004)POBS
RETURN
END IF
IF(IOPT.EQ.4)THEN
MINP = 999999
MAXP = -999999
K1 = 0
DO 400 I = 1, NMOT
K = MATCHP(I)
IF(K.NE.0)THEN
IF(K1.EQ.0) INAM = I
K1 = 1
IF(K.LT.MINP)MINP = K
K = K + LENGTH(I) - 1
C INVERTED REPEAT ?
IF(CLASS(I).EQ.6) K = MATCHQ(I)
C REPEAT ?
IF(CLASS(I).EQ.8) K = MATCHQ(I) + LENGTH(I) - 1
IF(K.GT.MAXP)MAXP = K
END IF
400 CONTINUE
CALL EFTOUT(KEYNS(INAM),MINP+KSTART-1,MAXP+KSTART-1,IDEV)
END IF
END
C*********************************************************************
SUBROUTINE MOTIF3(SEQ,IDIM1,STRING,IDIM2,ISTART,IEND,CUTOFF,
+MATCHP,MATCHS,IFOUND,MATRIC,IDM)
CHARACTER SEQ(IDIM1),STRING(IDIM2)
INTEGER MATRIC(IDM,IDM)
REAL MATCHS
IFOUND = 0
IF(ISTART.LT.1)ISTART=1
IF(ISTART.GT.IDIM1)RETURN
CALL SQFIT5(SEQ,IDIM1,STRING,IDIM2,ISTART,IEND,CUTOFF,MATCHS,
+IFOUND,MATRIC,IDM)
IF(IFOUND.EQ.0)RETURN
C SAVE MATCH POSITION
MATCHP = IFOUND
RETURN
END
C*********************************************************************
SUBROUTINE MOTIF4(SEQ,IDIM1,LENGTH,ISTART,IEND,
+WEIGHT,CUTOFF,MATCHP,MATCHS,IFOUND,IDM)
REAL MATCHS
CHARACTER SEQ(IDIM1)
IFOUND = 0
IF(ISTART.LT.1)ISTART=1
L1 = IEND-ISTART+1
IF(ISTART.GT.IDIM1)RETURN
IF(L1.LT.LENGTH)RETURN
CALL FMOT4(SEQ(ISTART),L1,WEIGHT,LENGTH,CUTOFF,SCORE,IFOUND,IDM)
IF(IFOUND.EQ.0)RETURN
C SAVE MATCH POSITION
MATCHP = ISTART+IFOUND-1
MATCHS = SCORE
RETURN
END
C*********************************************************************
SUBROUTINE FMOT4(SEQ,IDIM,WT,LENGTH,CUTOFF,SUM,IFOUND,IDM)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM)
REAL WT(IDM,LENGTH)
INTEGER CTONUM
EXTERNAL CTONUM
DO 10 I=1,IDIM-LENGTH+1
SUM=0.
K=0
DO 5 J=I,I+LENGTH-1
K=K+1
IP = CTONUM(SEQ(J))
SUM=SUM+WT(IP,K)
5 CONTINUE
IF(SUM.GE.CUTOFF)THEN
IFOUND = I
RETURN
END IF
10 CONTINUE
IFOUND = 0
RETURN
END
C*********************************************************************
SUBROUTINE MOTIF6(SEQ,IDSEQ,MATRIX,LENGTH,I5STAR,I5END,
+ I3STAR,I3END,CUTOFF,MATCHP,MATCHS,
+ IENTRY,IFOUND,MATCHQ,
+ LAST5,LAST3S,LAST3E,IDM)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ)
INTEGER MATRIX(IDM,IDM)
REAL MATCHS
EXTERNAL LOOP
C WE HAVE A START POSITION FOR THE 5' END OF THE 5' END OF
C A POTENTIAL STEM I5STAR AND AN END DEFINED BY A RANGE I5END
C WE HAVE A STEM LENGTH LENGTH
C WE HAVE A RANGE OF POSITIONS FOR THE 3' STEM TO START
C I3STAR TO I3END
C TRY THE TIGHTEST LOOPS FIRST
C BUT FIRST WE MAY HAVE TO FINISH A PREVIOUS SEARCH
C THIS IS DENOTED BY IENTRY NE 0.
C NOTE IENTRY IS ALSO USED TO RETURN THE 3' MATCH POSITION
ICUT = CUTOFF
C WRITE(*,*)'ICUT',ICUT
IFOUND = 0
JENTRY = IENTRY
IENTRY = 0
IF(I5STAR.LT.1)I5STAR=1
IF((I5STAR+I3STAR-1).GT.IDSEQ)RETURN
IF(JENTRY.NE.0)THEN
I1 = LAST5
C WRITE(*,*)'I5STAR,I5END',I5STAR,I5END
C WRITE(*,*)'I1,LAST3S,LAST3E',I1,LAST3S,LAST3E
DO 50 J=LAST3S+1,LAST3E
J1 = J
ISUM = LOOP(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
C RETURN IF GOOD ENOUGH
IF(ISUM.GE.CUTOFF)THEN
MATCHP = I1
IENTRY = J1
MATCHQ = J1
MATCHS = ISUM
IFOUND = MATCHP
LAST3S = J1
CDEBUG
C WRITE(*,*)'MATCHP',MATCHP
RETURN
END IF
50 CONTINUE
C NOW MOVE 5' STEM START POSITION (WE HAVE JUST FINISHED THE LAST)
C TO THE LAST MATCH + 1
I5STAR = MATCHP + 1
END IF
C SET ENTRY FLAG TO ZERO TO SIGNIFY LAST SEARCH NOW COMPLETED
IENTRY = 0
ISUM = 0
LOOPI1 = I5STAR
IF((I5STAR+I3STAR-1).GT.IDSEQ)RETURN
LOOPI2 = MIN(IDSEQ,I5END)
C WRITE(*,*)'IDSEQ,LENGTH,I5STAR,I5END',
C +IDSEQ,LENGTH,I5STAR,I5END
C
C TRY ALL STEM STARTS FROM 5' START TO 5' END
C
C
DO 200 I = LOOPI1,LOOPI2
C
C
I1 = I
C
C TRY ALL LOOPS FROM 3' START TO 3' END
C
LOOPJ1 = I+I3STAR-1
IF(LOOPJ1.GT.IDSEQ)RETURN
LOOPJ2 = MIN(IDSEQ,I+I3END-1)
C WRITE(*,*)'I3STAR,I3END',I3STAR,I3END
C
C
C
DO 100 J = LOOPJ1,LOOPJ2
C
C
C
J1 = J
C IN LOOP NOTE THAT
C THE 5' END POINTER I1 GOES FORWARDS
C THE 3' END POINTER J1 GOES BACKWARDS
C
ISUM = LOOP(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
C RETURN IF GOOD ENOUGH
C WRITE(*,*)ISUM
IF(ISUM.GE.ICUT)THEN
MATCHP = I1
IENTRY = J1
MATCHQ = J1
MATCHS = ISUM
IFOUND = MATCHP
C SAVE CURRENT POSITION FOR LATER ENTRIES
LAST5 = I1
LAST3S = J1
LAST3E = LOOPJ2
CDEBUG
C WRITE(*,*)'MATCHP',MATCHP
RETURN
END IF
100 CONTINUE
200 CONTINUE
CDEBUG
C WRITE(*,*)'MATCHPEND',MATCHP
END
C*********************************************************************
INTEGER FUNCTION LOOP(SEQ,IDSEQ,MATRIX,LENGTH,I5P,I3P,IDM)
CHARACTER SEQ(IDSEQ)
INTEGER MATRIX(IDM,IDM),CTONUM
EXTERNAL CTONUM
C THE 5' END POINTER GOES FORWARDS
C THE 3' END POINTER GOES BACKWARDS
L=0
I5=I5P-1
I3=I3P+1
DO 100 I=1,LENGTH
I5 = I5 + 1
I3 = I3 - 1
C WRITE(*,*)'I5,I3',I5,I3
L5 = CTONUM(SEQ(I5))
L3 = CTONUM(SEQ(I3))
L = L + MATRIX(L5,L3)
100 CONTINUE
LOOP = L
END
SUBROUTINE MOTIF8(SEQ,IDSEQ,MATRIX,LENGTH,I5STAR,I5END,
+ I3STAR,I3END,CUTOFF,MATCHP,MATCHS,
+ IENTRY,IFOUND,MATCHQ,
+ LAST5,LAST3S,LAST3E,IDM)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDSEQ)
INTEGER MATRIX(IDM,IDM),REPEET
REAL MATCHS
EXTERNAL REPEET
C WE HAVE A START POSITION FOR THE 5' END OF THE 5' END OF
C A POTENTIAL REPEAT I5STAR AND AN END DEFINED BY A RANGE I5END
C WE HAVE A REPEAT LENGTH LENGTH
C WE HAVE A RANGE OF POSITIONS FOR THE 3' STEM TO START
C I3STAR TO I3END
C TRY THE TIGHTEST LOOPS FIRST
C BUT FIRST WE MAY HAVE TO FINISH A PREVIOUS SEARCH
C THIS IS DENOTED BY IENTRY NE 0.
C NOTE IENTRY IS ALSO USED TO RETURN THE 3' MATCH POSITION
ICUT = CUTOFF
C WRITE(*,*)'ICUT',ICUT
IFOUND = 0
JENTRY = IENTRY
IENTRY = 0
IF(I5STAR.LT.1)I5STAR=1
IF((I5STAR+I3STAR+LENGTH-2).GT.IDSEQ)RETURN
IF(JENTRY.NE.0)THEN
I1 = LAST5
C WRITE(*,*)'I1,LAST3S,LAST3E',I1,LAST3S,LAST3E
DO 50 J=LAST3S+1,LAST3E
J1 = J
ISUM = REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
C RETURN IF GOOD ENOUGH
IF(ISUM.GE.ICUT)THEN
MATCHP = I1
IENTRY = J1
MATCHQ = J1
MATCHS = ISUM
IFOUND = MATCHP
LAST3S = J1
RETURN
END IF
50 CONTINUE
C NOW MOVE 5' STEM START POSITION (WE HAVE JUST FINISHED THE LAST)
C TO THE LAST MATCH + 1
I5STAR = MATCHP + 1
END IF
C SET ENTRY FLAG TO ZERO TO SIGNIFY LAST SEARCH NOW COMPLETED
IENTRY = 0
ISUM = 0
LOOPI1 = I5STAR
IF((I5STAR+I3STAR+LENGTH-2).GT.IDSEQ)RETURN
LOOPI2 = MIN(IDSEQ-2*LENGTH+1,I5END)
C WRITE(*,*)'IDSEQ,LENGTH,I5STAR,I5END',
C +IDSEQ,LENGTH,I5STAR,I5END
C
C TRY ALL STEM STARTS FROM 5' START TO 5' END
C
C
DO 200 I = LOOPI1,LOOPI2
C
C
I1 = I
C
C TRY ALL LOOPS FROM 3' START TO 3' END
C
LOOPJ1 = I + I3STAR -1
IF((LOOPJ1+LENGTH-1).GT.IDSEQ)RETURN
LOOPJ2 = MIN(IDSEQ-LENGTH+1,I+I3END-1)
C WRITE(*,*)'I3STAR,I3END',I3STAR,I3END
C
C
C
DO 100 J = LOOPJ1,LOOPJ2
C
C
C
J1 = J
C IN REPEAT NOTE THAT
C THE 5' END POINTER I1 GOES FORWARDS
C THE 3' END POINTER J1 GOES FORWARDS
C
ISUM = REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
C RETURN IF GOOD ENOUGH
C WRITE(*,*)ISUM
IF(ISUM.GE.ICUT)THEN
MATCHP = I1
IENTRY = J1
MATCHQ = J1
MATCHS = ISUM
IFOUND = MATCHP
C SAVE CURRENT POSITION FOR LATER ENTRIES
LAST5 = I1
LAST3S = J1
LAST3E = LOOPJ2
RETURN
END IF
100 CONTINUE
200 CONTINUE
END
C*********************************************************************
INTEGER FUNCTION REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I5P,I3P,IDM)
CHARACTER SEQ(IDSEQ)
INTEGER MATRIX(IDM,IDM),DTONUM
EXTERNAL DTONUM
C THE 5' END POINTER GOES FORWARDS
C THE 3' END POINTER GOES FORWARDS
L=0
I5=I5P-1
I3=I3P-1
DO 100 I=1,LENGTH
I5 = I5 + 1
I3 = I3 + 1
C WRITE(*,*)'I5,I3',I5,I3
L5 = DTONUM(SEQ(I5))
L3 = DTONUM(SEQ(I3))
L = L + MATRIX(L5,L3)
100 CONTINUE
REPEET = L
END
SUBROUTINE SQFIT5(SEQ,IDIM1,STRING,IDIM2,
1IS,IE,MINSC,MATCHS,IFOUND,MATRIC,IDM)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIM2)
REAL MATCHS,MINSC
INTEGER MATRIC(IDM,IDM),DTONUM
EXTERNAL DTONUM
MINSCR = MINSC
IDIF=(IE-IS+2)-IDIM2
IPSEQ=IS
DO 200 I=1,IDIF
NTOT=0
IP=IPSEQ
DO 100 J=1,IDIM2
NTOT = NTOT + MATRIC(DTONUM(SEQ(IP)),DTONUM(STRING(J)))
IP=IP+1
100 CONTINUE
C END OF COUNTING FOR THIS POSITION.IS TOTAL HIGH ENOUGH?
IF(NTOT.GE.MINSCR)THEN
MATCHS = NTOT
IFOUND = IP-IDIM2
RETURN
END IF
IPSEQ=IPSEQ+1
200 CONTINUE
IFOUND = 0
RETURN
END
SUBROUTINE SETCMP(SEQ,IDIM,IDM)
CHARACTER SEQ(IDIM)
PARAMETER (MAXCHR = 17)
INTEGER DTONUM
EXTERNAL DTONUM
COMMON /COMPC/COMP(MAXCHR)
SAVE /COMPC/
DO 10 I = 1,IDM
COMP(I) = 0.0
10 CONTINUE
DO 20 I = 1,IDIM
J = DTONUM(SEQ(I))
COMP(J) = COMP(J) + 1.
20 CONTINUE
T = MAX(1,IDIM)
DO 30 I = 1,IDM
COMP(I) = COMP(I) / T
30 CONTINUE
END