1016 lines
32 KiB
Fortran
1016 lines
32 KiB
Fortran
C PIPL (Protein interpretation program (library))
|
|
C 9-7-92 added fasta format capability
|
|
C 11-4-90 Changed title to be string rather than array
|
|
C corresponding change made to rdpira
|
|
C 18-4-90 Changed to allow use of library index when appropriate
|
|
C so using opir1 instead of opir, and rdpirb as well as
|
|
C rdpira
|
|
C 8-11-90 Changed call to getmf for compatibility with pip
|
|
C and replaced all calls to radio by radion
|
|
C 11-12-90 Changed library opening, added filnll and paramter
|
|
C 18-7-91 Added titles to pattern files
|
|
C 16-12-91 Added access to pir library in codata format and cdrom indices
|
|
C 2-3-92 set filnam = ' ' for calls to openf1
|
|
C
|
|
C author: Rodger Staden, Medical Research Council Centre,
|
|
C Laboratory of Molecular Biology, Hills Road,
|
|
C Cambridge, England
|
|
C
|
|
SUBROUTINE FMAIN()
|
|
PARAMETER (NAMLEN = 60)
|
|
CHARACTER*(NAMLEN) FILE1,FILNAM,HELPF,LIBLF
|
|
INTEGER BOTOPT,TOPOPT
|
|
PARAMETER (
|
|
+ MAXSEQ=180000,
|
|
+ MXSPAN=603,
|
|
+ MAXWIN=MAXSEQ+MXSPAN,
|
|
+ MAXWIR=180000,
|
|
+ MAXD36=MAXWIR/36,
|
|
+ MAXD2=MAXWIR/2,
|
|
+ MAXD3=MAXWIR/3,
|
|
+ MAXSD2=MAXSEQ/2,
|
|
+ MAXSD3=MAXSEQ/3,
|
|
+ MAXDEV=10,
|
|
+ LIBLF = 'SEQUENCELIBRARIES',
|
|
+ LENNAM = 10)
|
|
PARAMETER (FILE1='PROTMAT')
|
|
PARAMETER (MAXMOT = 100,
|
|
+ MAXWTS = 5000,
|
|
+ IDM = 26)
|
|
INTEGER BESTP(MAXMOT),BESTQ(MAXMOT)
|
|
REAL BESTS(MAXMOT)
|
|
CHARACTER*(LENNAM) ENAMEL,ENAME,NAMSAV(MAXMOT)
|
|
PARAMETER (MAXDIV = 15)
|
|
INTEGER DIVDEV(MAXDIV),RSIZEN
|
|
INTEGER DEVNOS(MAXDEV)
|
|
REAL WORKR(MAXWIR)
|
|
INTEGER WORKI(MAXWIR)
|
|
INTEGER MAT1(IDM,IDM),MATRIX(IDM,IDM)
|
|
CHARACTER SEQ(MAXWIN),SEQW(MAXSEQ),CHRSET(IDM)
|
|
C COMPATIBILITY WITH ANALYSEQ HELP STUFF
|
|
PARAMETER (BOTOPT=0,TOPOPT=1)
|
|
INTEGER HELPS(BOTOPT:TOPOPT),HELPE(BOTOPT:TOPOPT)
|
|
CHARACTER*8 KEYNS(MAXMOT)
|
|
CHARACTER LTYPE
|
|
EQUIVALENCE (WORKR,WORKI)
|
|
C GET DEVICE NUMBERS
|
|
CALL UNITNO(KBIN,KBOUT,DEVNOS,MAXDEV)
|
|
CALL GIDMAT(MAT1,IDM,22)
|
|
WRITE(KBOUT,1000)
|
|
1000 FORMAT(/,
|
|
+' PIPL (Protein interpretation program (library))',
|
|
+' V4.1 Jul 1991',/,
|
|
+' Author: Rodger Staden',/,
|
|
+' Searches protein libraries for patterns of motifs',/)
|
|
CALL INITLU(IDM)
|
|
CALL GETMAT(DEVNOS(1),FILE1,MATRIX,IDM,CHRSET,KBOUT,IOK)
|
|
IF(IOK.NE.0)STOP
|
|
FILNAM = ' '
|
|
CALL OPENF1(DEVNOS(1),FILNAM,1,IOK,KBIN,KBOUT,
|
|
+'Name for results file',
|
|
+HELPS(1),HELPE(1),HELPF,DEVNOS(2))
|
|
IF(IOK.NE.0)STOP
|
|
C OPEN LIBRARY
|
|
LIBIN = 2
|
|
IDEVNL = DEVNOS(7)
|
|
IDEVLL = DEVNOS(8)
|
|
IDEVEN = DEVNOS(9)
|
|
IDEVD = DEVNOS(MAXDEV)
|
|
CALL RDLIBL(FILNAM,KBIN,KBOUT,
|
|
+HELPS(1),HELPE(1),HELPF,DEVNOS(2),IDEVLL,IDEVEN,IDEVNL,
|
|
+LIBLF,LIBIN,DIVDEV,MAXDIV,IDEVD,
|
|
+LIST,ENAMEL,LIBTYP,LTYPE,NDIV,RSIZEN,NRECEN,IOK)
|
|
IF(IOK.NE.0)STOP
|
|
IDEVOT=DEVNOS(1)
|
|
J1 = 1
|
|
OPT = 1
|
|
CALL PATTEO(SEQ(MXSPAN+1),MAXSEQ,SEQW,MAXSEQ,
|
|
+WORKI(1),WORKI(MAXMOT+1),WORKI(2*MAXMOT+1),WORKI(3*MAXMOT+1),
|
|
+WORKI(4*MAXMOT+1),WORKI(5*MAXMOT+1),WORKI(6*MAXMOT+1),
|
|
+WORKI(7*MAXMOT+1),WORKI(8*MAXMOT+1),WORKI(9*MAXMOT+1),
|
|
+WORKI(10*MAXMOT+1),WORKI(11*MAXMOT+1),WORKI(12*MAXMOT+1),
|
|
+WORKI(13*MAXMOT+1),WORKI(14*MAXMOT+1),WORKI(15*MAXMOT+1),
|
|
+WORKI(16*MAXMOT+1),WORKI(17*MAXMOT+1),WORKI(18*MAXMOT+1),
|
|
+WORKI(19*MAXMOT+1),WORKI(20*MAXMOT+1),
|
|
+WORKR(22*MAXMOT+1),WORKR(23*MAXMOT+1),
|
|
+FILNAM,MAXMOT,MAXWTS,IDEVOT,DEVNOS(3),DEVNOS(4),J1,
|
|
+KBIN,KBOUT,MATRIX,DEVNOS(5),IDM,SEQ(1),
|
|
+WORKI(23*MAXMOT+1+MAXWTS),
|
|
+WORKI(23*MAXMOT+MAXWTS+3001),LIST,MAT1,
|
|
+NAMSAV,KEYNS,CHRSET,
|
|
+ HELPS,HELPE,HELPF,DEVNOS(2),DEVNOS(6),ENAMEL,ENAME,LIBTYP,
|
|
+ IDEVEN,RSIZEN,NRECEN,IDEVNL,DIVDEV,NDIV,BESTP,BESTQ,BESTS,
|
|
+ LTYPE)
|
|
C NB HAVE SET MAXIMUM COMBINED STRING LENGTH TO 3000 ON PREVIOUS LINE
|
|
C AND MAX INTEGER VERSION OF SEQUENCE IS WHATEVER IS LEFT AND THIS IS NOT
|
|
C SENT OR CHECKED BY THE CODE !!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
C
|
|
900 CONTINUE
|
|
END
|
|
SUBROUTINE PATTEO(SEQ,MAXSEQ,STRING,MAXSTR,
|
|
+LENGTH,CLASS,RELMOT,RANGES,RANGEL,RANGET,RANGEM,IENTRY,
|
|
+START2,IEND2,WTSTR,START,IEND,MATCHQ,RELEND,MATCHP,
|
|
+STRNGS,LAST5,LAST3S,LAST3E,MATCHS,CUTOFF,WEIGHT,FILNAM,
|
|
+MAXMOT,MAXWTS,
|
|
+IDEV1,IDEV2,IDEV3,KSTART,KBIN,KBOUT,
|
|
+MATRIX,IDEV4,IDM,COMBIN,STRNGI,SEQI,LIST,MAT1,
|
|
+NAMSAV,KEYNS,CHRSET,
|
|
+IHELPS,IHELPE,HELPF,IDEVH,IDEVN,ENAMEL,NAMIN,LIBTYP,
|
|
+ IDEVEN,RSIZEN,NRECEN,IDEVNL,DIVDEV,NDIV,BESTP,BESTQ,BESTS,
|
|
+ LTYPE)
|
|
INTEGER DIVDEV(NDIV),RSIZEN
|
|
INTEGER LENGTH(MAXMOT),CLASS(MAXMOT),RELMOT(MAXMOT)
|
|
INTEGER RANGES(MAXMOT),RANGEL(MAXMOT)
|
|
INTEGER RANGET(MAXMOT),RANGEM(MAXMOT),IENTRY(MAXMOT)
|
|
INTEGER START2(MAXMOT),IEND2(MAXMOT),MATRIX(IDM,IDM)
|
|
CHARACTER SEQ(MAXSEQ),STRING(MAXSTR)
|
|
INTEGER WTSTR(MAXMOT),START(MAXMOT),IEND(MAXMOT)
|
|
INTEGER MATCHQ(MAXMOT),RELEND(MAXMOT)
|
|
INTEGER MATCHP(MAXMOT),STRNGS(MAXMOT)
|
|
INTEGER LAST5(MAXMOT),LAST3S(MAXMOT),LAST3E(MAXMOT)
|
|
REAL WEIGHT(MAXWTS),CUTOFF(MAXMOT),MATCHS(MAXMOT)
|
|
REAL MINSCR,MAXSCR
|
|
CHARACTER FILNAM*(*),HELPF*(*),NAMIN*(*),ENAMEL*(*),LTYPE
|
|
CHARACTER COMBIN(MAXMOT),CHRSET(IDM)
|
|
INTEGER BESTP(MAXMOT),BESTQ(MAXMOT),ENTRYN
|
|
REAL BESTS(MAXMOT)
|
|
C NB PROBLEM ABOUT USING MAXSEQ AS DIMENSION!!!!!!!!!!!!
|
|
INTEGER STRNGI(MAXSTR),SEQI(MAXSEQ),MAT1(IDM,IDM)
|
|
CHARACTER TITLE*60,TITLEP*80
|
|
REAL EXPECC(26)
|
|
CHARACTER*(*) NAMSAV(MAXMOT),KEYNS(MAXMOT)
|
|
PARAMETER (MAXCLS = 6)
|
|
PARAMETER (MAXPRM = 25)
|
|
CHARACTER PROMPT(5)*(MAXPRM)
|
|
SAVE EXPECC
|
|
DATA EXPECC/2.9,7.0,6.1,5.2,8.6,8.4,4.3,5.5,6.0,3.9,0.0,0.0,
|
|
+2.0,4.9,6.6,1.7,4.5,7.4,6.6,3.6,3.4,1.3,0.0,0.0,0.0,0.0/
|
|
C
|
|
C ZERO ARRAYS
|
|
C
|
|
IDSEQ = 1000
|
|
CALL FILLI(LENGTH,MAXMOT,0)
|
|
CALL FILLI(CLASS,MAXMOT,0)
|
|
CALL FILLI(RELMOT,MAXMOT,0)
|
|
CALL FILLI(RANGES,MAXMOT,0)
|
|
CALL FILLI(RANGEL,MAXMOT,0)
|
|
CALL FILLI(RANGET,MAXMOT,0)
|
|
CALL FILLI(RANGEM,MAXMOT,0)
|
|
CALL FILLI(IENTRY,MAXMOT,0)
|
|
CALL FILLI(START2,MAXMOT,0)
|
|
CALL FILLI(IEND2,MAXMOT,0)
|
|
CALL FILLI(WTSTR,MAXMOT,0)
|
|
CALL FILLI(START,MAXMOT,0)
|
|
CALL FILLI(IEND,MAXMOT,0)
|
|
CALL FILLI(MATCHQ,MAXMOT,0)
|
|
CALL FILLI(RELEND,MAXMOT,0)
|
|
CALL FILLI(MATCHP,MAXMOT,0)
|
|
CALL FILLI(STRNGS,MAXMOT,0)
|
|
CALL FILLI(LAST5,MAXMOT,0)
|
|
CALL FILLI(LAST3S,MAXMOT,0)
|
|
CALL FILLI(LAST3E,MAXMOT,0)
|
|
CALL FILLR(CUTOFF,MAXMOT,0.0)
|
|
CALL FILLR(MATCHS,MAXMOT,0.0)
|
|
CALL FILLR(WEIGHT,MAXWTS,0.0)
|
|
CALL FILLC(COMBIN,MAXMOT,'A')
|
|
CALL FILLI(BESTP,MAXMOT,0)
|
|
CALL FILLC(BESTQ,MAXMOT,0)
|
|
CALL FILLC(BESTS,MAXMOT,0.)
|
|
ITOTAL = 0
|
|
CALL SETCMP(EXPECC,IDM)
|
|
PROMPT(1) = 'Motif by motif'
|
|
PROMPT(2) = 'Inclusive'
|
|
PROMPT(3) = 'Scores only'
|
|
PROMPT(4) = 'Complete padded sequences'
|
|
PROMPT(5) = 'Padded sections'
|
|
IOPT = 1
|
|
CALL RADION('Select results display mode',PROMPT,5,IOPT,
|
|
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
|
|
IF(IOPT.LT.1) RETURN
|
|
JOPT = 0
|
|
CALL YESNO(JOPT,'Report all matches',
|
|
+ IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
|
|
IF(JOPT.LT.0) RETURN
|
|
IF(IOPT.EQ.4)THEN
|
|
MININ = 1
|
|
MAXIN = 9999
|
|
JSTART = 1
|
|
WRITE(KBOUT,1020)
|
|
1020 FORMAT(
|
|
+' For output option 4, we need to position the first motifs',/,
|
|
+' in a pattern so that they are aligned with one another')
|
|
CALL GETINT(MININ,MAXIN,JSTART,'Position of first motif',
|
|
+IVAL,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
|
|
IF(IOK.NE.0) RETURN
|
|
JSTART = IVAL
|
|
END IF
|
|
FILNAM = ' '
|
|
CALL OPENF1(IDEV3,FILNAM,0,IOK,KBIN,KBOUT,
|
|
+ 'Pattern definition file',
|
|
+ IHELPS,IHELPE,HELPF,IDEVH)
|
|
IF(IOK.NE.0)RETURN
|
|
JDEV = IDEV3
|
|
C
|
|
C GET MOTIF DEFINITIONS
|
|
C
|
|
C RETURN STRING LENGTH FOR COMPATIBILITY WITH LIB SEARCH
|
|
NSTRNG = MAXSTR
|
|
CALL GETMF(KBIN,KBOUT,STRING,NSTRNG,ISTRNG,
|
|
+ LENGTH,MAXMOT,CLASS,RELMOT,RANGES,RANGEL,
|
|
+ RANGET,RANGEM,
|
|
+ STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,IDEV2,
|
|
+ WTSTR,JDEV,IOK,RELEND,IDSEQ,IDEV4,IDM,COMBIN,
|
|
+ MAXCLS,MATRIX,MAT1,
|
|
+ PMINT,PMAXT,PROBT,EXPTT,CHRSET,
|
|
+ IHELPS,IHELPE,HELPF,IDEVH,KEYNS,NAMSAV,FILNAM,0,TITLEP)
|
|
IF(IOK.NE.0)RETURN
|
|
IF(NMOT.LT.1)RETURN
|
|
C
|
|
C
|
|
C DISPLAY THE SIGNAL DESCRIPTION
|
|
C
|
|
RANGES(1) = 1
|
|
CALL DESSIG(
|
|
+ KBOUT,STRING,MAXSTR,
|
|
+ LENGTH,CLASS,RELMOT,RANGES,RANGEL,
|
|
+ RANGET,RANGEM,
|
|
+ STRNGS,NMOT,WEIGHT,MAXWTS,CUTOFF,
|
|
+ WTSTR,RELEND,COMBIN,KEYNS,TITLEP)
|
|
C
|
|
13 CONTINUE
|
|
WRITE(KBOUT,2003)PROBT
|
|
2003 FORMAT(' Probability of finding pattern = ',E10.4)
|
|
WRITE(KBOUT,2004)EXPTT
|
|
2004 FORMAT(' Expected number of matches per 1000 residues = ',E10.4)
|
|
CALL GETRL(0.,1.,1.,'Maximum pattern probability',
|
|
+XP,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
|
|
IF(IOK.NE.0) RETURN
|
|
PMINC = XP
|
|
CALL GETRL(-9999.,9999.,-9999.,'Minimum pattern score',
|
|
+XP,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
|
|
IF(IOK.NE.0) RETURN
|
|
CUTSCR = XP
|
|
MINSCR = 9999999.
|
|
MAXSCR = -9999999.
|
|
IF(NSTRNG.GT.0)CALL CONNUM(STRING,STRNGI,NSTRNG)
|
|
C
|
|
C
|
|
ICREC = 0
|
|
IFINEX = 0
|
|
NENTRY = 0
|
|
ENTRYN = 0
|
|
20 CONTINUE
|
|
IDSEQ = MAXSEQ
|
|
IF(LIBTYP.EQ.1) THEN
|
|
CALL CDROML(LIST,NAMIN,ENAMEL,
|
|
+ IDEVEN,RSIZEN,NRECEN,IDEVNL,SEQ,IDSEQ,
|
|
+ DIVDEV,NDIV,ICREC,IFINEX,TITLE,KBOUT,LTYPE,IOK)
|
|
IF(IOK.NE.0) GO TO 900
|
|
FILNAM = NAMIN
|
|
ELSE IF(LIBTYP.EQ.2) THEN
|
|
CALL RDPIRA(SEQ,IDSEQ,
|
|
+ IDEVEN,KBOUT,TITLE,FILNAM,LIST,ENAMEL,IDEVNL)
|
|
ELSE IF(LIBTYP.EQ.3) THEN
|
|
CALL RDFASA(SEQ,IDSEQ,
|
|
+ IDEVEN,KBOUT,TITLE,FILNAM,LIST,ENAMEL,IDEVNL,ENTRYN)
|
|
ELSE
|
|
RETURN
|
|
END IF
|
|
IF(IDSEQ.LT.1)GO TO 900
|
|
C CONVERT TO INTEGER
|
|
IF(IDSEQ.GT.0)CALL CONNUM(SEQ,SEQI,IDSEQ)
|
|
C
|
|
NENTRY = NENTRY + 1
|
|
RANGES(1) = 1
|
|
RANGEL(1) = IDSEQ
|
|
C
|
|
C DO THE SEARCH
|
|
C
|
|
CALL SRCSIH(KBIN,KBOUT,WTSTR,LENGTH,CLASS,
|
|
+RANGES,RANGEL,START,IEND,RELMOT,MATCHP,STRNGS,WEIGHT,
|
|
+MAXWTS,CUTOFF,MATCHS,NMOT,STRING,MAXSTR,SEQ,IDSEQ,
|
|
+RANGET,RANGEM,IENTRY,START2,IEND2,MATRIX,MATCHQ,RELEND,
|
|
+IDEV1,LAST5,LAST3S,LAST3E,
|
|
+IOPT,ITOTAL,KSTART,IDM,COMBIN,
|
|
+CUTSCR,MINSCR,MAXSCR,SEQI,STRNGI,TITLE,FILNAM,
|
|
+MAT1,PMINT,PMAXT,PROBT,PMINC,IDEVN,JSTART,KEYNS,
|
|
+BESTP,BESTQ,BESTS,JOPT)
|
|
C
|
|
C
|
|
GO TO 20
|
|
C
|
|
C
|
|
900 CONTINUE
|
|
WRITE(KBOUT,1002)ITOTAL
|
|
1002 FORMAT(' Total matches found',I7)
|
|
WRITE(KBOUT,1006)MINSCR,MAXSCR
|
|
1006 FORMAT(' Minimum and maximum observed scores=',2F12.2)
|
|
WRITE(KBOUT,1009)NENTRY
|
|
1009 FORMAT(' ',I7,' Entries processed')
|
|
END
|
|
C***************************************************************
|
|
SUBROUTINE SRCSIH(KBIN,KBOUT,WTSTR,LENGTH,CLASS,
|
|
+RANGES,RANGEL,START,IEND,RELMOT,MATCHP,STRNGS,WEIGHT,
|
|
+MAXWTS,CUTOFF,MATCHS,NMOT,STRING,MAXSTR,SEQ,IDSEQ,
|
|
+RANGET,RANGEM,IENTRY,START2,IEND2,MATRIX,MATCHQ,RELEND,
|
|
+IDEVOT,LAST5,LAST3S,LAST3E,
|
|
+IOPT,ITOTAL,KSTART,IDM,COMBIN,
|
|
+CUTSCR,MINSCR,MAXSCR,SEQI,STRNGI,TITLE,FILNAM,
|
|
+MAT1,PMINT,PMAXT,PROBT,PMINC,IDEVN,JSTART,KEYNS,
|
|
+BESTP,BESTQ,BESTS,JOPT)
|
|
C ROUTINE TO SEARCH FOR SIGNALS COMPOSED OF MOTIFS
|
|
C WEIGHT = WEIGHTS FOR MATRICES
|
|
C CUTOFF = CUTOFF SCORES
|
|
C LENGTH = MOTIF LENGTHS
|
|
C CLASS = MOTIF CLASS
|
|
C COMBIN = LOGICAL COMBINATION A, O, N
|
|
C WTSTR = POINTER TO WEIGHT STARTS
|
|
C RANGES = RANGES START
|
|
C RANGEL = RANGE LENGTH (A DISTANCE MEASURED FROM RANGES)
|
|
C RELMOT = MOTIF NUMBER THAT A RANGE IS RELATIVE TO IE THE
|
|
C FIRST MOTIF'S RANGE IS RELATIVE TO MOTIF 0, BUT
|
|
C ANY OTHER MOTIF MAY HAVE TO BE DEFINED RELATIVE
|
|
C TO ANY OTHER. THE MOST COMMON WOULD BE THE FIRST
|
|
C MOTIF OR THE LAST ONE SEARCHED FOR.
|
|
C RELEND = IS A SPECIAL CASE FLAG FOR STEMS. IT ALLOWS OTHER
|
|
C MOTIFS TO HAVE THEIR POSITIONS RELATIVE TO THE 3' SIDE
|
|
C OF A STEM. IT IS 5 FOR THE 5 PRIME SIDE, 3 FOR 3' SIDE
|
|
C START = RANGE START DURING SEARCH (SOME POSITIONS MAY HAVE
|
|
C BEEN TRIED)
|
|
C IEND = RANGE END POSITION FOR CURRENT INITIAL START (WHEN IT
|
|
C IS RESET DEPENDS ON WHETHER IT IS DEFINED RELATIVE TO
|
|
C THE FIRST OR THE PREVIOUS MOTIF. IF IT IS DEFINED RELATIVE
|
|
C TO THE FIRST MOTIF IT IS RESET WHEN WE FIND A MATCH FOR THE
|
|
C FIRST MOTIF. IF IT IS DEFINED RELATIVE TO THE PREVIOUS MOTIF
|
|
C WE MUST RESET WHEN WE MOVE FORWARD ONE MOTIF. I THINK THIS
|
|
C CAN BE TAKEN CARE OF BY UPDATING ALL THOSE MOTIFS THAT ARE
|
|
C DEFINED TO THE CURRENT MOTIF EVERY TIME WE MOVE FORWARD
|
|
C ONE MOTIF (IE IT INCLUDES THE FIRST MOTIF SO IT IS NOT A
|
|
C SPECIAL CASE). OTHERWISE WE UPDATE POSITIONS WHEN WE FIND
|
|
C A MATCH FOR THEM (WE SET TO THE MATCH POSITION PLUS 1)
|
|
C MATCHP = LIST OF CURRENT MATCH POSITIONS FOR EACH MOTIF
|
|
C MATCHS = LIST OF CURRENT MATCH SCORES FOR EACH MOTIF
|
|
C IFOUND = A FLAG TO INDICATE SUCCESS OR FAILURE OF A SEARCH ROUTINE
|
|
C 1 = SUCCESS, 0 = FAIL
|
|
C STRNGS = POINTER TO STRING STARTS IN CHARACTER ARRAY STRING
|
|
C TEMPORARY VALUES ARE:
|
|
C MOTIF = ACTUAL MOTIF NUMBER
|
|
C ICLASS = CLASS
|
|
C ILEN = LENGTH OF MOTIF
|
|
C CUT = CUTOFF
|
|
C WT = START OF WEIGHTS FOR THIS MOTIF
|
|
C ISTRST = START OF STRING
|
|
C RANGET = START OF 3' RANGE FOR STEM SEARCHES
|
|
C RANGEM = END OF 3' RANGE FOR STEM SEARCHES
|
|
C IENTRY = FLAG TO SIGNIFY MORE 3' STEM POSITIONS FOR LAST 5' START
|
|
C 0 = NONE, ON RETURN FROM MOTIF6 IT CONTAINS THE 3' MATCH
|
|
C POSITION
|
|
C MATCHQ = MATCH POSITION FOR STEM SEARCH
|
|
C COMB = LOGICAL COMBINATION A, O, N
|
|
INTEGER WTSTR(NMOT),LENGTH(NMOT),CLASS(NMOT)
|
|
INTEGER RANGES(NMOT),RANGEL(NMOT),START(NMOT),IEND(NMOT)
|
|
INTEGER RELMOT(NMOT),MATCHP(NMOT),STRNGS(NMOT)
|
|
INTEGER RANGET(NMOT),RANGEM(NMOT),IENTRY(NMOT),RELEND(NMOT)
|
|
INTEGER START2(NMOT),IEND2(NMOT),MATRIX(IDM,IDM),MATCHQ(NMOT)
|
|
INTEGER LAST5(NMOT),LAST3S(NMOT),LAST3E(NMOT)
|
|
REAL WEIGHT(MAXWTS),CUTOFF(NMOT),MATCHS(NMOT)
|
|
REAL MINSCR,MAXSCR
|
|
CHARACTER SEQ(IDSEQ),STRING(MAXSTR)
|
|
CHARACTER COMBIN(NMOT),COMB
|
|
INTEGER STRNGI(MAXSTR),SEQI(IDSEQ),MAT1(IDM,IDM)
|
|
CHARACTER TITLE*(*),FILNAM*(*)
|
|
CHARACTER*(*) KEYNS(NMOT)
|
|
INTEGER BESTP(NMOT),BESTQ(NMOT)
|
|
REAL BESTS(NMOT),MAXSS,MINMSS
|
|
PARAMETER (MINMSS = -999999.)
|
|
C
|
|
C
|
|
C INITIALIZE
|
|
JMOT = 0
|
|
IRET = 0
|
|
5 CONTINUE
|
|
JMOT = JMOT + 1
|
|
IF(JMOT.LE.NMOT)THEN
|
|
IF(RELMOT(JMOT).EQ.0)THEN
|
|
START(JMOT) = RANGES(1)
|
|
IEND(JMOT) = RANGES(1) + RANGEL(1) -1
|
|
GO TO 5
|
|
END IF
|
|
END IF
|
|
MOTIF = 1
|
|
ICLASS = CLASS(1)
|
|
ILEN = LENGTH(1)
|
|
CUT = CUTOFF(1)
|
|
IWT = WTSTR(1)
|
|
ISTRST = STRNGS(1)
|
|
IENTRY(1) = 0
|
|
COMB = COMBIN(1)
|
|
DO 10 I = 1,NMOT
|
|
MATCHP(I) = 0
|
|
10 CONTINUE
|
|
MAXSS = MINMSS
|
|
C
|
|
C
|
|
C
|
|
C
|
|
C
|
|
100 CONTINUE
|
|
C
|
|
C
|
|
C THIS A CLASS CLASS MOTIF, PERFORM THE APPROPRIATE SEARCH IF THE START
|
|
C POSITION IS >0. (IF IT IS NOT THE CURRENT MOTIF IS A NOT THAT HAS
|
|
C ALREADY BEEN SEARCHED FOR
|
|
C
|
|
IFOUND = 0
|
|
IF(START(MOTIF).GT.0)THEN
|
|
C
|
|
C
|
|
IF(ICLASS.EQ.1)THEN
|
|
CALL MOTIF1(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
|
|
+ IEND(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),IFOUND,
|
|
+ CUTOFF(MOTIF),0)
|
|
ELSE IF(ICLASS.EQ.2)THEN
|
|
CALL MOTIF2(SEQ,IDSEQ,STRING(ISTRST),ILEN,START(MOTIF),
|
|
+ IEND(MOTIF),CUT,MATCHP(MOTIF),MATCHS(MOTIF),IFOUND)
|
|
ELSE IF(ICLASS.EQ.3)THEN
|
|
CALL MOTFI3(SEQI,IDSEQ,STRNGI(ISTRST),ILEN,START(MOTIF),
|
|
+ IEND(MOTIF),CUT,MATCHP(MOTIF),MATCHS(MOTIF),IFOUND,MATRIX,IDM)
|
|
ELSE IF(ICLASS.EQ.4)THEN
|
|
CALL MOTFI4(SEQI,IDSEQ,ILEN,START(MOTIF),
|
|
+ IEND(MOTIF),WEIGHT(IWT),CUT,MATCHP(MOTIF),MATCHS(MOTIF),
|
|
+ IFOUND,IDM)
|
|
ELSE IF(ICLASS.EQ.5)THEN
|
|
CALL MOTIF8(SEQI,IDSEQ,MATRIX,LENGTH(MOTIF),START(MOTIF),
|
|
+ IEND(MOTIF),RANGET(MOTIF),RANGEM(MOTIF),
|
|
+ CUTOFF(MOTIF),MATCHP(MOTIF),MATCHS(MOTIF),
|
|
+ IENTRY(MOTIF),IFOUND,MATCHQ(MOTIF),
|
|
+ LAST5(MOTIF),LAST3S(MOTIF),LAST3E(MOTIF),IDM)
|
|
ELSE IF(ICLASS.EQ.6)THEN
|
|
CALL MOTFI4(SEQI,IDSEQ,ILEN,START(MOTIF),
|
|
+ IEND(MOTIF),WEIGHT(IWT),CUT,MATCHP(MOTIF),MATCHS(MOTIF),
|
|
+ IFOUND,IDM)
|
|
ELSE
|
|
WRITE(KBOUT,*)'UNKNOWN CLASS!!'
|
|
END IF
|
|
C
|
|
C
|
|
END IF
|
|
C
|
|
C
|
|
C MATCH FOUND WHEN MATCH WANTED ?
|
|
C
|
|
C
|
|
C
|
|
IF(((IFOUND.EQ.0).AND.(COMB.NE.'N')).OR.
|
|
+ ((IFOUND.GT.0).AND.(COMB.EQ.'N')))THEN
|
|
C
|
|
C NO SO GO BACK OR SIDEWAYS ONE MOTIF
|
|
C
|
|
C
|
|
CALL BAKSID(CLASS,LENGTH,CUTOFF,STRNGS,NMOT,
|
|
+ MOTIF,ICLASS,ILEN,CUT,IWT,ISTRST,WTSTR,
|
|
+ RELMOT,START,IEND,MATCHQ,RANGES,RANGEL,RELEND,IRET,MATCHP,
|
|
+ COMBIN,COMB)
|
|
C
|
|
C
|
|
C IF CANT GO BACK ANY FURTHER QUIT
|
|
IF(IRET.NE.0) THEN
|
|
IF(MAXSS.GT.MINMSS) THEN
|
|
CALL DSPLAZ(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEVOT,
|
|
+ CLASS,MATCHQ,IOPT,
|
|
+ KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
|
|
+ TITLE,FILNAM,
|
|
+ MATRIX,MAT1,IDM,PMINT,PMAXT,PROBT,
|
|
+ WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,RANGES,RANGEL,IDEVN,
|
|
+ JSTART,KEYNS,BESTP,BESTQ,BESTS,2,MAXSS)
|
|
ITOTAL = ITOTAL + 1
|
|
END IF
|
|
RETURN
|
|
END IF
|
|
C
|
|
C
|
|
ELSE
|
|
C
|
|
C
|
|
C MATCH FOUND.
|
|
C
|
|
C
|
|
C PREPARE FOR NEXT SEARCH THIS MOTIF BY INCREMENTING POINTER
|
|
C TO SEARCH RANGE (NOT FOR CLASS 6 WHICH IS HANDLED BY MOTIF6)
|
|
C
|
|
C
|
|
IF(COMB.EQ.'N')THEN
|
|
START(MOTIF) = -9
|
|
ELSE
|
|
IF(ICLASS.NE.5) START(MOTIF) = MATCHP(MOTIF) + 1
|
|
END IF
|
|
C
|
|
C TRY GOING FORWARD ONE MOTIF
|
|
C
|
|
C
|
|
CALL FORWAD(CLASS,LENGTH,CUTOFF,NMOT,
|
|
+ MOTIF,ICLASS,ILEN,CUT,IWT,RELMOT,START,IEND,
|
|
+ RANGES,RANGEL,STRNGS,ISTRST,WTSTR,IDSEQ,IENTRY,
|
|
+ RANGET,RANGEM,START2,IEND2,MATCHQ,RELEND,MATCHP,IDSPLY,
|
|
+ COMBIN,COMB)
|
|
C
|
|
C
|
|
C
|
|
C IS THIS THE LAST MOTIF? IF SO DISPLAY THE MATCH
|
|
C
|
|
C
|
|
IF(IDSPLY.EQ.1)THEN
|
|
IF(JOPT.NE.1) ITOTAL = ITOTAL + 1
|
|
CALL DSPLAZ(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEVOT,
|
|
+ CLASS,MATCHQ,IOPT,
|
|
+ KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
|
|
+ TITLE,FILNAM,
|
|
+ MATRIX,MAT1,IDM,PMINT,PMAXT,PROBT,
|
|
+ WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,RANGES,RANGEL,IDEVN,
|
|
+ JSTART,KEYNS,BESTP,BESTQ,BESTS,JOPT,MAXSS)
|
|
C
|
|
C
|
|
C HORRIBLE SPECIAL CASE - IF LAST MOTIF IS NOTTED WE MUST MOVE BACK AFTER
|
|
C DISPLAY
|
|
IF(COMB.EQ.'N')THEN
|
|
CALL BAKSID(CLASS,LENGTH,CUTOFF,STRNGS,NMOT,
|
|
+ MOTIF,ICLASS,ILEN,CUT,IWT,ISTRST,WTSTR,
|
|
+ RELMOT,START,IEND,MATCHQ,RANGES,RANGEL,RELEND,IRET,MATCHP,
|
|
+ COMBIN,COMB)
|
|
C
|
|
C
|
|
C IF CANT GO BACK ANY FURTHER QUIT
|
|
IF(IRET.NE.0) THEN
|
|
IF(MAXSS.GT.MINMSS) THEN
|
|
CALL DSPLAZ(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEVOT,
|
|
+ CLASS,MATCHQ,IOPT,
|
|
+ KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
|
|
+ TITLE,FILNAM,
|
|
+ MATRIX,MAT1,IDM,PMINT,PMAXT,PROBT,
|
|
+ WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,RANGES,RANGEL,IDEVN,
|
|
+ JSTART,KEYNS,BESTP,BESTQ,BESTS,2,MAXSS)
|
|
ITOTAL = ITOTAL + 1
|
|
END IF
|
|
RETURN
|
|
END IF
|
|
END IF
|
|
END IF
|
|
C
|
|
C
|
|
END IF
|
|
C
|
|
C
|
|
C GO BACK FOR NEXT SEARCH
|
|
C
|
|
C
|
|
GO TO 100
|
|
END
|
|
C*********************************************************************
|
|
SUBROUTINE MOTFI3(SEQ,IDIM1,STRING,IDIM2,ISTART,IEND,CUTOFF,
|
|
+MATCHP,MATCHS,IFOUND,MATRIC,IDM)
|
|
INTEGER SEQ(IDIM1),STRING(IDIM2)
|
|
INTEGER MATRIC(IDM,IDM)
|
|
REAL MATCHS
|
|
IFOUND = 0
|
|
IF(ISTART.LT.1)ISTART=1
|
|
IF(ISTART.GT.IDIM1)RETURN
|
|
CALL SQFTI5(SEQ,IDIM1,STRING,IDIM2,ISTART,IEND,CUTOFF,MATCHS,
|
|
+IFOUND,MATRIC,IDM)
|
|
IF(IFOUND.EQ.0)RETURN
|
|
C SAVE MATCH POSITION
|
|
MATCHP = IFOUND
|
|
RETURN
|
|
END
|
|
C*********************************************************************
|
|
SUBROUTINE SQFTI5(SEQ,IDIM1,STRING,IDIM2,
|
|
1IS,IE,MINSC,MATCHS,IFOUND,MATRIC,IDM)
|
|
C AUTHOR: RODGER STADEN
|
|
INTEGER SEQ(IDIM1),STRING(IDIM2)
|
|
REAL MATCHS,MINSC
|
|
INTEGER MATRIC(IDM,IDM)
|
|
MINSCR = MINSC
|
|
C
|
|
IDIF=(IE-IS+2)-IDIM2
|
|
C IDIF IS THE NUMBER OF POSNS TO TRY
|
|
C IPSTR GOES FROM 1 TO IDIM2 IDIF TIMES
|
|
C TRY ALL POSSIBLE POSITIONS FOR MATCHING AND SCORE FOR EACH
|
|
C POINT TO ARRAY ELEMENT CORRESPONDING TO FIRST BASE
|
|
IPSEQ=IS
|
|
DO 200 I=1,IDIF
|
|
NTOT=0
|
|
IP=IPSEQ
|
|
DO 100 J=1,IDIM2
|
|
NTOT = NTOT + MATRIC(SEQ(IP),STRING(J))
|
|
IP=IP+1
|
|
100 CONTINUE
|
|
C END OF COUNTING FOR THIS POSITION.IS TOTAL HIGH ENOUGH?
|
|
IF(NTOT.GE.MINSCR)THEN
|
|
MATCHS = NTOT
|
|
IFOUND = IP-IDIM2
|
|
RETURN
|
|
END IF
|
|
IPSEQ=IPSEQ+1
|
|
200 CONTINUE
|
|
IFOUND = 0
|
|
RETURN
|
|
END
|
|
C*********************************************************************
|
|
SUBROUTINE MOTFI4(SEQ,IDIM1,LENGTH,ISTART,IEND,
|
|
+WEIGHT,CUTOFF,MATCHP,MATCHS,IFOUND,IDM)
|
|
REAL WEIGHT(IDM,LENGTH)
|
|
REAL MATCHS
|
|
INTEGER SEQ(IDIM1)
|
|
IFOUND = 0
|
|
IF(ISTART.LT.1)ISTART=1
|
|
L1 = IEND-ISTART+1
|
|
IF(ISTART.GT.IDIM1)RETURN
|
|
IF(L1.LT.LENGTH)RETURN
|
|
DO 10 I=ISTART,ISTART+L1-LENGTH
|
|
SUM = 0.
|
|
K = 0
|
|
DO 5 J=I,I+LENGTH-1
|
|
K = K + 1
|
|
SUM = SUM + WEIGHT(SEQ(J),K)
|
|
5 CONTINUE
|
|
IF(SUM.GE.CUTOFF) THEN
|
|
MATCHP = I
|
|
MATCHS = SUM
|
|
IFOUND = I
|
|
RETURN
|
|
END IF
|
|
10 CONTINUE
|
|
IFOUND = 0
|
|
END
|
|
C*********************************************************************
|
|
SUBROUTINE FMOTI4(SEQ,IDIM,WT,LENGTH,CUTOFF,SUM,IFOUND,IDM)
|
|
C AUTHOR: RODGER STADEN
|
|
INTEGER SEQ(IDIM)
|
|
REAL WT(IDM,LENGTH)
|
|
DO 10 I=1,IDIM-LENGTH+1
|
|
SUM=0.
|
|
K=0
|
|
DO 5 J=I,I+LENGTH-1
|
|
K=K+1
|
|
SUM=SUM+WT(SEQ(J),K)
|
|
5 CONTINUE
|
|
IF(SUM.GE.CUTOFF)THEN
|
|
IFOUND = I
|
|
RETURN
|
|
END IF
|
|
10 CONTINUE
|
|
IFOUND = 0
|
|
END
|
|
C*********************************************************************
|
|
SUBROUTINE MOTIF8(SEQ,IDSEQ,MATRIX,LENGTH,I5STAR,I5END,
|
|
+ I3STAR,I3END,CUTOFF,MATCHP,MATCHS,
|
|
+ IENTRY,IFOUND,MATCHQ,
|
|
+ LAST5,LAST3S,LAST3E,IDM)
|
|
C AUTHOR: RODGER STADEN
|
|
INTEGER SEQ(IDSEQ)
|
|
INTEGER MATRIX(IDM,IDM),REPEET
|
|
REAL MATCHS
|
|
EXTERNAL REPEET
|
|
C WE HAVE A START POSITION FOR THE 5' END OF THE 5' END OF
|
|
C A POTENTIAL REPEAT I5STAR AND AN END DEFINED BY A RANGE I5END
|
|
C WE HAVE A REPEAT LENGTH LENGTH
|
|
C WE HAVE A RANGE OF POSITIONS FOR THE 3' STEM TO START
|
|
C I3STAR TO I3END
|
|
C TRY THE TIGHTEST LOOPS FIRST
|
|
C BUT FIRST WE MAY HAVE TO FINISH A PREVIOUS SEARCH
|
|
C THIS IS DENOTED BY IENTRY NE 0.
|
|
C NOTE IENTRY IS ALSO USED TO RETURN THE 3' MATCH POSITION
|
|
ICUT = CUTOFF
|
|
C WRITE(*,*)'ICUT',ICUT
|
|
IFOUND = 0
|
|
JENTRY = IENTRY
|
|
IENTRY = 0
|
|
IF(I5STAR.LT.1)I5STAR=1
|
|
IF((I5STAR+I3STAR+LENGTH-2).GT.IDSEQ)RETURN
|
|
IF(JENTRY.NE.0)THEN
|
|
I1 = LAST5
|
|
C WRITE(*,*)'I1,LAST3S,LAST3E',I1,LAST3S,LAST3E
|
|
DO 50 J=LAST3S+1,LAST3E
|
|
J1 = J
|
|
ISUM = REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
|
|
C RETURN IF GOOD ENOUGH
|
|
IF(ISUM.GE.ICUT)THEN
|
|
MATCHP = I1
|
|
IENTRY = J1
|
|
MATCHQ = J1
|
|
MATCHS = ISUM
|
|
IFOUND = MATCHP
|
|
LAST3S = J1
|
|
RETURN
|
|
END IF
|
|
50 CONTINUE
|
|
C NOW MOVE 5' STEM START POSITION (WE HAVE JUST FINISHED THE LAST)
|
|
C TO THE LAST MATCH + 1
|
|
I5STAR = MATCHP + 1
|
|
END IF
|
|
C SET ENTRY FLAG TO ZERO TO SIGNIFY LAST SEARCH NOW COMPLETED
|
|
IENTRY = 0
|
|
ISUM = 0
|
|
LOOPI1 = I5STAR
|
|
IF((I5STAR+I3STAR+LENGTH-2).GT.IDSEQ)RETURN
|
|
LOOPI2 = MIN(IDSEQ-2*LENGTH+1,I5END)
|
|
C WRITE(*,*)'IDSEQ,LENGTH,I5STAR,I5END',
|
|
C +IDSEQ,LENGTH,I5STAR,I5END
|
|
C
|
|
C TRY ALL STEM STARTS FROM 5' START TO 5' END
|
|
C
|
|
C
|
|
DO 200 I = LOOPI1,LOOPI2
|
|
C
|
|
C
|
|
I1 = I
|
|
C
|
|
C TRY ALL LOOPS FROM 3' START TO 3' END
|
|
C
|
|
LOOPJ1 = I + I3STAR -1
|
|
IF((LOOPJ1+LENGTH-1).GT.IDSEQ)RETURN
|
|
LOOPJ2 = MIN(IDSEQ-LENGTH+1,I+I3END-1)
|
|
|
|
C WRITE(*,*)'I3STAR,I3END',I3STAR,I3END
|
|
C
|
|
C
|
|
C
|
|
DO 100 J = LOOPJ1,LOOPJ2
|
|
C
|
|
C
|
|
C
|
|
J1 = J
|
|
C IN REPEAT NOTE THAT
|
|
C THE 5' END POINTER I1 GOES FORWARDS
|
|
C THE 3' END POINTER J1 GOES FORWARDS
|
|
C
|
|
ISUM = REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I1,J1,IDM)
|
|
C RETURN IF GOOD ENOUGH
|
|
C WRITE(*,*)ISUM
|
|
IF(ISUM.GE.ICUT)THEN
|
|
MATCHP = I1
|
|
IENTRY = J1
|
|
MATCHQ = J1
|
|
MATCHS = ISUM
|
|
IFOUND = MATCHP
|
|
C SAVE CURRENT POSITION FOR LATER ENTRIES
|
|
LAST5 = I1
|
|
LAST3S = J1
|
|
LAST3E = LOOPJ2
|
|
RETURN
|
|
END IF
|
|
100 CONTINUE
|
|
200 CONTINUE
|
|
END
|
|
C*********************************************************************
|
|
INTEGER FUNCTION REPEET(SEQ,IDSEQ,MATRIX,LENGTH,I5P,I3P,IDM)
|
|
INTEGER SEQ(IDSEQ)
|
|
INTEGER MATRIX(IDM,IDM)
|
|
C THE 5' END POINTER GOES FORWARDS
|
|
C THE 3' END POINTER GOES FORWARDS
|
|
L=0
|
|
I5=I5P-1
|
|
I3=I3P-1
|
|
DO 100 I=1,LENGTH
|
|
I5 = I5 + 1
|
|
I3 = I3 + 1
|
|
C WRITE(*,*)'I5,I3',I5,I3
|
|
L5 = SEQ(I5)
|
|
L3 = SEQ(I3)
|
|
L = L + MATRIX(L5,L3)
|
|
100 CONTINUE
|
|
REPEET = L
|
|
END
|
|
SUBROUTINE DSPLAZ(MATCHP,LENGTH,NMOT,SEQ,IDSEQ,IDEV,
|
|
+CLASS,MATCHQ,IOPT,KSTART,MATCHS,CUTSCR,MINSCR,MAXSCR,
|
|
+TITLE,FILNAM,
|
|
+ MATRIX,MAT1,IDM,PMINT,PMAXT,PROBT,
|
|
+ WEIGHT,MAXWTS,WTSTR,CUTOFF,PMINC,RANGES,RANGEL,
|
|
+ IDEV1,JSTART,KEYNS,BESTP,BESTQ,BESTS,JOPT,MAXSS)
|
|
INTEGER MATCHP(NMOT),LENGTH(NMOT),CLASS(NMOT)
|
|
INTEGER MATCHQ(NMOT)
|
|
CHARACTER SEQ(IDSEQ),TITLE*(*),FILNAM*(*)
|
|
REAL MATCHS(NMOT),MINSCR,MAXSCR
|
|
INTEGER MATRIX(IDM,IDM),MAT1(IDM,IDM),WTSTR(NMOT)
|
|
REAL WEIGHT(MAXWTS),CUTOFF(NMOT)
|
|
INTEGER RANGES(NMOT),RANGEL(NMOT)
|
|
CHARACTER DASH
|
|
CHARACTER*(*) KEYNS(NMOT)
|
|
C stuff for best
|
|
INTEGER BESTP(NMOT),BESTQ(NMOT)
|
|
REAL BESTS(NMOT),MAXSS
|
|
EXTERNAL PSCORE
|
|
SAVE DASH
|
|
DATA DASH/'-'/
|
|
C jopt 1 get best match for any individual sequence, then
|
|
C display it. So check each match for being best (>maxss), if it is
|
|
C save its coords in bestp, bestq. Keep a note that a score
|
|
C has been recorded for this sequence (actually noted by bestp(1) ne.0)
|
|
C when we finish a sequence (denoted by jopt = 2) put all the
|
|
C saved values into matchp, matchq and process as normal.
|
|
IF(JOPT.NE.0) THEN
|
|
IF(JOPT.EQ.2) THEN
|
|
DO 5 I=1,NMOT
|
|
MATCHP(I) = BESTP(I)
|
|
MATCHQ(I) = BESTQ(I)
|
|
MATCHS(I) = BESTS(I)
|
|
5 CONTINUE
|
|
ELSE IF(JOPT.EQ.1) THEN
|
|
C Add scores
|
|
T = 0.
|
|
DO 6 I = 1,NMOT
|
|
IF(MATCHP(I).NE.0) T = T + MATCHS(I)
|
|
6 CONTINUE
|
|
IF(T.GT.MAXSS) THEN
|
|
DO 7 I=1,NMOT
|
|
BESTP(I) = MATCHP(I)
|
|
BESTQ(I) = MATCHQ(I)
|
|
BESTS(I) = MATCHS(I)
|
|
7 CONTINUE
|
|
MAXSS = T
|
|
END IF
|
|
RETURN
|
|
END IF
|
|
END IF
|
|
C Add scores
|
|
T = 0.
|
|
DO 10 I = 1,NMOT
|
|
IF(MATCHP(I).NE.0) T = T + MATCHS(I)
|
|
10 CONTINUE
|
|
POBS = 1.0
|
|
IF(PMINC.LT.1.0)THEN
|
|
C Calc prob
|
|
DO 20 I = 1,NMOT
|
|
IF(MATCHP(I).NE.0)THEN
|
|
CALL GETP(CLASS(I),SEQ(MATCHP(I)+KSTART-1),LENGTH(I),
|
|
+ IDM,MATRIX,MAT1,WEIGHT(MAX(1,WTSTR(I))))
|
|
PROB = PSCORE(MATCHS(I))
|
|
POBS = POBS * PROB
|
|
END IF
|
|
20 CONTINUE
|
|
IF((PMINC.LT.1.0).AND.(POBS.GT.PMINC))RETURN
|
|
END IF
|
|
C
|
|
IF(T.GT.MAXSCR) MAXSCR = T
|
|
IF(T.LT.MINSCR) MINSCR = T
|
|
IF(T.LT.CUTSCR) RETURN
|
|
C
|
|
C Motif by motif
|
|
C
|
|
IF(IOPT.EQ.1)THEN
|
|
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
|
|
1001 FORMAT(' >',A,' ',F10.3,' ',A)
|
|
DO 100 I=1,NMOT
|
|
J = I
|
|
C
|
|
C Check for no match (needed for ored motifs)
|
|
C
|
|
IF(MATCHP(J).NE.0)THEN
|
|
WRITE(IDEV,1000)MATCHP(J)+KSTART-1,MATCHS(J),KEYNS(I)
|
|
WRITE(IDEV,1002)
|
|
+ (SEQ(K),K=MATCHP(J),MATCHP(J)+LENGTH(J)-1)
|
|
C Repeat ?
|
|
IF(CLASS(J).EQ.5)THEN
|
|
WRITE(IDEV,1002)
|
|
+ (SEQ(K),K=MATCHQ(J),MATCHQ(J)+LENGTH(J)-1)
|
|
WRITE(IDEV,1000)MATCHQ(J)+KSTART-1
|
|
END IF
|
|
END IF
|
|
100 CONTINUE
|
|
1000 FORMAT(' ',I7,' ',F10.3,' ',A8)
|
|
1002 FORMAT(' ',60A1)
|
|
IF(PMINC.LT.1.0)WRITE(IDEV,1004)POBS
|
|
1004 FORMAT(' Probability =',E10.4)
|
|
RETURN
|
|
END IF
|
|
C
|
|
C Title,score only
|
|
C
|
|
IF(IOPT.EQ.3)THEN
|
|
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
|
|
IF(PMINC.LT.1.0)WRITE(IDEV,1004)POBS
|
|
1003 FORMAT(' ',F12.5)
|
|
END IF
|
|
C
|
|
C Inclusive
|
|
C
|
|
IF(IOPT.EQ.2)THEN
|
|
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
|
|
MINP = 999999
|
|
MAXP = -999999
|
|
DO 300 I = 1, NMOT
|
|
K = MATCHP(I)
|
|
IF(K.NE.0)THEN
|
|
IF(K.LT.MINP)MINP = K
|
|
K = K + LENGTH(I) - 1
|
|
C Repeat ?
|
|
IF(CLASS(I).EQ.5) K = MATCHQ(I) + LENGTH(I) - 1
|
|
IF(K.GT.MAXP)MAXP = K
|
|
END IF
|
|
300 CONTINUE
|
|
WRITE(IDEV,1000)MINP+KSTART-1
|
|
WRITE(IDEV,1002)
|
|
+ ((SEQ(K1),K1=K2,MIN(K2+59,MAXP)),K2=MINP,MAXP,60)
|
|
IF(PMINC.LT.1.0)WRITE(IDEV,1004)POBS
|
|
RETURN
|
|
END IF
|
|
C
|
|
C write file of whole seq
|
|
C
|
|
IF(IOPT.EQ.4)THEN
|
|
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
|
|
CALL OPENRS(IDEV1,FILNAM,IOK,LLL,1)
|
|
IF(IOK.NE.0)THEN
|
|
WRITE(IDEV,*)' Error opening sequence file'
|
|
RETURN
|
|
END IF
|
|
C Want first character from sequence to start at jstart
|
|
C So how many dashes required?
|
|
IDASH = JSTART - MATCHP(1)
|
|
IF(IDASH.GT.0)CALL PADOUT(IDEV1,DASH,60,IDASH)
|
|
C Write up to and including first motif
|
|
J1 = MATCHP(1)+LENGTH(1)-1
|
|
WRITE(IDEV1,1005,ERR=401)(SEQ(K),K=1,J1)
|
|
DO 400 I=2,NMOT
|
|
J = I
|
|
C Put dashes for next gap in now. what is max gap?
|
|
IMAXG = RANGES(J) - LENGTH(J-1) + RANGEL(J) - LENGTH(J)
|
|
C What is actual gap?
|
|
IG = MATCHP(J) - MATCHP(J-1) - LENGTH(J-1)
|
|
C Want to put in difference number of dashes
|
|
IDASH = IMAXG - IG
|
|
IF(IDASH.GT.0)CALL PADOUT(IDEV1,DASH,60,IDASH)
|
|
C Write up to and including next motif
|
|
J2 = MATCHP(J) + LENGTH(J) - 1
|
|
WRITE(IDEV1,1005,ERR=401)(SEQ(K),K=J1+1,J2)
|
|
J1 = J2
|
|
400 CONTINUE
|
|
C Write to end of sequence
|
|
WRITE(IDEV1,1005,ERR=401)(SEQ(K),K=J1+1,IDSEQ)
|
|
401 CONTINUE
|
|
CLOSE(UNIT=IDEV1)
|
|
1005 FORMAT(' ',60A1)
|
|
END IF
|
|
C
|
|
C write file inclusive
|
|
C
|
|
IF(IOPT.EQ.5)THEN
|
|
WRITE(IDEV,1001)FILNAM(1:10),T,TITLE
|
|
CALL OPENRS(IDEV1,FILNAM,IOK,LLL,1)
|
|
IF(IOK.NE.0)THEN
|
|
WRITE(IDEV,*)' Error opening sequence file'
|
|
RETURN
|
|
END IF
|
|
C Write first motif
|
|
J1 = MATCHP(1)+LENGTH(1)-1
|
|
WRITE(IDEV1,1005,ERR=501)(SEQ(K),K=MATCHP(1),J1)
|
|
DO 500 I=2,NMOT
|
|
J = I
|
|
C Put dashes for next gap in now. what is max gap?
|
|
IMAXG = RANGES(J) - LENGTH(J-1) + RANGEL(J) - LENGTH(J)
|
|
C What is actual gap?
|
|
IG = MATCHP(J) - MATCHP(J-1) - LENGTH(J-1)
|
|
C Want to put in difference number of dashes
|
|
IDASH = IMAXG - IG
|
|
IF(IDASH.GT.0)CALL PADOUT(IDEV1,DASH,60,IDASH)
|
|
C Write up to and including next motif
|
|
J2 = MATCHP(J) + LENGTH(J) - 1
|
|
WRITE(IDEV1,1005,ERR=501)(SEQ(K),K=J1+1,J2)
|
|
J1 = J2
|
|
500 CONTINUE
|
|
501 CONTINUE
|
|
CLOSE(UNIT=IDEV1)
|
|
END IF
|
|
END
|
|
SUBROUTINE PADOUT(IDEV,CHAR,LINLEN,NCHAR)
|
|
CHARACTER CHAR
|
|
C HOW MANY LINES?
|
|
NLINE = 1 + (NCHAR-1)/LINLEN
|
|
K1 = 1
|
|
K2 = MIN(NCHAR,LINLEN)
|
|
DO 10 I = 1,NLINE
|
|
WRITE(IDEV,1000,ERR=20)(CHAR,K=K1,K2)
|
|
K1 = K2 + 1
|
|
K2 = K1 + LINLEN - 1
|
|
K2 = MIN(K2,NCHAR)
|
|
10 CONTINUE
|
|
20 CONTINUE
|
|
1000 FORMAT(' ',60A1)
|
|
END
|
|
SUBROUTINE SETCMP(COMPIN,IDM)
|
|
PARAMETER (MAXCHR = 26)
|
|
REAL COMPIN(IDM)
|
|
COMMON /COMPC/COMP(MAXCHR)
|
|
SAVE /COMPC/
|
|
DO 10 I = 1,MAXCHR
|
|
COMP(I) = 0.0
|
|
10 CONTINUE
|
|
T = 0.
|
|
DO 20 I = 1,IDM
|
|
COMP(I) = COMPIN(I)
|
|
T = T + COMPIN(I)
|
|
20 CONTINUE
|
|
DO 30 I = 1,IDM
|
|
COMP(I) = COMP(I) / T
|
|
30 CONTINUE
|
|
END
|