staden-lg/src/staden/asubs89.f

1056 lines
30 KiB
Fortran

C ASUBS89
C SUBROUTINES FOR ANALYSIS PROGRAMS ANALYSEQ AND ANALYSEP
C AUTHOR RODGER STADEN
C 11-1-90 GETNAM changed use of inflag=2 to set idnlst to 0
C Added sqpf7, sepf7 sqpf5 and removed them from anals89,
C analps89
C 6-7-90 Added showfu
C 5-11-90 Changed rdwmt to show/not show title, changed mkwt accordingly
C 7-11-90 Changed iopt in mkwt to fit with changes in analps89 and
C anals89 (means adding 1 to old values)
C added new routine gstrnd
C 18-4-91 added new weight matrix routines (initially for splice search)
C BUBDEL
C GETNAM
C PSRCHX
C BUBBLE
C PLTMAP
C IEMBL
C SEPFIT
C SPFIT
C GETSCR
C EDITSQ ETC
SUBROUTINE SQPF7(SEQNCE,IDIM1,STRING,IDIM2,MATCH,
+ITOT,ITOTEL,ITOTID,
+ITOTP,KSTART,J1,J2,ISS,PR,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
CHARACTER SEQNCE(IDIM1),STRING(IDIM2),MATCH(IDIM2),HELPF*(*)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
IOK = 1
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
WRITE(KBOUT,1013)PR,ITOTP
1013 FORMAT(/,' Total scoring positions above',F7.3,' percent =',I4)
IF(ITOTP.GT.0) THEN
IF(ITOTP.GT.1)CALL BUBDEL(ITOT,ITOTEL,ITOTP)
WRITE(KBOUT,1002)(ITOT(K),K=1,MIN(10,ITOTP))
WRITE(KBOUT,1006)(ITOTEL(K),K=1,MIN(10,ITOTP))
1002 FORMAT( ' Scores ',10I7)
1006 FORMAT( ' Positions',10I7)
NSEE = 0
CALL GETINT(0,ITOTP,NSEE,'Display',IVAL,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
NSEE = IVAL
IF(NSEE.GT.0)THEN
DO 300 I=1,NSEE
K=ITOTEL(I)-KSTART+1
WRITE(IDEV,1008)
1008 FORMAT( )
CALL SQMTCH( SEQNCE(K),STRING,MATCH,IDIM2)
CALL FMT4LN(SEQNCE(K),STRING,MATCH,IDIM2,K,ISS,IDEV)
300 CONTINUE
END IF
END IF
IOK = 0
END
SUBROUTINE SEPF7(SEQNCE,IDIM1,STRING,IDIM2,MATCH,
+ITOT,ITOTEL,ITOTID,
+ITOTP,MINP,KSTART,J1,J2,ISS,IDEV,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT,IOK)
CHARACTER SEQNCE(IDIM1),STRING(IDIM2),MATCH(IDIM2),HELPF*(*)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
IOK = 1
CALL BPAUSE(KBIN,KBOUT,IOK)
IF(IOK.NE.0) RETURN
WRITE(KBOUT,1013)MINP,ITOTP
1013 FORMAT(/,' For score',I6,' the number of matches=',I6)
IF(ITOTP.GT.0) THEN
IF(ITOTP.GT.1)CALL BUBDEL(ITOT,ITOTEL,ITOTP)
WRITE(KBOUT,1002)(ITOT(K),K=1,MIN(10,ITOTP))
WRITE(KBOUT,1006)(ITOTEL(K),K=1,MIN(10,ITOTP))
1002 FORMAT( ' Scores ',10I7)
1006 FORMAT( ' Positions',10I7/)
NSEE = 0
CALL GETINT(0,ITOTP,NSEE,'Display',IVAL,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
NSEE = IVAL
IF(NSEE.GT.0)THEN
DO 300 I=1,NSEE
K=ITOTEL(I)-KSTART+1
WRITE(IDEV,1008)
1008 FORMAT( )
CALL SQMTCH( SEQNCE(K),STRING,MATCH,IDIM2)
CALL FMT4LN(SEQNCE(K),STRING,MATCH,IDIM2,K,ISS,IDEV)
300 CONTINUE
END IF
END IF
IOK = 0
END
SUBROUTINE SQPF5(IDIM2,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,PR,MINP,IOK)
CHARACTER HELPF*(*)
REAL MININ,MAXIN
IOK = 1
MININ = 1.
MAXIN = 100.
CALL GETRL(MININ,MAXIN,PR,'Percent match',VALUE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
PR = VALUE
PRR = PR/100.
XIDIM2 = REAL(IDIM2)
XIDIM2 = XIDIM2 * PRR
MINP = NINT(XIDIM2)
IOK = 0
END
SUBROUTINE GETRKB(SEQ,ID,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,
+IOK)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*)
CHARACTER SEQ(ID),A
IOK = 1
WRITE(KBOUT,1004)
1004 FORMAT(' Define search strings by typing a string name',
+/,' followed by the string(s)')
JD = 1
1 CONTINUE
L = 0
CALL GETSTR('Name',A,SEQ(JD),20,L,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 1
END IF
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.3) GO TO 50
JD = JD + L
SEQ(JD) = '/'
JD = JD + 1
2 CONTINUE
L = 0
CALL GETSTR('String(s)',A,SEQ(JD),75,L,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 2
END IF
IF(INFLAG.EQ.2) RETURN
IF(INFLAG.EQ.3) GO TO 50
C IF(L.EQ.0) GO TO 2
DO 40 I = JD, JD + L - 1
IF(SEQ(I).EQ.' ') SEQ(I) = '/'
40 CONTINUE
JD = JD + L
SEQ(JD) = '/'
SEQ(JD+1) = '/'
JD = JD + 2
GO TO 1
50 CONTINUE
ID = JD - 1
IOK = 0
END
SUBROUTINE BUBDEL(LIST,LISTEL,IDIM)
C AUTHOR: RODGER STADEN
INTEGER LIST(IDIM),LISTEL(IDIM)
I=0
J=0
10 CONTINUE
IF(J.GT.I)I=J
I=I+1
IF(I.EQ.IDIM)RETURN
20 CONTINUE
IF(LIST(I).GE.LIST(I+1))GO TO 10
IF(J.LT.I)J=I
ITEMP=LIST(I)
LIST(I)=LIST(I+1)
LIST(I+1)=ITEMP
ITEMP=LISTEL(I)
LISTEL(I)=LISTEL(I+1)
LISTEL(I+1)=ITEMP
IF(I.GT.1)I=I-1
GO TO 20
END
SUBROUTINE GETNAM(NAMLST,IDNLST,NAMES,IDNAML,NAMEP,NAMLEN,
+MAXEN,KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH)
C AUTHOR: RODGER STADEN
C 2-9-91 Added tupper to deal with lower case enzyme names
C routine to return namlst with numbers of wanted enzymes
CHARACTER NAMES(IDNAML),NAME*20,NEWNAM*20,HELPF*(*),TUPPER
INTEGER NAMLST(IDNLST),NAMEP(MAXEN),NAMLEN(MAXEN)
EXTERNAL TUPPER
NENZ=0
10 CONTINUE
LENNAM = 0
CALL GTSTR('Name',NAME,NEWNAM,LENNAM,KBOUT,KBIN,INFLAG)
IF(LENNAM.EQ.0) GO TO 50
CALL CCASE(NEWNAM,1)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(INFLAG.EQ.2) THEN
IDNLST = 0
RETURN
END IF
NAME = NEWNAM
C search for it in names
DO 20 I=1,MAXEN
IF(NAMLEN(I).NE.LENNAM)GO TO 20
C name of correct length, do its chars match?
DO 19 J=1,LENNAM
IF(TUPPER(NAMES(NAMEP(I)+J-1)).NE.NAME(J:J))GO TO 20
19 CONTINUE
C must match
NENZ=NENZ+1
IF(NENZ.GT.IDNLST)THEN
WRITE(KBOUT,1003)
1003 FORMAT(' Too many names selected')
RETURN
END IF
NAMLST(NENZ)=I
GO TO 10
20 CONTINUE
C no name match
WRITE(KBOUT,1004)NAME(1:LENNAM)
1004 FORMAT(' ',A,' not found in file')
GO TO 10
50 CONTINUE
IDNLST=NENZ
END
SUBROUTINE PLSRCH(J1,J2,
+PSAVE,IFOUND,IBH,LEVEL,NAME,NAMLEN,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
C AUTHOR: RODGER STADEN
CHARACTER NAME*(*)
INTEGER PSAVE(IFOUND)
CALL VECTOM
XMAX=J2
XMIN=J1
YMAX=MARGT
YMIN=0.
Y=LEVEL
YP=Y+IBH
CALL TEXT(XMIN,Y,NAME,NAMLEN,0
+,XMAX,XMIN,YMAX,YMIN,8,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX)
CALL LINE(XMIN,XMAX,Y,Y,XMAX,XMIN,YMAX,YMIN,
+MARGL,MARGR,MARGB,MARGT,
+ISXMAX,ISYMAX)
DO 10 I=1,IFOUND
X=PSAVE(I)
CALL LINE(X,X,Y,YP,XMAX,XMIN,YMAX,YMIN,MARGL,MARGR,MARGB,
+MARGT,ISXMAX,ISYMAX)
10 CONTINUE
C now increase level ready for next entry
LEVEL=LEVEL+IBH
END
C PSRCHX
SUBROUTINE PSRCHX(LEVEL1,LEVEL,KBIN,KBOUT,IQUIT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CALL VT100M
CALL BPAUSE(KBIN,KBOUT,IQUIT)
IF(IQUIT.NE.0) RETURN
CALL YESNO(IQUIT,'Restart plotting from bottom of frame',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
CALL VECTOM
CALL CLEARV
IF(IQUIT.EQ.1)RETURN
CALL BPAUSE(KBIN,KBOUT,IQUIT)
IF(IQUIT.NE.0) RETURN
CALL CLEARG
LEVEL=LEVEL1
END
SUBROUTINE BUBBLE(LIST,IDIM)
C AUTHOR: RODGER STADEN
INTEGER LIST(IDIM)
I=0
J=0
10 CONTINUE
IF(J.GT.I)I=J
I=I+1
IF(I.EQ.IDIM)RETURN
20 CONTINUE
IF(LIST(I).LE.LIST(I+1))GO TO 10
IF(J.LT.I)J=I
ITEMP=LIST(I)
LIST(I)=LIST(I+1)
LIST(I+1)=ITEMP
IF(I.GT.1)I=I-1
GO TO 20
END
C PLOTMAP
SUBROUTINE PLTMAP(IDEV,FILNAM,IDIM,
+MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX,IS,IE,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER FILNAM*(*),FEATUR*8,STRAND,DESCRP(38)
CHARACTER ATOS*38
EXTERNAL IEMBL
EXTERNAL ATOS
PARAMETER (IBLIPH=256)
CALL SHOWFU(KBOUT,
+'Display a map using an EMBL feature table file')
XMAX=IE
XMIN=IS
YMIN=0.
YMAX=ISYMAX
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+'Map file name',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
10 CONTINUE
REWIND IDEV
LIN = 3
CALL GTSTR('Feature code','CDS',FEATUR,LIN,
+KBOUT,KBIN,INFLAG)
IF(LIN.EQ.0) FEATUR(1:3) = 'CDS'
CALL CCASE(FEATUR,1)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(INFLAG.EQ.2) GO TO 900
1006 FORMAT(A)
IF(FEATUR.EQ.' ') GO TO 900
ISTRND = 1
CALL GSTRND(ISTRND,IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
ISTRND = ISTRND - 1
IF(ISTRND.LT.0) GO TO 900
STRAND=' '
IF(ISTRND.EQ.1)STRAND='C'
IF(ISTRND.EQ.2)STRAND='B'
C7 WRITE(KBOUT,1001)
C1001 FORMAT(' ? SEQUENCE LEVEL IN DRAWING BOARD UNITS(DEF=0)=',$)
C READ(KBIN,1002,ERR=7)IBASE
C IF(IBASE.EQ.-99)THEN
C CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
C GO TO 7
C END IF
C IF(IBASE.LT.0)RETURN
C IF(IBASE.GT.MARGT)IBASE=0
IY = IBLIPH
MININ = 0
MAXIN = MARGT
CALL GETINT(MININ,MAXIN,IY,
+'level',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) GO TO 900
IY = IVAL
C YBASE=IBASE
YF=IY
CALL CLEARV
CALL VECTOM
C NEED BARS AT ENDS OF FEATURES FROM BLIPB TO BLIPT
BLIPB=YF-IBLIPH/2
BLIPT=YF+IBLIPH/2
C FEATURE NAME PUT AT YT
YT=YF+IBLIPH/2
C CALL LINE(XMIN,XMAX,YBASE,YBASE,XMAX,XMIN,YMAX,YMIN,
C +MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
20 CONTINUE
LEMBL=IEMBL(IDEV,'FT',FEATUR,STRAND,IS,IE,IPOSNL,IPOSNR,0,
+DESCRP,KBOUT,JSTRAN)
IF(LEMBL.EQ.0)THEN
POSNL=IPOSNL
POSNR=IPOSNR
XMID=POSNL+(POSNR-POSNL)/2.
CALL LINE(POSNL,POSNR,YF,YF,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(POSNL,POSNL,BLIPB,BLIPT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL LINE(POSNR,POSNR,BLIPB,BLIPT,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
CALL TEXT(XMID,YT,ATOS(DESCRP,1),1,
+ 0,XMAX,XMIN,YMAX,YMIN,
+ MARGL,MARGR,MARGB,MARGT,ISXMAX,ISYMAX)
GO TO 20
END IF
CALL VT100M
GO TO 10
900 CONTINUE
CLOSE(UNIT=IDEV)
END
C IEMBL
INTEGER FUNCTION IEMBL(IDEV,SCODE,FEATUR,STRAND,IS,IE,
+POSNL,POSNR,IWRITE,DESCR,KBOUT,JSTRAN)
C AUTHOR: RODGER STADEN
CHARACTER LINE(80),FEATUR*(*),STRAND,DESCR(38)
CHARACTER KEYNAM(8),LFTEND(6),RITEND(6),DIR(3),DESCRP(38)
CHARACTER*2 CODE,SCODE
INTEGER POSNL,POSNR,IWRITE
EQUIVALENCE (LINE(1),CODE)
EQUIVALENCE (LINE(6),KEYNAM)
EQUIVALENCE (LINE(15),LFTEND)
EQUIVALENCE (LINE(22),RITEND)
EQUIVALENCE (LINE(29),DIR)
EQUIVALENCE (LINE(35),DESCRP)
C
C READS UNTIL FINDS WANTED LINE TYPE (SCODE)
C OR END OF TEXT THIS SEQUENCE, OR END OF FILE
C RETURN CODES ARE 0,1,2 RESPECTIVELY
C WRITES ALL LINES, WANTED LINE, OR NONE CODE IWRITE=2,1,0 RESPECTIVELY
C JSTRAN IS A NUMBER DENOTING THE STRAND FOUND (0,1, (3=ERROR IN LIB))
C SET FLAG FOR MORE DATA
IEMBL=0
10 CONTINUE
READ(IDEV,1000,END=600)LINE
1000 FORMAT(80A1)
IF(IWRITE.EQ.2)WRITE(KBOUT,2000)LINE
2000 FORMAT(' ',80A1)
IF(CODE.EQ.'SQ')GO TO 550
IF(CODE.NE.SCODE)GO TO 10
IF(IWRITE.EQ.1)WRITE(KBOUT,2000)LINE
IF(SCODE.NE.'FT')RETURN
50 CONTINUE
DO 51 J=1,8
IF(KEYNAM(J).NE.FEATUR(J:J))GO TO 10
51 CONTINUE
IF(STRAND.NE.'B')THEN
IF(DIR(2).NE.STRAND)GO TO 10
END IF
C SET FOUND STRAND
JSTRAN=3
IF(DIR(2).EQ.'C')JSTRAN=1
IF(DIR(2).EQ.' ')JSTRAN=0
DO 52 J=1,38
DESCR(J)=DESCRP(J)
52 CONTINUE
C GET POSITIONS
C <1 ?
IF(LFTEND(5).EQ.'<')THEN
POSNL=1
ELSE
POSNL=IFROMC(LFTEND,6,KBOUT)
END IF
C MAY CONTAIN > SO NEED TO CHECK EACH POSITION!
DO 56 K=1,6
KK=7-K
IF(RITEND(KK).EQ.'>')GO TO 57
56 CONTINUE
KK=0
57 CONTINUE
C MAY CONTAIN < !
IF(RITEND(5).EQ.'<')THEN
POSNR=1
ELSE
C THE > IS AT POSITION KK
POSNR=IFROMC(RITEND(KK+1),6-KK,KBOUT)
END IF
C COMPLEMENTARY STRAND?
IF(DIR(2).EQ.'C')THEN
I=POSNL
POSNL=POSNR
POSNR=I
END IF
C in range?
IF(POSNR.LT.IS)GO TO 10
IF(POSNL.GT.IE)GO TO 10
RETURN
550 CONTINUE
C SET FLAG FOR END OF THIS SEQUENCE'S HEADING
IEMBL=1
RETURN
600 CONTINUE
C SET FLAG FOR END OF THIS LIBRARY
IEMBL=2
END
C SQFIT
SUBROUTINE SPFIT(SEQ,IDIM1,STRING,IDIM2,ITOT,ITOTEL,ITOTID,
+IS,IE,MINP,ITOTP,SCORES,IDSCOR,KSTART)
C AUTHOR: RODGER STADEN
CHARACTER SEQ(IDIM1),STRING(IDIM2)
INTEGER ITOT(ITOTID),ITOTEL(ITOTID)
INTEGER SCORES(IDSCOR,IDSCOR)
INTEGER DTONUM
EXTERNAL DTONUM
C
IDIF=(IE-IS+2)-IDIM2
C IDIF IS THE NUMBER OF POSNS TO TRY
C IPSTR GOES FROM 1 TO IDIM2 IDIF TIMES
IPSEQ=IS-KSTART+1
ITOTP=0
DO 200 I=1,IDIF
NTOT=0
IP=IPSEQ
DO 100 J=1,IDIM2
NTOT=NTOT+SCORES(DTONUM(SEQ(IP)),DTONUM(STRING(J)))
IP=IP+1
100 CONTINUE
IF(NTOT.GE.MINP)THEN
ITOTP=ITOTP+1
IF(ITOTP.GT.ITOTID)RETURN
ITOT(ITOTP)=NTOT
ITOTEL(ITOTP)=IP-IDIM2+KSTART-1
END IF
IPSEQ=IPSEQ+1
200 CONTINUE
END
SUBROUTINE GETSCR(STRING,IDIM2,SCORES,IDSCOR,SMAX,SMIN)
C AUTHOR: RODGER STADEN
CHARACTER STRING(IDIM2)
INTEGER SCORES(IDSCOR,IDSCOR),SMAX,SMIN,DTONUM
EXTERNAL DTONUM
SMAX=0
SMIN=IDIM2*SCORES(IDSCOR,IDSCOR)
DO 2 I=1,IDIM2
SMAX=SMAX+SCORES(DTONUM(STRING(I)),DTONUM(STRING(I)))
2 CONTINUE
END
SUBROUTINE GETC(TOT,SUM,LINE,IDM,MAXLEN,IDEV,KBOUT,LENGTH,
+IOK)
INTEGER TOT(MAXLEN),SUM(IDM,MAXLEN)
CHARACTER LINE(MAXLEN)
INTEGER CTONUM
EXTERNAL CTONUM
IOK = 1
DO 2 I=1,MAXLEN
TOT(I)=0
DO 1 J=1,IDM
SUM(J,I)=0
1 CONTINUE
2 CONTINUE
N=0
10 CONTINUE
1003 FORMAT(1X,120A1)
1004 FORMAT(' ',I6,' ',120A1)
READ(IDEV,1003,END=100)LINE
N=N+1
WRITE(KBOUT,1004)N,LINE
DO 20 I=1,MAXLEN
IF(LINE(I).EQ.' ')GO TO 10
SUM(CTONUM(LINE(I)),I)=SUM(CTONUM(LINE(I)),I)+1
20 CONTINUE
GO TO 10
100 CONTINUE
IF(N.EQ.0)THEN
WRITE(KBOUT,*)' Empty file of aligned sequences'
RETURN
END IF
C NOW FIND LENGTH OF MOTIF
DO 40 I=1,MAXLEN
K=0
L=I
DO 30 J=1,IDM
K=K+SUM(J,I)
30 CONTINUE
IF(K.EQ.0)GO TO 50
TOT(I)=TOT(I)+K
40 CONTINUE
50 CONTINUE
LENGTH=L-1
IOK = 0
END
SUBROUTINE GETW(TOT,SUM,FREQ,LENGTH,MAXCHR,MAXLEN)
INTEGER TOT(LENGTH),SUM(MAXCHR,MAXLEN)
REAL FREQ(MAXCHR,MAXLEN)
DO 70 I=1,LENGTH
DO 60 J=1,MAXCHR
FREQ(J,I)=LOG((REAL(SUM(J,I)+1)/REAL(TOT(I)+MAXCHR)))
60 CONTINUE
70 CONTINUE
END
SUBROUTINE GETW2(SUM,FREQ,LENGTH,MAXCHR,MAXLEN)
INTEGER SUM(MAXCHR,MAXLEN)
REAL FREQ(MAXCHR,MAXLEN)
DO 70 I=1,LENGTH
DO 60 J=1,MAXCHR
FREQ(J,I)=REAL(SUM(J,I))
60 CONTINUE
70 CONTINUE
END
SUBROUTINE WRTSCM(TITLE,LENGTH,MIDDLE,BOT,TOP,IDM,
+TOT,SUM,CHRSET,IDEV,MAXLEN)
INTEGER TOT(LENGTH),SUM(IDM,MAXLEN)
CHARACTER CHRSET(IDM),TITLE*(*)
C PROTEIN MATRICES DONT WRITE ROWS FOR -X? AND SPACE SO SET DIMENSION
C TO IDM-4
MINUS = 1
IF(IDM.EQ.26)MINUS = 4
WRITE(IDEV,1018)TITLE
1018 FORMAT(' ',A)
1019 FORMAT(' P',20I4)
1020 FORMAT(' N',20I4)
1021 FORMAT(' ',A,20I4)
1022 FORMAT(' ',2I6,2F10.3)
WRITE(IDEV,1022)LENGTH,MIDDLE,BOT,TOP
NLINES=1+(LENGTH-1)/20
K1=1
DO 400 J=1,NLINES
K2=MIN((K1+19),LENGTH)
WRITE(IDEV,1019)(K,K=K1-MIDDLE,K2-MIDDLE)
WRITE(IDEV,1020)(TOT(K),K=K1,K2)
DO 390 I=1,IDM-MINUS
WRITE(IDEV,1021)CHRSET(I),(SUM(I,K),K=K1,K2)
390 CONTINUE
K1=K1+20
IF(K1.GT.LENGTH)K1=LENGTH
400 CONTINUE
CLOSE(UNIT=IDEV)
END
SUBROUTINE RDWMTN(TOT,WT,MIDDLE,
+LENGTH,MAXLEN,YMIN,YMAX,IDEV,IFAIL,IDM,KBOUT,IPROB)
C AUTHOR: RODGER STADEN
C Same as rdwmt except for file closing
INTEGER WT(IDM,MAXLEN),TOT(MAXLEN)
CHARACTER LINE*79
C PROTEIN MATRICES DONT READ ROWS FOR -X? AND SPACE SO SET DIMENSION
C TO IDM-4
MINUS = 1
IF(IDM.EQ.26)MINUS = 4
C SET FAIL FLAG
IFAIL=1
1000 FORMAT( )
1001 FORMAT(2X,20I4)
1003 FORMAT(A)
1002 FORMAT(2X,2I6,2F10.3)
1004 FORMAT(' ',A)
DO 10 I = 1,MAXLEN
TOT(I) = 0
DO 5 J = 1,IDM
WT(J,I) = 0
5 CONTINUE
10 CONTINUE
C READ TITLE
READ(IDEV,1003,END=100,ERR=100)LINE
IF(IPROB.EQ.0) WRITE(KBOUT,1004)LINE
C READ PLOT VALUES ETC
READ(IDEV,1002,ERR=100,END=100)
+LENGTH,MIDDLE,YMIN,YMAX
C HOW MANY LINES TO READ?
NLINES=1+(LENGTH-1)/20
K1=1
DO 50 J=1,NLINES
C READ POSITION
READ(IDEV,1000,END=100,ERR=100)
K2=MIN((K1+19),LENGTH)
C READ TOTALS
C READ(IDEV,1001,ERR=100,END=100)(TOT(K),K=K1,K2)
READ(IDEV,1001,ERR=100,END=100)
C READ COUNTS
DO 25 I=1,IDM-MINUS
READ(IDEV,1001,ERR=100,END=100)(WT(I,K),K=K1,K2)
C ALLOW TOTALS IN FILE TO BE WRONG!
DO 24 K = K1,K2
TOT(K) = TOT(K) + WT(I,K)
24 CONTINUE
25 CONTINUE
K1=K1+20
IF(K1.GT.LENGTH)K1=LENGTH
50 CONTINUE
C SET FAIL FLAG TO GOOD
IFAIL=0
RETURN
100 CONTINUE
END
SUBROUTINE GETWC(TOT,SUM,LENGTH,MAXCHR,MAXLEN,
+CEXACT,PEXACT,IEXACT)
INTEGER TOT(LENGTH),SUM(MAXCHR,MAXLEN)
INTEGER PEXACT(MAXLEN),CEXACT(MAXLEN)
C routine to find 100% conserved residues in wt matrices
IEXACT = 0
DO 70 I=1,LENGTH
IF(TOT(I).NE.0) THEN
DO 60 J=1,MAXCHR
IF(TOT(I).EQ.SUM(J,I)) THEN
IEXACT = IEXACT + 1
CEXACT(IEXACT) = J
PEXACT(IEXACT) = I
GO TO 61
END IF
60 CONTINUE
61 CONTINUE
END IF
70 CONTINUE
END
INTEGER FUNCTION MATWTC(SEQ,IDSEQ,J1,J2,K1,
+CEXACT,PEXACT,IEXACT)
C find first position in seq where the chars (in index form)
C contained in cexact, and with relative positions pexact, is found
C else return end +1
CHARACTER SEQ(IDSEQ)
INTEGER PEXACT(IEXACT),CEXACT(IEXACT)
INTEGER CTONUM
EXTERNAL CTONUM
MATWTC = J2 + 1
DO 100 I=K1,J2-PEXACT(IEXACT)+1
DO 50 J=1,IEXACT
IF(CTONUM(SEQ(I+PEXACT(J)-1)).NE.CEXACT(J)) GO TO 51
50 CONTINUE
MATWTC = I
RETURN
51 CONTINUE
100 CONTINUE
END
SUBROUTINE RDWMT(TOT,WT,MIDDLE,
+LENGTH,MAXLEN,YMIN,YMAX,IDEV,IFAIL,IDM,KBOUT,IPROB)
C AUTHOR: RODGER STADEN
INTEGER WT(IDM,MAXLEN),TOT(MAXLEN)
CHARACTER LINE*79
C PROTEIN MATRICES DONT READ ROWS FOR -X? AND SPACE SO SET DIMENSION
C TO IDM-4
MINUS = 1
IF(IDM.EQ.26)MINUS = 4
C SET FAIL FLAG
IFAIL=1
1000 FORMAT( )
1001 FORMAT(2X,20I4)
1003 FORMAT(A)
1002 FORMAT(2X,2I6,2F10.3)
1004 FORMAT(' ',A)
DO 10 I = 1,MAXLEN
TOT(I) = 0
DO 5 J = 1,IDM
WT(J,I) = 0
5 CONTINUE
10 CONTINUE
C READ TITLE
READ(IDEV,1003,END=100,ERR=100)LINE
IF(IPROB.EQ.0) WRITE(KBOUT,1004)LINE
C READ PLOT VALUES ETC
READ(IDEV,1002,ERR=100,END=100)
+LENGTH,MIDDLE,YMIN,YMAX
C HOW MANY LINES TO READ?
NLINES=1+(LENGTH-1)/20
K1=1
DO 50 J=1,NLINES
C READ POSITION
READ(IDEV,1000,END=100,ERR=100)
K2=MIN((K1+19),LENGTH)
C READ TOTALS
C READ(IDEV,1001,ERR=100,END=100)(TOT(K),K=K1,K2)
READ(IDEV,1001,ERR=100,END=100)
C READ COUNTS
DO 25 I=1,IDM-MINUS
READ(IDEV,1001,ERR=100,END=100)(WT(I,K),K=K1,K2)
C ALLOW TOTALS IN FILE TO BE WRONG!
DO 24 K = K1,K2
TOT(K) = TOT(K) + WT(I,K)
24 CONTINUE
25 CONTINUE
K1=K1+20
IF(K1.GT.LENGTH)K1=LENGTH
50 CONTINUE
CLOSE(UNIT=IDEV)
C SET FAIL FLAG TO GOOD
IFAIL=0
RETURN
100 CONTINUE
CLOSE(UNIT=IDEV)
END
SUBROUTINE MKWT(FREQ,SUM,TOT,CHRSET,IDM,MAXLEN,
+IDEV2,IDEV3,KBIN,KBOUT,LINE,
+FILNAM,IOPT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
C AUTHOR RODGER STADEN
CHARACTER FILNAM*(*),HELPF*(*)
CHARACTER LINE(MAXLEN),TITLE*60,CHRSET(IDM)
INTEGER SUM(IDM,MAXLEN),TOT(MAXLEN)
REAL FREQ(IDM,MAXLEN)
IOK = 1
IF(IOPT.EQ.3)THEN
CALL OPENF1(IDEV2,FILNAM,0,IOK,KBIN,KBOUT,
+ 'Name of existing weight matrix file',
+ IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL RDWMT(TOT,SUM,MIDDLE,LENGTH,MAXLEN,
+ CUTMIN,CUTMAX,IDEV2,
+ IOK,IDM,KBOUT,0)
IF(IOK.NE.0) RETURN
END IF
CALL OPENF1(IDEV2,FILNAM,0,IOK,KBIN,KBOUT,
+'Name of aligned sequences file',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0)RETURN
IF(IOPT.EQ.2) THEN
CALL GETC(TOT,SUM,LINE,IDM,MAXLEN,IDEV2,
+ KBOUT,LENGTH,IOK)
IF(IOK.NE.0)RETURN
END IF
WRITE(KBOUT,1006)LENGTH
1006 FORMAT(' Length of motif',I6)
IOK = 1
CALL YESNO(IOPT,'Sum logs of weights',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOPT.LT.0) RETURN
CUTMIN = -10.0
IF(IOPT.EQ.1)CUTMIN = 10.
CALL MASKW(SUM,LENGTH,IDM,MAXLEN,KBIN,KBOUT,TITLE,
+IOPT,IHELPS,IHELPE,HELPF,IDEVH)
IF(IOPT.LT.0) RETURN
C NOW CALC WEIGHTS
IF(CUTMIN.LT.0.0)CALL GETW(TOT,SUM,FREQ,LENGTH,IDM,MAXLEN)
IF(CUTMIN.GE.0.0)CALL GETW2(SUM,FREQ,LENGTH,IDM,MAXLEN)
C NOW APPLY THE WEIGHTS
REWIND IDEV2
CALL APPLWT(FREQ,IDM,LENGTH,IDEV2,IDEV3,KBIN,KBOUT,LINE,MAXLEN,
+BOT,TOP,TITLE,MIDDLE,IOK)
CLOSE(UNIT=IDEV2)
IF(IOK.NE.0) RETURN
CALL OPENF1(IDEV2,FILNAM,1,IOK,KBIN,KBOUT,
+'Name for new weight matrix file',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
CALL WRTSCM(TITLE,LENGTH,MIDDLE,BOT,TOP,IDM,
+TOT,SUM,CHRSET,IDEV2,MAXLEN)
CLOSE(UNIT=IDEV2)
RETURN
END
SUBROUTINE MASKW(SUM,LENGTH,IDM,MAXLEN,KBIN,KBOUT,MASK,
+IOPT,IHELPS,IHELPE,HELPF,IDEVH)
INTEGER SUM(IDM,MAXLEN)
CHARACTER MASK*(*),HELPF*(*)
CALL YESNO(IOPT,'Use all motif positions',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(IOPT.LT.0) RETURN
5 CONTINUE
IF(IOPT.EQ.1)THEN
WRITE(KBOUT,1002)
1002 FORMAT(' x means use, - means ignore',/,
+ ' e.g. xx-x---x-x means use positions 1,2,4,8,10')
LIN = 0
CALL GTSTR('Mask',' ',MASK,LIN,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 5
END IF
IF(INFLAG.EQ.2) RETURN
IF(LIN.EQ.0) RETURN
DO 70 I=1,LENGTH
IF(MASK(I:I).EQ.'-')THEN
DO 60 J=1,IDM
SUM(J,I) = 0
60 CONTINUE
END IF
70 CONTINUE
END IF
END
SUBROUTINE APPLWT(FREQ,IDM,LENGTH,IDEV,IDEV3,KBIN,KBOUT,
+LINE,MAXLEN,BOT,TOP,TITLE,MIDDLE,IOK)
REAL FREQ(IDM,MAXLEN)
CHARACTER LINE(MAXLEN),TITLE*(*)
INTEGER CTONUM
EXTERNAL CTONUM
IOK = 1
N=0
TOP=-99999.
BOT=9999999.
WRITE(KBOUT,*)' Applying weights to input sequences'
SMEAN = 0.
SUMSQ = 0.
1003 FORMAT(1X,120A1)
1004 FORMAT(' ',I4,' ',F12.3,' ',120A1)
200 CONTINUE
READ(IDEV,1003,END=300)LINE
N=N+1
SCORE=0.
DO 210 I=1,LENGTH
SCORE=SCORE+FREQ(CTONUM(LINE(I)),I)
210 CONTINUE
WRITE(IDEV3,1004)N,SCORE,(LINE(K),K=1,LENGTH)
IF(SCORE.GT.TOP)TOP=SCORE
IF(SCORE.LT.BOT)BOT=SCORE
SMEAN=SMEAN+SCORE
SUMSQ=SUMSQ+SCORE*SCORE
GO TO 200
300 CONTINUE
IF(N.LT.1)THEN
WRITE(KBOUT,*)' Error: empty sequence file'
RETURN
END IF
SMEAN=SMEAN/N
SM=SMEAN
SMEAN=SMEAN*SMEAN
SUMSQ=SUMSQ/N
SD = 0.
T = SUMSQ - SMEAN
IF(T.GT.0.)SD = SQRT(T)
SMM3=SM-3*SD
SMP3=SM+3*SD
WRITE(KBOUT,1000)TOP,BOT
1000 FORMAT(' Top score',F12.3,' Bottom score',F12.3)
WRITE(KBOUT,1001)SM,SD
1001 FORMAT(' Mean',F12.3,' Standard deviation',F12.3)
WRITE(KBOUT,1002)SMM3,SMP3
1002 FORMAT(' Mean minus 3.sd',F12.3,' Mean plus 3.sd',F12.3)
BOT=SMM3
TOP=SMP3
XMN = -999.
XMX = 9999.
CALL GETRL(XMN,XMX,BOT,'Cutoff score',VAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
BOT = VAL
XMN = BOT
XMX = 999.
CALL GETRL(XMN,XMX,TOP,'Top score for scaling plots',
+VAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
TOP = VAL
MN = 0
MX = LENGTH
MIDDLE = 1
CALL GETINT(MN,MX,MIDDLE,'Position to identify',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
MIDDLE = IVAL
305 CONTINUE
LIN = 0
CALL GTSTR('Title',' ',TITLE,LIN,KBOUT,KBIN,INFLAG)
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 305
END IF
IF(INFLAG.EQ.2) RETURN
IOK = 0
END
SUBROUTINE GETWM(TOT,SUM,FREQ,LENGTH,MATRIX,MAXCHR,MAXLEN)
INTEGER TOT(LENGTH),SUM(MAXCHR,MAXLEN),MATRIX(MAXCHR,MAXCHR)
REAL FREQ(MAXCHR,MAXLEN)
DO 70 I=1,LENGTH
IPROD = 0
DO 60 J=1,MAXCHR
DO 50 K = 1,MAXCHR
IPROD = IPROD + MATRIX(J,K) * (SUM(J,I)+1)
50 CONTINUE
60 CONTINUE
TOT(I) = IPROD
70 CONTINUE
DO 170 I=1,LENGTH
DO 160 J=1,MAXCHR
IPROD = 0
DO 150 K = 1,MAXCHR
IPROD = IPROD + MATRIX(J,K) * (SUM(J,I)+1)
150 CONTINUE
FREQ(J,I)=LOG(REAL(IPROD)/REAL(TOT(I)))
160 CONTINUE
170 CONTINUE
END
SUBROUTINE GETW1(SUM,FREQ,LENGTH,MAXCHR,MAXLEN)
INTEGER SUM(MAXCHR,MAXLEN)
REAL FREQ(MAXCHR,MAXLEN)
DO 70 I=1,LENGTH
DO 60 J=1,MAXCHR
FREQ(J,I) = 0.
IF(SUM(J,I).GT.0)FREQ(J,I) = 1.
60 CONTINUE
70 CONTINUE
END
SUBROUTINE MARGC(ISXMAX,ISYMAX,MARGL,MARGR,MARGB,MARGT,
+HELPS,HELPE,MAXOPT,HELPF,IDEVH,KBIN,KBOUT)
C AUTHOR RODGER STADEN
INTEGER MARGB(MAXOPT),MARGT(MAXOPT),HELPS,HELPE
CHARACTER HELPF*(*)
CALL SHOWFU(KBOUT,'Reset plot positions')
NOPT = 0
CALL GETINT(0,MAXOPT,NOPT,'Number of option to reposition',
+IVAL,KBIN,KBOUT,HELPS,HELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IF(IVAL.EQ.0) RETURN
NOPT = IVAL
CALL MARGC1(ISXMAX,ISYMAX,MARGL,MARGR,MARGB(NOPT),MARGT(NOPT),
+HELPS,HELPE,HELPF,IDEVH,KBIN,KBOUT)
RETURN
END
SUBROUTINE SQPF3(STRING,NEW,MAXSTR,LENGTH,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER STRING(MAXSTR),NEW(MAXSTR)
IOK = 1
10 CONTINUE
LIN = LENGTH
CALL GETSTR('String',STRING,NEW,MAXSTR,LIN,KBOUT,KBIN,INFLAG)
IF((LIN.LT.1).AND.(LENGTH.LT.1)) RETURN
IF(INFLAG.EQ.1) THEN
CALL HELP2(IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
GO TO 10
END IF
IF(INFLAG.EQ.2) RETURN
IF(LIN.GT.0)THEN
CALL SQCOPY(NEW,STRING,LIN)
LENGTH = LIN
END IF
IOK = 0
END
SUBROUTINE SQPF2(SEQ2,IDIM3,STRING,IDIM2I,IDIM2,I1,I2,
+KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ2(IDIM3),STRING(IDIM2I)
IOK = 1
MININ = 1
MAXIN = IDIM3
WRITE(KBOUT,1000)
1000 FORMAT(' Define string ends')
CALL GETINT(MININ,MAXIN,I1,'Start',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
IF(IVAL.NE.I1) THEN
I2 = IVAL + 10
END IF
I1 = IVAL
MININ = I1 + 1
MAXIN = I1 + IDIM2I - 1
CALL GETINT(MININ,MAXIN,I2,'End',IVAL,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
I2 = IVAL
IDIM2 =I2 - I1 + 1
CALL SQCOPY(SEQ2(I1),STRING,IDIM2)
WRITE(KBOUT,1001)(STRING(K),K=1,IDIM2)
1001 FORMAT(' string=',50A1)
IOK = 0
END
SUBROUTINE SQPF1(SEQ2,IDIM3I,IDIM3,
+IDEV,FILNAM,KBIN,KBOUT,
+IHELPS,IHELPE,HELPF,IDEVH,IOK)
CHARACTER HELPF*(*)
C AUTHOR: RODGER STADEN
CHARACTER SEQ2(IDIM3I),FILNAM*(*)
IDIM3 = IDIM3I
CALL OPENF1(IDEV,FILNAM,0,IOK,KBIN,KBOUT,
+'String file',
+IHELPS,IHELPE,HELPF,IDEVH)
IF(IOK.NE.0) RETURN
IOK = 1
CALL ARRFIL(IDEV,SEQ2,IDIM3,KBOUT)
CLOSE(UNIT=IDEV)
IF(IDIM3.LT.1)RETURN
IOK = 0
END
SUBROUTINE SQPFD1(SEQ2,IDIM3I,IDIM3,ANSTY,IDEVIN,FILE2,
+KBIN,KBOUT,IHELPS,IHELPE,HELPF,IDEVH,IOK)
C AUTHOR: RODGER STADEN
CHARACTER HELPF*(*),FILE2*(*)
CHARACTER SEQ2(IDIM3I)
INTEGER ANSTY,CHOICE
CHOICE = ANSTY
IOK = 1
CALL YESONO(CHOICE,'Type in string','Extract string from file',
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
IF(CHOICE.LT.0) RETURN
ANSTY = CHOICE
IF(ANSTY.EQ.1)THEN
CALL SQPF1(SEQ2,IDIM3I,IDIM3,
+ IDEVIN,FILE2,KBIN,KBOUT,
+ IHELPS,IHELPE,HELPF,IDEVH,IOK)
IF(IOK.NE.0) RETURN
END IF
IOK = 0
END
SUBROUTINE GSTRND(IVAL,IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
CHARACTER HELPF*(*)
PARAMETER (MAXPRM = 13)
CHARACTER PROMPT(3)*(MAXPRM)
PROMPT(1) = '+ strand only'
PROMPT(2) = '- strand only'
PROMPT(3) = 'Both strands'
CALL RADION('Select strands',PROMPT,3,IVAL,
+IHELPS,IHELPE,HELPF,IDEVH,KBIN,KBOUT)
END