C PIP (protein interpretation program) C C author: Rodger Staden, Medical Research Council Centre, C Laboratory of Molecular Biology, Hills Road, C Cambridge, England C 16-3-90 Removed escape after bpause C 5-4-90 Changed getdev to redir and all that implies (see redir) C 27-6-90 added hopp and woods hydrophilicity table. Not used yet! C but could be sent to compc C 6-7-90 added showfi C 9-7-90 Renamed menu routine C 5-11-90 Very many changes for addition of file of file names C search for patterns - affects patternp,patternpc,pipl, C pip,nip,nipl,patternn,patternnc,analps89 and subs89 (rdwmt) C 4-12-90 CHANGED NAME OF ROUTINE COMPC TO COMPCP because the sun C linker cannot cope with subroutines and common blocks C having the same name C 11-12-90 Changed handling of sequence libraries. New logical name C set here and passed to rdseq C 13-04-91 removed initial CLEARV C The maximum sequence length is defined by the parameter MAXSEQ C and MAXWIN. C C Either Staden (ie. no heading and effectively free format with C lines of any length <81 and allowing gaps that will be removed) C or EMBL or GENBANK format files can be read. C C 7-6-91 New sequence library routines for cdrom format. PIR stuff junked C 3-7-91 set namlen = 60 C 18-7-91 added titles to pattern files. Fixed graphics output for patterns C 25-2-92 changed call to rdseq SUBROUTINE FMAIN() INTEGER BOTOPT,TOPOPT PARAMETER (NAMLEN = 60) CHARACTER*(NAMLEN) HELPF,POINTF,FILMAR,FILNAM,FILEP,FILEA,FILEAA CHARACTER*(NAMLEN) FILEIN,FILE12,FOFNAM CHARACTER*(NAMLEN) LIBLF PARAMETER (BOTOPT=0,TOPOPT=28, + MAXSEQ=100000, + MXSPAN=603, + MAXWIN=MAXSEQ+MXSPAN, + MAXSD2=MAXSEQ/2, + MAXSD3=MAXSEQ/3, + MAXWIR=100000, + MAXD36=MAXWIR/36, + MAXD2=MAXWIR/2, + MAXD3=MAXWIR/3, + MAXMEN=-5, + MAXOPT=28, + MAXDEV=9, + MXWTLN = 120, + IDM = 26) PARAMETER ( + HELPF='PIPHELP', + POINTF='PIPHPNT', + FILMAR='PIPMARG', + FILEA='PROTGRP', + FILEAA='PROTALL', + FILEP='PROTMAT', + FILE12='ROBSON', + LIBLF='SEQUENCELIBRARIES') PARAMETER ( MAXMOT = 50,MAXWTS = 4000) INTEGER HELPS(BOTOPT:TOPOPT),HELPE(BOTOPT:TOPOPT),DEVNOS(MAXDEV) INTEGER WORKI(MAXWIR),OPT,MARGB(MAXOPT),MARGT(MAXOPT) INTEGER MATRIX(IDM,IDM),MAT1(IDM,IDM) REAL WORKR(MAXWIR) CHARACTER*(NAMLEN) NAMSAV(MAXMOT) CHARACTER*8 KEYNS(MAXMOT) CHARACTER SEQ(MAXWIN),SEQW(MAXSEQ),CHRSET(IDM) REAL HYDRO(IDM),ISOP(IDM),HYDRE(IDM),HYDHW(IDM) CHARACTER*3 CODONS(26) EQUIVALENCE (WORKI,WORKR) C 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6 C c,s,t,p,a,g,n,d,e,q,?,?,h,r,k,m,i,l,v,f,y,w DATA HYDHW/-1.0,0.3,-0.4,0.0,-0.5,0.0,0.2,3.0,3.0,0.2,0.0,0.0, +-0.5,3.0,3.0,-1.3,-1.8,-1.8,-1.5,-2.5,-2.3,-3.4,0.0,0.0,0.0,0.0/ DATA HYDRO/70.,36.,38.,29.,63.,41.,10.,10.,10.,10.,10.,10., +13.,0.,6.,64.,90.,82.,87.,72.,32.,36.,41.,41.,41.,41./ DATA HYDRE/0.29,-0.18,-0.05,0.12,0.62,0.48,-0.78,-0.9,-0.74, +-0.85,-0.74,-0.85,-0.4,-2.53,-1.5,0.64,1.38,1.06,1.08,1.19, +0.26,0.81,0.0,0.0,0.0,0.0/ C DATA ISOP/5.02,6.68,6.53,6.10,6.02,5.97,5.41,2.87,3.22, C +5.65,5.00,5.00,7.58,10.76,9.74,5.75,6.02,5.98,5.97,5.98, C +5.65,5.88,5.00,5.00,5.00,5.00/ DATA ISOP/0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0, +0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0, +0.0,0.0,0.0,0.0,0.0,0.0/ DATA CODONS/'TGY','WSN','ACN','CCN', +'GCN','GGN','AAY','GAY','GAR','CAR', +'RAY','SAR','CAY','MGN','AAR','ATG', +'ATH','YTN','GTN','TTY','TAY','TGG', +'---','---','---','---'/ C Initialise help CALL INTHLP('pip', TOPOPT) C GET DEVICE NUMBERS CALL UNITNO(KBIN,KBOUT,DEVNOS,MAXDEV) CALL OPENGR(DEVNOS(3)) CALL GIDMAT(MAT1,IDM,22) IGORT = 0 IFORNO = 0 LIBNO = 2 WRITE(KBOUT,1000) 1000 FORMAT( +' PIP (Protein interpretation program) V5.0 Feb 1992',/, +' Author: Rodger Staden'/) C READ IN THE POINTERS TO THE HELP FILE CALL SETHLP(HELPS,HELPE,BOTOPT,TOPOPT,POINTF,DEVNOS(4),KBOUT) CALL INITGR(KBIN,KBOUT,HELPS(0),HELPE(0),HELPF,DEVNOS(4)) CALL INITLU(IDM) IOK=0 C IF(MACTYP.EQ.MACSHT)CALL OPENB(DEVNOS(5),FILBUF,LREC,NREC,IOK) C IF(IOK.NE.0)STOP C GET SCREEN AND MARGIN SIZES CALL GETMRG(ISXMAX,ISYMAX,MARGL,MARGR,MARGB,MARGT, +MAXOPT,DEVNOS(1),FILMAR) IIIIX=0 IIIIY=0 IDEV=KBOUT MOPT=0 IDIMT = 0 C READ IN THE SCORE MATRIX (USUALLY MDM78) CALL GETMAT(DEVNOS(1),FILEP,MATRIX,IDM,CHRSET,KBOUT,IOK) IF(IOK.NE.0) GO TO 9999 CALL SETPAR(IOK) IF(IOK.NE.0) GO TO 9999 2 CONTINUE IDEVLL = DEVNOS(5) IDEVEN = DEVNOS(6) IDEVAN = DEVNOS(7) IDEVDL = DEVNOS(8) IDEVLF = DEVNOS(9) CALL RDSEQ( +SEQ(2+MXSPAN/2),MAXSEQ,IDIMT,J1,J2,ISTART,IEND,IDIM1,IDIMB, +DEVNOS(1),FILNAM,KBIN,KBOUT, +HELPS(3),HELPE(3),HELPF,DEVNOS(4),IDEV,IFORNO, +IDEVLL,IDEVEN,IDEVAN,IDEVDL, +IDEVLF,LIBNO,LIBLF,WORKI,MAXWIR,IOK) IF(IOK.NE.0)GO TO 1 FILEIN=FILNAM CALL SHOWFI(KBOUT,FILEIN) C GIVE COMPOSITION AS A CHECK IF(IDIMB.GT.0)CALL MWCALC(SEQ(2+MXSPAN/2),IDIMB, +J1-ISTART+1,J2-ISTART+1,KBOUT,KBOUT) C set pointers to sequence ******** C MAXSEQ = THE DIMENSION OF THE RAM BUFFER SEQ C IDIMT = THE ACTUAL SEQUENCE LENGTH (AND THEREFORE THE NUMBER OF ELEMENTS C IN THE DISK BUFFER) C ISTART = THE SEQUENCE NUMBER OF THE CHARACTER OCCUPYING SEQ(1) C J1 = THE SEQUENCE NUMBER OF THE FIRST CHARACTER IN THE ACTIVE REGION C J2 = THE SEQUENCE NUMBER OF THE LAST CHARACTER IN THE ACTIVE REGION C IDIM1 = J2-J1+1 I.E. THE NUMBER OF ELEMENTS IN THE ACTIVE REGION C IEND = THE SEQUENCE NUMBER OF THE LAST ELEMENT OF SEQ C IDIMB = IEND-ISTART+1 I.E. THE NUMBER OF ELEMENTS IN THE RAM BUFFER ******** IDIM1P=IDIMB+MXSPAN 1 CONTINUE CALL BPAUSE(KBIN,KBOUT,IOK) C IF(IOK.NE.0) GO TO 9999 C give menu, get option C CALL MENU(OPT,KOPT,MOPT,MAXOPT,MAXMEN,KBIN,KBOUT, + HELPS(0),HELPE(0),HELPF,DEVNOS(4)) C change region IF((OPT.EQ.4).AND.(IDIMT.GT.0))THEN C CALL REDEFA C + (SEQ(2+MXSPAN/2),IDIMT,J1,J2,MAXSEQ,IDIM1,ISTART,IEND,IDIMB, C + DEVNOS(5),KBIN,KBOUT,IOK,SEQW,LREC,NREC,HELPS(OPT), C + HELPE(OPT),HELPF,DEVNOS(4),MACTYP,MACSHT,MACLNG) CALL GTREG(KBIN,KBOUT,ISTART,IEND,J1,J2, + 'Define active region', + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),IOK) IDIM1 = J2 - J1 + 1 GO TO 1 END IF C STOP IF(OPT.EQ.2)GO TO 9999 C C LIST C IF((OPT.EQ.5).AND.(IDIMB.GT.0))THEN CALL LSTSEP(SEQ(2+MXSPAN/2),IDIMB,IDEV,KBIN,KBOUT, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C WRITE OUT ACTIVE SEQUENCE C IF((OPT.EQ.8).AND.(IDIM1.GT.0))THEN CALL WRTACT(DEVNOS(1),FILNAM,KBIN,KBOUT, + SEQ(J1+1-ISTART+1+MXSPAN/2),IDIM1, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4)) GO TO 1 END IF C C ruler C IF((OPT.EQ.12).AND.(IDIM1.GT.0))THEN CALL RULER(J1,J2,MARGL,MARGR, + MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX,KBIN,KBOUT,1, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4)) GO TO 1 END IF C C composition C IF((OPT.EQ.22).AND.(IDIM1.GT.0))THEN CALL COMPH(SEQ,IDIM1P,MXSPAN, + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX, + J1,J2,KBIN,KBOUT,HYDRO,IDM, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C clear all C IF(OPT.EQ.10)THEN CALL CLEARG GO TO 1 END IF C C xhairs C IF((OPT.EQ.13).AND.(IDIM1.GT.0))THEN XMAX=J2 XMIN=J1 YMAX=ISYMAX YMIN=0. CALL CLEARV CALL XHAIRN(XMAX,XMIN,YMAX,YMIN, + MARGL,MARGR,MARGB(OPT),MARGT(OPT), + ISXMAX,ISYMAX,IIIIX,IIIIY,N,KBOUT, + SEQ(2+MXSPAN/2),ISTART,IDIMB, + SEQ(2+MXSPAN/2),ISTART,IDIMB,1) GO TO 1 END IF C C clear vt100 C IF(OPT.EQ.11)THEN CALL CLEARV GO TO 1 END IF C C CHANGE MARGINS C IF(OPT.EQ.14)THEN CALL MARGC(ISXMAX,ISYMAX,MARGL,MARGR,MARGB,MARGT, + HELPS(OPT),HELPE(OPT),MAXOPT,HELPF,DEVNOS(4),KBIN,KBOUT) GO TO 1 END IF C C plot map C IF((OPT.EQ.16).AND.(IDIM1.GT.0))THEN CALL PLTMAP(DEVNOS(1),FILNAM,IDIM1,MARGL,MARGR,MARGB(OPT), + MARGT(OPT),ISXMAX,ISYMAX,J1,J2,KBIN,KBOUT, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4)) GO TO 1 END IF C C new file C IF(OPT.EQ.3)GO TO 2 C C type text C IF(OPT.EQ.6)THEN CALL TTEXT(DEVNOS(1),FILNAM,KBIN,KBOUT, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4)) GO TO 1 END IF C C help C IF(OPT.EQ.1)THEN CALL HELP(HELPS,HELPE,BOTOPT,TOPOPT,HELPF,DEVNOS(4), + KBIN,KBOUT) GO TO 1 END IF C C search C IF((OPT.EQ.17).AND.(IDIM1.GT.0))THEN KOPT = 1 CALL SERCHP(SEQ(2+MXSPAN/2),IDIMB,J1,J2,ISTART,SEQW,MAXSD3, + SEQW(1+MAXSD3),MAXSD3, + SEQW(1+2*MAXSD3),MAXSD3, + WORKI,WORKI(1+10*MAXD36),WORKI(1+20*MAXD36),10*MAXD36, + WORKI(1+30*MAXD36),WORKI(1+31*MAXD36),WORKI(1+32*MAXD36), + WORKI(1+33*MAXD36), + WORKI(1+34*MAXD36),WORKI(1+35*MAXD36),MAXD36, + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX, + IDEV,DEVNOS(1),FILNAM,FILEA,FILEAA,KBIN,KBOUT, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C direct output to disk C IF(OPT.EQ.7)THEN CALL REDIR(IDEV,DEVNOS(2),DEVNOS(3),IGORT,FILNAM,KBIN,KBOUT, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C signal searches from named plot files C IF((OPT.EQ.20).AND.(IDIM1.GT.0))THEN CALL SIGNLP(SEQ(2+MXSPAN/2),IDIMB, + MARGL,MARGR,MARGB(OPT),MARGT(OPT), + ISXMAX,ISYMAX,J1-ISTART+1,J2-ISTART+1, + DEVNOS(1),FILNAM,KBIN,KBOUT, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),IDEV, + WORKI,WORKR(1+IDM*MXWTLN),WORKI(1+2*IDM*MXWTLN),IDM, + MXWTLN,SEQW,CHRSET,KOPT) GO TO 1 END IF C C SEQFIT C IF((OPT.EQ.18).AND.(IDIM1.GT.0))THEN CALL SQPFIT(SEQ(2+MXSPAN/2),IDIMB,SEQW,MAXSEQ, + WORKI,WORKI(1+MAXD2),MAXD2, + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX, + J1,J2,ISTART,IDEV, + DEVNOS(1),FILNAM,KBIN,KBOUT, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C calc mol wt C IF((OPT.EQ.21).AND.(IDIM1.GT.0))THEN CALL MWCALC(SEQ(2+MXSPAN/2),IDIMB, + J1-ISTART+1,J2-ISTART+1,KBOUT,IDEV) GO TO 1 END IF C C plot charge C IF((OPT.EQ.23).AND.(IDIM1.GT.0))THEN CALL COMPCP(SEQ,IDIM1P,MXSPAN, + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX, + J1-ISTART+1,J2-ISTART+1,KBIN,KBOUT,ISOP,IDM, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C sepfit C IF((OPT.EQ.19).AND.(IDIM1.GT.0))THEN CALL SEPFIT(SEQ(2+MXSPAN/2),IDIMB,SEQW,MAXSEQ, + WORKI,WORKI(1+MAXD2),MAXD2, + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX,J1,J2, + ISTART,IDEV, + DEVNOS(1),FILNAM,KBIN,KBOUT,MATRIX,IDM, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C write labels C IF(OPT.EQ.15)THEN CALL LABLER(KBIN,KBOUT,ISXMAX,ISYMAX, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4)) GO TO 1 END IF C C show settings and edit C IF(OPT.EQ.9)THEN WRITE(KBOUT,4000)FILEIN,J1,J2,IDIM1 4000 FORMAT(' Sequence=',A,/,' Start=',I7,' End=',I7,' Length=',I7) C THIS NEXT LINE IS BECAUSE SOMETIMES THE FILE IS NOT READY TO EDIT! CLOSE(UNIT=DEVNOS(2)) CALL SEQEDT(SEQ(2+MXSPAN/2),MAXSEQ,IDIMB,KBIN, + KBOUT,HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),DEVNOS(1), + FILNAM,FILEIN,IDM,IOK) IF(IOK.NE.0) GO TO 1 C CALL EDITSQ(SEQ(2+MXSPAN/2),MAXSEQ,SEQW,MAXSEQ,IDIMB,KBIN, C + KBOUT,HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),1,DEVNOS(1), C + FILNAM,PAA,IDM) ******** C MAXSEQ = THE DIMENSION OF THE RAM BUFFER SEQ C IDIMT = THE ACTUAL SEQUENCE LENGTH (AND THEREFORE THE NUMBER OF ELEMENTS C IN THE DISK BUFFER) C ISTART = THE SEQUENCE NUMBER OF THE CHARACTER OCCUPYING SEQ(1) C J1 = THE SEQUENCE NUMBER OF THE FIRST CHARACTER IN THE ACTIVE REGION C J2 = THE SEQUENCE NUMBER OF THE LAST CHARACTER IN THE ACTIVE REGION C IDIM1 = J2-J1+1 I.E. THE NUMBER OF ELEMENTS IN THE ACTIVE REGION C IEND = THE SEQUENCE NUMBER OF THE LAST ELEMENT OF SEQ C IDIMB = IEND-ISTART+1 I.E. THE NUMBER OF ELEMENTS IN THE RAM BUFFER C THIS IS A FUDGE AND ONLY WORKS FOR UNBUFFERED SEQUENCES!!!!!!!!!!! J1 = 1 J2 = IDIMB IDIMT = IDIMB IEND = IDIMB IDIM1 = IDIMB WRITE(KBOUT,4000)FILEIN,J1,J2,IDIM1 GO TO 1 END IF C C robson C IF((OPT.EQ.24).AND.(IDIM1.GT.0))THEN CALL ROBSON(SEQ(2+MXSPAN/2),IDIMB, + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX,J1-ISTART+1, + J2-ISTART+1,KBIN,KBOUT, + DEVNOS(1),FILE12, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),IDEV,KOPT) GO TO 1 END IF C C hydrophobic moment C IF((OPT.EQ.25).AND.(IDIM1.GT.0))THEN CALL HYDMOM(SEQ(J1+1-ISTART+1+MXSPAN/2),IDIM1,HYDRE,IDM, + MARGL,MARGR,MARGB(OPT),MARGT(OPT), + ISXMAX,ISYMAX,KBIN,KBOUT, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C helix wheel C IF((OPT.EQ.26).AND.(IDIM1.GT.0))THEN CALL HELIXW(SEQ(2+MXSPAN/2),IDIMB,ISTART,J1,J2, + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX,KBIN,KBOUT, + HYDRE,IDM, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C back translate C IF((OPT.EQ.27).AND.(IDIM1.GT.0))THEN CALL BACK(SEQ(J1+1-ISTART+1+MXSPAN/2),IDIM1, + CODONS,KBIN,KBOUT,FILNAM, + DEVNOS(1),SEQW,MAXSEQ,WORKR,IDIM1P,MXSPAN, + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),KOPT) GO TO 1 END IF C C pattern search C IF((OPT.EQ.28).AND.(IDIM1.GT.0))THEN CALL PATTEN(SEQ(J1-ISTART+2+MXSPAN/2),IDIM1,SEQW,MAXSEQ, + WORKI(1),WORKI(MAXMOT+1),WORKI(2*MAXMOT+1),WORKI(3*MAXMOT+1), + WORKI(4*MAXMOT+1),WORKI(5*MAXMOT+1),WORKI(6*MAXMOT+1), + WORKI(7*MAXMOT+1),WORKI(8*MAXMOT+1),WORKI(9*MAXMOT+1), + WORKI(10*MAXMOT+1),WORKI(11*MAXMOT+1),WORKI(12*MAXMOT+1), + WORKI(13*MAXMOT+1),WORKI(14*MAXMOT+1),WORKI(15*MAXMOT+1), + WORKI(16*MAXMOT+1),WORKI(17*MAXMOT+1),WORKI(18*MAXMOT+1), + WORKI(19*MAXMOT+1),WORKI(20*MAXMOT+1), + WORKR(22*MAXMOT+1),WORKR(23*MAXMOT+1), + FILNAM,MAXMOT,MAXWTS,MATRIX,IDEV,DEVNOS(6),DEVNOS(7), + MARGL,MARGR,MARGB(OPT),MARGT(OPT),ISXMAX,ISYMAX,J1, + KBIN,KBOUT,DEVNOS(8),IDM,SEQ(1),MAT1,NAMSAV,KEYNS,CHRSET, + HELPS(OPT),HELPE(OPT),HELPF,DEVNOS(4),FOFNAM,DEVNOS(1)) C NB IVE SENT THE START OF SEQ TO BE USED IN PATTEN. THIS IS OK C AS LONG AS MAXMOT