gde_linux/ZUKER/rna.f

354 lines
12 KiB
Fortran
Executable File

c MFOLD - Prediction of RNA secondary structure by free energy
c minimization.
c - Version 2.0
c - Michael Zuker and John Jaeger
c - LRNA : folds linear RNA sequences
c - CRNA : folds circular RNA sequences
c
c The original version (1.0) was designed by Michael Zuker and
c programmed by Eric Nelson in the summer of 1987 in the Division of
c Biological Sciences at the National Research Council of Canada. John
c Jaeger added the tetraloop bonus energy feature and created the
c BATGEN program for batch file generation.
c
c Version 2.0 corrects a number of small bugs from the original
c program. These were added to version 1 and itemized in the
c ERRATA.LIST file that was distributed along with version 1. The major
c improvements of version 2 are : 1. During the generation of
c suboptimal foldings, the number of new base pairs that are
c sufficiently different from base pairs that have already been found
c must be greater than the WINDOW parameter. This feature was added
c during the summer of 1989, and was made part of version 1 (item 11 in
c the ERRATA.LIST file distributed with this version). The effect is to
c eliminate structures that contain just a few new base pairs. 2.
c Temperature dependent folding. This was added in the fall of 1989 and
c was never a feature of version 1.
c
c METHOD : A dynamic programming algorithm is used to find optimal and
c suboptimal foldings of an RNA molecule starting from linear sequence
c data. Auxiliary information can be used to constrain the folding.
c
c NB : Base pairs are forced by giving them a bonus energy (EPARAM(9)
c in the program code). These energies are subtracted during the
c traceback algorithm so that the computed sturctures have the correct
c energies. Unfortunately, there is no way to subtract the bonus
c energies from the energy dot plots. Moreover, each forced base pair
c contain two bonus energies because of the nature of the algorithm.
c For example, suppose that an optimal folding of an RNA contains 3
c forced base pairs ( default bonus energy is 50.0 kcal per forced base
c pair ) and that the correct folding energy is -180.0 kcal/mole.
c Internally, the energy will be -180.0 - (3+1) x 50.0 = -380.0
c kcal/mole. To find foldings within 10% of the correct energy, one
c needs to compute foldings to within 18.0 kcal of -180.0 - 3 x 50.0 =
c -330.0 kcal/mole. This comes out to -312.0 kcal/mole. The ratio of
c -312.0 to -380.0 is 82%, so that one would request the 18% level of
c suboptimality! This confustion only exists when base pairs are
c forced. Each closed excision counts as one forced base pair.
c
c Energy data from :
c S.M. Freier et al., Proc. Natl. Acad. Sci. USA, 83, 9373-9377, 1986.
c D.H. Turner et al., Cold Spring Harbor Symposia on Quantitative Biology,
c 52, 123-133, 1987.
c D.H. Turner et al., Annu. Rev. Biophys. Biophys. Chem 17, 167-192 (1988).
c This last reference has all the dangling end and terminal mismatch data.
c
c References :
c M. Zuker
c On Finding All Suboptimal Foldings of an RNA Molecule.
c Science, 244, 48-52, (1989)
c
c J. A. Jaeger, D. H. Turner and M. Zuker
c Improved Predictions of Secondary Structures for RNA.
c Proc. Natl. Acad. Sci. USA, BIOCHEMISTRY, 86, 7706-7710, (1989)
c
c J. A. Jaeger, D. H. Turner and M. Zuker
c Predicting Optimal and Suboptimal Secondary Structure for RNA.
c in "Molecular Evolution: Computer Analysis of Protein and
c Nucleic Acid Sequences", R. F. Doolittle ed.
c Methods in Enzymology, 183, 281-306 (1989)
c
include 'rfd.inc'
real energy
logical flag,mark
c Fill screen with author and reference data.
call begin
c Initial setup for run.
5 call device
if (cntrl(1).ne.2) then
c Read energy information if this is not a continuation run.
call enefiles
call erg(1,0,0,0,0)
else
c dump out information read in from continuation file
call cdump
endif
c Determine output specifications if this is not a save run.
if (cntrl(1).ne.1) call outputs
c Call the menu if this is not a continuation run.
if (cntrl(1).ne.2) call menu
mrep = 1
c CNTRL(7) = 0 - suboptimal dot plot
c 1 - N best sorted by energy
c 2 - best folding for all sequences in a file
10 if (cntrl(7).eq.2) call mseq(mrep)
c Process sequence before folding.
do i = 1,mxbits
marks(i) = 0
force2(i) = 0
enddo
call process
if (n*2.gt.fldmax) then
tt = fldmax/2
c Fragment is too long. Try again.
write (6,*) 'Segment larger than ',tt
stop
endif
if (cntrl(1).ne.2) then
c Fill the optimal energy arrays except in a continuation run.
call fill
endif
if (cntrl(1).eq.1) then
c Save the results from FILL in a SAVE run and then stop.
call putcont
stop
endif
rep = 1
jump = 1
flag = .true.
err = 0
do while (flag)
if (cntrl(7).eq.0) then
c Interactive dot plot returns IRET, JRET (new numbering).
c Zuker comments out call to dotplt : do not choose this option
C If you do, the program stops dead here.
c call dotplt(iret,jret,jump)
if(1.eq.1) stop ' Energy dot plot disabled.'
jump = 2
else
c Automatic sort returns IRET,JRET (new numbering).
print *,'traceback'
call sortout(iret,jret,rep,err)
if (err.eq.30) then
flag = .false.
call errmsg(err,rep-1,0)
err = 0
endif
rep = rep + 1
endif
c First traceback yields the best structure on the included fragment
c from IRET to JRET.
if (flag) call trace(iret,jret,nforc1,err)
if (err.ne.0) call errmsg(err,iret,jret)
if (flag) then
it = iret+n
c Second traceback yields the best structure on the excluded fragment
c from IRET to JRET.
call trace(jret,it,nforc2,err)
if (err.ne.0) then
call errmsg(err,jret,it)
else
c The energy of the best structure containing the base-pair IRET,
c JRET is the sum of the energies of the optimal foldings on
c the included and excluded fragments. A correction is made for
c forced base-pairs.
c* CALL EFN(ENE,1,N)
c* WRITE (6,*) 'NEW ENERGY ',ENE
ene = v(iret,jret) + v(jret,iret+n) - eparam(9) * (nforc1+nforc2)
energy = float(ene) / 10.0
c Count the number of new base pairs not within WINDOW
c of existing base pairs.
numbp = 0
do k = 1,n
if(k.lt.basepr(k)) then
if(.not.mark(k,basepr(k))) numbp = numbp + 1
endif
enddo
do k = 1,n
if(k.lt.basepr(k)) then
c Mark "traced-back" base pairs and also base-pairs
c which are close (within WINDOW = CNTRL(9) ).
call smark(k,basepr(k))
if(cntrl(9).gt.0) then
do k1 = -cntrl(9),cntrl(9)
do k2 = -cntrl(9),cntrl(9)
if(k+k1.gt.0.and.k+k1.lt.basepr(k)+k2.and.
1 basepr(k)+k2.le.n) call smark(k+k1,basepr(k)+k2)
enddo
enddo
endif
endif
enddo
if(numbp.le.cntrl(9)) then
rep = rep - 1
go to 900
endif
write (6,1010) rep - 1
c 1010 format('+',i5$)
1010 format('+',i5)
if (cntrl(2).ne.2.and.cntrl(2).ne.3.and.cntrl(2).ne.6) then
c Line printer output.
call linout(1,n,energy,iret,jret,err)
endif
if (err.ne.0) then
call errmsg(err,iret,jret)
else
if (cntrl(2).ge.3.and.cntrl(2).ne.4) then
c Region table output.
call regtab
endif
if (mod(cntrl(2),2).eq.0.or.cntrl(2).eq.7) then
c CT file output.
call ct(energy)
endif
endif
endif
endif
if (cntrl(7).eq.1.and.rep.gt.cntrl(6)) flag = .false.
900 continue
enddo
c
c Multiple sequence option (CNTRL(7) = 2)
c If sequence number (MREP) is < total number of sequences
c (CNTRL(5)), go get another sequence.
c
if (cntrl(7).eq.2.and.mrep.lt.cntrl(5)) then
mrep = mrep + 1
goto 10
endif
stop
end
c Marks a base-pair I,J.
c Assumes that 1 <= I <= J <= N.
c The information is stored in a single bit in the MARKS
c array.
c The conversion from double dimension to single is through the
c transformation I,J ==> (J-1)*J/2 + I .
subroutine smark(i,j)
include 'rfd.inc'
integer*2 bit
posn = (((j-1)*j)/2) + i
word = (posn+15) / 16
bit = mod(posn,16)
c marks(word) = iibset(marks(word),bit)
marks(word) = ibset(marks(word),bit)
return
end
c Marks a forced base-pair I,J.
c The incoming base-pair II,JI is processed to an I,J
c base-pair satisfying 1 <= I <= J <= N.
c The information is stored in a single bit in the FORCE2
c array.
c The conversion from double dimension to single is through the
c transformation I,J ==> (J-1)*J/2 + I .
subroutine sfce(ii,ji)
include 'rfd.inc'
integer*2 bit
if (ii.gt.n) then
i = ii - n
j = ji - n
elseif (ji.gt.n) then
i = ji-n
j = ii
else
i = ii
j = ji
endif
posn = (((j-1)*j)/2) + i
word = (posn+15) / 16
bit = mod(posn,16)
c force2(word) = iibset(force2(word),bit)
force2(word) = ibset(force2(word),bit)
return
end
c Retrieves information on whether or not the base-pair I,J
c has been marked by a traceback passing through or close to
c this pair.
logical function mark(i,j)
include 'rfd.inc'
integer*2 bit,one
one = 1
posn = (((j-1)*j)/2) + i
word = (posn+15) / 16
bit = mod(posn,16)
c set = iibits(marks(word),bit,one)
set = ibits(marks(word),bit,one)
mark = .false.
if (set.ne.0) mark = .true.
return
end
c Retrieves information on whether or not the base-pair I,J
c has been forced.
logical function fce(ii,ji)
include 'rfd.inc'
integer*2 bit,one
if (ii.gt.n) then
i = ii - n
j = ji - n
elseif (ji.gt.n) then
i = ji-n
j = ii
else
i = ii
j = ji
endif
one = 1
posn = (((j-1)*j)/2) + i
word = (posn+15) / 16
bit = mod(posn,16)
c set = iibits(force2(word),bit,one)
set = ibits(force2(word),bit,one)
fce = .false.
if (set.ne.0) fce = .true.
return
end
c fills screen with author and reference information
subroutine begin
character*1 ans
character*80 record
open(3,file='begin.dat',status='old',err=5)
write(6,1010)
1010 format(' ')
1 read(3,1020,end=2) record
1020 format(a80)
write(6,1030) record
c1030 format(' ',a80)
1030 format(a80)
go to 1
2 write(6,1040)
c 1040 format(' Press <return> to continue ...'$)
c 1040 format(' Press <return> to continue ...'$)
1040 format(' Press <return> to continue ...',$)
read(5,1050) ans
1050 format(a1)
return
5 write(6,1060)
1060 format(//' Author and reference file not available.'//)
c
c C.Wang copied the next 2 lines here. 6/12/91
c to keep consistency.
3 write(6,1040)
read(5,1050) ans
return
end