staden-lg/src/indexseqlibs/emblfreetext.script

96 lines
2.3 KiB
Tcsh

#! /bin/csh -f
#
# script to make a free text index for the embl newdata library
#
echo "emblfreetext.script Version 1.0"
echo ""
set RM = "/bin/rm -f"
set SORT = sort
set UNIQ = uniq
#
#
#
set WORDS = stopwords
emblfreetext bb.dat bb.list
if ( $status ) goto abort
emblfreetext fun.dat fun.list
if ( $status ) goto abort
emblfreetext inv.dat inv.list
if ( $status ) goto abort
emblfreetext mam.dat mam.list
if ( $status ) goto abort
emblfreetext org.dat org.list
if ( $status ) goto abort
emblfreetext patent.dat patent.list
if ( $status ) goto abort
emblfreetext phg.dat phg.list
if ( $status ) goto abort
emblfreetext pln.dat pln.list
if ( $status ) goto abort
emblfreetext pri.dat pri.list
if ( $status ) goto abort
emblfreetext pro.dat pro.list
if ( $status ) goto abort
emblfreetext rod.dat rod.list
if ( $status ) goto abort
emblfreetext syn.dat syn.list
if ( $status ) goto abort
emblfreetext una.dat una.list
if ( $status ) goto abort
emblfreetext vrl.dat vrl.list
if ( $status ) goto abort
emblfreetext vrt.dat vrt.list
if ( $status ) goto abort
# sort on entry name
${SORT} bb.list fun.list inv.list mam.list org.list patent.list phg.list pln.list pri.list pro.list rod.list syn.list una.list vrl.list vrt.list >! freetext.sorted
if ( $status ) goto abort
${RM} bb.list fun.list inv.list mam.list org.list patent.list phg.list pln.list pri.list pro.list rod.list syn.list una.list vrl.list vrt.list
# sort on words, remove dumplicates, remove stopwords, resort on entry name
${SORT} -b +1 bb.list fun.list inv.list mam.list org.list patent.list phg.list pln.list pri.list pro.list rod.list syn.list una.list vrl.list vrt.list | ${UNIQ} | excludewords ${WORDS} | ${SORT} >! freetext.sorted
if ( $status ) goto abort
${RM} bb.list fun.list inv.list mam.list org.list patent.list phg.list pln.list pri.list pro.list rod.list syn.list una.list vrl.list vrt.list
# include entry numbers in the file
freetext2 freetext.sorted freetext.entry
if ( $status ) goto abort
${RM} freetext.sorted
# sort on text
${SORT} -b +2 freetext.entry >! freetext.sorted2
if ( $status ) goto abort
${RM} freetext.entry
# create the index
freetext4 freetext.sorted2 freetext.trg freetext.hit
if ( $status ) goto abort
${RM} freetext.sorted2
echo "Completion successful"
echo ""
exit 0
#
# Abort
#
abort:
echo "emblfreetext.script: aborting due to serious error"
exit 2