2022-03-08 04:38:24 +08:00
/*************************************************************************************
* OBLONG
* a program for finding most parsimonious
* trees with minimum RAM requirements
*
* Copyright April 2013 by Pablo A . Goloboff
*
* This program is free software ; you may redistribute it and / or modify its
* under the terms of the GNU General Public License as published by the Free
* Software Foundation ; either version 2 of the License , or ( at your option )
* any later version .
*
* This program is distributed in the hope that it will be useful , but
* WITHOUT ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License
* for more details .
*
* If publishing results based on this program , please cite Goloboff ( forthcoming )
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <time.h>
# include <unistd.h>
# include <ctype.h>
# include <math.h>
# define MAXNAMELEN 100
# define CHUNKSZ 1000
# define MAXMULTITAXA 1000
FILE * inp = NULL , * outp = NULL , * tmpp , * partsfile , * treeinp = NULL , * multinp ;
# ifdef WATCOM
long long * fpos ;
# else
fpos_t * fpos ;
# endif
unsigned long long bestgloballen , lenlag = 0 , rearrangs = 0 ;
unsigned long int * * mat , * wtchg , * movelist , lenlimit ;
unsigned long int gup , gmix , gdn , nitdn ;
long int nc2 = 0 , nc3 = 0 , nc4 = 0 , trashed = 0 , totnc , maxnc , piecesz , totnc4 ;
double * gbrem , * tbrem ;
long int * abrem , fasta = 0 ;
int nt , nc , nodz , nstord , together = 1 , use_spr = 0 , reportreplication = 0 ,
numreplications = 0 , reportperiod = 1 , ratcycles = 0 , ratcyclesdone = 0 ,
dojak = 0 , saveall = 0 , swapcollapse = 0 , recordgoodmoves = 0 , usenexus = 0 ;
int ranseed = 1 , do_branch_swapping = 1 , random_starts = 0 , numtrees = 0 , numhits ,
ratcheting = 0 , changewts = 0 , numsubs , jakprob = 36 , numcolmoves , numcolroots ,
savejusttrees = 0 , felipe = 0 , multifiles = 0 , nummultifiles , recorddn = 0 ,
equal_taxon_sequence = 0 , numparts , quickbremer = 0 , doingbremer = 0 ,
bremertype = 2 , shownegbremers = 0 , verbose = 0 ;
short int * nodlist , * anx , * lef , * rig , * sis , * rlist ,
* tgtlist , * lassis , * nodfork , nodtoskip = 0 , * ladd , * nodmatch ,
* blist , * * memtrees , * unsuprt , * distoroot , * colmoves , * colroots ;
char strsp [ MAXNAMELEN ] , maskin [ 256 ] , bitct [ 65536 ] , reportheader ,
reporttoscreen = 1 ;
char legindic [ ] = { ' = ' , 0 } ;
unsigned char * * matbuf , * tmpstates , * isabs = NULL ;
double maxram = - 1 ;
typedef struct { unsigned long int up , dn ; int root , tgt ; } BremTyp ;
BremTyp * bremdif ;
void transfer_bremer ( int , int , BremTyp * ) ;
unsigned long long downpass_all ( void ) ;
unsigned long long downpass_split ( void ) ;
unsigned long long ( * downpass ) ( void ) ;
unsigned long long wagner_all ( void ) ;
unsigned long long wagner_split ( void ) ;
unsigned long long ( * wagner ) ( void ) ;
unsigned long long bswap_all ( void ) ;
unsigned long long bswap_split ( void ) ;
unsigned long long ( * bswap ) ( void ) ;
# define VERYBIG 10000000000000000
# ifdef BITS64
# define FLDS2 32
# define FLDS3 21
# define FLDS4 16
# define HI4 9838263505978427528UL
# define LO4 8608480567731124087UL
# define HI3 5270498306774157604UL
# define LO3 3952873730080618203UL
# define HI2 12297829382473034410UL
# define LO2 6148914691236517205UL
# else
# define FLDS2 16
# define FLDS3 10
# define FLDS4 8
# define HI4 2290649224
# define LO4 2004318071
# define HI3 613566756
# define LO3 460175067
# define HI2 2863311530
# define LO2 1431655765
# endif
/*********** TRIVIAL BUT USEFUL FUNCTIONS ******************/
void display_help ( int kind )
{
if ( kind = = 0 )
fprintf ( stdout ,
" \n "
" OBLONG (Merriam-Webster): \" a rectangle with one of its \n "
" sides much longer than the other \" \n "
" \n "
" by P.A. Goloboff (2013) \n "
" \n "
" For phylogenetic analysis of wide matrices, with low memory \n "
" requirements. Recommended for data sets with many characters \n "
" and few taxa (not advised for hundreds of taxa or more, as \n "
" it is optimized for memory, not speed) \n "
" Uses temporary disk buffers, \" oblong.tmp \" and/or \" oblong.pts \" \n "
" to avoid holding all data in RAM. As much disk space as the size \n "
" of the data set may be required. \n "
" Screen output can be redirected to a file with \" 2> file \" \n "
" \n "
" Use \" oblong -h \" for help on program options \n "
" \n "
) ;
else
fprintf ( stdout ,
" \n "
" Input/Output: \n "
" \n "
" -ifilename use \" filename \" as data file. \n "
" -p data files in Phylip/RAxML instead of TNT format. \n "
" -f data files in Fasta instead of TNT format. \n "
" -@list get from file \" list \" the list of data files, must \n "
" be in either Phylip (no \" -p \" switch required), or \n "
" Fasta format ( \" -f \" switch required). By default, \n "
" taxon names are matched with identical strings; a \n "
" taxon absent from a data set gets all \" ? \" . \n "
" -e for file lists, instead of matching taxa based on name, \n "
" assume all data files have the same taxon sequence. \n "
" -ooutfile output results to \" outfile \" ; this is a TNT-ready \n "
" data set with no characters (just tree topologies). \n "
" \n "
" Searches: \n "
" \n "
" -w use random trees as starting points, not Wagner trees \n "
" -rN use N starting points. \n "
" -spr use SPR branch swapping instead of the default TBR. \n "
" -b no branch swapping. \n "
" -a save the tree for each replication (even if suboptimal) \n "
" -jN jackknife the data, with N = P(del) (no N = 36). A \n "
" single (possibly collapsed) tree saved per replicate. \n "
" Note: uninformative characters/states are never \n "
" deactivated (they don't influence supports). \n "
" -mX use no more than X MBytes of RAM. If not using \" -j \" or \n "
" \" -x \" , results are identical to the default but slower; \n "
" otherwise, randomization sequences may differ. \n "
" -RN use N as random seed. \n "
" -xN use N iterations of ratchet (no N = 10). Note that for \n "
" C informative characters, \" -x \" requires C/2 additional \n "
" bytes of RAM (even under \" -m \" ). \n "
" -k if jackknifing, do TBR/SPR tree collapsing (recommended). \n "
" \n "
" Other: \n "
" \n "
" -N output Nexus format (i.e. tree-file is TreeView-ready) \n "
" -t save only the trees (no \" xread \" ). \n "
" -Ttreefile read trees from \" treefile \" and use them as starting \n "
" points (if \" -b \" and no \" -x \" , it just calculates \n "
" tree-scores). Treefile must be the output of Oblong \n "
" (i.e. what is in \" outfile \" ), with number of trees in \n "
" tree-title. \n "
" -q use a quick approximmation for group supports. Default is \n "
" S = 0.64 * R ^ 1/A (where R = relative bremer support, \n "
" and A = absolute bremer support); this is also indicated \n "
" with \" -q2 \" . You can also indicate \" -q0 \" , absolute bremer \n "
" support, or \" -q1 \" , relative. Supports are obtained via \n "
" branch-swapping, recording score difference for each move. \n "
" You can use this to measure strength of contradiction, starting \n "
" from suboptimal trees ( \" -T \" ) and skipping swapping ( \" -b \" ). \n "
" -v verbose mode: report rearrangements examined as they are done \n "
" (note: repeatedly writing to screen slows down program). \n "
" \n "
" You can also view this info with \" oblong -h | less \" or \" oblong -h | more \" \n "
" \n "
) ;
}
void errout ( int cond , char * msg )
{
if ( ! cond ) return ;
if ( msg ! = NULL )
2022-03-13 07:44:36 +08:00
fprintf ( stderr , " \n %s \n " , msg ) ; //output to the screen, usually error
exit ( 0 ) ; //exit(0)正常运行程序并退出程序; exit(1)非正常运行导致退出程序
2022-03-08 04:38:24 +08:00
}
void * myalloc ( unsigned long int size )
{
void * vd = malloc ( size ) ;
errout ( vd = = NULL , " Sorry, not enough memory available --try with the \" -m \" switch " ) ;
return vd ;
}
int stringis ( char * a , char * b )
{
2022-03-13 07:44:36 +08:00
while ( * a & & tolower ( * a ) = = tolower ( * b ) ) { + + a ; + + b ; } //tolower: conver letters from uppercase to lowercase
if ( * a | | * b ) return 0 ; // or || ,return fake
return 1 ; //return true
2022-03-08 04:38:24 +08:00
}
2022-03-13 07:44:36 +08:00
void procargs ( int nargs , char * * arg ) // int nargs =2 for [oblong -f] **arg = *arg [] 字符串数组
2022-03-08 04:38:24 +08:00
{
int i ;
char * cp ;
2022-03-13 07:44:36 +08:00
if ( nargs = = 1 ) { // no arguments if nargs == 1
display_help ( 0 ) ; //not show the help ouput just no argument output
2022-03-08 04:38:24 +08:00
exit ( 0 ) ; }
for ( i = 1 ; i < nargs ; + + i ) {
cp = arg [ i ] ;
2022-03-13 07:44:36 +08:00
if ( * cp ! = ' - ' ) { // if arguments not start with -
fprintf ( stderr , " \n Wrong argument: %s \n " , arg [ i ] ) ; //show which argument is wrong
2022-03-08 04:38:24 +08:00
exit ( 0 ) ; }
switch ( * + + cp ) {
2022-03-13 07:44:36 +08:00
case ' a ' : saveall = 1 ; break ; //save all trees
case ' b ' : do_branch_swapping = 0 ; break ; //no branch swapping
2022-03-08 04:38:24 +08:00
case ' e ' : equal_taxon_sequence = 1 ; break ;
2022-03-13 07:44:36 +08:00
case ' f ' : fasta = 1 ; break ; //fasta format
case ' h ' : display_help ( 1 ) ; exit ( 0 ) ; //help
case ' i ' : //use the filename -i${filename}
2022-03-08 04:38:24 +08:00
case ' @ ' :
2022-03-13 07:44:36 +08:00
if ( * cp = = ' @ ' ) multifiles = 1 ; //there are multi files
2022-03-08 04:38:24 +08:00
+ + cp ;
errout ( stringis ( cp , " oblong.tmp " ) , " Cannot use \" oblong.tmp \" as input file; rename it and try again " ) ;
errout ( stringis ( cp , " oblong.pts " ) , " Cannot use \" oblong.pts \" as input file; rename it and try again " ) ;
errout ( inp ! = NULL , " Can't open TWO input files " ) ;
2022-03-13 07:44:36 +08:00
errout ( ( inp = fopen ( cp , " rb " ) ) = = NULL , " Can't open input file " ) ; //input file, if the input list can't be readable in the bianry mode, on windows, binary is different
2022-03-08 04:38:24 +08:00
break ;
2022-03-13 07:44:36 +08:00
case ' j ' : //j${NumberofJakknife}
2022-03-08 04:38:24 +08:00
+ + cp ;
2022-03-13 07:44:36 +08:00
if ( isdigit ( * cp ) ) //to find if the character is the numer
jakprob = atoi ( cp ) ; //atoi: convert character to the int
2022-03-08 04:38:24 +08:00
dojak = changewts = 1 ;
break ;
2022-03-13 07:44:36 +08:00
case ' k ' : swapcollapse = 1 ; break ; //do TBR/SPR collapsing
case ' m ' : errout ( ( ( maxram = atof ( + + cp ) ) < = 0 ) , " You must indicate RAM to use " ) ; together = 0 ; break ; //atof: transfer chracter into double
case ' N ' : usenexus = 1 ; break ; //output the nexus file
case ' o ' : //-o${outfile}
2022-03-08 04:38:24 +08:00
+ + cp ;
errout ( stringis ( cp , " oblong.tmp " ) , " Cannot use \" oblong.tmp \" as output file; use a different name " ) ;
errout ( stringis ( cp , " oblong.pts " ) , " Cannot use \" oblong.pts \" as output file; use a different name " ) ;
errout ( outp ! = NULL , " Can't open TWO output files " ) ;
errout ( ( outp = fopen ( cp , " wb " ) ) = = NULL , " Can't open output file " ) ;
break ;
2022-03-13 07:44:36 +08:00
case ' p ' : felipe = 1 ; break ; //file in Phylip style
case ' q ' : //a quick approximmation
2022-03-08 04:38:24 +08:00
+ + cp ;
quickbremer = 1 ;
if ( * cp = = ' 0 ' ) bremertype = 0 ;
if ( * cp = = ' 1 ' ) bremertype = 1 ;
if ( * cp = = ' 2 ' ) bremertype = 2 ;
break ;
2022-03-13 07:44:36 +08:00
case ' r ' : numreplications = atoi ( + + cp ) ; break ; //use N starting points. -r${Number}
case ' R ' : ranseed = atoi ( + + cp ) ; break ; //random seed
case ' s ' : //spr use spr instead of tbr
2022-03-08 04:38:24 +08:00
if ( ! strcmp ( cp , " spr " ) ) { use_spr = 1 ; break ; }
2022-03-13 07:44:36 +08:00
case ' t ' : savejusttrees = 1 ; break ; //output standrad tre format
case ' T ' : //set start tree -T${treefile}
2022-03-08 04:38:24 +08:00
+ + cp ;
errout ( treeinp ! = NULL , " Can't open TWO tree files " ) ;
errout ( ( treeinp = fopen ( cp , " rb " ) ) = = NULL , " Can't open tree file " ) ;
break ;
2022-03-13 07:44:36 +08:00
case ' v ' : verbose = 1 ; break ; //report rearrangements
case ' w ' : random_starts = 1 ; break ; //use random trees as start tree
case ' x ' : //-x${Numer of ratcher}
2022-03-08 04:38:24 +08:00
+ + cp ;
2022-03-13 07:44:36 +08:00
if ( isdigit ( * cp ) ) ratcycles = atoi ( cp ) ; //if the char is number, make ratecycles=
2022-03-08 04:38:24 +08:00
else ratcycles = 10 ;
break ;
default : fprintf ( stderr , " \n Unrecognized option: %s \n " , arg [ i ] ) ; exit ( 0 ) ; } }
2022-03-13 07:44:36 +08:00
errout ( ( fasta & & felipe ) , " Cannot use both Phylip and Fasta formats " ) ; //fasta = 1 and felipe = 1, it can't be identified as different type
2022-03-08 04:38:24 +08:00
errout ( ( inp = = NULL ) , " No data set specified " ) ;
errout ( ( outp = = NULL ) , " No output file specified " ) ;
if ( quickbremer & & dojak ) {
2022-03-13 07:44:36 +08:00
dojak = changewts = 0 ; //if quick approximmation and jack both are set, the jack won't be done
2022-03-08 04:38:24 +08:00
fprintf ( stderr , " \n NOTE: Cannot do both jackknifing and bremer at the same time! \n " ) ;
if ( bremertype = = 2 )
fprintf ( stderr , " deletion prob P=%.2f will be used to rescale supports (1-P) \n " , ( double ) jakprob / 100 ) ;
else
if ( bremertype = = 0 ) fprintf ( stderr , " will do bremer supports instead \n " ) ;
else fprintf ( stderr , " will do relative bremer supports instead \n " ) ;
errout ( ( ! jakprob & & bremertype = = 2 ) , " ...but cannot use a deletion prob. P=0 to rescale! " ) ;
fprintf ( stderr , " \n " ) ; }
2022-03-13 07:44:36 +08:00
if ( quickbremer & & usenexus ) legindic [ 0 ] = 0 ; // *** left here temp
if ( ! do_branch_swapping & & ! ratcycles ) shownegbremers = 1 ; // *** left here temp
2022-03-08 04:38:24 +08:00
if ( treeinp ! = NULL & & numreplications )
fprintf ( stderr , " \n Note: \" -T \" overrides \" -r \" : number of trees in file determines starting points \n " ) ;
if ( ! numreplications ) numreplications = 1 ;
if ( maxram > 0 ) {
fprintf ( stderr , " \n Using %.2f MBytes of RAM for matrix buffers \n " , maxram ) ;
2022-04-01 08:18:58 +08:00
fflush ( stderr ) ; //清空缓存区
maxram * = 1024 * 1024 ; } // maxram = maxram * 1024 * 1024;
2022-03-08 04:38:24 +08:00
reportheader = ' \r ' ;
if ( ! isatty ( fileno ( stderr ) ) ) {
errout ( verbose , " Verbose allowed only when STDERR goes to screen " ) ;
reporttoscreen = 0 ;
reportheader = ' \n ' ;
reportperiod = 600 ;
fprintf ( stderr , " \n \n Program options: \n \n oblong " ) ;
for ( i = 1 ; i < nargs ; + + i )
fprintf ( stderr , " %s " , arg [ i ] ) ;
fprintf ( stderr , " \n \n " ) ; }
return ;
}
void init_bit_counters ( void )
{
2022-04-01 08:18:58 +08:00
int a , b , c ; // bitct[65536]
for ( a = 0 ; a < 65536 ; + + a ) { // a 0 -> 65535
for ( b = 1 , c = 0 ; b < = a ; b < < = 1 ) // <= 是 < 或 =;
if ( ( b & a ) ) + + c ; // c++;
2022-03-08 04:38:24 +08:00
bitct [ a ] = c ; }
}
void maskinit ( void )
{
int a ;
for ( a = 0 ; a < 256 ; + + a ) maskin [ a ] = 0 ;
maskin [ ' a ' ] = maskin [ ' A ' ] = maskin [ ' 0 ' ] = 1 ;
maskin [ ' g ' ] = maskin [ ' G ' ] = maskin [ ' 1 ' ] = 2 ;
maskin [ ' c ' ] = maskin [ ' C ' ] = maskin [ ' 2 ' ] = 4 ;
maskin [ ' t ' ] = maskin [ ' T ' ] = maskin [ ' 3 ' ] = 8 ;
maskin [ ' R ' ] = maskin [ ' r ' ] = maskin [ ' a ' ] | maskin [ ' g ' ] ;
maskin [ ' Y ' ] = maskin [ ' y ' ] = maskin [ ' t ' ] | maskin [ ' c ' ] ;
maskin [ ' W ' ] = maskin [ ' w ' ] = maskin [ ' a ' ] | maskin [ ' t ' ] ;
maskin [ ' S ' ] = maskin [ ' s ' ] = maskin [ ' c ' ] | maskin [ ' g ' ] ;
maskin [ ' M ' ] = maskin [ ' m ' ] = maskin [ ' a ' ] | maskin [ ' c ' ] ;
maskin [ ' K ' ] = maskin [ ' k ' ] = maskin [ ' g ' ] | maskin [ ' t ' ] ;
maskin [ ' B ' ] = maskin [ ' b ' ] = maskin [ ' c ' ] | maskin [ ' g ' ] | maskin [ ' t ' ] ;
maskin [ ' D ' ] = maskin [ ' d ' ] = maskin [ ' a ' ] | maskin [ ' g ' ] | maskin [ ' t ' ] ;
maskin [ ' H ' ] = maskin [ ' h ' ] = maskin [ ' a ' ] | maskin [ ' c ' ] | maskin [ ' t ' ] ;
maskin [ ' V ' ] = maskin [ ' v ' ] = maskin [ ' a ' ] | maskin [ ' c ' ] | maskin [ ' g ' ] ;
maskin [ ' N ' ] = maskin [ ' n ' ] = maskin [ ' - ' ] = maskin [ ' ? ' ] = 1 | 2 | 4 | 8 ;
maskin [ ' . ' ] = 127 ;
return ;
}
/********* CHECKING AND SORTING INPUT *************************/
void prepare ( int off , unsigned char intwo )
{
int a , b , c , totbytes ;
char msk , shif ;
unsigned short int * pt ;
unsigned long int * lp , written ;
void * vd ;
int trsf ;
char mybits = bitct [ intwo ] ;
char convert [ 16 ] , goesto [ 4 ] ;
if ( mybits = = 2 ) { msk = 3 ; + + nc2 ; }
if ( mybits = = 3 ) { msk = 7 ; + + nc3 ; }
if ( mybits = = 4 ) { msk = 15 ; + + nc4 ; }
for ( a = b = 1 , c = 0 ; a < 16 ; a < < = 1 , + + c )
if ( ( a & intwo ) ) {
goesto [ c ] = b ;
b < < = 1 ; }
else goesto [ c ] = 0 ;
convert [ 0 ] = msk ;
for ( a = 1 ; a < 16 ; + + a ) {
convert [ a ] = 0 ;
for ( b = 1 , c = 0 ; b < = a ; b < < = 1 , + + c )
if ( ( a & b ) ) convert [ a ] | = goesto [ c ] ;
if ( ! convert [ a ] ) convert [ a ] = msk ; }
for ( a = 0 ; a < nt ; + + a )
matbuf [ a ] [ off ] = convert [ matbuf [ a ] [ off ] ] ;
fprintf ( tmpp , " %c " , mybits ) ;
pt = ( unsigned short int * ) tmpstates ;
for ( a = ( nt + 1 ) / 2 ; a - - > 0 ; )
pt [ a ] = 0 ;
vd = ( void * ) tmpstates ;
for ( a = shif = 0 ; a < nt ; + + a ) {
lp = ( unsigned long int * ) pt ;
trsf = matbuf [ a ] [ off ] ;
trsf < < = shif ;
* lp | = trsf ;
shif + = mybits ;
if ( shif > = 16 ) {
+ + pt ;
shif - = 16 ; } }
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
written = fwrite ( vd , 1 , totbytes + 1 , tmpp ) ;
errout ( written ! = totbytes + 1 , " Sorry, cannot write to temporary file (disk full?) " ) ;
}
int domin ( int a )
{
int best = 10 ;
int b , c , misinsome ;
for ( c = 16 ; c - - ; ) {
for ( b = misinsome = 0 ; b < nt & & ! misinsome ; + + b )
if ( ! ( matbuf [ b ] [ a ] & c ) ) misinsome = 1 ;
if ( ! misinsome )
if ( best > bitct [ c ] ) best = bitct [ c ] ; }
return best - 1 ;
}
int dotrueextra ( long int a , unsigned long int intwo )
{
int st , ret = 0 , b ;
for ( b = 0 ; b < nt ; + + b ) {
st = matbuf [ b ] [ a ] ;
if ( st = = 15 ) continue ;
if ( ( st & ~ intwo ) & & ! ( st & intwo ) ) ret + + ; }
return ret ;
}
void process_chunk ( int start )
{
long int a , b , d ;
unsigned char c ;
unsigned char * cp ;
unsigned char refval ;
unsigned char intwo , inone ;
nstord = CHUNKSZ ;
if ( start + nstord > nc ) nstord = nc - start ;
for ( d = 0 ; d < nt ; + + d )
for ( b = nstord ; b - - ; )
matbuf [ d ] [ b ] = 0 ;
for ( d = 0 ; d < nt ; + + d ) {
cp = matbuf [ d ] ;
if ( isabs ! = NULL )
if ( isabs [ d ] ) { // This can only happen when reading multiple files and taxon is absent from this matrix...
for ( b = 0 ; b < nstord ; + + b )
* cp + + = 15 ;
continue ; }
# ifdef WATCOM
_fseeki64 ( inp , fpos [ d ] , SEEK_SET ) ;
# else
fsetpos ( inp , & fpos [ d ] ) ;
# endif
for ( b = 0 ; b < nstord ; + + b ) {
fscanf ( inp , " %c " , & c ) ;
if ( c = = ' [ ' ) {
refval = 0 ;
while ( 1 ) {
fscanf ( inp , " %c " , & c ) ;
if ( c = = ' ] ' ) break ;
refval | = maskin [ c ] ; } }
else
if ( c = = ' . ' ) {
errout ( ! d , " Cannot use \" . \" for the first taxon " ) ;
refval = matbuf [ d ] [ b ] ; }
else refval = maskin [ c ] ;
* cp + + = refval ; }
# ifdef WATCOM
fpos [ d ] = _ftelli64 ( inp ) ;
# else
fgetpos ( inp , & fpos [ d ] ) ;
# endif
}
for ( a = 0 ; a < nstord ; + + a ) {
inone = intwo = 0 ;
for ( b = 0 ; b < nt ; + + b ) {
if ( matbuf [ b ] [ a ] = = 15 ) continue ;
intwo | = inone & matbuf [ b ] [ a ] ;
inone | = matbuf [ b ] [ a ] ; }
if ( bitct [ intwo ] > 1 ) {
if ( inone ! = intwo )
if ( bitct [ inone ] = = 1 )
lenlag + = bitct [ ( inone & ~ intwo ) ] ;
else lenlag + = dotrueextra ( a , intwo ) ;
prepare ( a , intwo ) ; }
else {
lenlag + = domin ( a ) ;
+ + trashed ; } }
}
void popmat ( int part ) // populate matrix, for split optimization
{
unsigned long int a , b , c , shif , mybits , totbytes ;
unsigned long int inmsk , msk , ent , inshif , val , numelems ;
unsigned long int * lp ;
unsigned short int * pt ;
unsigned long int sz = piecesz * FLDS4 , numnc = 0 ;
void * vd ;
for ( b = 0 ; b < piecesz ; + + b )
for ( a = 0 ; a < nt ; + + a )
mat [ a ] [ b ] = - 1 ;
vd = ( void * ) tmpstates ;
for ( a = 0 , c = part * FLDS4 ; a < sz & & c < totnc ; + + a , + + c ) {
mybits = getc ( tmpp ) ;
if ( mybits = = 2 ) msk = 3 ;
if ( mybits = = 3 ) msk = 7 ;
if ( mybits = = 4 ) msk = 15 ;
ent = numnc / FLDS4 ;
inshif = 4 * ( numnc % FLDS4 ) ;
inmsk = ( unsigned long int ) 15 < < inshif ;
+ + numnc ;
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
numelems = fread ( vd , 1 , totbytes + 1 , tmpp ) ;
errout ( numelems ! = totbytes + 1 , " OOOPS, unexpected error loading matrix!. \n Sorry, abandoning run... " ) ;
pt = ( unsigned short int * ) tmpstates ;
for ( b = shif = 0 ; b < nt ; + + b ) {
lp = ( unsigned long int * ) pt ;
val = ( * lp > > shif ) & msk ;
mat [ b ] [ ent ] & = ~ inmsk ;
mat [ b ] [ ent ] | = val < < inshif ;
shif + = mybits ;
if ( shif > = 16 ) {
+ + pt ;
shif - = 16 ; } } }
}
void create_parts_file ( void )
{
unsigned long int atnc ;
unsigned long int written ;
void * vd ;
unsigned long int * at ;
errout ( ( partsfile = fopen ( " oblong.pts " , " wb " ) ) = = NULL , " Cannot open temporary file \" oblong.pts \" " ) ;
for ( atnc = 0 ; atnc < nc4 ; atnc + = piecesz ) {
popmat ( atnc ) ;
at = mat [ 0 ] ;
vd = ( void * ) at ;
written = fwrite ( vd , 1 , sizeof ( unsigned long int ) * nt * piecesz , partsfile ) ;
errout ( written ! = sizeof ( unsigned long int ) * nt * piecesz , " Sorry, cannot write to temporary file (disk full?) " ) ; }
fputc ( 32 , partsfile ) ;
fclose ( partsfile ) ;
partsfile = fopen ( " oblong.pts " , " rb " ) ;
return ;
}
void tmpparse ( void )
{
int atnc2 = 0 , atnc3 = 0 , atnc4 = 0 , totbytes ;
unsigned char mybits ;
int a , b , shif , inshif ;
unsigned short int * pt ;
unsigned long int * lp , inmsk , ent ;
signed long int sizis , toalloc , subtract ;
unsigned long int j , * segposit , msk , val ;
double prop , prevprop ;
totnc = nc2 + nc3 + nc4 ;
nc2 = ( nc2 + ( FLDS2 - 1 ) ) / FLDS2 ;
nc3 = ( nc3 + ( FLDS3 - 1 ) ) / FLDS3 ;
nc4 = ( nc4 + ( FLDS4 - 1 ) ) / FLDS4 ;
nc3 + = nc2 ;
nc4 + = nc3 ;
nodz = 2 * nt - 1 ;
nodlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
anx = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
lef = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
rig = ( short int * ) myalloc ( nt * sizeof ( short int ) ) ;
rig - = nt ;
sis = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
rlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
tgtlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
lassis = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
nodfork = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
ladd = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
blist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
mat = ( unsigned long int * * ) myalloc ( nodz * sizeof ( long int * ) ) ;
if ( swapcollapse | | quickbremer ) {
unsuprt = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
distoroot = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
colmoves = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
colroots = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
if ( quickbremer ) {
nodmatch = ( short int * ) myalloc ( nt * sizeof ( short int ) ) ;
nodmatch - = nt ;
abrem = ( long int * ) myalloc ( nt * sizeof ( long int ) ) ;
abrem - = nt ;
gbrem = ( double * ) myalloc ( nt * sizeof ( double ) ) ;
gbrem - = nt ;
tbrem = ( double * ) myalloc ( nt * sizeof ( double ) ) ;
tbrem - = nt ;
bremdif = ( BremTyp * ) myalloc ( nt * nt * sizeof ( BremTyp ) ) ; } }
b = numreplications ;
if ( ratcycles ) b = ( numreplications * ratcycles ) + 1 ;
memtrees = ( short int * * ) myalloc ( b * sizeof ( short int * ) ) ;
for ( a = 0 ; a < b ; + + a )
memtrees [ a ] = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
toalloc = nc4 ;
downpass = downpass_all ;
bswap = bswap_all ;
wagner = wagner_all ;
if ( ! together ) {
sizis = ( signed long int ) maxram ;
subtract = ( 12 * nodz * sizeof ( short int ) ) + // tree-buffers...
( nt * nt * sizeof ( long int ) ) + // buffer to store moves...
( b * sizeof ( short int * * ) ) + ( b * nodz * sizeof ( short int ) ) ; // trees
subtract + = nodz * sizeof ( long int * ) ;
if ( subtract > = sizis ) {
fprintf ( stderr , " \n Sorry: %.0f Bytes are insufficient to run this data set. Increase \" -m \" \n " , maxram ) ;
exit ( 0 ) ; }
toalloc = ( sizis - subtract ) / ( nodz * sizeof ( long int ) ) ;
j = ( totnc + ( FLDS4 - 1 ) ) / FLDS4 ; // needed total
numparts = 1 + ( j / toalloc ) ;
toalloc = j / numparts ;
while ( toalloc * numparts * FLDS4 < totnc )
toalloc + + ;
if ( toalloc < 1 ) {
fprintf ( stderr , " \n Sorry: %.0f Bytes are insufficient to run this data set. Increase -m \n " , maxram ) ;
exit ( 0 ) ; }
if ( toalloc * FLDS4 < totnc ) {
piecesz = toalloc ;
nc2 = nc3 = 0 ;
nc4 = totnc4 = numparts * piecesz ;
fprintf ( stderr , " %cWill process the chars in %i chunks of %li \n " , reportheader , numparts , toalloc * FLDS4 ) ; fflush ( stderr ) ;
movelist = ( unsigned long int * ) myalloc ( nt * nt * sizeof ( long int ) ) ;
downpass = downpass_split ;
wagner = wagner_split ;
bswap = bswap_split ; }
else {
fprintf ( stderr , " %cLucky you! All data can be held in RAM! No disk swaps needed... \n " , reportheader ) ; fflush ( stderr ) ;
toalloc = nc4 ;
together = 1 ; } }
if ( ratcycles | | ( ( dojak | | quickbremer ) & & together ) )
wtchg = ( unsigned long int * ) myalloc ( nc4 * sizeof ( long int ) ) ;
else if ( ( dojak | | quickbremer ) & & ! together )
wtchg = ( unsigned long int * ) myalloc ( piecesz * sizeof ( long int ) ) ;
tmpp = fopen ( " oblong.tmp " , " rb " ) ;
if ( ! together ) {
/** Alloc matrix for terms in a contiguous memory segment, for faster loading *******/
segposit = ( unsigned long int * ) myalloc ( nt * toalloc * sizeof ( long int ) ) ;
for ( a = 0 ; a < nt ; + + a ) {
mat [ a ] = segposit ;
segposit + = toalloc ; }
for ( a = nt ; a < nodz ; + + a )
mat [ a ] = ( unsigned long int * ) myalloc ( toalloc * sizeof ( long int ) ) ;
create_parts_file ( ) ;
fclose ( tmpp ) ;
remove ( " oblong.tmp " ) ;
return ; }
/** Just alloc matrix as normally done ****/
for ( a = 0 ; a < nodz ; + + a ) {
mat [ a ] = ( unsigned long int * ) myalloc ( nc4 * sizeof ( long int ) ) ;
if ( a < nt )
for ( b = 0 ; b < nc4 ; + + b ) mat [ a ] [ b ] = - 1 ; }
prevprop = 0 ;
for ( a = 0 ; a < totnc ; + + a ) {
if ( reporttoscreen & & ( a % 327 ) = = 0 ) {
prop = ( double ) a / totnc ;
if ( prop - prevprop > 0.015 ) {
fprintf ( stderr , " \r Reading matrix... %.1f%% " , 50 + ( prop * 100 / 2 ) ) ;
prevprop = prop ; } }
mybits = getc ( tmpp ) ;
if ( mybits = = 2 ) {
msk = 3 ; ent = atnc2 / FLDS2 ; inshif = 2 * ( atnc2 % FLDS2 ) ;
inmsk = ( unsigned long int ) 3 < < inshif ; + + atnc2 ; }
if ( mybits = = 3 ) {
msk = 7 ; ent = nc2 + ( atnc3 / FLDS3 ) ; inshif = 3 * ( atnc3 % FLDS3 ) ;
inmsk = ( unsigned long int ) 7 < < inshif ; + + atnc3 ; }
if ( mybits = = 4 ) {
msk = 15 ; ent = nc3 + ( atnc4 / FLDS4 ) ; inshif = 4 * ( atnc4 % FLDS4 ) ;
inmsk = ( unsigned long int ) 15 < < inshif ; + + atnc4 ; }
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
for ( b = 0 ; b < = totbytes ; + + b )
tmpstates [ b ] = getc ( tmpp ) ;
pt = ( unsigned short int * ) tmpstates ;
for ( b = shif = 0 ; b < nt ; + + b ) {
lp = ( unsigned long int * ) pt ;
val = ( * lp > > shif ) & msk ;
mat [ b ] [ ent ] & = ~ inmsk ;
mat [ b ] [ ent ] | = val < < inshif ;
shif + = mybits ;
if ( shif > = 16 ) {
+ + pt ;
shif - = 16 ; } } }
fclose ( tmpp ) ;
remove ( " oblong.tmp " ) ;
}
void get_fasta_dims ( int * mync , int * mynt )
{
int i ;
char c ;
int curnt = 0 , curnc = 0 , atnc ;
char * nampt = strsp ;
2022-04-12 08:32:27 +08:00
while ( ! feof ( inp ) & & i ! = ' > ' ) i = getc ( inp ) ; //没读到 EOF 并且不是 > 继续,直到读到 >
if ( i ! = ' > ' ) { //读到最后没有 > 报错
2022-03-08 04:38:24 +08:00
fprintf ( stderr , " \n Error reading fasta file --expected taxon name \n " ) ;
errout ( 1 , NULL ) ; }
2022-04-12 08:32:27 +08:00
while ( ! feof ( inp ) ) { //没读到 EOF 不停
fscanf ( inp , " %s " , nampt ) ; //从 inp 读到 nampt
2022-03-08 04:38:24 +08:00
i = 0 ;
+ + curnt ;
2022-04-12 08:32:27 +08:00
if ( curnt = = 1 ) //第一次
2022-03-08 04:38:24 +08:00
while ( 1 ) {
fscanf ( inp , " %c " , & c ) ;
2022-04-12 08:32:27 +08:00
if ( c = = ' > ' | | feof ( inp ) ) break ; //读到 > 或者 EOF
+ + curnc ; } // 读到 > 或者 EOF curnc ++
else { //第二次及之后
2022-03-08 04:38:24 +08:00
atnc = 0 ;
while ( 1 ) {
fscanf ( inp , " %c " , & c ) ;
if ( c = = ' > ' | | feof ( inp ) ) break ;
+ + atnc ; }
2022-04-12 08:32:27 +08:00
if ( atnc ! = curnc ) { //到最后比较
2022-03-08 04:38:24 +08:00
fprintf ( stderr , " \n Error reading fasta file --it seems not to be aligned \n " ) ;
errout ( 1 , NULL ) ; } } }
* mync = curnc ;
* mynt = curnt ;
rewind ( inp ) ;
}
void read_multifiles ( void )
{
char * nampt = strsp ;
unsigned char state ;
long int a , b , i , curnt = 0 , curnc = 0 , matched , prvnt ;
int atfile = 0 , numfiles = 0 ;
char * * taxnam ;
double prop , secs , prevprop ;
2022-04-12 08:32:27 +08:00
clock_t beg , end ; // 定义起始时间
beg = clock ( ) ; //计算时间
2022-03-08 04:38:24 +08:00
errout ( ( tmpp = fopen ( " oblong.tmp " , " wb " ) ) = = NULL , " Cannot open temporary file, \" oblong.tmp \" " ) ;
// Find out total NT and total NC
multinp = inp ;
2022-04-12 08:32:27 +08:00
maskinit ( ) ; //maskinit 函数
taxnam = ( char * * ) myalloc ( MAXMULTITAXA * sizeof ( char * ) ) ; //default MAXMULTITAXA 1000 L32
fscanf ( inp , " %s " , nampt ) ; //FILE * inp = NULL 读入文件到 nampt
while ( ! feof ( multinp ) ) { //feof 如果 EOF 返回非 0, 没有EOF 返回 0,
+ + numfiles ; //numfiles 初始为 0
if ( ( inp = fopen ( nampt , " rb " ) ) = = NULL ) { // r readbale b binary 二进制可以防止 Windows下文件问题
2022-03-08 04:38:24 +08:00
fprintf ( stderr , " \n Cannot open file %s (from list of multiple files) \n " , nampt ) ;
exit ( 0 ) ; }
2022-04-12 08:32:27 +08:00
if ( reporttoscreen ) // L60: default 1 L309:0
2022-03-08 04:38:24 +08:00
fprintf ( stderr , " %cChecking input file %3i %-40s " , reportheader , numfiles , nampt ) ;
2022-04-12 08:32:27 +08:00
if ( fasta ) get_fasta_dims ( & nc , & nt ) ; //get nc and nt
2022-03-08 04:38:24 +08:00
else {
i = fscanf ( inp , " %i %i " , & nt , & nc ) ;
if ( i < 2 ) {
fprintf ( stderr , " \n Error reading number of taxa/characters in data set %s \n Make sure it is in proper Phylip format \n " , bitct ) ;
exit ( 0 ) ; } }
if ( numfiles = = 1 ) prvnt = nt ;
errout ( ( equal_taxon_sequence & & nt ! = prvnt ) , " Under \" -e \" all data files must have the same number of taxa " ) ;
curnc + = nc ;
for ( a = 0 ; a < nt ; + + a ) {
fscanf ( inp , " %s " , nampt ) ;
if ( fasta ) + + nampt ;
if ( equal_taxon_sequence )
if ( numfiles = = 1 ) matched = - 1 ;
else matched = a ;
else
for ( i = 0 , matched = - 1 ; i < curnt & & matched < 0 ; + + i )
if ( ! strcmp ( nampt , taxnam [ i ] ) ) matched = i ;
if ( matched < 0 ) {
errout ( curnt > = MAXMULTITAXA , " Sorry, cannot deal with so many distinct taxon names (change MAXMULTITAXA and recompile) " ) ;
2022-04-12 08:32:27 +08:00
taxnam [ curnt ] = ( char * ) myalloc ( MAXNAMELEN * sizeof ( char ) ) ; //MAXNAMELEN = 1000 default
2022-03-08 04:38:24 +08:00
strcpy ( taxnam [ curnt + + ] , nampt ) ; }
if ( fasta ) - - nampt ;
for ( b = 0 ; b < nc ; + + b ) {
fscanf ( inp , " %c " , & state ) ;
if ( ! maskin [ state ] ) {
fprintf ( stderr , " \n Unrecognized symbol: %c \n " , state ) ;
exit ( 0 ) ; } } }
fclose ( inp ) ;
fscanf ( multinp , " %s " , nampt ) ; }
// Now do some initializations and process
rewind ( multinp ) ;
# ifdef WATCOM
fpos = ( long long * ) myalloc ( curnt * sizeof ( long long ) ) ;
# else
fpos = ( fpos_t * ) myalloc ( curnt * sizeof ( fpos_t ) ) ;
# endif
matbuf = ( unsigned char * * ) myalloc ( curnt * sizeof ( char * ) ) ;
isabs = ( unsigned char * ) myalloc ( curnt * sizeof ( char ) ) ;
for ( a = 0 ; a < curnt ; + + a )
if ( curnc < CHUNKSZ )
matbuf [ a ] = ( char * ) myalloc ( curnc * sizeof ( char ) ) ;
else matbuf [ a ] = ( char * ) myalloc ( CHUNKSZ * sizeof ( char ) ) ;
a = curnt ;
if ( ( a & 1 ) ) + + a ;
tmpstates = ( unsigned char * ) myalloc ( a * sizeof ( char ) ) ;
fscanf ( multinp , " %s " , nampt ) ;
while ( ! feof ( multinp ) ) {
if ( ( inp = fopen ( nampt , " rb " ) ) = = NULL ) { // it was just opened ok, but re-check just in case...
fprintf ( stderr , " \n Cannot open file %s (from list of multiple files) \n " , nampt ) ;
exit ( 0 ) ; }
if ( ! fasta ) fscanf ( inp , " %i %i " , & nt , & nc ) ;
for ( a = 0 ; a < curnt ; + + a ) isabs [ a ] = 1 ;
for ( a = 0 ; a < nt ; + + a ) {
fscanf ( inp , " %s " , nampt ) ;
if ( fasta ) + + nampt ;
if ( equal_taxon_sequence )
i = a ;
else
for ( i = 0 ; i < curnt ; + + i )
if ( ! strcmp ( nampt , taxnam [ i ] ) ) break ; // there has to be a match...
if ( fasta ) - - nampt ;
# ifdef WATCOM
fpos [ i ] = _ftelli64 ( inp ) ;
# else
fgetpos ( inp , & fpos [ i ] ) ;
# endif
isabs [ i ] = 0 ;
for ( b = 0 ; b < nc ; + + b ) fscanf ( inp , " %c " , & state ) ; }
nt = curnt ;
atfile + + ;
prevprop = 0 ;
for ( a = 0 ; a < nc ; a + = CHUNKSZ ) {
if ( reporttoscreen ) {
prop = ( double ) a / nc ;
if ( ( prop - prevprop ) > 0.009 ) {
fprintf ( stderr , " %cReading matrix %i of %i... %.1f%% " , reportheader , atfile , numfiles , prop * 100 ) ;
prevprop = prop ; } }
process_chunk ( a ) ; }
fclose ( inp ) ;
fscanf ( multinp , " %s " , nampt ) ; }
free ( matbuf ) ;
fclose ( tmpp ) ;
// Assign bufers and (if appropriate), read data in, from tmp file...
nt = curnt ;
tmpparse ( ) ;
fprintf ( stderr , " %cRead all data (%i files), %i taxa, %li chars. (%li informative) \n " , reportheader , numfiles , nt , curnc , curnc - trashed ) ;
end = clock ( ) ;
secs = ( double ) ( end - beg ) / CLOCKS_PER_SEC ;
fprintf ( stderr , " Time to read data: %.2f secs. \n \n " , secs ) ;
// Begin writing to output file...
if ( ! usenexus ) {
if ( ! savejusttrees )
fprintf ( outp , " xread \n 'Results from oblong' \n 0 %i \n " , nt ) ; }
else fprintf ( outp , " #NEXUS \n begin trees; \n Translate \n " ) ;
for ( a = 0 ; a < nt ; + + a )
if ( ! usenexus ) {
if ( ! savejusttrees )
fprintf ( outp , " %s " , taxnam [ a ] ) ; }
else {
fprintf ( outp , " %li %s " , a + 1 , taxnam [ a ] ) ;
if ( a < nt - 1 ) fprintf ( outp , " , " ) ;
fprintf ( outp , " \n " ) ; }
if ( ! usenexus ) {
if ( ! savejusttrees )
fprintf ( outp , " ; \n collapse none; \n " ) ; }
else fprintf ( outp , " ; \n " ) ;
return ;
}
void read_input ( void )
{
char * nampt = strsp ;
unsigned char state ;
int a , b ;
char c ;
double prop , secs , prevprop ;
clock_t beg , end ;
2022-04-01 08:18:58 +08:00
strcpy ( nampt , " " ) ; // nampt = " "
if ( treeinp ! = NULL ) { // int is NULL (L34), but can specific the tree file (L273)
while ( strcmp ( nampt , " tread " ) & & ! feof ( treeinp ) ) //feof 如果 EOF 返回非 0, 没有EOF 返回 0, strcmp 0 相同, 非0 则执行,即 非 tread 并且 没有 EOF 且执行
fscanf ( treeinp , " %7s " , nampt ) ; // 输入长度为 7 的 string
2022-03-08 04:38:24 +08:00
errout ( feof ( treeinp ) , " String \" tread \" not found in tree file " ) ;
fscanf ( treeinp , " %c " , & c ) ;
errout ( c ! = 39 , " Number of trees should be indicated in the title of tree-file " ) ;
errout ( ! fscanf ( treeinp , " %i " , & numreplications ) , " Number of trees should be indicated in the title of tree-file " ) ;
while ( getc ( treeinp ) ! = 39 ) ; }
if ( multifiles ) {
read_multifiles ( ) ;
return ; }
beg = clock ( ) ;
if ( ! felipe & & ! fasta ) {
while ( strcmp ( nampt , " xread " ) ) {
fscanf ( inp , " %7s " , nampt ) ;
if ( feof ( inp ) ) break ; }
errout ( feof ( inp ) , " String \" xread \" not found in data set \n \n Maybe it's in Phylip ( \" -p \" ) or Fasta ( \" -f \" ) format? " ) ;
fscanf ( inp , " %c " , & c ) ;
if ( c = = 39 )
while ( getc ( inp ) ! = 39 & & ! feof ( inp ) ) ;
else ungetc ( c , inp ) ; }
if ( fasta ) get_fasta_dims ( & nc , & nt ) ;
else fscanf ( inp , " %i %i " , & nc , & nt ) ;
if ( felipe ) {
a = nc ;
nc = nt ;
nt = a ; }
errout ( ! nc , " Illegal number of characters " ) ;
errout ( nt < 4 , " Illegal number of taxa " ) ;
# ifdef WATCOM
fpos = ( long long * ) myalloc ( nt * sizeof ( long long ) ) ;
# else
fpos = ( fpos_t * ) myalloc ( nt * sizeof ( fpos_t ) ) ;
# endif
matbuf = ( unsigned char * * ) myalloc ( nt * sizeof ( char * ) ) ;
a = nt ;
if ( ( a & 1 ) ) + + a ;
tmpstates = ( unsigned char * ) myalloc ( a * sizeof ( char ) ) ;
maskinit ( ) ;
// Get file positions for first character for each taxon...
if ( ! usenexus ) {
if ( ! savejusttrees )
fprintf ( outp , " xread \n 'Results from oblong' \n 0 %i \n " , nt ) ; }
else fprintf ( outp , " #NEXUS \n begin trees; \n Translate \n " ) ;
for ( a = 0 ; a < nt ; + + a ) {
fscanf ( inp , " %s " , nampt ) ;
if ( fasta ) + + nampt ;
if ( ! usenexus ) {
if ( ! savejusttrees )
fprintf ( outp , " %s " , nampt ) ; }
else {
fprintf ( outp , " %i %s " , a + 1 , nampt ) ;
if ( a < nt - 1 ) fprintf ( outp , " , " ) ;
fprintf ( outp , " \n " ) ; }
if ( fasta ) - - nampt ;
# ifdef WATCOM
fpos [ a ] = _ftelli64 ( inp ) ;
# else
fgetpos ( inp , & fpos [ a ] ) ;
# endif
if ( nc < CHUNKSZ )
matbuf [ a ] = ( char * ) myalloc ( nc * sizeof ( char ) ) ;
else matbuf [ a ] = ( char * ) myalloc ( CHUNKSZ * sizeof ( char ) ) ;
for ( b = 0 ; b < nc ; + + b ) {
fscanf ( inp , " %c " , & state ) ;
if ( state = = ' [ ' ) {
while ( 1 ) {
fscanf ( inp , " %c " , & state ) ;
if ( state = = ' ] ' ) break ;
if ( ! maskin [ state ] ) {
fprintf ( stderr , " \n Unrecognized symbol in matrix: %c \n " , state ) ;
exit ( 0 ) ; } } }
else
if ( ! maskin [ state ] ) {
fprintf ( stderr , " \n Unrecognized symbol in matrix: %c \n " , state ) ;
exit ( 0 ) ; } } }
if ( ! usenexus ) {
if ( ! savejusttrees )
fprintf ( outp , " ; \n collapse none; \n " ) ; }
else fprintf ( outp , " ; \n " ) ;
// Read pieces, and output temporary file as you go...
errout ( ( tmpp = fopen ( " oblong.tmp " , " wb " ) ) = = NULL , " Cannot open temporary file, \" oblong.tmp \" " ) ;
prevprop = 0 ;
for ( a = 0 ; a < nc ; a + = CHUNKSZ ) {
if ( reporttoscreen ) {
prop = ( double ) a / nc ;
if ( together ) prop / = 2 ;
if ( ( prop - prevprop ) > 0.009 ) {
fprintf ( stderr , " \r Reading matrix... %.1f%% " , prop * 100 ) ;
prevprop = prop ; } }
process_chunk ( a ) ; }
free ( matbuf ) ;
fclose ( tmpp ) ;
// Assign bufers and (if appropriate), read data in, from tmp file...
tmpparse ( ) ;
fprintf ( stderr , " %cReading matrix, %i taxa, %i chars. (%li informative) \n " , reportheader , nt , nc , nc - trashed ) ;
fclose ( inp ) ;
end = clock ( ) ;
secs = ( double ) ( end - beg ) / CLOCKS_PER_SEC ;
fprintf ( stderr , " Time to read data: %.2f secs. \n \n " , secs ) ;
return ;
}
/************ GENERAL TREE-HANDLING STUFF ************/
void reset_nodz ( short int * anpt )
{
short int * bu = rlist ;
short int * tbu = tgtlist ;
int a , b , atno , lg ;
for ( a = nodz ; a - - > nt ; ) tbu [ a ] = - 1 ;
for ( a = nt ; a - - ; ) tbu [ a ] = a ;
atno = nt ;
lg = anpt [ nt ] ;
tbu [ nt ] = nt ;
tbu [ lg ] = lg ;
for ( a = 0 ; a < nt ; + + a ) {
b = anpt [ a ] ;
while ( tbu [ b ] < 0 ) {
tbu [ b ] = + + atno ;
b = anpt [ b ] ; } }
for ( a = lg ; a - - ; )
bu [ tbu [ a ] ] = tbu [ anpt [ a ] ] ;
for ( a = lg ; a - - ; ) anpt [ a ] = bu [ a ] ;
anpt [ nt ] = + + atno ;
return ;
}
int equal_trees ( short int * a , short int * b )
{
int c ;
for ( c = 0 ; c < nodz ; + + c )
if ( * a + + ! = * b + + ) return 0 ;
return 1 ;
}
void storetree ( void )
{
int b , isdup = 0 ;
for ( b = 0 ; b < nodz ; + + b ) memtrees [ numtrees ] [ b ] = anx [ b ] ;
reset_nodz ( memtrees [ numtrees ] ) ;
if ( ! dojak & & ! saveall )
for ( b = 0 ; b < numtrees & & ! isdup ; + + b )
if ( equal_trees ( memtrees [ b ] , memtrees [ numtrees ] ) ) isdup = 1 ;
if ( ! isdup )
+ + numtrees ;
}
void randomlist ( int num , short int * dali , int firstis )
{ short int * bakrand ;
int a , todo , i , n ;
if ( ranseed ) srand ( ranseed ) ;
for ( a = 0 ; a < num ; + + a ) dali [ a ] = a ;
if ( firstis = = 0 ) {
todo = num - 1 ;
+ + dali ; // skip 0
bakrand = dali + todo ;
for ( a = 1 ; a < num ; + + a ) {
i = rand ( ) % todo - - ;
n = * - - bakrand ;
* bakrand = dali [ i ] ;
dali [ i ] = n ; } }
else {
todo = num ;
bakrand = dali + todo ;
for ( a = 0 ; a < num ; + + a ) {
i = rand ( ) % todo - - ;
n = * - - bakrand ;
* bakrand = dali [ i ] ;
dali [ i ] = n ; } }
}
void nodzert ( int what , int where )
{
int a , b , c ;
a = anx [ where ] ;
c = sis [ where ] ;
b = anx [ what ] = anx [ where ] = anx [ nt ] + + ;
anx [ b ] = a ;
sis [ what ] = where ;
sis [ where ] = what ;
sis [ c ] = b ;
sis [ b ] = c ;
lef [ a ] = c ;
rig [ a ] = b ;
lef [ b ] = where ;
rig [ b ] = what ;
}
int savebremervals = 0 ;
void savant ( int which )
{
int a , didsome = 0 ;
if ( savebremervals & & which ! = nt ) {
if ( gbrem [ which ] > 0 | | shownegbremers ) fprintf ( outp , " ( " ) ; }
else
fprintf ( outp , " ( " ) ;
for ( a = 0 ; a < anx [ nt ] ; + + a )
if ( anx [ a ] = = which ) {
if ( usenexus & & didsome )
fprintf ( outp , " , " ) ;
didsome = 1 ;
if ( a < nt )
if ( usenexus ) fprintf ( outp , " %i " , a + 1 ) ;
else fprintf ( outp , " %i " , a ) ;
else savant ( a ) ; }
if ( savebremervals & & which ! = nt & & anx [ which ] ! = nt ) {
if ( gbrem [ which ] > 0 )
if ( bremertype = = 0 ) fprintf ( outp , " )%s%.0f " , legindic , gbrem [ which ] ) ;
else fprintf ( outp , " ) % s % .0f " , legindic , gbrem [ which ] * 100 ) ;
else if ( shownegbremers )
if ( bremertype = = 0 ) fprintf ( outp , " )%s%.0f " , legindic , gbrem [ which ] ) ;
else fprintf ( outp , " ) % s ( % .0f ) " , legindic , gbrem [ which ] * 100 ) ; }
else
fprintf ( outp , " ) " ) ;
}
void savetrees ( void )
{
short int * anxwas = anx ;
int i ;
if ( quickbremer )
fprintf ( stderr , " \n Saving supports and %i trees to output file... \n \n " , numtrees ) ;
else
fprintf ( stderr , " \n Saving %i trees to output file... \n \n " , numtrees ) ;
if ( ! usenexus )
fprintf ( outp , " tread '%i Oblong trees' \n " , numtrees ) ;
for ( i = 0 ; i < numtrees ; + + i ) {
if ( quickbremer & & ! i ) savebremervals = 1 ;
if ( ! usenexus ) {
if ( i ) fprintf ( outp , " * \n " ) ; }
else fprintf ( outp , " tree OBLONG_%i = [&U] " , i + 1 ) ;
anx = memtrees [ i ] ;
savant ( nt ) ;
if ( usenexus )
fprintf ( outp , " ; \n " ) ;
if ( quickbremer & & ! i ) {
savebremervals = quickbremer = 0 ;
if ( ! usenexus ) fprintf ( outp , " * \n " ) ;
i = - 1 ; } }
anx = anxwas ;
if ( ! usenexus )
fprintf ( outp , " ; \n proc/; \n " ) ;
else fprintf ( outp , " End ; \n " ) ;
}
int binoptlist ( short int * list , short int * tt )
{
short int * x , * y ;
x = y = list ;
* y + + = nt ;
while ( x < y ) {
if ( lef [ * x ] > = nt ) * y + + = lef [ * x ] ;
if ( rig [ * x ] > = nt ) * y + + = rig [ * x ] ;
+ + x ; }
return y - list ;
}
void initnet ( int un , int dos , int tres )
{
anx [ un ] = anx [ nt + 1 ] = nt ;
anx [ dos ] = anx [ tres ] = nt + 1 ;
anx [ nt ] = nt + 2 ;
lef [ nt ] = un ;
rig [ nt ] = sis [ un ] = nt + 1 ;
lef [ nt + 1 ] = dos ;
rig [ nt + 1 ] = sis [ dos ] = tres ;
sis [ nt + 1 ] = un ;
sis [ tres ] = dos ;
}
/************ OPTIMIZATION STUFF *********************/
void dowtfilter ( int prob , int keepprevious , int dalim )
{
int i , j ;
unsigned long int msk ;
for ( i = 0 ; i < nc2 ; + + i ) {
if ( keepprevious )
wtchg [ i ] | = wtchg [ i ] < < 1 ;
else wtchg [ i ] = LO2 ;
for ( j = 0 , msk = 1 ; j < FLDS2 ; + + j , msk < < = 2 )
if ( ( rand ( ) % 100 ) < prob )
wtchg [ i ] & = ~ msk ; }
for ( ; i < nc3 ; + + i ) {
if ( keepprevious )
wtchg [ i ] | = wtchg [ i ] > > 1 ;
else wtchg [ i ] = HI3 ;
for ( j = 0 , msk = 4 ; j < FLDS3 ; + + j , msk < < = 3 )
if ( ( rand ( ) % 100 ) < prob )
wtchg [ i ] & = ~ msk ; }
for ( ; i < dalim ; + + i ) {
if ( keepprevious )
wtchg [ i ] | = wtchg [ i ] > > 1 ;
else wtchg [ i ] = HI4 ;
for ( j = 0 , msk = 8 ; j < FLDS4 ; + + j , msk < < = 4 )
if ( ( rand ( ) % 100 ) < prob )
wtchg [ i ] & = ~ msk ; }
}
void resetwtfilter ( void )
{
int i ;
for ( i = 0 ; i < nc2 ; + + i )
wtchg [ i ] = ( wtchg [ i ] > > 1 ) & LO2 ;
for ( ; i < nc3 ; + + i )
wtchg [ i ] = ( wtchg [ i ] < < 1 ) & HI3 ;
for ( ; i < nc4 ; + + i )
wtchg [ i ] = ( wtchg [ i ] < < 1 ) & HI4 ;
}
void storewtfilter ( void )
{
int i ;
for ( i = 0 ; i < nc2 ; + + i )
wtchg [ i ] | = ( wtchg [ i ] < < 1 ) ;
for ( ; i < nc3 ; + + i )
wtchg [ i ] | = ( wtchg [ i ] > > 1 ) ;
for ( ; i < nc4 ; + + i )
wtchg [ i ] | = ( wtchg [ i ] > > 1 ) ;
}
unsigned long long downpass_all ( void )
{
int nunos , len , a , nod , c ;
unsigned long long totlen ;
unsigned long int * lp , * rp , * np , x , y , z ;
unsigned long int * wts ;
nunos = binoptlist ( nodlist , anx ) ;
totlen = lenlag ;
for ( a = nunos ; a - - ; ) {
nod = nodlist [ a ] ;
lp = mat [ lef [ nod ] ] ;
rp = mat [ rig [ nod ] ] ;
np = mat [ nod ] ;
wts = wtchg - 1 ;
len = 0 ;
for ( c = 0 ; c < nc2 ; + + c ) {
x = * lp + + & * rp + + ;
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) > > 1 ) ) ;
if ( changewts ) y & = * + + wts ;
# ifdef BITS64
len + = bitct [ 65535 & ( y | ( y > > 15 ) ) ] + bitct [ 65535 & ( ( y > > 32 ) | ( y > > 47 ) ) ] ;
# else
len + = bitct [ 65535 & ( y | ( y > > 15 ) ) ] ;
# endif
y | = y < < 1 ;
* np + + = x | y ; }
for ( ; c < nc3 ; + + c ) {
x = * lp & * rp ;
y = * lp + + | * rp + + ;
z = HI3 & ~ ( x | ( ( x & LO3 ) + LO3 ) ) ;
if ( changewts ) z & = * + + wts ;
# ifdef BITS64
len + = bitct [ 65535 & ( z | ( z > > 16 ) ) ] + bitct [ 65535 & ( z > > 32 | ( z > > 49 ) ) ] ;
# else
len + = bitct [ 65535 & ( z | ( z > > 17 ) ) ] ;
# endif
z | = z - ( z > > 2 ) ;
* np + + = x | ( z & y ) ; }
for ( ; c < nc4 ; + + c ) {
x = * lp & * rp ;
y = * lp + + | * rp + + ;
z = HI4 & ~ ( x | ( ( x & LO4 ) + LO4 ) ) ;
if ( changewts ) z & = * + + wts ;
# ifdef BITS64
len + = bitct [ 65535 & ( z | ( z > > 17 ) | ( z > > 34 ) | ( z > > 51 ) ) ] ;
# else
len + = bitct [ 65535 & ( z | ( z > > 17 ) ) ] ;
# endif
z | = z - ( z > > 3 ) ;
* np + + = x | ( z & y ) ; }
totlen + = len ; }
return totlen ;
}
void loadmat ( int part )
{
void * vd ;
unsigned long int * seg ;
unsigned long int numelems ;
if ( ! part ) rewind ( partsfile ) ;
seg = mat [ 0 ] ;
vd = ( void * ) seg ;
numelems = fread ( vd , 1 , sizeof ( unsigned long int ) * nt * piecesz , partsfile ) ;
errout ( numelems ! = sizeof ( unsigned long int ) * nt * piecesz , " OOOPS, unexpected error loading matrix!. \n Sorry, abandoning run... " ) ;
if ( dojak & & ! ratcycles & & ! together ) {
if ( ! part ) srand ( ranseed ) ;
dowtfilter ( jakprob , 0 , piecesz ) ; }
}
unsigned long long downpass_split ( void )
{
int nunos , len , a , nod , c ;
unsigned long long totlen ;
unsigned long int * lp , * rp , * np , x , y , z ;
unsigned long int atnc ;
unsigned long int * wts ;
nunos = binoptlist ( nodlist , anx ) ;
totlen = lenlag ;
for ( atnc = 0 ; atnc < nc4 ; atnc + = piecesz ) {
loadmat ( atnc ) ;
for ( a = nunos ; a - - ; ) {
nod = nodlist [ a ] ;
lp = mat [ lef [ nod ] ] ;
rp = mat [ rig [ nod ] ] ;
np = mat [ nod ] ;
if ( ratcycles ) wts = wtchg + atnc - 1 ;
else wts = wtchg - 1 ;
len = 0 ;
for ( c = 0 ; c < piecesz ; + + c ) {
x = * lp & * rp ;
y = * lp + + | * rp + + ;
z = HI4 & ~ ( x | ( ( x & LO4 ) + LO4 ) ) ;
if ( changewts ) z & = * + + wts ;
# ifdef BITS64
len + = bitct [ 65535 & ( z | ( z > > 17 ) | ( z > > 34 ) | ( z > > 51 ) ) ] ;
# else
len + = bitct [ 65535 & ( z | ( z > > 17 ) ) ] ;
# endif
z | = z - ( z > > 3 ) ;
* np + + = x | ( z & y ) ; }
totlen + = len ;
} }
return totlen ;
}
/********* SUPPORT FUNCTIONS FOR WAGNER AND BRANCH-SWAPPING ************************/
void zert ( int whatp , int wherep )
{
int a , b , c ;
if ( wherep = = 0 ) {
anx [ lef [ nt ] = sis [ whatp ] = 0 ] = nt ;
anx [ rig [ nt ] = sis [ 0 ] = whatp ] = nt ;
return ; }
a = anx [ wherep ] ;
c = sis [ wherep ] ;
b = anx [ wherep ] = anx [ whatp ] ;
anx [ b ] = a ;
sis [ sis [ wherep ] = rig [ b ] = whatp ] = lef [ b ] = wherep ;
sis [ lef [ a ] = c ] = b ;
sis [ rig [ a ] = b ] = c ;
}
void unzert ( int whatp )
{
int a , b , c ;
a = anx [ whatp ] ;
if ( a = = nt ) {
lef [ anx [ 0 ] = nt ] = rig [ nt ] = sis [ 0 ] = 0 ;
anx [ whatp ] = whatp ;
return ; }
b = sis [ whatp ] ;
c = sis [ a ] ;
a = anx [ a ] ;
lef [ a ] = b ;
rig [ anx [ b ] = anx [ c ] = a ] = sis [ sis [ c ] = b ] = c ;
}
void tbreroot ( int new , int old )
{
short int * an = anx ;
int i , j , k , m , n , nx , first ;
if ( an [ new ] = = old | | old < nt ) return ;
j = nx = first = an [ i = new ] ;
n = sis [ new ] ;
while ( nx ! = old ) {
k = i ;
i = j ;
j = nx ;
m = n ;
n = sis [ j ] ;
if ( k = = lef [ i ] ) { lef [ i ] = j ; sis [ sis [ rig [ i ] ] = j ] = rig [ i ] ; }
else { rig [ i ] = j ; sis [ sis [ lef [ i ] ] = j ] = lef [ i ] ; }
nx = an [ j ] ;
an [ j ] = i ; }
an [ lef [ j ] = m ] = an [ rig [ j ] = n ] = j ;
sis [ sis [ n ] = m ] = n ;
an [ lef [ old ] = new ] = an [ rig [ old ] = first ] = old ;
sis [ sis [ new ] = first ] = new ;
return ;
}
void binmksis ( short int * tt )
{
int a , b , c , lownod = tt [ nt ] ;
for ( a = nodz ; a - - ; ) nodfork [ a ] = 0 ;
for ( a = nt ; a - - ; ) {
b = tt [ c = a ] ;
while ( b ! = lownod ) {
if ( ! nodfork [ b ] )
lef [ b ] = lassis [ b ] = c ;
else
sis [ sis [ c ] = lassis [ b ] ] = rig [ b ] = c ;
if ( + + nodfork [ b ] > 1 ) break ;
c = b ;
b = tt [ b ] ; } }
return ;
}
int divoptlist ( short int * list , int clip , int sisis )
{
short int * x , * y ;
x = y = list ;
if ( sisis ) * y + + = nt ;
if ( clip > nt ) * y + + = clip ;
while ( x < y ) {
if ( lef [ * x ] > = nt ) * y + + = lef [ * x ] ;
if ( rig [ * x ] > = nt ) * y + + = rig [ * x ] ;
+ + x ; }
return y - list ;
}
/*********** HANDLE SWAP-COLLAPSING FOR JACKKNIFING *************/
void delnodz ( void )
{ short int * tmp , * cs ;
int a , b , at , last ;
char stopped ;
short int * an = anx ;
short int * dellist = unsuprt ;
tmp = nodlist ;
cs = lef ;
dellist [ nt ] = 0 ;
at = nt + 1 ;
for ( a = nt ; a - - ; ) cs [ a ] = a ;
for ( a = nt ; a < nodz ; + + a ) cs [ a ] = 0 ;
cs [ nt ] = nt ;
for ( a = nodz ; a - - ; ) tmp [ a ] = 0 ;
for ( a = nt ; a - - ; ) {
b = last = a ;
stopped = 0 ;
while ( b ! = nt ) {
b = an [ b ] ;
if ( cs [ b ] ) { tmp [ cs [ last ] ] = cs [ b ] ; stopped = 1 ; break ; }
else
if ( ! dellist [ b ] ) {
tmp [ cs [ last ] ] = at ;
cs [ last = b ] = at + + ; }
}
if ( ! stopped ) tmp [ cs [ last ] ] = nt ; }
for ( a = nodz ; a - - ; ) an [ a ] = tmp [ a ] ;
an [ nt ] = at ;
}
void filldistoroot ( void )
{
short int * x = nodlist , * y ;
binmksis ( anx ) ;
distoroot [ nt ] = 0 ;
nodlist [ 0 ] = lef [ nt ] ;
nodlist [ 1 ] = rig [ nt ] ;
x = nodlist - 1 ;
y = nodlist + 2 ;
while ( + + x < y ) {
distoroot [ * x ] = distoroot [ anx [ * x ] ] + 1 ;
if ( * x > nt ) {
* y + + = lef [ * x ] ;
* y + + = rig [ * x ] ; } }
return ;
}
void collapse_with_swaps ( int clip , int sisis )
{
int a , b , c , comnod ;
int da , db , dest , rootis ;
zert ( clip , sisis ) ;
for ( c = 0 ; c < numcolmoves ; + + c ) {
dest = colmoves [ c ] ;
if ( dest = = sisis ) continue ;
da = distoroot [ a = clip ] ;
db = distoroot [ b = dest ] ;
while ( distoroot [ a ] > db ) a = anx [ a ] ;
while ( distoroot [ b ] > da ) b = anx [ b ] ;
while ( a ! = b ) { a = anx [ a ] ; b = anx [ b ] ; }
comnod = a ;
if ( comnod ! = dest )
for ( a = anx [ dest ] ; a ! = comnod ; a = anx [ a ] ) unsuprt [ a ] = 1 ;
for ( b = anx [ clip ] ; b ! = comnod ; b = anx [ b ] ) unsuprt [ b ] = 1 ; }
if ( ! use_spr & & clip > nt )
for ( c = 0 ; c < numcolroots ; + + c ) {
rootis = colroots [ c ] ;
if ( anx [ rootis ] ! = clip ) {
a = anx [ rootis ] ;
while ( a ! = clip ) {
unsuprt [ a ] = 1 ;
a = anx [ a ] ; } } }
unzert ( clip ) ;
return ;
}
void collapse_the_tree ( void )
{
int i ;
if ( ! swapcollapse ) return ;
for ( i = 0 ; i < nodz ; + + i ) unsuprt [ i ] = 0 ;
filldistoroot ( ) ;
recordgoodmoves = 1 ;
bswap ( ) ;
recordgoodmoves = 0 ;
delnodz ( ) ;
}
/*************** WAGNER AND BRANCH-SWAPPING FOR SINGLE BUFFER ****************/
void fullpass_all ( int clip , int sisis )
{
int nunos , a , nod , c ;
unsigned long int * lp , * rp , * np , * ap , x , y , z , bak ;
nunos = divoptlist ( nodlist , clip , sisis ) ;
for ( a = nunos ; a - - ; ) {
nod = nodlist [ a ] ;
if ( nodtoskip = = nod ) continue ;
lp = mat [ lef [ nod ] ] ;
rp = mat [ rig [ nod ] ] ;
np = mat [ nod ] ;
for ( c = 0 ; c < nc2 ; + + c ) {
x = * lp + + & * rp + + ;
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) > > 1 ) ) ;
y | = y < < 1 ;
* np + + = x | y ; }
for ( ; c < nc3 ; + + c ) {
x = * lp & * rp ;
y = * lp + + | * rp + + ;
z = HI3 & ~ ( x | ( ( x & LO3 ) + LO3 ) ) ;
z | = z - ( z > > 2 ) ;
* np + + = x | ( z & y ) ; }
for ( ; c < nc4 ; + + c ) {
x = * lp & * rp ;
y = * lp + + | * rp + + ;
z = HI4 & ~ ( x | ( ( x & LO4 ) + LO4 ) ) ;
z | = z - ( z > > 3 ) ;
* np + + = x | ( z & y ) ; } }
for ( a = 0 ; + + a < nunos ; ) {
nod = nodlist [ a ] ;
if ( nod = = clip | | anx [ nod ] = = nodtoskip | | nod = = nodtoskip ) continue ;
lp = mat [ lef [ nod ] ] - 1 ;
rp = mat [ rig [ nod ] ] - 1 ;
np = mat [ nod ] - 1 ;
ap = mat [ anx [ nod ] ] - 1 ;
for ( c = 0 ; c < nc2 ; + + c ) {
bak = * + + lp | * + + rp ;
x = * + + ap & ~ * + + np ;
x = ( x & LO2 ) | ( ( x & HI2 ) > > 1 ) ;
x | = ( x < < 1 ) ;
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }
for ( ; c < nc3 ; + + c ) {
x = * + + lp & * + + rp ;
z = * lp | * rp ;
y = HI3 & ~ ( x | ( ( x & LO3 ) + LO3 ) ) ;
y | = y - ( y > > 2 ) ;
bak = z | y ;
x = * + + ap & ~ * + + np ;
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
x | = x - ( x > > 2 ) ;
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }
for ( ; c < nc4 ; + + c ) {
x = * + + lp & * + + rp ;
z = * lp | * rp ;
y = HI4 & ~ ( x | ( ( x & LO4 ) + LO4 ) ) ;
y | = y - ( y > > 3 ) ;
bak = z | y ;
x = * + + ap & ~ * + + np ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x | = x - ( x > > 3 ) ;
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; } }
nodtoskip = 0 ;
}
unsigned long int scorediff_all ( short int clipis , short int srcis , short int tgtis )
{
unsigned long int len = 0 ;
unsigned long int * tgt , * tgta , * src , * srca ;
unsigned long int untgt , unsrc ;
unsigned long int x , y , z ;
unsigned long int * wts ;
long int a ;
if ( clipis < nt ) srca = mat [ clipis ] - 1 ;
else srca = mat [ anx [ srcis ] ] - 1 ;
src = mat [ srcis ] - 1 ;
if ( ! tgtis ) tgta = mat [ 0 ] - 1 ;
else tgta = mat [ anx [ tgtis ] ] - 1 ;
tgt = mat [ tgtis ] - 1 ;
wts = wtchg - 1 ;
for ( a = 0 ; a < nc2 ; + + a ) {
unsrc = * + + src | * + + srca ;
if ( srcis < nt & & clipis > nt ) {
x = * srca & ~ * src ;
x = ( x & LO2 ) | ( ( x & HI2 ) > > 1 ) ;
x | = ( x < < 1 ) ;
unsrc = * srca | ( * src & x ) ; }
untgt = * + + tgt | * + + tgta ;
if ( tgtis & & tgtis < nt ) {
x = * tgta & ~ * tgt ;
x = ( x & LO2 ) | ( ( x & HI2 ) > > 1 ) ;
x | = ( x < < 1 ) ;
untgt = * tgta | ( * tgt & x ) ; }
x = unsrc & untgt ;
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) > > 1 ) ) ;
if ( changewts )
if ( recorddn ) {
+ + wts ;
if ( recorddn = = 1 ) * wts = y ;
x = * wts & y ;
z = x ^ y ;
# ifdef BITS64
gmix + = bitct [ 65535 & ( x | ( x > > 15 ) ) ] + bitct [ 65535 & ( ( x > > 32 ) | ( x > > 47 ) ) ] ;
gup + = bitct [ 65535 & ( z | ( z > > 15 ) ) ] + bitct [ 65535 & ( ( z > > 32 ) | ( z > > 47 ) ) ] ;
# else
gmix + = bitct [ 65535 & ( x | ( x > > 15 ) ) ] ;
gup + = bitct [ 65535 & ( z | ( z > > 15 ) ) ] ;
# endif
continue ; }
else
y & = * + + wts ;
# ifdef BITS64
len + = bitct [ 65535 & ( y | ( y > > 15 ) ) ] + bitct [ 65535 & ( ( y > > 32 ) | ( y > > 47 ) ) ] ;
# else
len + = bitct [ 65535 & ( y | ( y > > 15 ) ) ] ;
# endif
if ( len > lenlimit ) return len ; }
for ( ; a < nc3 ; + + a ) {
unsrc = * + + src | * + + srca ;
if ( srcis < nt & & clipis > nt ) {
x = * srca & ~ * src ;
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
x | = x - ( x > > 2 ) ;
unsrc = * srca | ( * src & x ) ; }
untgt = * + + tgt | * + + tgta ;
if ( tgtis & & tgtis < nt ) {
x = * tgta & ~ * tgt ;
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
x | = x - ( x > > 2 ) ;
untgt = * tgta | ( * tgt & x ) ; }
x = unsrc & untgt ;
z = HI3 & ~ ( x | ( ( x & LO3 ) + LO3 ) ) ;
if ( changewts )
if ( recorddn ) {
+ + wts ;
if ( recorddn = = 1 ) * wts = z ;
y = * wts & z ;
x = y ^ z ;
# ifdef BITS64
gmix + = bitct [ 65535 & ( y | ( y > > 16 ) ) ] + bitct [ 65535 & ( y > > 32 | ( y > > 49 ) ) ] ;
gup + = bitct [ 65535 & ( x | ( x > > 16 ) ) ] + bitct [ 65535 & ( x > > 32 | ( x > > 49 ) ) ] ;
# else
gmix + = bitct [ 65535 & ( y | ( y > > 17 ) ) ] ;
gup + = bitct [ 65535 & ( x | ( x > > 17 ) ) ] ;
# endif
continue ; }
else
z & = * + + wts ;
# ifdef BITS64
len + = bitct [ 65535 & ( z | ( z > > 16 ) ) ] + bitct [ 65535 & ( z > > 32 | ( z > > 49 ) ) ] ;
# else
len + = bitct [ 65535 & ( z | ( z > > 17 ) ) ] ;
# endif
if ( len > lenlimit ) return len ; }
for ( ; a < nc4 ; + + a ) {
unsrc = * + + src | * + + srca ;
if ( srcis < nt & & clipis > nt ) {
x = * srca & ~ * src ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x | = x - ( x > > 3 ) ;
unsrc = * srca | ( * src & x ) ; }
untgt = * + + tgt | * + + tgta ;
if ( tgtis & & tgtis < nt ) {
x = * tgta & ~ * tgt ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x | = x - ( x > > 3 ) ;
untgt = * tgta | ( * tgt & x ) ; }
x = unsrc & untgt ;
z = HI4 & ~ ( x | ( ( x & LO4 ) + LO4 ) ) ;
if ( changewts )
if ( recorddn ) {
+ + wts ;
if ( recorddn = = 1 ) * wts = z ;
y = * wts & z ;
x = y ^ z ;
# ifdef BITS64
gmix + = bitct [ 65535 & ( y | ( y > > 17 ) | ( y > > 34 ) | ( y > > 51 ) ) ] ;
gup + = bitct [ 65535 & ( x | ( x > > 17 ) | ( x > > 34 ) | ( x > > 51 ) ) ] ;
# else
gmix + = bitct [ 65535 & ( y | ( y > > 17 ) ) ] ;
gup + = bitct [ 65535 & ( x | ( x > > 17 ) ) ] ;
# endif
continue ; }
else
z & = * + + wts ;
# ifdef BITS64
len + = bitct [ 65535 & ( z | ( z > > 17 ) | ( z > > 34 ) | ( z > > 51 ) ) ] ;
# else
len + = bitct [ 65535 & ( z | ( z > > 17 ) ) ] ;
# endif
if ( len > lenlimit ) return len ; }
return len ;
}
unsigned long long wagner_all ( void )
{
int a , added , fac , basnod , bestpos , clip , tgt ;
unsigned long int lim , diff ;
unsigned long long len ;
short int * inlist = nodfork ;
unsigned long int seedwas = ranseed ;
randomlist ( nt , ladd , 0 ) ;
initnet ( ladd [ 0 ] , ladd [ 1 ] , ladd [ 2 ] ) ;
blist [ 0 ] = ladd [ 1 ] ;
blist [ 1 ] = ladd [ 2 ] ;
blist [ 2 ] = nt + 1 ;
added = 2 ;
fac = 3 ;
basnod = nt + 2 ;
ranseed = 0 ;
while ( + + added < nt ) {
if ( random_starts ) {
nodzert ( ladd [ added ] , blist [ rand ( ) % fac ] ) ;
blist [ fac + + ] = ladd [ added ] ;
blist [ fac + + ] = basnod + + ;
continue ; }
randomlist ( fac , inlist , 1 ) ;
clip = ladd [ added ] ;
fullpass_all ( clip , clip ) ;
lim = lenlimit = VERYBIG ;
if ( verbose )
fprintf ( stderr , " \r Rep %3i %4i taxa added " , reportreplication , added ) ;
for ( a = 0 ; a < fac ; + + a ) {
tgt = blist [ inlist [ a ] ] ;
diff = scorediff_all ( clip , clip , tgt ) ;
if ( diff < lim ) {
lim = diff ;
lenlimit = lim - 1 ;
bestpos = tgt ; } }
nodzert ( clip , bestpos ) ;
blist [ fac + + ] = clip ;
blist [ fac + + ] = basnod + + ; }
len = downpass_all ( ) ;
ranseed = seedwas ;
return len ;
}
unsigned long long bswap_all ( void )
{
unsigned long int diff , lim , nod ;
unsigned long long len , achk ;
int clip , bestpos , begpos , sisis , nunos , bestroot , begroot , i , isdone ;
int curroot , tgt , rounds , maxrounds = 0 , subsdone = 0 ;
short int * rdo , * rdone , * tgtdo , * tgtdone ;
double prop = 0 , secs ;
static int clockinit = 0 ;
static clock_t last , now ;
BremTyp * brempt ;
anx [ nt ] = nodz ;
binmksis ( anx ) ;
if ( ! rig [ nt ] ) {
rig [ nt ] = lef [ nt ] ;
lef [ nt ] = 0 ; }
nunos = binoptlist ( nodlist , anx ) ;
len = downpass_all ( ) ;
clip = 0 ;
if ( ! clockinit ) {
clockinit = 1 ;
last = clock ( ) ; }
if ( ratcycles & & verbose )
prop = ( double ) ratcyclesdone / ratcycles ;
for ( rounds = 1 ; rounds < nodz ; + + rounds ) {
if ( + + clip > = nodz ) clip = 1 ;
if ( clip = = nt ) continue ;
if ( rounds > maxrounds ) maxrounds = rounds ;
now = clock ( ) ;
secs = ( double ) ( now - last ) / CLOCKS_PER_SEC ;
if ( secs > reportperiod & & ! ratcheting & & ! doingbremer ) {
last = clock ( ) ;
prop = ( double ) ( maxrounds + 1 ) / nodz ;
if ( ratcycles )
prop = ( double ) ratcyclesdone / ratcycles ;
if ( recordgoodmoves )
fprintf ( stderr , " %cRep %3i collapse LEN %-10lli %12lli rearrangs " , reportheader , reportreplication , len , rearrangs ) ;
else
fprintf ( stderr , " %cRep %3i %6.2f%% LEN %-10lli %12lli rearrangs " , reportheader , reportreplication , prop * 100 , len , rearrangs ) ; }
unzert ( clip ) ;
begpos = bestpos = sisis = sis [ clip ] ;
if ( sisis ) nodtoskip = anx [ clip ] ;
fullpass_all ( clip , sisis ) ;
lenlimit = VERYBIG ;
if ( doingbremer ) {
recorddn = changewts = 1 ;
gup = gmix = 0 ; }
if ( clip < nt )
lim = scorediff_all ( clip , clip , sisis ) ;
else
if ( lef [ clip ] > nt )
lim = scorediff_all ( clip , lef [ clip ] , sisis ) ;
else lim = scorediff_all ( clip , rig [ clip ] , sisis ) ;
if ( doingbremer ) {
brempt = bremdif ;
recorddn = 2 ;
nitdn = gmix ; }
numcolmoves = numcolroots = 0 ;
if ( ! doingbremer )
if ( ratcheting | | recordgoodmoves )
lenlimit = lim ;
else lenlimit = lim - 1 ;
if ( lim = = 0 & & ! ( ratcheting | | recordgoodmoves | | doingbremer ) ) {
zert ( clip , bestpos ) ;
continue ; }
tgtdo = tgtdone = tgtlist ;
if ( ! sisis ) * tgtdo + + = 0 ;
else {
* tgtdo + + = sis [ 0 ] ;
while ( tgtdone < tgtdo ) {
nod = * tgtdone + + ;
if ( nod > nt ) {
* tgtdo + + = lef [ nod ] ;
* tgtdo + + = rig [ nod ] ; } } }
rdo = rdone = rlist ;
if ( clip < nt ) * rdo + + = begroot = bestroot = clip ;
else {
* rdo + + = begroot = bestroot = rig [ clip ] ;
if ( lef [ clip ] > nt ) {
* rdo + + = lef [ lef [ clip ] ] ;
* rdo + + = rig [ lef [ clip ] ] ; } }
while ( rdone < rdo ) {
curroot = * rdone ;
if ( curroot > nt ) {
* rdo + + = lef [ curroot ] ;
* rdo + + = rig [ curroot ] ; }
for ( tgtdone = tgtlist ; tgtdone < tgtdo ; + + tgtdone ) {
tgt = * tgtdone ;
+ + rearrangs ;
if ( verbose )
if ( ! ratcheting & & ! recordgoodmoves & & ! doingbremer )
fprintf ( stderr , " \r Rep %3i %6.2f%% LEN %-10lli %12lli rearrangs " , reportreplication , prop * 100 , len , rearrangs ) ;
gmix = gup = 0 ;
diff = scorediff_all ( clip , curroot , tgt ) ;
if ( doingbremer ) {
gdn = nitdn - gmix ;
brempt - > up = gup ;
brempt - > dn = gdn ;
brempt - > root = curroot ;
brempt - > tgt = tgt ;
+ + brempt ;
continue ; }
if ( recordgoodmoves & & diff = = lim ) {
if ( rdone > rlist ) {
for ( i = isdone = 0 ; i < numcolroots & & ! isdone ; + + i )
if ( colroots [ i ] = = curroot ) isdone = 1 ;
if ( ! isdone ) colroots [ numcolroots + + ] = curroot ; }
for ( i = isdone = 0 ; i < numcolmoves & & ! isdone ; + + i )
if ( colmoves [ i ] = = tgt ) isdone = 1 ;
if ( ! isdone ) colmoves [ numcolmoves + + ] = tgt ; }
if ( ratcheting & & diff = = lim ) {
bestpos = tgt ;
bestroot = curroot ; }
if ( diff < lim ) {
len - = lim - diff ;
lim = diff ;
lenlimit = lim - 1 ;
if ( ratcheting ) + + lenlimit ;
bestpos = tgt ;
bestroot = curroot ; }
} // targets
if ( use_spr ) break ;
+ + rdone ; } // rootings
if ( doingbremer )
transfer_bremer ( clip , sisis , brempt ) ;
if ( recordgoodmoves )
collapse_with_swaps ( clip , sisis ) ;
if ( clip > nt )
if ( ! use_spr & & bestroot ! = begroot ) tbreroot ( bestroot , clip ) ;
zert ( clip , bestpos ) ;
if ( bestpos ! = begpos | | bestroot ! = begroot ) {
rounds = 0 ;
if ( ratcheting & & + + subsdone = = numsubs ) break ; } } // clippings
achk = len ;
len = downpass_all ( ) ;
if ( len ! = achk & & ! doingbremer ) {
fprintf ( stderr , " \n OOPS!!! \n An internal error occured!!! \n length is %lli, not %lli \n " , len , achk ) ;
exit ( 0 ) ; }
return len ;
}
/********** WAGNER AND BRANCH-SWAPPING WITH DISK BUFFER ****************/
void fullpass_split ( int clip , int sisis )
{
int nunos , a , nod , c ;
unsigned long int * lp , * rp , * np , * ap , x , y , z , bak ;
nunos = divoptlist ( nodlist , clip , sisis ) ;
for ( a = nunos ; a - - ; ) {
nod = nodlist [ a ] ;
if ( nodtoskip = = nod ) continue ;
lp = mat [ lef [ nod ] ] ;
rp = mat [ rig [ nod ] ] ;
np = mat [ nod ] ;
for ( c = 0 ; c < piecesz ; + + c ) {
x = * lp & * rp ;
y = * lp + + | * rp + + ;
z = HI4 & ~ ( x | ( ( x & LO4 ) + LO4 ) ) ;
z | = z - ( z > > 3 ) ;
* np + + = x | ( z & y ) ; } }
for ( a = 0 ; + + a < nunos ; ) {
nod = nodlist [ a ] ;
if ( nod = = clip | | anx [ nod ] = = nodtoskip | | nod = = nodtoskip ) continue ;
lp = mat [ lef [ nod ] ] - 1 ;
rp = mat [ rig [ nod ] ] - 1 ;
np = mat [ nod ] - 1 ;
ap = mat [ anx [ nod ] ] - 1 ;
for ( c = 0 ; c < piecesz ; + + c ) {
x = * + + lp & * + + rp ;
z = * lp | * rp ;
y = HI4 & ~ ( x | ( ( x & LO4 ) + LO4 ) ) ;
y | = y - ( y > > 3 ) ;
bak = z | y ;
x = * + + ap & ~ * + + np ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x | = x - ( x > > 3 ) ;
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; } }
nodtoskip = 0 ;
}
unsigned long int scorediff_split ( short int clipis , short int srcis , short int tgtis , unsigned long int atnc )
{
int len = 0 ;
unsigned long int * tgt , * tgta , * src , * srca ;
unsigned long int untgt , unsrc ;
unsigned long int x , z , y ;
unsigned long int * wts ;
long int a ;
if ( clipis < nt ) srca = mat [ clipis ] - 1 ;
else srca = mat [ anx [ srcis ] ] - 1 ;
src = mat [ srcis ] - 1 ;
if ( ! tgtis ) tgta = mat [ 0 ] - 1 ;
else tgta = mat [ anx [ tgtis ] ] - 1 ;
tgt = mat [ tgtis ] - 1 ;
if ( ratcycles ) wts = wtchg + atnc - 1 ;
else wts = wtchg - 1 ;
for ( a = 0 ; a < piecesz ; + + a ) {
unsrc = * + + src | * + + srca ;
if ( srcis < nt & & clipis > nt ) {
x = * srca & ~ * src ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x | = x - ( x > > 3 ) ;
unsrc = * srca | ( * src & x ) ; }
untgt = * + + tgt | * + + tgta ;
if ( tgtis & & tgtis < nt ) {
x = * tgta & ~ * tgt ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x | = x - ( x > > 3 ) ;
untgt = * tgta | ( * tgt & x ) ; }
x = unsrc & untgt ;
z = HI4 & ~ ( x | ( ( x & LO4 ) + LO4 ) ) ;
if ( changewts )
if ( recorddn ) {
+ + wts ;
if ( recorddn = = 1 ) * wts = z ;
x = z & * wts ;
y = x ^ z ;
# ifdef BITS64
gmix + = bitct [ 65535 & ( x | ( x > > 17 ) | ( x > > 34 ) | ( x > > 51 ) ) ] ;
gup + = bitct [ 65535 & ( y | ( y > > 17 ) | ( y > > 34 ) | ( y > > 51 ) ) ] ;
# else
gmix + = bitct [ 65535 & ( x | ( x > > 17 ) ) ] ;
gup + = bitct [ 65535 & ( y | ( y > > 17 ) ) ] ;
# endif
continue ; }
else
z & = * + + wts ;
# ifdef BITS64
len + = bitct [ 65535 & ( z | ( z > > 17 ) | ( z > > 34 ) | ( z > > 51 ) ) ] ;
# else
len + = bitct [ 65535 & ( z | ( z > > 17 ) ) ] ;
# endif
}
return len ;
}
unsigned long long wagner_split ( void )
{
int a , added , fac , basnod , bestpos , clip , tgt ;
unsigned long int lim , diff , atnc , dapos ;
unsigned long long len ;
short int * inlist = nodfork ;
unsigned long int * mp ;
unsigned long int seedwas = ranseed ;
int lastpartdone = - 1 , atpart = 0 ;
randomlist ( nt , ladd , 0 ) ;
initnet ( ladd [ 0 ] , ladd [ 1 ] , ladd [ 2 ] ) ;
blist [ 0 ] = ladd [ 1 ] ;
blist [ 1 ] = ladd [ 2 ] ;
blist [ 2 ] = nt + 1 ;
added = 2 ;
fac = 3 ;
basnod = nt + 2 ;
ranseed = 0 ;
while ( + + added < nt ) {
if ( random_starts ) {
nodzert ( ladd [ added ] , blist [ rand ( ) % fac ] ) ;
blist [ fac + + ] = ladd [ added ] ;
blist [ fac + + ] = basnod + + ;
continue ; }
if ( verbose )
fprintf ( stderr , " \r Rep %3i %4i taxa added " , reportreplication , added ) ;
randomlist ( fac , inlist , 1 ) ;
clip = ladd [ added ] ;
for ( a = 0 , mp = movelist ; a < fac ; + + a , + + mp ) * mp = 0 ;
for ( atnc = 0 ; atnc < nc4 ; atnc + = piecesz ) {
if ( atpart ! = lastpartdone ) // if loaded already, don't load again...
loadmat ( atpart ) ;
lastpartdone = dapos = atpart ;
atpart + = piecesz ;
if ( atpart > = nc4 ) atpart = 0 ;
fullpass_split ( clip , clip ) ;
for ( a = 0 , mp = movelist ; a < fac ; + + a , + + mp ) {
tgt = blist [ inlist [ a ] ] ;
* mp + = scorediff_split ( clip , clip , tgt , dapos ) ; } }
atpart = lastpartdone ;
lim = VERYBIG ;
for ( a = 0 , mp = movelist ; a < fac ; + + a , + + mp ) {
diff = * mp ;
if ( diff < lim ) {
lim = diff ;
bestpos = blist [ inlist [ a ] ] ; } }
nodzert ( clip , bestpos ) ;
blist [ fac + + ] = clip ;
blist [ fac + + ] = basnod + + ; }
len = downpass_split ( ) ;
ranseed = seedwas ;
return len ;
}
unsigned long long bswap_split ( void )
{
unsigned long int diff , lim , nod , atnc , j , initrearrangs , dapos ;
unsigned long long len , achk ;
int clip , bestpos , begpos , sisis , nunos , bestroot , begroot , i , isdone ;
int curroot , tgt , rounds , maxrounds = 0 , subsdone = 0 ;
short int * rdo , * rdone , * tgtdo , * tgtdone ;
double prop = 0 , secs , curprop ;
static int clockinit = 0 ;
unsigned long int * mp ;
static clock_t last , now ;
int lastpartdone = - 1 , atpart = 0 ;
BremTyp * brempt ;
anx [ nt ] = nodz ;
binmksis ( anx ) ;
if ( ! rig [ nt ] ) {
rig [ nt ] = lef [ nt ] ;
lef [ nt ] = 0 ; }
nunos = binoptlist ( nodlist , anx ) ;
len = downpass_split ( ) ;
clip = 0 ;
if ( ! clockinit ) {
clockinit = 1 ;
last = clock ( ) ; }
if ( ratcycles & & verbose )
prop = ( double ) ratcyclesdone / ratcycles ;
for ( rounds = 1 ; rounds < nodz ; + + rounds ) {
if ( + + clip > = nodz ) clip = 1 ;
if ( clip = = nt ) continue ;
if ( rounds > maxrounds ) maxrounds = rounds ;
now = clock ( ) ;
secs = ( double ) ( now - last ) / CLOCKS_PER_SEC ;
if ( secs > reportperiod & & ! ratcheting & & ! doingbremer ) {
last = clock ( ) ;
prop = ( double ) ( maxrounds + 1 ) / nodz ;
if ( ratcycles )
prop = ( double ) ratcyclesdone / ratcycles ;
if ( recordgoodmoves )
fprintf ( stderr , " %cRep %3i collapse LEN %-10lli %12lli rearrangs " , reportheader , reportreplication , len , rearrangs ) ;
else
fprintf ( stderr , " %cRep %3i %6.2f%% LEN %-10lli %12lli rearrangs " , reportheader , reportreplication , prop * 100 , len , rearrangs ) ; }
unzert ( clip ) ;
begpos = bestpos = sisis = sis [ clip ] ;
if ( sisis ) nodtoskip = anx [ clip ] ;
initrearrangs = rearrangs ;
/*** Make lists of rootings and targets *********/
tgtdo = tgtdone = tgtlist ;
if ( ! sisis ) * tgtdo + + = 0 ;
else {
* tgtdo + + = sis [ 0 ] ;
while ( tgtdone < tgtdo ) {
nod = * tgtdone + + ;
if ( nod > nt ) {
* tgtdo + + = lef [ nod ] ;
* tgtdo + + = rig [ nod ] ; } } }
rdo = rdone = rlist ;
if ( clip < nt ) * rdo + + = begroot = bestroot = clip ;
else {
* rdo + + = begroot = bestroot = rig [ clip ] ;
if ( lef [ clip ] > nt ) {
* rdo + + = lef [ lef [ clip ] ] ;
* rdo + + = rig [ lef [ clip ] ] ; } }
while ( rdone < rdo ) {
curroot = * rdone + + ;
if ( curroot > nt ) {
* rdo + + = lef [ curroot ] ;
* rdo + + = rig [ curroot ] ; } }
/*** Calculate scores for each move, one matrix piece at a time *****/
lim = 0 ;
for ( atnc = 0 ; atnc < nc4 ; atnc + = piecesz ) {
if ( atpart ! = lastpartdone ) // if loaded already, don't load again...
loadmat ( atpart ) ;
lastpartdone = dapos = atpart ;
atpart + = piecesz ;
if ( atpart > = nc4 ) atpart = 0 ;
fullpass_split ( clip , sisis ) ;
if ( doingbremer ) {
recorddn = changewts = 1 ;
gup = gmix = 0 ; }
lim + = scorediff_split ( clip , * rlist , sisis , dapos ) ;
if ( doingbremer ) {
brempt = bremdif ;
recorddn = 2 ;
nitdn = gmix ; }
mp = movelist ;
rdone = rlist ;
if ( verbose )
if ( ! ratcheting & & ! recordgoodmoves & & ! doingbremer ) {
i = ( rdo - rlist ) * ( tgtdo - tgtlist ) ;
curprop = ( double ) atnc / nc4 ;
j = ( i * curprop ) + initrearrangs ;
fprintf ( stderr , " \r Rep %3i %6.2f%% LEN %-10lli %12li rearrangs " , reportreplication , prop * 100 , len , j ) ; }
while ( rdone < rdo ) {
curroot = * rdone + + ;
for ( tgtdone = tgtlist ; tgtdone < tgtdo ; + + tgtdone ) {
tgt = * tgtdone ;
if ( ! atnc ) {
+ + rearrangs ;
* mp = 0 ;
if ( doingbremer ) {
brempt - > up = 0 ;
brempt - > dn = 0 ; } }
gmix = gup = 0 ;
* mp + = scorediff_split ( clip , curroot , tgt , dapos ) ;
if ( doingbremer ) {
gdn = nitdn - gmix ;
brempt - > up + = gup ;
brempt - > dn + = gdn ;
brempt - > root = curroot ;
brempt - > tgt = tgt ;
+ + brempt ; }
+ + mp ; } // targets
if ( use_spr ) break ; } // rootings
} // matrix pieces
atpart = lastpartdone ;
/*** now repeat rootings & targets, using total scores... *********/
numcolmoves = numcolroots = 0 ;
mp = movelist ;
rdone = rlist ;
if ( doingbremer ) rdone = rdo ;
while ( rdone < rdo ) {
curroot = * rdone + + ;
for ( tgtdone = tgtlist ; tgtdone < tgtdo ; + + tgtdone ) {
tgt = * tgtdone ;
diff = * mp + + ;
if ( recordgoodmoves & & diff = = lim ) {
if ( rdone > rlist ) {
for ( i = isdone = 0 ; i < numcolroots & & ! isdone ; + + i )
if ( colroots [ i ] = = curroot ) isdone = 1 ;
if ( ! isdone ) colroots [ numcolroots + + ] = curroot ; }
for ( i = isdone = 0 ; i < numcolmoves & & ! isdone ; + + i )
if ( colmoves [ i ] = = tgt ) isdone = 1 ;
if ( ! isdone ) colmoves [ numcolmoves + + ] = tgt ; }
if ( ratcheting & & diff = = lim ) {
bestpos = tgt ;
bestroot = curroot ; }
if ( diff < lim ) {
len - = lim - diff ;
lim = diff ;
bestpos = tgt ;
bestroot = curroot ; } } // targets
if ( use_spr ) break ; } // rootings
if ( doingbremer )
transfer_bremer ( clip , sisis , brempt ) ;
if ( recordgoodmoves )
collapse_with_swaps ( clip , sisis ) ;
if ( clip > nt )
if ( ! use_spr & & bestroot ! = begroot ) tbreroot ( bestroot , clip ) ;
zert ( clip , bestpos ) ;
if ( bestpos ! = begpos | | bestroot ! = begroot ) {
rounds = 0 ;
if ( ratcheting & & + + subsdone = = numsubs ) break ; } } // clippings
achk = len ;
if ( ! doingbremer ) {
len = downpass_split ( ) ;
if ( len ! = achk ) {
fprintf ( stderr , " \n OOPS!!! \n An internal error occured!!! \n length is %lli, not %lli \n " , len , achk ) ;
exit ( 0 ) ; } }
return len ;
}
/******* CALCULATE (AND POSSIBLY COMBINE) BREMER SUPPORTS *****************/
void calcusizes ( short int * to , short int * daprvtree , short int * inthtulist )
{
int a , b , nodn , i ;
short int * lp ;
for ( a = nt ; a - - ; ) {
inthtulist [ a ] = a ;
daprvtree [ a ] = 1 ; }
binmksis ( to ) ;
binoptlist ( nodlist , to ) ;
nodn = nodz - nt ;
lp = nodlist + nodn ;
for ( i = nodn ; i - - ; ) {
a = * ( - - lp ) ;
daprvtree [ a ] = 0 ;
inthtulist [ a ] = - 1 ;
b = lef [ a ] ;
daprvtree [ a ] + = daprvtree [ b ] ;
if ( inthtulist [ b ] > = 0 )
inthtulist [ a ] = inthtulist [ b ] ;
b = rig [ a ] ;
daprvtree [ a ] + = daprvtree [ b ] ;
if ( inthtulist [ b ] > = 0 )
inthtulist [ a ] = inthtulist [ b ] ; }
}
void do_node_equivalences ( short int * from , short int * to , short int * save )
{
int b , c , d , e , i ;
int mysz ;
short int * dolist = ladd , * done , * todo , * lp , * nodok = lassis ;
int isitok ;
short int * conslist = tgtlist , consnodz , * tmpsize = rlist ;
short int * bufprvtree = blist , * thet = from ;
short int * waglist = unsuprt , * htulist = colmoves ;
calcusizes ( to , bufprvtree , htulist ) ;
consnodz = nodz - nt ;
for ( i = 0 ; i < consnodz ; + + i ) conslist [ i ] = nodlist [ i ] ;
calcusizes ( thet , tmpsize , waglist ) ;
for ( i = nt ; i < nodz ; + + i )
nodok [ i ] = save [ i ] = - 1 ;
lp = conslist + consnodz ;
for ( i = consnodz ; i - - ; ) {
b = * ( - - lp ) ;
c = htulist [ b ] ;
if ( c < 0 ) continue ;
if ( ! thet [ c ] ) continue ;
mysz = bufprvtree [ b ] ;
while ( tmpsize [ c ] < mysz & & c ! = nt ) c = thet [ c ] ;
if ( tmpsize [ c ] ! = mysz ) continue ;
* ( todo = dolist ) = c ;
done = todo + + ;
isitok = 1 ;
while ( done < todo ) {
d = * done + + ;
if ( d > = nt ) {
if ( nodok [ d ] > = 0 ) {
for ( e = nodok [ d ] ; e ! = nt & & bufprvtree [ e ] < = mysz & & e ! = b ; e = to [ e ] ) ;
if ( e ! = b ) {
isitok = 0 ;
break ; } }
else {
* todo + + = lef [ d ] ;
* todo + + = rig [ d ] ; } }
else {
for ( e = d ; e ! = nt & & bufprvtree [ e ] < = mysz & & e ! = b ; e = to [ e ] ) ;
if ( e ! = b ) {
isitok = 0 ;
break ; } } }
if ( isitok ) {
save [ b ] = c ;
nodok [ c ] = htulist [ b ] ; } }
}
void transfer_bremer ( int clip , int sisis , BremTyp * hidest )
{
BremTyp * at = bremdif - 1 ;
int a , b , comnod , rootis ;
int da , db , valisneg ;
unsigned long int mid ;
double abs , rel , eval , jprob ;
eval = ( double ) jakprob / 100 ;
jprob = 1 - eval ;
zert ( clip , sisis ) ;
while ( + + at < hidest ) {
valisneg = 0 ;
if ( shownegbremers )
if ( at - > up < at - > dn ) {
mid = at - > up ;
at - > up = at - > dn ;
at - > dn = mid ;
valisneg = 1 ; }
abs = at - > up - at - > dn ;
if ( abs > 0 ) {
rel = abs / at - > up ;
if ( bremertype = = 2 )
eval = pow ( jprob * rel , 1 / abs ) ;
else if ( bremertype = = 1 ) eval = rel ;
else eval = abs ; }
else eval = 0 ;
if ( valisneg ) eval = - eval ;
rootis = at - > root ;
if ( ! use_spr & & clip > nt & & anx [ rootis ] ! = clip ) {
a = anx [ rootis ] ;
while ( a ! = clip ) {
if ( tbrem [ a ] > eval ) tbrem [ a ] = eval ;
a = anx [ a ] ; } }
if ( at - > tgt = = sisis ) continue ;
da = distoroot [ a = clip ] ;
db = distoroot [ b = at - > tgt ] ;
while ( distoroot [ a ] > db ) a = anx [ a ] ;
while ( distoroot [ b ] > da ) b = anx [ b ] ;
while ( a ! = b ) { a = anx [ a ] ; b = anx [ b ] ; }
comnod = a ;
if ( comnod ! = at - > tgt )
for ( a = anx [ at - > tgt ] ; a ! = comnod ; a = anx [ a ] ) {
if ( abrem [ a ] > abs ) abrem [ a ] = abs ;
if ( abrem [ a ] < abs ) continue ; // only consider moves within absolute BS
if ( tbrem [ a ] > eval ) tbrem [ a ] = eval ; }
for ( b = anx [ clip ] ; b ! = comnod ; b = anx [ b ] ) {
if ( abrem [ a ] > abs ) abrem [ a ] = abs ;
if ( abrem [ a ] < abs ) continue ; // ditto
if ( tbrem [ b ] > eval ) tbrem [ b ] = eval ; } }
unzert ( clip ) ;
}
void do_quick_bremers ( void )
{
int i , j , is ;
short int * anxwas = anx ;
long long thisdiff ;
doingbremer = 1 ;
rearrangs = 0 ;
if ( reporttoscreen )
fprintf ( stderr , " \n Calculating supports from %i trees... " , numtrees ) ;
for ( i = 0 ; i < numtrees ; + + i ) {
for ( j = nt ; j < nodz ; + + j ) {
tbrem [ j ] = VERYBIG ;
abrem [ j ] = 2000000000 ; }
anx = memtrees [ i ] ;
filldistoroot ( ) ;
bswap ( ) ;
if ( bremertype = = 0 )
thisdiff = downpass ( ) - bestgloballen ;
if ( ! i ) {
for ( j = nt + 1 ; j < nodz ; + + j )
gbrem [ j ] = tbrem [ j ] ; }
else {
do_node_equivalences ( memtrees [ i ] , memtrees [ 0 ] , nodmatch ) ;
for ( j = nt + 1 ; j < nodz ; + + j ) {
is = nodmatch [ j ] ;
if ( is < 0 ) {
if ( bremertype = = 0 ) {
if ( gbrem [ j ] > thisdiff ) gbrem [ j ] = thisdiff ; }
else
gbrem [ j ] = 0 ;
continue ; }
if ( gbrem [ j ] > tbrem [ is ] )
gbrem [ j ] = tbrem [ is ] ; } } }
fprintf ( stderr , " %cCalculated supports, %lli additional rearrangs. \n " , reportheader , rearrangs ) ;
anx = anxwas ;
}
/**************** HANDLING TREE SEARCHES ***********************/
char getatree ( void )
{
int htu , atnode , i ;
char car = ' ( ' ;
for ( htu = nt ; htu - - ; ) anx [ htu ] = 0 ;
htu = atnode = nt - 1 ;
while ( car ! = ' * ' & & car ! = ' ; ' ) {
if ( car = = ' ( ' )
{ errout ( htu > nodz , " Tree contains too many open parenths ... can't read it " ) ;
anx [ htu + 1 ] = atnode ;
atnode = + + htu ; }
else if ( car = = ' ) ' ) {
errout ( atnode = = nodz , " Tree contains too many close parenths ... can't read it " ) ;
atnode = anx [ atnode ] ; }
else {
ungetc ( car , treeinp ) ;
fscanf ( treeinp , " %i " , & i ) ;
errout ( anx [ i ] , " Duplicate taxa in tree " ) ;
anx [ i ] = atnode ; }
while ( isspace ( car = getc ( treeinp ) ) ) ; }
for ( i = nt ; i - - ; )
errout ( anx [ i ] = = 0 , " Tree is incomplete ... can't read it \n Are you sure it corresponds to the same matrix? " ) ;
anx [ nt ] = + + htu ;
errout ( htu ! = nodz , " Tree is not binary ... can't process it " ) ;
binmksis ( anx ) ;
return car ;
}
unsigned long long doratchet ( void )
{
int i , j , numtreeswas = numtrees ;
unsigned long long len , beslen = downpass ( ) ;
numsubs = nt / 2 ;
for ( i = 0 ; i < nodz ; + + i )
memtrees [ numtrees ] [ i ] = anx [ i ] ;
ratcyclesdone = 0 ;
if ( dojak ) storewtfilter ( ) ;
else
if ( beslen < = bestgloballen | | saveall ) {
bestgloballen = beslen ;
storetree ( ) ; }
for ( j = 0 ; j < ratcycles ; + + j ) {
dowtfilter ( 5 , dojak , nc4 ) ;
ratcheting = changewts = 1 ;
ratcyclesdone = j + 1 ;
bswap ( ) ;
if ( dojak ) resetwtfilter ( ) ;
ratcheting = 0 ;
changewts = dojak ;
len = bswap ( ) ;
if ( ! dojak ) {
if ( len < beslen ) beslen = len ;
if ( len < bestgloballen ) {
if ( ! saveall ) numtrees = 0 ;
else numtrees = numtreeswas ;
numhits = 0 ;
bestgloballen = len ; }
if ( len = = bestgloballen ) {
+ + numhits ;
storetree ( ) ; } }
else
if ( len < beslen ) {
beslen = len ;
for ( i = 0 ; i < nodz ; + + i )
memtrees [ numtrees ] [ i ] = anx [ i ] ; } }
ratcyclesdone = 0 ;
if ( dojak )
for ( i = 0 ; i < nodz ; + + i )
anx [ i ] = memtrees [ numtrees ] [ i ] ;
return beslen ;
}
void dosearches ( void )
{
int a , i , exists , nxt ;
unsigned long long len ;
clock_t beg , end ;
double secs ;
beg = clock ( ) ;
bestgloballen = VERYBIG ;
for ( a = 0 ; a + + < numreplications ; ) {
srand ( ranseed ) ;
ranseed = rand ( ) ;
reportreplication = a ;
if ( dojak & & ( together | | ratcycles ) )
dowtfilter ( jakprob , 0 , nc4 ) ;
if ( treeinp = = NULL )
len = wagner ( ) ;
else {
while ( isspace ( i = getc ( treeinp ) ) ) ;
errout ( i ! = ' ( ' , " Expected an opening parent in tree-file " ) ;
nxt = getatree ( ) ;
len = downpass ( ) ; }
if ( do_branch_swapping ) {
reset_nodz ( anx ) ;
exists = 0 ;
if ( ! dojak )
for ( i = 0 ; i < numtrees & & ! exists ; + + i )
if ( equal_trees ( anx , memtrees [ i ] ) ) exists = 1 ;
if ( ! exists )
len = bswap ( ) ; }
if ( ratcycles )
len = doratchet ( ) ;
fprintf ( stderr , " %cRep %3i LEN %-10lli %12lli rearrangs " , reportheader , reportreplication , len , rearrangs ) ;
if ( reporttoscreen ) fprintf ( stderr , " \n " ) ;
if ( ! dojak )
if ( len < bestgloballen ) {
if ( ! saveall ) numtrees = 0 ;
numhits = 0 ;
bestgloballen = len ; }
if ( dojak ) collapse_the_tree ( ) ;
if ( ! ratcycles | | dojak ) // jackknifing saves a single tree per ratchet set (sorry!)
if ( len = = bestgloballen | | dojak | | saveall ) {
+ + numhits ;
storetree ( ) ; }
if ( treeinp ! = NULL & & nxt = = ' ; ' ) break ; }
if ( numreplications > 1 & & ! dojak )
fprintf ( stderr , " \n BEST LEN %lli, HIT %i TIMES \n " , bestgloballen , numhits ) ;
else fprintf ( stderr , " \n " ) ;
if ( quickbremer )
do_quick_bremers ( ) ;
if ( ! together ) {
fclose ( partsfile ) ;
remove ( " oblong.pts " ) ; }
end = clock ( ) ;
secs = ( double ) ( end - beg ) / CLOCKS_PER_SEC ;
fprintf ( stderr , " \n Time used in tree calculations: %.2f secs. \n " , secs ) ;
}
/******* MAIN --where it all begins... *****************/
int main ( int argc , char * * argv )
{
procargs ( argc , argv ) ;
init_bit_counters ( ) ;
read_input ( ) ;
dosearches ( ) ;
savetrees ( ) ;
fclose ( outp ) ;
return 1 ;
}