2456 lines
98 KiB
C
2456 lines
98 KiB
C
/*************************************************************************************
|
||
* OBLONG
|
||
* a program for finding most parsimonious
|
||
* trees with minimum RAM requirements
|
||
*
|
||
* Copyright April 2013 by Pablo A. Goloboff
|
||
*
|
||
* This program is free software; you may redistribute it and/or modify its
|
||
* under the terms of the GNU General Public License as published by the Free
|
||
* Software Foundation; either version 2 of the License, or (at your option)
|
||
* any later version.
|
||
*
|
||
* This program is distributed in the hope that it will be useful, but
|
||
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
* for more details.
|
||
*
|
||
* If publishing results based on this program, please cite Goloboff (forthcoming)
|
||
*
|
||
***********************************************************************************
|
||
*/
|
||
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <time.h>
|
||
#include <unistd.h>
|
||
#include <ctype.h>
|
||
#include <math.h>
|
||
#define MAXNAMELEN 100
|
||
#define CHUNKSZ 1000
|
||
#define MAXMULTITAXA 1000
|
||
|
||
FILE * inp = NULL , * outp = NULL , * tmpp , * partsfile , * treeinp = NULL , * multinp ;
|
||
|
||
#ifdef WATCOM
|
||
long long * fpos ;
|
||
#else
|
||
fpos_t * fpos ;
|
||
#endif
|
||
|
||
unsigned long long bestgloballen , lenlag = 0 , rearrangs = 0 ;
|
||
unsigned long int ** mat , * wtchg , * movelist , lenlimit ;
|
||
unsigned long int gup , gmix , gdn , nitdn ;
|
||
long int nc2 = 0 , nc3 = 0 , nc4 = 0 , trashed = 0 , totnc , maxnc , piecesz , totnc4 ;
|
||
double * gbrem , * tbrem ;
|
||
long int * abrem , fasta = 0 ;
|
||
int nt , nc , nodz , nstord , together = 1 , use_spr = 0 , reportreplication = 0 ,
|
||
numreplications = 0 , reportperiod = 1 , ratcycles = 0 , ratcyclesdone = 0 ,
|
||
dojak = 0 , saveall = 0 , swapcollapse = 0 , recordgoodmoves = 0 , usenexus = 0 ;
|
||
int ranseed = 1 , do_branch_swapping = 1 , random_starts = 0 , numtrees = 0 , numhits ,
|
||
ratcheting = 0 , changewts = 0 , numsubs , jakprob = 36 , numcolmoves , numcolroots ,
|
||
savejusttrees = 0 , felipe = 0 , multifiles = 0 , nummultifiles , recorddn = 0 ,
|
||
equal_taxon_sequence = 0 , numparts , quickbremer = 0 , doingbremer = 0 ,
|
||
bremertype = 2 , shownegbremers = 0 , verbose = 0 ;
|
||
short int * nodlist , * anx , * lef , * rig , * sis , * rlist ,
|
||
* tgtlist , * lassis , * nodfork , nodtoskip = 0 , * ladd , * nodmatch ,
|
||
* blist , ** memtrees , * unsuprt , * distoroot , * colmoves , * colroots ;
|
||
char strsp[ MAXNAMELEN ] , maskin[ 256 ] , bitct [ 65536 ] , reportheader ,
|
||
reporttoscreen = 1 ;
|
||
char legindic [ ] = { '=' , 0 } ;
|
||
unsigned char ** matbuf , * tmpstates , * isabs = NULL ;
|
||
double maxram = -1 ;
|
||
|
||
typedef struct { unsigned long int up , dn ; int root , tgt ; } BremTyp ;
|
||
BremTyp * bremdif ;
|
||
|
||
void transfer_bremer ( int , int , BremTyp * ) ;
|
||
|
||
unsigned long long downpass_all ( void ) ;
|
||
unsigned long long downpass_split ( void ) ;
|
||
unsigned long long ( * downpass ) ( void ) ;
|
||
|
||
unsigned long long wagner_all ( void ) ;
|
||
unsigned long long wagner_split ( void ) ;
|
||
unsigned long long ( * wagner ) ( void ) ;
|
||
|
||
unsigned long long bswap_all ( void ) ;
|
||
unsigned long long bswap_split ( void ) ;
|
||
unsigned long long ( * bswap ) ( void ) ;
|
||
|
||
#define VERYBIG 10000000000000000
|
||
|
||
#ifdef BITS64
|
||
#define FLDS2 32
|
||
#define FLDS3 21
|
||
#define FLDS4 16
|
||
#define HI4 9838263505978427528UL
|
||
#define LO4 8608480567731124087UL
|
||
#define HI3 5270498306774157604UL
|
||
#define LO3 3952873730080618203UL
|
||
#define HI2 12297829382473034410UL
|
||
#define LO2 6148914691236517205UL
|
||
#else
|
||
#define FLDS2 16
|
||
#define FLDS3 10
|
||
#define FLDS4 8
|
||
#define HI4 2290649224
|
||
#define LO4 2004318071
|
||
#define HI3 613566756
|
||
#define LO3 460175067
|
||
#define HI2 2863311530
|
||
#define LO2 1431655765
|
||
#endif
|
||
|
||
/*********** TRIVIAL BUT USEFUL FUNCTIONS ******************/
|
||
|
||
void display_help ( int kind )
|
||
{
|
||
if ( kind == 0 )
|
||
fprintf ( stdout ,
|
||
"\n"
|
||
" OBLONG (Merriam-Webster): \"a rectangle with one of its\n"
|
||
" sides much longer than the other\"\n"
|
||
"\n"
|
||
" by P.A. Goloboff (2013)\n"
|
||
"\n"
|
||
" For phylogenetic analysis of wide matrices, with low memory\n"
|
||
" requirements. Recommended for data sets with many characters\n"
|
||
" and few taxa (not advised for hundreds of taxa or more, as\n"
|
||
" it is optimized for memory, not speed)\n"
|
||
" Uses temporary disk buffers, \"oblong.tmp\" and/or \"oblong.pts\"\n"
|
||
" to avoid holding all data in RAM. As much disk space as the size\n"
|
||
" of the data set may be required.\n"
|
||
" Screen output can be redirected to a file with \"2> file\"\n"
|
||
"\n"
|
||
" Use \"oblong -h\" for help on program options\n"
|
||
"\n"
|
||
) ;
|
||
else
|
||
fprintf ( stdout ,
|
||
"\n"
|
||
" Input/Output:\n"
|
||
"\n"
|
||
" -ifilename use \"filename\" as data file.\n"
|
||
" -p data files in Phylip/RAxML instead of TNT format.\n"
|
||
" -f data files in Fasta instead of TNT format.\n"
|
||
" -@list get from file \"list\" the list of data files, must\n"
|
||
" be in either Phylip (no \"-p\" switch required), or\n"
|
||
" Fasta format (\"-f\" switch required). By default, \n"
|
||
" taxon names are matched with identical strings; a \n"
|
||
" taxon absent from a data set gets all \"?\".\n"
|
||
" -e for file lists, instead of matching taxa based on name,\n"
|
||
" assume all data files have the same taxon sequence.\n"
|
||
" -ooutfile output results to \"outfile\"; this is a TNT-ready\n"
|
||
" data set with no characters (just tree topologies).\n"
|
||
"\n"
|
||
" Searches:\n"
|
||
"\n"
|
||
" -w use random trees as starting points, not Wagner trees\n"
|
||
" -rN use N starting points.\n"
|
||
" -spr use SPR branch swapping instead of the default TBR.\n"
|
||
" -b no branch swapping.\n"
|
||
" -a save the tree for each replication (even if suboptimal)\n"
|
||
" -jN jackknife the data, with N = P(del) (no N = 36). A \n"
|
||
" single (possibly collapsed) tree saved per replicate.\n"
|
||
" Note: uninformative characters/states are never \n"
|
||
" deactivated (they don't influence supports).\n"
|
||
" -mX use no more than X MBytes of RAM. If not using \"-j\" or\n"
|
||
" \"-x\", results are identical to the default but slower;\n"
|
||
" otherwise, randomization sequences may differ.\n"
|
||
" -RN use N as random seed.\n"
|
||
" -xN use N iterations of ratchet (no N = 10). Note that for\n"
|
||
" C informative characters, \"-x\" requires C/2 additional\n"
|
||
" bytes of RAM (even under \"-m\").\n"
|
||
" -k if jackknifing, do TBR/SPR tree collapsing (recommended).\n"
|
||
"\n"
|
||
" Other:\n"
|
||
"\n"
|
||
" -N output Nexus format (i.e. tree-file is TreeView-ready)\n"
|
||
" -t save only the trees (no \"xread\").\n"
|
||
" -Ttreefile read trees from \"treefile\" and use them as starting\n"
|
||
" points (if \"-b\" and no \"-x\", it just calculates\n"
|
||
" tree-scores). Treefile must be the output of Oblong\n"
|
||
" (i.e. what is in \"outfile\"), with number of trees in\n"
|
||
" tree-title.\n"
|
||
" -q use a quick approximmation for group supports. Default is\n"
|
||
" S = 0.64 * R ^ 1/A (where R = relative bremer support,\n"
|
||
" and A = absolute bremer support); this is also indicated\n"
|
||
" with \"-q2\". You can also indicate \"-q0\", absolute bremer\n"
|
||
" support, or \"-q1\", relative. Supports are obtained via\n"
|
||
" branch-swapping, recording score difference for each move.\n"
|
||
" You can use this to measure strength of contradiction, starting\n"
|
||
" from suboptimal trees (\"-T\") and skipping swapping (\"-b\").\n"
|
||
" -v verbose mode: report rearrangements examined as they are done\n"
|
||
" (note: repeatedly writing to screen slows down program).\n"
|
||
"\n"
|
||
" You can also view this info with \"oblong -h | less\" or \"oblong -h | more\"\n"
|
||
"\n"
|
||
) ;
|
||
}
|
||
|
||
void errout ( int cond , char * msg )
|
||
{
|
||
if ( !cond ) return ;
|
||
if ( msg != NULL )
|
||
fprintf ( stderr , "\n%s\n" , msg ) ; //output to the screen, usually error
|
||
exit ( 0 ) ; //exit(0)正常运行程序并退出程序;exit(1)非正常运行导致退出程序
|
||
}
|
||
|
||
void * myalloc ( unsigned long int size )
|
||
{
|
||
void * vd = malloc ( size ) ;
|
||
errout ( vd == NULL , "Sorry, not enough memory available --try with the \"-m\" switch" ) ;
|
||
return vd ;
|
||
}
|
||
|
||
int stringis ( char * a , char * b )
|
||
{
|
||
while ( * a && tolower ( * a ) == tolower ( * b ) ) { ++a ; ++ b ; } //tolower: conver letters from uppercase to lowercase
|
||
if ( * a || * b ) return 0 ; // or || ,return fake
|
||
return 1 ; //return true
|
||
}
|
||
|
||
void procargs ( int nargs , char ** arg ) // int nargs =2 for [oblong -f] **arg = *arg [] 字符串数组
|
||
{
|
||
int i ;
|
||
char * cp ;
|
||
if ( nargs == 1 ) { // no arguments if nargs == 1
|
||
display_help ( 0 ) ; //not show the help ouput just no argument output
|
||
exit ( 0 ) ; }
|
||
for ( i = 1 ; i < nargs ; ++ i ) {
|
||
cp = arg [ i ] ;
|
||
if ( * cp != '-' ) { // if arguments not start with -
|
||
fprintf ( stderr , "\nWrong argument: %s\n" , arg [ i ] ) ; //show which argument is wrong
|
||
exit ( 0 ) ; }
|
||
switch ( * ++ cp ) {
|
||
case 'a': saveall = 1 ; break ; //save all trees
|
||
case 'b': do_branch_swapping = 0 ; break ; //no branch swapping
|
||
case 'e': equal_taxon_sequence = 1 ; break ;
|
||
case 'f': fasta = 1 ; break ; //fasta format
|
||
case 'h': display_help ( 1 ) ; exit ( 0 ) ; //help
|
||
case 'i': //use the filename -i${filename}
|
||
case '@':
|
||
if ( * cp == '@' ) multifiles = 1 ; //there are multi files
|
||
++ cp ;
|
||
errout ( stringis ( cp , "oblong.tmp" ) , "Cannot use \"oblong.tmp\" as input file; rename it and try again" ) ;
|
||
errout ( stringis ( cp , "oblong.pts" ) , "Cannot use \"oblong.pts\" as input file; rename it and try again" ) ;
|
||
errout ( inp != NULL , "Can't open TWO input files" ) ;
|
||
errout ( ( inp = fopen ( cp , "rb" ) ) == NULL , "Can't open input file" ) ; //input file, if the input list can't be readable in the bianry mode, on windows, binary is different
|
||
break ;
|
||
case 'j': //j${NumberofJakknife}
|
||
++ cp ;
|
||
if ( isdigit ( * cp ) ) //to find if the character is the numer
|
||
jakprob = atoi ( cp ) ; //atoi: convert character to the int
|
||
dojak = changewts = 1 ;
|
||
break ;
|
||
case 'k': swapcollapse = 1 ; break ; //do TBR/SPR collapsing
|
||
case 'm': errout ( ( ( maxram = atof ( ++ cp ) ) <= 0 ) , "You must indicate RAM to use" ) ; together = 0 ; break ; //atof: transfer chracter into double
|
||
case 'N': usenexus = 1 ; break ; //output the nexus file
|
||
case 'o': //-o${outfile}
|
||
++ cp ;
|
||
errout ( stringis ( cp , "oblong.tmp" ) , "Cannot use \"oblong.tmp\" as output file; use a different name" ) ;
|
||
errout ( stringis ( cp , "oblong.pts" ) , "Cannot use \"oblong.pts\" as output file; use a different name" ) ;
|
||
errout ( outp != NULL , "Can't open TWO output files" ) ;
|
||
errout ( ( outp = fopen ( cp , "wb" ) ) == NULL , "Can't open output file" ) ;
|
||
break ;
|
||
case 'p': felipe = 1 ; break ; //file in Phylip style
|
||
case 'q': //a quick approximmation
|
||
++ cp ;
|
||
quickbremer = 1 ;
|
||
if ( * cp == '0' ) bremertype = 0 ;
|
||
if ( * cp == '1' ) bremertype = 1 ;
|
||
if ( * cp == '2' ) bremertype = 2 ;
|
||
break ;
|
||
case 'r': numreplications = atoi ( ++ cp ) ; break ; //use N starting points. -r${Number}
|
||
case 'R': ranseed = atoi ( ++ cp ) ; break ; //random seed
|
||
case 's': //spr use spr instead of tbr
|
||
if ( !strcmp ( cp , "spr" ) ) { use_spr = 1 ; break ; }
|
||
case 't': savejusttrees = 1 ; break ; //output standrad tre format
|
||
case 'T': //set start tree -T${treefile}
|
||
++ cp ;
|
||
errout ( treeinp != NULL , "Can't open TWO tree files" ) ;
|
||
errout ( ( treeinp = fopen ( cp , "rb" ) ) == NULL , "Can't open tree file" ) ;
|
||
break ;
|
||
case 'v': verbose = 1 ; break ; //report rearrangements
|
||
case 'w': random_starts = 1 ; break ; //use random trees as start tree
|
||
case 'x': //-x${Numer of ratcher}
|
||
++ cp ;
|
||
if ( isdigit ( * cp ) ) ratcycles = atoi ( cp ) ;//if the char is number, make ratecycles=
|
||
else ratcycles = 10 ;
|
||
break ;
|
||
default: fprintf ( stderr , "\nUnrecognized option: %s\n" , arg [ i ] ) ; exit ( 0 ) ; }}
|
||
errout ( ( fasta && felipe ) , "Cannot use both Phylip and Fasta formats" ) ; //fasta = 1 and felipe = 1, it can't be identified as different type
|
||
errout ( ( inp == NULL ) , "No data set specified" ) ;
|
||
errout ( ( outp == NULL ) , "No output file specified" ) ;
|
||
if ( quickbremer && dojak ) {
|
||
dojak = changewts = 0 ; //if quick approximmation and jack both are set, the jack won't be done
|
||
fprintf ( stderr , "\nNOTE: Cannot do both jackknifing and bremer at the same time!\n" ) ;
|
||
if ( bremertype == 2 )
|
||
fprintf ( stderr , " deletion prob P=%.2f will be used to rescale supports (1-P)\n" , ( double ) jakprob / 100 ) ;
|
||
else
|
||
if ( bremertype == 0 ) fprintf ( stderr , " will do bremer supports instead\n" ) ;
|
||
else fprintf ( stderr , " will do relative bremer supports instead\n" ) ;
|
||
errout ( ( !jakprob && bremertype == 2 ) , " ...but cannot use a deletion prob. P=0 to rescale!" ) ;
|
||
fprintf ( stderr , "\n" ) ; }
|
||
if ( quickbremer && usenexus ) legindic [ 0 ] = 0 ; // *** left here temp
|
||
if ( !do_branch_swapping && !ratcycles ) shownegbremers = 1 ; // *** left here temp
|
||
if ( treeinp != NULL && numreplications )
|
||
fprintf ( stderr , "\nNote: \"-T\" overrides \"-r\": number of trees in file determines starting points\n" ) ;
|
||
if ( !numreplications ) numreplications = 1 ;
|
||
if ( maxram > 0 ) {
|
||
fprintf ( stderr , "\nUsing %.2f MBytes of RAM for matrix buffers\n" , maxram ) ;
|
||
fflush ( stderr ) ; //清空缓存区
|
||
maxram *= 1024 * 1024 ; } // maxram = maxram * 1024 * 1024;
|
||
reportheader = '\r' ;
|
||
if ( !isatty( fileno( stderr ) ) ) {
|
||
errout ( verbose , "Verbose allowed only when STDERR goes to screen" ) ;
|
||
reporttoscreen = 0 ;
|
||
reportheader = '\n' ;
|
||
reportperiod = 600 ;
|
||
fprintf ( stderr , "\n\nProgram options:\n\n oblong " ) ;
|
||
for ( i = 1 ; i < nargs ; ++ i )
|
||
fprintf ( stderr , "%s " , arg [ i ] ) ;
|
||
fprintf ( stderr , "\n\n" ) ; }
|
||
return ;
|
||
}
|
||
|
||
void init_bit_counters ( void )
|
||
{
|
||
int a , b , c ; // bitct[65536]
|
||
for ( a = 0 ; a < 65536 ; ++ a ) { // a 0 -> 65535
|
||
for ( b = 1 , c = 0 ; b <= a ; b <<= 1 ) // <= 是 < 或 =;
|
||
if ( ( b & a ) ) ++ c ; // c++;
|
||
bitct [ a ] = c ; }
|
||
}
|
||
|
||
void maskinit ( void )
|
||
{
|
||
int a ;
|
||
for ( a = 0 ; a < 256 ; ++ a ) maskin [ a ] = 0 ;
|
||
maskin [ 'a' ] = maskin [ 'A' ] = maskin [ '0' ] = 1 ;
|
||
maskin [ 'g' ] = maskin [ 'G' ] = maskin [ '1' ] = 2 ;
|
||
maskin [ 'c' ] = maskin [ 'C' ] = maskin [ '2' ] = 4 ;
|
||
maskin [ 't' ] = maskin [ 'T' ] = maskin [ '3' ] = 8 ;
|
||
maskin [ 'R' ] = maskin [ 'r' ] = maskin [ 'a' ] | maskin [ 'g' ] ;
|
||
maskin [ 'Y' ] = maskin [ 'y' ] = maskin [ 't' ] | maskin [ 'c' ] ;
|
||
maskin [ 'W' ] = maskin [ 'w' ] = maskin [ 'a' ] | maskin [ 't' ] ;
|
||
maskin [ 'S' ] = maskin [ 's' ] = maskin [ 'c' ] | maskin [ 'g' ] ;
|
||
maskin [ 'M' ] = maskin [ 'm' ] = maskin [ 'a' ] | maskin [ 'c' ] ;
|
||
maskin [ 'K' ] = maskin [ 'k' ] = maskin [ 'g' ] | maskin [ 't' ] ;
|
||
maskin [ 'B' ] = maskin [ 'b' ] = maskin [ 'c' ] | maskin [ 'g' ] | maskin [ 't' ] ;
|
||
maskin [ 'D' ] = maskin [ 'd' ] = maskin [ 'a' ] | maskin [ 'g' ] | maskin [ 't' ] ;
|
||
maskin [ 'H' ] = maskin [ 'h' ] = maskin [ 'a' ] | maskin [ 'c' ] | maskin [ 't' ] ;
|
||
maskin [ 'V' ] = maskin [ 'v' ] = maskin [ 'a' ] | maskin [ 'c' ] | maskin [ 'g' ] ;
|
||
maskin [ 'N' ] = maskin [ 'n' ] = maskin [ '-' ] = maskin [ '?' ] = 1 | 2 | 4 | 8 ;
|
||
maskin [ '.' ] = 127 ;
|
||
return ;
|
||
}
|
||
|
||
/********* CHECKING AND SORTING INPUT *************************/
|
||
|
||
void prepare ( int off , unsigned char intwo )
|
||
{
|
||
int a , b , c , totbytes ;
|
||
char msk , shif ;
|
||
unsigned short int * pt ;
|
||
unsigned long int * lp , written ;
|
||
void * vd ;
|
||
int trsf ;
|
||
char mybits = bitct [ intwo ] ;
|
||
char convert[ 16 ] , goesto[ 4 ] ;
|
||
if ( mybits == 2 ) { msk = 3 ; ++ nc2 ; }
|
||
if ( mybits == 3 ) { msk = 7 ; ++ nc3 ; }
|
||
if ( mybits == 4 ) { msk = 15 ; ++ nc4 ; }
|
||
for ( a = b = 1 , c = 0 ; a < 16 ; a <<= 1 , ++ c )
|
||
if ( ( a & intwo ) ) {
|
||
goesto [ c ] = b ;
|
||
b <<= 1 ; }
|
||
else goesto [ c ] = 0 ;
|
||
convert [ 0 ] = msk ;
|
||
for ( a = 1 ; a < 16 ; ++ a ) {
|
||
convert [ a ] = 0 ;
|
||
for ( b = 1 , c = 0 ; b <= a ; b <<= 1 , ++ c )
|
||
if ( ( a & b ) ) convert [ a ] |= goesto [ c ] ;
|
||
if ( !convert [ a ] ) convert [ a ] = msk ; }
|
||
for ( a = 0 ; a < nt ; ++ a )
|
||
matbuf [ a ] [ off ] = convert [ matbuf [ a ] [ off ] ] ;
|
||
fprintf ( tmpp , "%c" , mybits ) ;
|
||
pt = ( unsigned short int * ) tmpstates ;
|
||
for ( a = ( nt + 1 ) / 2 ; a -- > 0 ; )
|
||
pt [ a ] = 0 ;
|
||
vd = ( void * ) tmpstates ;
|
||
for ( a = shif = 0 ; a < nt ; ++ a ) {
|
||
lp = ( unsigned long int * ) pt ;
|
||
trsf = matbuf [ a ] [ off ] ;
|
||
trsf <<= shif ;
|
||
* lp |= trsf ;
|
||
shif += mybits ;
|
||
if ( shif >= 16 ) {
|
||
++ pt ;
|
||
shif -= 16 ; }}
|
||
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
|
||
written = fwrite ( vd , 1 , totbytes + 1 , tmpp ) ;
|
||
errout ( written != totbytes + 1 , "Sorry, cannot write to temporary file (disk full?)" ) ;
|
||
}
|
||
|
||
int domin ( int a )
|
||
{
|
||
int best = 10 ;
|
||
int b , c , misinsome ;
|
||
for ( c = 16 ; c -- ; ) {
|
||
for ( b = misinsome = 0; b < nt && !misinsome ; ++ b )
|
||
if ( ! ( matbuf [ b ] [ a ] & c ) ) misinsome = 1 ;
|
||
if ( !misinsome )
|
||
if ( best > bitct [ c ] ) best = bitct [ c ] ; }
|
||
return best - 1 ;
|
||
}
|
||
|
||
int dotrueextra ( long int a , unsigned long int intwo )
|
||
{
|
||
int st , ret = 0 , b ;
|
||
for ( b = 0 ; b < nt ; ++ b ) {
|
||
st = matbuf [ b ] [ a ] ;
|
||
if ( st == 15 ) continue ;
|
||
if ( ( st & ~intwo ) && !( st & intwo ) ) ret ++ ; }
|
||
return ret ;
|
||
}
|
||
|
||
void process_chunk ( int start )
|
||
{
|
||
long int a , b , d ;
|
||
unsigned char c ;
|
||
unsigned char * cp ;
|
||
unsigned char refval ;
|
||
unsigned char intwo , inone ;
|
||
nstord = CHUNKSZ ;
|
||
if ( start + nstord > nc ) nstord = nc - start ;
|
||
for ( d = 0 ; d < nt ; ++ d )
|
||
for ( b = nstord ; b -- ; )
|
||
matbuf [ d ] [ b ] = 0 ;
|
||
for ( d = 0 ; d < nt ; ++ d ) {
|
||
cp = matbuf [ d ] ;
|
||
if ( isabs != NULL )
|
||
if ( isabs [ d ] ) { // This can only happen when reading multiple files and taxon is absent from this matrix...
|
||
for ( b = 0 ; b < nstord ; ++ b )
|
||
* cp ++ = 15 ;
|
||
continue ; }
|
||
#ifdef WATCOM
|
||
_fseeki64 ( inp , fpos [ d ] , SEEK_SET ) ;
|
||
#else
|
||
fsetpos ( inp , &fpos [ d ] ) ;
|
||
#endif
|
||
for ( b = 0 ; b < nstord ; ++ b ) {
|
||
fscanf ( inp , " %c" , &c ) ;
|
||
if ( c == '[' ) {
|
||
refval = 0 ;
|
||
while ( 1 ) {
|
||
fscanf ( inp , " %c" , &c ) ;
|
||
if ( c == ']' ) break ;
|
||
refval |= maskin [ c ] ; }}
|
||
else
|
||
if ( c == '.' ) {
|
||
errout ( !d , "Cannot use \".\" for the first taxon" ) ;
|
||
refval = matbuf [ d ] [ b ] ; }
|
||
else refval = maskin [ c ] ;
|
||
* cp ++ = refval ; }
|
||
#ifdef WATCOM
|
||
fpos [ d ] = _ftelli64 ( inp ) ;
|
||
#else
|
||
fgetpos ( inp , &fpos [ d ] ) ;
|
||
#endif
|
||
}
|
||
for ( a = 0 ; a < nstord ; ++ a ) {
|
||
inone = intwo = 0 ;
|
||
for ( b = 0 ; b < nt ; ++ b ) {
|
||
if ( matbuf [ b ] [ a ] == 15 ) continue ;
|
||
intwo |= inone & matbuf [ b ] [ a ] ;
|
||
inone |= matbuf [ b ] [ a ] ; }
|
||
if ( bitct [ intwo ] > 1 ) {
|
||
if ( inone != intwo )
|
||
if ( bitct [ inone ] == 1 )
|
||
lenlag += bitct [ ( inone & ~intwo ) ] ;
|
||
else lenlag += dotrueextra ( a , intwo ) ;
|
||
prepare ( a , intwo ) ; }
|
||
else {
|
||
lenlag += domin ( a ) ;
|
||
++ trashed ; }}
|
||
}
|
||
|
||
void popmat ( int part ) // populate matrix, for split optimization
|
||
{
|
||
unsigned long int a , b , c , shif , mybits , totbytes ;
|
||
unsigned long int inmsk , msk , ent , inshif , val , numelems ;
|
||
unsigned long int * lp ;
|
||
unsigned short int * pt ;
|
||
unsigned long int sz = piecesz * FLDS4 , numnc = 0 ;
|
||
void * vd ;
|
||
for ( b = 0 ; b < piecesz ; ++ b )
|
||
for ( a = 0 ; a < nt ; ++ a )
|
||
mat [ a ] [ b ] = -1 ;
|
||
vd = ( void * ) tmpstates ;
|
||
for ( a = 0 , c = part * FLDS4 ; a < sz && c < totnc ; ++ a , ++ c ) {
|
||
mybits = getc ( tmpp ) ;
|
||
if ( mybits == 2 ) msk = 3 ;
|
||
if ( mybits == 3 ) msk = 7 ;
|
||
if ( mybits == 4 ) msk = 15 ;
|
||
ent = numnc / FLDS4 ;
|
||
inshif = 4 * ( numnc % FLDS4 ) ;
|
||
inmsk = ( unsigned long int ) 15 << inshif ;
|
||
++ numnc ;
|
||
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
|
||
numelems = fread ( vd , 1 , totbytes + 1 , tmpp ) ;
|
||
errout ( numelems != totbytes + 1 , "OOOPS, unexpected error loading matrix!.\nSorry, abandoning run..." ) ;
|
||
pt = ( unsigned short int * ) tmpstates ;
|
||
for ( b = shif = 0 ; b < nt ; ++ b ) {
|
||
lp = ( unsigned long int * ) pt ;
|
||
val = ( * lp >> shif ) & msk ;
|
||
mat [ b ] [ ent ] &= ~inmsk ;
|
||
mat [ b ] [ ent ] |= val << inshif ;
|
||
shif += mybits ;
|
||
if ( shif >= 16 ) {
|
||
++ pt ;
|
||
shif -= 16 ; }}}
|
||
}
|
||
|
||
void create_parts_file ( void )
|
||
{
|
||
unsigned long int atnc ;
|
||
unsigned long int written ;
|
||
void * vd ;
|
||
unsigned long int * at ;
|
||
errout ( ( partsfile = fopen ( "oblong.pts" , "wb" ) ) == NULL , "Cannot open temporary file \"oblong.pts\"" ) ;
|
||
for ( atnc = 0 ; atnc < nc4 ; atnc += piecesz ) {
|
||
popmat ( atnc ) ;
|
||
at = mat [ 0 ] ;
|
||
vd = ( void * ) at ;
|
||
written = fwrite ( vd , 1 , sizeof ( unsigned long int ) * nt * piecesz , partsfile ) ;
|
||
errout ( written != sizeof ( unsigned long int ) * nt * piecesz , "Sorry, cannot write to temporary file (disk full?)" ) ; }
|
||
fputc ( 32 , partsfile ) ;
|
||
fclose ( partsfile ) ;
|
||
partsfile = fopen ( "oblong.pts" , "rb" ) ;
|
||
return ;
|
||
}
|
||
|
||
void tmpparse ( void )
|
||
{
|
||
int atnc2 = 0 , atnc3 = 0 , atnc4 = 0 , totbytes ;
|
||
unsigned char mybits ;
|
||
int a , b , shif , inshif ;
|
||
unsigned short int * pt ;
|
||
unsigned long int * lp , inmsk , ent ;
|
||
signed long int sizis , toalloc , subtract ;
|
||
unsigned long int j , * segposit , msk , val ;
|
||
double prop , prevprop ;
|
||
totnc = nc2 + nc3 + nc4 ;
|
||
nc2 = ( nc2 + ( FLDS2 - 1 ) ) / FLDS2 ;
|
||
nc3 = ( nc3 + ( FLDS3 - 1 ) ) / FLDS3 ;
|
||
nc4 = ( nc4 + ( FLDS4 - 1 ) ) / FLDS4 ;
|
||
nc3 += nc2 ;
|
||
nc4 += nc3 ;
|
||
nodz = 2 * nt - 1 ;
|
||
nodlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
anx = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
lef = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
rig = ( short int * ) myalloc ( nt * sizeof ( short int ) ) ;
|
||
rig -= nt ;
|
||
sis = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
rlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
tgtlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
lassis = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
nodfork = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
ladd = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
blist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
mat = ( unsigned long int ** ) myalloc ( nodz * sizeof ( long int * ) ) ;
|
||
if ( swapcollapse || quickbremer ) {
|
||
unsuprt = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
distoroot = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
colmoves = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
colroots = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
if ( quickbremer ) {
|
||
nodmatch = ( short int * ) myalloc ( nt * sizeof ( short int ) ) ;
|
||
nodmatch -= nt ;
|
||
abrem = ( long int * ) myalloc ( nt * sizeof ( long int ) ) ;
|
||
abrem -= nt ;
|
||
gbrem = ( double * ) myalloc ( nt * sizeof ( double ) ) ;
|
||
gbrem -= nt ;
|
||
tbrem = ( double * ) myalloc ( nt * sizeof ( double ) ) ;
|
||
tbrem -= nt ;
|
||
bremdif = ( BremTyp * ) myalloc ( nt * nt * sizeof ( BremTyp ) ) ; }}
|
||
b = numreplications ;
|
||
if ( ratcycles ) b = ( numreplications * ratcycles ) + 1 ;
|
||
memtrees = ( short int ** ) myalloc ( b * sizeof ( short int * ) ) ;
|
||
for ( a = 0 ; a < b ; ++ a )
|
||
memtrees [ a ] = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
|
||
toalloc = nc4 ;
|
||
downpass = downpass_all ;
|
||
bswap = bswap_all ;
|
||
wagner = wagner_all ;
|
||
if ( !together ) {
|
||
sizis = ( signed long int ) maxram ;
|
||
subtract = ( 12 * nodz * sizeof ( short int ) ) + // tree-buffers...
|
||
( nt * nt * sizeof ( long int ) ) + // buffer to store moves...
|
||
( b * sizeof ( short int ** ) ) + ( b * nodz * sizeof ( short int ) ) ; // trees
|
||
subtract += nodz * sizeof ( long int * ) ;
|
||
if ( subtract >= sizis ) {
|
||
fprintf ( stderr , "\nSorry: %.0f Bytes are insufficient to run this data set. Increase \"-m\"\n" , maxram ) ;
|
||
exit ( 0 ) ; }
|
||
toalloc = ( sizis - subtract ) / ( nodz * sizeof ( long int ) ) ;
|
||
j = ( totnc + ( FLDS4 - 1 ) ) / FLDS4 ; // needed total
|
||
numparts = 1 + ( j / toalloc ) ;
|
||
toalloc = j / numparts ;
|
||
while ( toalloc * numparts * FLDS4 < totnc )
|
||
toalloc ++ ;
|
||
if ( toalloc < 1 ) {
|
||
fprintf ( stderr , "\nSorry: %.0f Bytes are insufficient to run this data set. Increase -m\n" , maxram ) ;
|
||
exit ( 0 ) ; }
|
||
if ( toalloc * FLDS4 < totnc ) {
|
||
piecesz = toalloc ;
|
||
nc2 = nc3 = 0 ;
|
||
nc4 = totnc4 = numparts * piecesz ;
|
||
fprintf ( stderr , "%cWill process the chars in %i chunks of %li\n" , reportheader , numparts , toalloc * FLDS4 ) ; fflush ( stderr ) ;
|
||
movelist = ( unsigned long int * ) myalloc ( nt * nt * sizeof ( long int ) ) ;
|
||
downpass = downpass_split ;
|
||
wagner = wagner_split ;
|
||
bswap = bswap_split ; }
|
||
else {
|
||
fprintf ( stderr , "%cLucky you! All data can be held in RAM! No disk swaps needed...\n" , reportheader ) ; fflush ( stderr ) ;
|
||
toalloc = nc4 ;
|
||
together = 1 ; }}
|
||
if ( ratcycles || ( ( dojak || quickbremer ) && together ) )
|
||
wtchg = ( unsigned long int * ) myalloc ( nc4 * sizeof ( long int ) ) ;
|
||
else if ( ( dojak || quickbremer ) && !together )
|
||
wtchg = ( unsigned long int * ) myalloc ( piecesz * sizeof ( long int ) ) ;
|
||
tmpp = fopen ( "oblong.tmp" , "rb" ) ;
|
||
if ( !together ) {
|
||
/** Alloc matrix for terms in a contiguous memory segment, for faster loading *******/
|
||
segposit = ( unsigned long int * ) myalloc ( nt * toalloc * sizeof ( long int ) ) ;
|
||
for ( a = 0 ; a < nt ; ++ a ) {
|
||
mat [ a ] = segposit ;
|
||
segposit += toalloc ; }
|
||
for ( a = nt ; a < nodz ; ++ a )
|
||
mat [ a ] = ( unsigned long int * ) myalloc ( toalloc * sizeof ( long int ) ) ;
|
||
create_parts_file( ) ;
|
||
fclose ( tmpp ) ;
|
||
remove ( "oblong.tmp" ) ;
|
||
return ; }
|
||
/** Just alloc matrix as normally done ****/
|
||
for ( a = 0 ; a < nodz ; ++ a ) {
|
||
mat [ a ] = ( unsigned long int * ) myalloc ( nc4 * sizeof ( long int ) ) ;
|
||
if ( a < nt )
|
||
for ( b = 0 ; b < nc4 ; ++ b ) mat [ a ] [ b ] = -1 ; }
|
||
prevprop = 0 ;
|
||
for ( a = 0 ; a < totnc ; ++ a ) {
|
||
if ( reporttoscreen && ( a % 327 ) == 0 ) {
|
||
prop = ( double ) a / totnc ;
|
||
if ( prop - prevprop > 0.015 ) {
|
||
fprintf ( stderr , "\rReading matrix... %.1f%%" , 50 + ( prop * 100 / 2 ) ) ;
|
||
prevprop = prop ; }}
|
||
mybits = getc ( tmpp ) ;
|
||
if ( mybits == 2 ) {
|
||
msk = 3 ; ent = atnc2 / FLDS2 ; inshif = 2 * ( atnc2 % FLDS2 ) ;
|
||
inmsk = ( unsigned long int ) 3 << inshif ; ++ atnc2 ; }
|
||
if ( mybits == 3 ) {
|
||
msk = 7 ; ent = nc2 + ( atnc3 / FLDS3 ) ; inshif = 3 * ( atnc3 % FLDS3 ) ;
|
||
inmsk = ( unsigned long int ) 7 << inshif ; ++ atnc3 ; }
|
||
if ( mybits == 4 ) {
|
||
msk = 15 ; ent = nc3 + ( atnc4 / FLDS4 ) ; inshif = 4 * ( atnc4 % FLDS4 ) ;
|
||
inmsk = ( unsigned long int ) 15 << inshif ; ++ atnc4 ; }
|
||
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
|
||
for ( b = 0 ; b <= totbytes ; ++ b )
|
||
tmpstates [ b ] = getc ( tmpp ) ;
|
||
pt = ( unsigned short int * ) tmpstates ;
|
||
for ( b = shif = 0 ; b < nt ; ++ b ) {
|
||
lp = ( unsigned long int * ) pt ;
|
||
val = ( * lp >> shif ) & msk ;
|
||
mat [ b ] [ ent ] &= ~inmsk ;
|
||
mat [ b ] [ ent ] |= val << inshif ;
|
||
shif += mybits ;
|
||
if ( shif >= 16 ) {
|
||
++ pt ;
|
||
shif -= 16 ; }}}
|
||
fclose ( tmpp ) ;
|
||
remove ( "oblong.tmp" ) ;
|
||
}
|
||
|
||
void get_fasta_dims ( int * mync , int * mynt )
|
||
{
|
||
int i ;
|
||
char c ;
|
||
int curnt = 0 , curnc = 0 , atnc ;
|
||
char * nampt = strsp ;
|
||
while ( !feof ( inp ) && i != '>' ) i = getc ( inp ) ; //没读到 EOF 并且不是 > 继续,直到读到 >
|
||
if ( i != '>' ) { //读到最后没有 > 报错
|
||
fprintf ( stderr , "\nError reading fasta file --expected taxon name\n" ) ;
|
||
errout ( 1 , NULL ) ; }
|
||
while ( !feof ( inp ) ) { //没读到 EOF 不停
|
||
fscanf ( inp , " %s" , nampt ) ; //从 inp 读到 nampt
|
||
i = 0 ;
|
||
++ curnt ;
|
||
if ( curnt == 1) //第一次
|
||
while ( 1 ) {
|
||
fscanf ( inp , " %c" , &c ) ;
|
||
if ( c == '>' || feof ( inp ) ) break ; //读到 > 或者 EOF
|
||
++ curnc ; } // 读到 > 或者 EOF curnc ++
|
||
else { //第二次及之后
|
||
atnc = 0 ;
|
||
while ( 1 ) {
|
||
fscanf ( inp , " %c" , &c ) ;
|
||
if ( c == '>' || feof ( inp ) ) break ;
|
||
++ atnc ; }
|
||
if ( atnc != curnc ) { //到最后比较
|
||
fprintf ( stderr , "\nError reading fasta file --it seems not to be aligned\n" ) ;
|
||
errout ( 1 , NULL ) ; }}}
|
||
* mync = curnc ;
|
||
* mynt = curnt ;
|
||
rewind ( inp ) ;
|
||
}
|
||
|
||
void read_multifiles ( void )
|
||
{
|
||
char * nampt = strsp ;
|
||
unsigned char state ;
|
||
long int a , b , i , curnt = 0 , curnc = 0 , matched , prvnt ;
|
||
int atfile = 0 , numfiles = 0 ;
|
||
char ** taxnam ;
|
||
double prop , secs , prevprop ;
|
||
clock_t beg , end ; // 定义起始时间
|
||
beg = clock () ; //计算时间
|
||
errout ( ( tmpp = fopen ( "oblong.tmp" , "wb" ) ) == NULL , "Cannot open temporary file, \"oblong.tmp\"" ) ;
|
||
// Find out total NT and total NC
|
||
multinp = inp ;
|
||
maskinit ( ) ; //maskinit 函数
|
||
taxnam = ( char ** ) myalloc ( MAXMULTITAXA * sizeof ( char * ) ) ; //default MAXMULTITAXA 1000 L32
|
||
fscanf ( inp , " %s" , nampt ) ; //FILE * inp = NULL 读入文件到 nampt
|
||
while ( !feof ( multinp ) ) { //feof 如果 EOF 返回非 0,没有EOF 返回 0,
|
||
++ numfiles ; //numfiles 初始为 0
|
||
if ( ( inp = fopen ( nampt , "rb" ) ) == NULL ) { // r readbale b binary 二进制可以防止 Windows下文件问题
|
||
fprintf ( stderr , "\nCannot open file %s (from list of multiple files)\n" , nampt ) ;
|
||
exit ( 0 ) ; }
|
||
if ( reporttoscreen ) // L60: default 1 L309:0
|
||
fprintf ( stderr , "%cChecking input file %3i %-40s" , reportheader , numfiles , nampt ) ;
|
||
if ( fasta ) get_fasta_dims ( &nc , &nt ) ; //get nc and nt
|
||
else {
|
||
i = fscanf ( inp , " %i %i" , &nt , &nc ) ;
|
||
if ( i < 2 ) {
|
||
fprintf ( stderr , "\nError reading number of taxa/characters in data set %s\nMake sure it is in proper Phylip format\n" , bitct ) ;
|
||
exit ( 0 ) ; }}
|
||
if ( numfiles == 1 ) prvnt = nt ;
|
||
errout ( ( equal_taxon_sequence && nt != prvnt ) , "Under \"-e\" all data files must have the same number of taxa" ) ;
|
||
curnc += nc ;
|
||
for ( a = 0 ; a < nt ; ++ a ) {
|
||
fscanf ( inp , " %s" , nampt ) ;
|
||
if ( fasta ) ++ nampt ;
|
||
if ( equal_taxon_sequence )
|
||
if ( numfiles == 1 ) matched = -1 ;
|
||
else matched = a ;
|
||
else
|
||
for ( i = 0 , matched = -1 ; i < curnt && matched < 0 ; ++ i )
|
||
if ( !strcmp ( nampt , taxnam[ i ] ) ) matched = i ;
|
||
if ( matched < 0 ) {
|
||
errout( curnt >= MAXMULTITAXA , "Sorry, cannot deal with so many distinct taxon names (change MAXMULTITAXA and recompile)" ) ;
|
||
taxnam [ curnt ] = ( char * ) myalloc ( MAXNAMELEN * sizeof ( char ) ) ; //MAXNAMELEN = 1000 default
|
||
strcpy ( taxnam [ curnt ++ ] , nampt ) ; }
|
||
if ( fasta ) -- nampt ;
|
||
for ( b = 0 ; b < nc ; ++ b ) {
|
||
fscanf ( inp , " %c" , &state ) ;
|
||
if ( !maskin [ state ] ) {
|
||
fprintf ( stderr , "\nUnrecognized symbol: %c\n" , state ) ;
|
||
exit ( 0 ) ; }}}
|
||
fclose ( inp ) ;
|
||
fscanf ( multinp , " %s" , nampt ) ; }
|
||
// Now do some initializations and process
|
||
rewind ( multinp ) ;
|
||
#ifdef WATCOM
|
||
fpos = ( long long * ) myalloc ( curnt * sizeof ( long long ) ) ;
|
||
#else
|
||
fpos = ( fpos_t * ) myalloc ( curnt * sizeof ( fpos_t ) ) ;
|
||
#endif
|
||
matbuf = ( unsigned char ** ) myalloc ( curnt * sizeof ( char * ) ) ;
|
||
isabs = ( unsigned char * ) myalloc ( curnt * sizeof ( char ) ) ;
|
||
for ( a = 0 ; a < curnt ; ++ a )
|
||
if ( curnc < CHUNKSZ )
|
||
matbuf [ a ] = ( char * ) myalloc ( curnc * sizeof ( char ) ) ;
|
||
else matbuf [ a ] = ( char * ) myalloc ( CHUNKSZ * sizeof ( char ) ) ;
|
||
a = curnt ;
|
||
if ( ( a & 1 ) ) ++ a ;
|
||
tmpstates = ( unsigned char * ) myalloc ( a * sizeof ( char ) ) ;
|
||
fscanf ( multinp , " %s" , nampt ) ;
|
||
while ( !feof ( multinp ) ) {
|
||
if ( ( inp = fopen ( nampt , "rb" ) ) == NULL ) { // it was just opened ok, but re-check just in case...
|
||
fprintf ( stderr , "\nCannot open file %s (from list of multiple files)\n" , nampt ) ;
|
||
exit ( 0 ) ; }
|
||
if ( !fasta ) fscanf ( inp , " %i %i" , &nt , &nc ) ;
|
||
for ( a = 0 ; a < curnt ; ++ a ) isabs [ a ] = 1 ;
|
||
for ( a = 0 ; a < nt ; ++ a ) {
|
||
fscanf ( inp , " %s" , nampt ) ;
|
||
if ( fasta ) ++ nampt ;
|
||
if ( equal_taxon_sequence )
|
||
i = a ;
|
||
else
|
||
for ( i = 0 ; i < curnt ; ++ i )
|
||
if ( !strcmp ( nampt , taxnam[ i ] ) ) break ; // there has to be a match...
|
||
if ( fasta ) -- nampt ;
|
||
#ifdef WATCOM
|
||
fpos [ i ] = _ftelli64 ( inp ) ;
|
||
#else
|
||
fgetpos ( inp , &fpos [ i ] ) ;
|
||
#endif
|
||
isabs [ i ] = 0 ;
|
||
for ( b = 0 ; b < nc ; ++ b ) fscanf ( inp , " %c" , &state ) ; }
|
||
nt = curnt ;
|
||
atfile ++ ;
|
||
prevprop = 0 ;
|
||
for ( a = 0 ; a < nc ; a += CHUNKSZ ) {
|
||
if ( reporttoscreen ) {
|
||
prop = ( double ) a / nc ;
|
||
if ( ( prop - prevprop ) > 0.009 ) {
|
||
fprintf ( stderr , "%cReading matrix %i of %i... %.1f%% " , reportheader , atfile , numfiles , prop * 100 ) ;
|
||
prevprop = prop ; }}
|
||
process_chunk ( a ) ; }
|
||
fclose ( inp ) ;
|
||
fscanf ( multinp , " %s" , nampt ) ; }
|
||
free ( matbuf ) ;
|
||
fclose ( tmpp ) ;
|
||
// Assign bufers and (if appropriate), read data in, from tmp file...
|
||
nt = curnt ;
|
||
tmpparse ( ) ;
|
||
fprintf ( stderr , "%cRead all data (%i files), %i taxa, %li chars. (%li informative) \n" , reportheader , numfiles , nt , curnc , curnc - trashed ) ;
|
||
end = clock () ;
|
||
secs = ( double ) ( end - beg ) / CLOCKS_PER_SEC ;
|
||
fprintf ( stderr , "Time to read data: %.2f secs.\n\n" , secs ) ;
|
||
// Begin writing to output file...
|
||
if ( !usenexus ) {
|
||
if ( !savejusttrees )
|
||
fprintf ( outp , "xread\n'Results from oblong'\n0 %i\n" , nt ) ; }
|
||
else fprintf ( outp , "#NEXUS\nbegin trees;\nTranslate\n" ) ;
|
||
for ( a = 0 ; a < nt ; ++ a )
|
||
if ( !usenexus ) {
|
||
if ( !savejusttrees )
|
||
fprintf ( outp , "%s " , taxnam [ a ] ) ; }
|
||
else {
|
||
fprintf ( outp , " %li %s" , a + 1 , taxnam [ a ] ) ;
|
||
if ( a < nt - 1 ) fprintf ( outp , "," ) ;
|
||
fprintf ( outp , "\n" ) ; }
|
||
if ( !usenexus ) {
|
||
if ( !savejusttrees )
|
||
fprintf ( outp , ";\ncollapse none;\n" ) ; }
|
||
else fprintf ( outp , " ;\n" ) ;
|
||
return ;
|
||
}
|
||
|
||
void read_input ( void )
|
||
{
|
||
char * nampt = strsp ;
|
||
unsigned char state ;
|
||
int a , b ;
|
||
char c ;
|
||
double prop , secs , prevprop ;
|
||
clock_t beg , end ;
|
||
strcpy ( nampt , " " ) ; // nampt = " "
|
||
if ( treeinp != NULL ) { // int is NULL (L34), but can specific the tree file (L273)
|
||
while ( strcmp ( nampt , "tread" ) && !feof ( treeinp ) ) //feof 如果 EOF 返回非 0,没有EOF 返回 0, strcmp 0 相同,非0 则执行,即 非 tread 并且 没有 EOF 且执行
|
||
fscanf ( treeinp , " %7s" , nampt ) ; // 输入长度为 7 的 string
|
||
errout ( feof ( treeinp ) , "String \"tread\" not found in tree file" ) ;
|
||
fscanf ( treeinp , " %c" , &c ) ;
|
||
errout ( c != 39 , "Number of trees should be indicated in the title of tree-file" ) ;
|
||
errout ( !fscanf ( treeinp , " %i" , &numreplications ) , "Number of trees should be indicated in the title of tree-file" ) ;
|
||
while ( getc ( treeinp ) != 39 ) ; }
|
||
if ( multifiles ) {
|
||
read_multifiles ( ) ;
|
||
return ; }
|
||
beg = clock () ;
|
||
if ( !felipe && !fasta ) {
|
||
while ( strcmp ( nampt , "xread" ) ) {
|
||
fscanf ( inp , " %7s" , nampt ) ;
|
||
if ( feof ( inp ) ) break ; }
|
||
errout ( feof ( inp ) , "String \"xread\" not found in data set\n\nMaybe it's in Phylip (\"-p\") or Fasta (\"-f\") format?" ) ;
|
||
fscanf ( inp , " %c" , &c ) ;
|
||
if ( c == 39 )
|
||
while ( getc ( inp ) != 39 && !feof ( inp ) ) ;
|
||
else ungetc ( c , inp ) ; }
|
||
if ( fasta ) get_fasta_dims ( &nc , &nt ) ;
|
||
else fscanf ( inp , " %i %i" , &nc , &nt ) ;
|
||
if ( felipe ) {
|
||
a = nc ;
|
||
nc = nt ;
|
||
nt = a ; }
|
||
errout ( !nc , "Illegal number of characters" ) ;
|
||
errout ( nt < 4 , "Illegal number of taxa" ) ;
|
||
#ifdef WATCOM
|
||
fpos = ( long long * ) myalloc ( nt * sizeof ( long long ) ) ;
|
||
#else
|
||
fpos = ( fpos_t * ) myalloc ( nt * sizeof ( fpos_t ) ) ;
|
||
#endif
|
||
matbuf = ( unsigned char ** ) myalloc ( nt * sizeof ( char * ) ) ;
|
||
a = nt ;
|
||
if ( ( a & 1 ) ) ++ a ;
|
||
tmpstates = ( unsigned char * ) myalloc ( a * sizeof ( char ) ) ;
|
||
maskinit ( ) ;
|
||
// Get file positions for first character for each taxon...
|
||
if ( !usenexus ) {
|
||
if ( !savejusttrees )
|
||
fprintf ( outp , "xread\n'Results from oblong'\n0 %i\n" , nt ) ; }
|
||
else fprintf ( outp , "#NEXUS\nbegin trees;\nTranslate\n" ) ;
|
||
for ( a = 0 ; a < nt ; ++ a ) {
|
||
fscanf ( inp , " %s" , nampt ) ;
|
||
if ( fasta ) ++ nampt ;
|
||
if ( !usenexus ) {
|
||
if ( !savejusttrees )
|
||
fprintf ( outp , "%s " , nampt ) ; }
|
||
else {
|
||
fprintf ( outp , " %i %s" , a + 1 , nampt ) ;
|
||
if ( a < nt - 1 ) fprintf ( outp , "," ) ;
|
||
fprintf ( outp , "\n" ) ; }
|
||
if ( fasta ) -- nampt ;
|
||
#ifdef WATCOM
|
||
fpos [ a ] = _ftelli64 ( inp ) ;
|
||
#else
|
||
fgetpos ( inp , &fpos [ a ] ) ;
|
||
#endif
|
||
if ( nc < CHUNKSZ )
|
||
matbuf [ a ] = ( char * ) myalloc ( nc * sizeof ( char ) ) ;
|
||
else matbuf [ a ] = ( char * ) myalloc ( CHUNKSZ * sizeof ( char ) ) ;
|
||
for ( b = 0 ; b < nc ; ++ b ) {
|
||
fscanf ( inp , " %c" , &state ) ;
|
||
if ( state == '[' ) {
|
||
while ( 1 ) {
|
||
fscanf ( inp , " %c" , &state ) ;
|
||
if ( state == ']' ) break ;
|
||
if ( !maskin [ state ] ) {
|
||
fprintf ( stderr , "\nUnrecognized symbol in matrix: %c\n" , state ) ;
|
||
exit ( 0 ) ; }}}
|
||
else
|
||
if ( !maskin [ state ] ) {
|
||
fprintf ( stderr , "\nUnrecognized symbol in matrix: %c\n" , state ) ;
|
||
exit ( 0 ) ; }}}
|
||
if ( !usenexus ) {
|
||
if ( !savejusttrees )
|
||
fprintf ( outp , ";\ncollapse none;\n" ) ; }
|
||
else fprintf ( outp , " ;\n" ) ;
|
||
// Read pieces, and output temporary file as you go...
|
||
errout ( ( tmpp = fopen ( "oblong.tmp" , "wb" ) ) == NULL , "Cannot open temporary file, \"oblong.tmp\"" ) ;
|
||
prevprop = 0 ;
|
||
for ( a = 0 ; a < nc ; a += CHUNKSZ ) {
|
||
if ( reporttoscreen ) {
|
||
prop = ( double ) a / nc ;
|
||
if ( together ) prop /= 2 ;
|
||
if ( ( prop - prevprop ) > 0.009 ) {
|
||
fprintf ( stderr , "\rReading matrix... %.1f%%" , prop * 100 ) ;
|
||
prevprop = prop ; }}
|
||
process_chunk ( a ) ; }
|
||
free ( matbuf ) ;
|
||
fclose ( tmpp ) ;
|
||
// Assign bufers and (if appropriate), read data in, from tmp file...
|
||
tmpparse ( ) ;
|
||
fprintf ( stderr , "%cReading matrix, %i taxa, %i chars. (%li informative) \n" , reportheader , nt , nc , nc - trashed ) ;
|
||
fclose ( inp ) ;
|
||
end = clock () ;
|
||
secs = ( double ) ( end - beg ) / CLOCKS_PER_SEC ;
|
||
fprintf ( stderr , "Time to read data: %.2f secs.\n\n" , secs ) ;
|
||
return ;
|
||
}
|
||
|
||
/************ GENERAL TREE-HANDLING STUFF ************/
|
||
|
||
void reset_nodz ( short int * anpt )
|
||
{
|
||
short int * bu = rlist ;
|
||
short int * tbu = tgtlist ;
|
||
int a , b , atno , lg ;
|
||
for ( a = nodz ; a -- > nt ; ) tbu [ a ] = -1;
|
||
for ( a = nt ; a -- ; ) tbu [ a ] = a ;
|
||
atno = nt ;
|
||
lg = anpt [ nt ] ;
|
||
tbu [ nt ] = nt ;
|
||
tbu [ lg ] = lg ;
|
||
for ( a = 0 ; a < nt ; ++a) {
|
||
b = anpt [ a ] ;
|
||
while ( tbu [ b ] < 0 ) {
|
||
tbu [ b ] = ++ atno ;
|
||
b = anpt [ b ] ; }}
|
||
for ( a = lg ; a-- ; )
|
||
bu [ tbu [ a ] ] = tbu [ anpt [ a ] ] ;
|
||
for ( a = lg ; a-- ; ) anpt [ a ] = bu [ a ] ;
|
||
anpt [ nt ] = ++ atno ;
|
||
return ;
|
||
}
|
||
|
||
int equal_trees ( short int * a , short int * b )
|
||
{
|
||
int c ;
|
||
for ( c = 0 ; c < nodz ; ++ c )
|
||
if ( * a ++ != * b ++ ) return 0 ;
|
||
return 1 ;
|
||
}
|
||
|
||
void storetree ( void )
|
||
{
|
||
int b , isdup = 0 ;
|
||
for ( b = 0 ; b < nodz ; ++ b ) memtrees [ numtrees ] [ b ] = anx [ b ] ;
|
||
reset_nodz ( memtrees [ numtrees ] ) ;
|
||
if ( !dojak && !saveall )
|
||
for ( b = 0 ; b < numtrees && !isdup ; ++ b )
|
||
if ( equal_trees ( memtrees [ b ] , memtrees [ numtrees ] ) ) isdup = 1 ;
|
||
if ( !isdup )
|
||
++ numtrees ;
|
||
}
|
||
|
||
void randomlist ( int num , short int * dali , int firstis )
|
||
{ short int * bakrand ;
|
||
int a , todo , i , n ;
|
||
if ( ranseed ) srand ( ranseed ) ;
|
||
for ( a = 0 ; a < num ; ++ a ) dali [ a ] = a ;
|
||
if ( firstis == 0 ) {
|
||
todo = num - 1 ;
|
||
++ dali ; // skip 0
|
||
bakrand = dali + todo ;
|
||
for ( a = 1 ; a < num ; ++ a ) {
|
||
i = rand () % todo -- ;
|
||
n = * -- bakrand ;
|
||
* bakrand = dali [ i ] ;
|
||
dali [ i ] = n ; }}
|
||
else {
|
||
todo = num ;
|
||
bakrand = dali + todo ;
|
||
for ( a = 0 ; a < num ; ++ a ) {
|
||
i = rand () % todo -- ;
|
||
n = * -- bakrand ;
|
||
* bakrand = dali [ i ] ;
|
||
dali [ i ] = n ; }}
|
||
}
|
||
|
||
void nodzert ( int what, int where )
|
||
{
|
||
int a, b, c ;
|
||
a = anx [ where ] ;
|
||
c = sis [ where ] ;
|
||
b = anx [ what ] = anx[ where ] = anx[nt]++;
|
||
anx [ b ] = a;
|
||
sis [ what ] = where;
|
||
sis [ where ] = what;
|
||
sis [ c ] = b;
|
||
sis [ b ] = c;
|
||
lef [ a ] = c;
|
||
rig [ a ] = b;
|
||
lef [ b ] = where ;
|
||
rig [ b ] = what ;
|
||
}
|
||
|
||
|
||
int savebremervals = 0 ;
|
||
|
||
void savant ( int which )
|
||
{
|
||
int a , didsome = 0 ;
|
||
if ( savebremervals && which != nt ) {
|
||
if ( gbrem [ which ] > 0 || shownegbremers ) fprintf ( outp , "(" ) ; }
|
||
else
|
||
fprintf ( outp , "(" ) ;
|
||
for ( a = 0 ; a < anx [ nt ] ; ++ a )
|
||
if ( anx [ a ] == which ) {
|
||
if ( usenexus && didsome )
|
||
fprintf ( outp , "," ) ;
|
||
didsome = 1 ;
|
||
if ( a < nt )
|
||
if ( usenexus ) fprintf ( outp , "%i " , a + 1 ) ;
|
||
else fprintf ( outp , "%i " , a ) ;
|
||
else savant ( a ) ; }
|
||
if ( savebremervals && which != nt && anx [ which ] != nt ) {
|
||
if ( gbrem [ which ] > 0 )
|
||
if ( bremertype == 0 ) fprintf ( outp , ")%s%.0f " , legindic , gbrem [ which ] ) ;
|
||
else fprintf ( outp , ")%s%.0f " , legindic , gbrem [ which ] * 100 ) ;
|
||
else if ( shownegbremers )
|
||
if ( bremertype == 0 ) fprintf ( outp , ")%s%.0f " , legindic , gbrem [ which ] ) ;
|
||
else fprintf ( outp , ")%s(%.0f) " , legindic , gbrem [ which ] * 100 ) ; }
|
||
else
|
||
fprintf ( outp , ")" ) ;
|
||
}
|
||
|
||
void savetrees ( void )
|
||
{
|
||
short int * anxwas = anx ;
|
||
int i ;
|
||
if ( quickbremer )
|
||
fprintf ( stderr , "\nSaving supports and %i trees to output file...\n\n" , numtrees ) ;
|
||
else
|
||
fprintf ( stderr , "\nSaving %i trees to output file...\n\n" , numtrees ) ;
|
||
if ( !usenexus )
|
||
fprintf ( outp , "tread '%i Oblong trees'\n" , numtrees ) ;
|
||
for ( i = 0 ; i < numtrees ; ++ i ) {
|
||
if ( quickbremer && !i ) savebremervals = 1 ;
|
||
if ( !usenexus ) {
|
||
if ( i ) fprintf ( outp , "*\n" ) ; }
|
||
else fprintf ( outp , "tree OBLONG_%i = [&U] " , i + 1 ) ;
|
||
anx = memtrees [ i ] ;
|
||
savant ( nt ) ;
|
||
if ( usenexus )
|
||
fprintf ( outp , ";\n" ) ;
|
||
if ( quickbremer && !i ) {
|
||
savebremervals = quickbremer = 0 ;
|
||
if ( !usenexus ) fprintf ( outp , "*\n" ) ;
|
||
i = -1 ; }}
|
||
anx = anxwas ;
|
||
if ( !usenexus )
|
||
fprintf ( outp , ";\nproc/;\n" ) ;
|
||
else fprintf ( outp , "End ; \n" ) ;
|
||
}
|
||
|
||
int binoptlist ( short int * list , short int * tt )
|
||
{
|
||
short int * x , * y ;
|
||
x = y = list ;
|
||
* y ++ = nt ;
|
||
while ( x < y ) {
|
||
if ( lef [ * x ] >= nt ) * y ++ = lef [ * x ] ;
|
||
if ( rig [ * x ] >= nt ) * y ++ = rig [ * x ] ;
|
||
++ x ; }
|
||
return y - list ;
|
||
}
|
||
|
||
void initnet ( int un, int dos, int tres )
|
||
{
|
||
anx [ un ] = anx [ nt + 1] = nt;
|
||
anx [ dos ] = anx [ tres ] = nt + 1;
|
||
anx [ nt ] = nt + 2 ;
|
||
lef [ nt ] = un;
|
||
rig [ nt ] = sis [ un ] = nt + 1;
|
||
lef [ nt + 1 ] = dos;
|
||
rig [ nt + 1 ] = sis [ dos ] = tres;
|
||
sis [ nt + 1 ] = un;
|
||
sis [ tres ] = dos;
|
||
}
|
||
|
||
/************ OPTIMIZATION STUFF *********************/
|
||
|
||
void dowtfilter ( int prob , int keepprevious , int dalim )
|
||
{
|
||
int i , j ;
|
||
unsigned long int msk ;
|
||
for ( i = 0 ; i < nc2 ; ++ i ) {
|
||
if ( keepprevious )
|
||
wtchg [ i ] |= wtchg [ i ] << 1 ;
|
||
else wtchg [ i ] = LO2 ;
|
||
for ( j = 0 , msk = 1 ; j < FLDS2 ; ++ j , msk <<= 2 )
|
||
if ( ( rand () % 100 ) < prob )
|
||
wtchg [ i ] &= ~msk ; }
|
||
for ( ; i < nc3 ; ++ i ) {
|
||
if ( keepprevious )
|
||
wtchg [ i ] |= wtchg [ i ] >> 1 ;
|
||
else wtchg [ i ] = HI3 ;
|
||
for ( j = 0 , msk = 4 ; j < FLDS3 ; ++ j , msk <<= 3 )
|
||
if ( ( rand () % 100 ) < prob )
|
||
wtchg [ i ] &= ~msk ; }
|
||
for ( ; i < dalim ; ++ i ) {
|
||
if ( keepprevious )
|
||
wtchg [ i ] |= wtchg [ i ] >> 1 ;
|
||
else wtchg [ i ] = HI4 ;
|
||
for ( j = 0 , msk = 8 ; j < FLDS4 ; ++ j , msk <<= 4 )
|
||
if ( ( rand () % 100 ) < prob )
|
||
wtchg [ i ] &= ~msk ; }
|
||
}
|
||
|
||
void resetwtfilter ( void )
|
||
{
|
||
int i ;
|
||
for ( i = 0 ; i < nc2 ; ++ i )
|
||
wtchg [ i ] = ( wtchg [ i ] >> 1 ) & LO2 ;
|
||
for ( ; i < nc3 ; ++ i )
|
||
wtchg [ i ] = ( wtchg [ i ] << 1 ) & HI3 ;
|
||
for ( ; i < nc4 ; ++ i )
|
||
wtchg [ i ] = ( wtchg [ i ] << 1 ) & HI4 ;
|
||
}
|
||
|
||
void storewtfilter ( void )
|
||
{
|
||
int i ;
|
||
for ( i = 0 ; i < nc2 ; ++ i )
|
||
wtchg [ i ] |= ( wtchg [ i ] << 1 ) ;
|
||
for ( ; i < nc3 ; ++ i )
|
||
wtchg [ i ] |= ( wtchg [ i ] >> 1 ) ;
|
||
for ( ; i < nc4 ; ++ i )
|
||
wtchg [ i ] |= ( wtchg [ i ] >> 1 ) ;
|
||
}
|
||
|
||
|
||
unsigned long long downpass_all ( void )
|
||
{
|
||
int nunos , len , a , nod , c ;
|
||
unsigned long long totlen ;
|
||
unsigned long int * lp , * rp , * np , x , y , z ;
|
||
unsigned long int * wts ;
|
||
nunos = binoptlist ( nodlist , anx ) ;
|
||
totlen = lenlag ;
|
||
for ( a = nunos ; a -- ; ) {
|
||
nod = nodlist [ a ] ;
|
||
lp = mat [ lef [ nod ] ] ;
|
||
rp = mat [ rig [ nod ] ] ;
|
||
np = mat [ nod ] ;
|
||
wts = wtchg - 1 ;
|
||
len = 0 ;
|
||
for ( c = 0 ; c < nc2 ; ++ c ) {
|
||
x = * lp ++ & * rp ++ ;
|
||
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ) ;
|
||
if ( changewts ) y &= * ++ wts ;
|
||
#ifdef BITS64
|
||
len += bitct [ 65535 & ( y | ( y >> 15 ) ) ] + bitct [ 65535 & ( ( y >> 32 ) | ( y >> 47 ) ) ] ;
|
||
#else
|
||
len += bitct [ 65535 & ( y | ( y >> 15 ) ) ] ;
|
||
#endif
|
||
y |= y << 1 ;
|
||
* np ++ = x | y ; }
|
||
for ( ; c < nc3 ; ++ c ) {
|
||
x = * lp & * rp ;
|
||
y = * lp ++ | * rp ++ ;
|
||
z = HI3 & ~( x | ( ( x & LO3 ) + LO3 ) ) ;
|
||
if ( changewts ) z &= * ++ wts ;
|
||
#ifdef BITS64
|
||
len += bitct [ 65535 & ( z | ( z >> 16 ) ) ] + bitct [ 65535 & ( z >> 32 | ( z >> 49 ) ) ] ;
|
||
#else
|
||
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
|
||
#endif
|
||
z |= z - ( z >> 2 ) ;
|
||
* np ++ = x | ( z & y ) ; }
|
||
for ( ; c < nc4 ; ++ c ) {
|
||
x = * lp & * rp ;
|
||
y = * lp ++ | * rp ++ ;
|
||
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
|
||
if ( changewts ) z &= * ++ wts ;
|
||
#ifdef BITS64
|
||
len += bitct [ 65535 & ( z | ( z >> 17 ) | ( z >> 34 ) | ( z >> 51 ) ) ] ;
|
||
#else
|
||
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
|
||
#endif
|
||
z |= z - ( z >> 3 ) ;
|
||
* np ++ = x | ( z & y ) ; }
|
||
totlen += len ; }
|
||
return totlen ;
|
||
}
|
||
|
||
void loadmat ( int part )
|
||
{
|
||
void * vd ;
|
||
unsigned long int * seg ;
|
||
unsigned long int numelems ;
|
||
if ( !part ) rewind ( partsfile ) ;
|
||
seg = mat [ 0 ] ;
|
||
vd = ( void * ) seg ;
|
||
numelems = fread ( vd , 1 , sizeof ( unsigned long int ) * nt * piecesz , partsfile ) ;
|
||
errout ( numelems != sizeof ( unsigned long int ) * nt * piecesz , "OOOPS, unexpected error loading matrix!.\nSorry, abandoning run..." ) ;
|
||
if ( dojak && !ratcycles && !together ) {
|
||
if ( !part ) srand ( ranseed ) ;
|
||
dowtfilter ( jakprob , 0 , piecesz ) ; }
|
||
}
|
||
|
||
unsigned long long downpass_split ( void )
|
||
{
|
||
int nunos , len , a , nod , c ;
|
||
unsigned long long totlen ;
|
||
unsigned long int * lp , * rp , * np , x , y , z ;
|
||
unsigned long int atnc ;
|
||
unsigned long int * wts ;
|
||
nunos = binoptlist ( nodlist , anx ) ;
|
||
totlen = lenlag ;
|
||
for ( atnc = 0 ; atnc < nc4 ; atnc += piecesz ) {
|
||
loadmat ( atnc ) ;
|
||
for ( a = nunos ; a -- ; ) {
|
||
nod = nodlist [ a ] ;
|
||
lp = mat [ lef [ nod ] ] ;
|
||
rp = mat [ rig [ nod ] ] ;
|
||
np = mat [ nod ] ;
|
||
if ( ratcycles ) wts = wtchg + atnc - 1 ;
|
||
else wts = wtchg - 1 ;
|
||
len = 0 ;
|
||
for ( c = 0 ; c < piecesz ; ++ c ) {
|
||
x = * lp & * rp ;
|
||
y = * lp ++ | * rp ++ ;
|
||
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
|
||
if ( changewts ) z &= * ++ wts ;
|
||
#ifdef BITS64
|
||
len += bitct [ 65535 & ( z | ( z >> 17 ) | ( z >> 34 ) | ( z >> 51 ) ) ] ;
|
||
#else
|
||
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
|
||
#endif
|
||
z |= z - ( z >> 3 ) ;
|
||
* np ++ = x | ( z & y ) ; }
|
||
totlen += len ;
|
||
}}
|
||
return totlen ;
|
||
}
|
||
|
||
/********* SUPPORT FUNCTIONS FOR WAGNER AND BRANCH-SWAPPING ************************/
|
||
|
||
void zert ( int whatp, int wherep )
|
||
{
|
||
int a, b, c ;
|
||
if ( wherep == 0 ) {
|
||
anx [ lef [ nt ] = sis [ whatp ] = 0 ] = nt ;
|
||
anx [ rig [ nt ] = sis [ 0 ] = whatp ] = nt ;
|
||
return ; }
|
||
a = anx [ wherep ] ;
|
||
c = sis [ wherep ] ;
|
||
b = anx [ wherep ] = anx [ whatp ] ;
|
||
anx [ b ] = a;
|
||
sis [ sis [ wherep ] = rig [ b ] = whatp ] = lef [ b ] = wherep;
|
||
sis [ lef [ a ] = c ] = b ;
|
||
sis [ rig [ a ] = b ] = c ;
|
||
}
|
||
|
||
void unzert ( int whatp )
|
||
{
|
||
int a, b, c;
|
||
a = anx [ whatp ] ;
|
||
if ( a == nt ) {
|
||
lef [ anx [ 0 ] = nt ] = rig [ nt ] = sis [ 0 ] = 0 ;
|
||
anx [ whatp ] = whatp ;
|
||
return ; }
|
||
b = sis [ whatp ] ;
|
||
c = sis [ a ] ;
|
||
a = anx [ a ] ;
|
||
lef [ a ] = b ;
|
||
rig [ anx [ b ] = anx [ c ] = a ] = sis [ sis [ c ] = b ] = c ;
|
||
}
|
||
|
||
void tbreroot ( int new, int old )
|
||
{
|
||
short int * an = anx ;
|
||
int i , j , k , m , n , nx , first ;
|
||
if ( an [ new ] == old || old < nt ) return ;
|
||
j = nx = first = an [ i = new ] ;
|
||
n = sis [ new ] ;
|
||
while ( nx != old ) {
|
||
k = i ;
|
||
i = j ;
|
||
j = nx ;
|
||
m = n ;
|
||
n = sis [ j ] ;
|
||
if ( k == lef [ i ] ) { lef [ i ] = j ; sis [ sis [ rig [ i ] ] = j ] = rig [ i ] ; }
|
||
else { rig [ i ] = j ; sis [ sis [ lef [ i ] ] = j ] = lef [ i ] ; }
|
||
nx = an [ j ] ;
|
||
an [ j ] = i ; }
|
||
an [ lef [ j ] = m ] = an [ rig [ j ] = n ] = j ;
|
||
sis [ sis [ n ] = m ] = n ;
|
||
an [ lef [ old ] = new ] = an [ rig [ old ] = first ] = old ;
|
||
sis [ sis [ new ] = first ] = new ;
|
||
return ;
|
||
}
|
||
|
||
void binmksis ( short int * tt )
|
||
{
|
||
int a , b , c , lownod = tt [ nt ] ;
|
||
for ( a = nodz ; a -- ; ) nodfork [ a ] = 0 ;
|
||
for ( a = nt ; a -- ; ) {
|
||
b = tt [ c = a ] ;
|
||
while ( b != lownod ) {
|
||
if ( !nodfork [ b ] )
|
||
lef [ b ] = lassis [ b ] = c ;
|
||
else
|
||
sis [ sis [ c ] = lassis [ b ] ] = rig [ b ] = c ;
|
||
if ( ++ nodfork [ b ] > 1 ) break ;
|
||
c = b ;
|
||
b = tt [ b ] ; }}
|
||
return ;
|
||
}
|
||
|
||
int divoptlist ( short int * list , int clip , int sisis )
|
||
{
|
||
short int * x , * y ;
|
||
x = y = list ;
|
||
if ( sisis ) * y ++ = nt ;
|
||
if ( clip > nt ) * y ++ = clip ;
|
||
while ( x < y ) {
|
||
if ( lef [ * x ] >= nt ) * y ++ = lef [ * x ] ;
|
||
if ( rig [ * x ] >= nt ) * y ++ = rig [ * x ] ;
|
||
++ x ; }
|
||
return y - list ;
|
||
}
|
||
|
||
/*********** HANDLE SWAP-COLLAPSING FOR JACKKNIFING *************/
|
||
|
||
void delnodz ( void )
|
||
{ short int * tmp , *cs ;
|
||
int a , b , at , last ;
|
||
char stopped ;
|
||
short int * an = anx ;
|
||
short int * dellist = unsuprt ;
|
||
tmp = nodlist ;
|
||
cs = lef ;
|
||
dellist[ nt ] = 0 ;
|
||
at = nt + 1 ;
|
||
for ( a = nt ; a -- ; ) cs [ a ] = a ;
|
||
for ( a = nt ; a < nodz ; ++ a ) cs [ a ] = 0 ;
|
||
cs [ nt ] = nt ;
|
||
for ( a = nodz ; a -- ; ) tmp[ a ] = 0 ;
|
||
for ( a = nt ; a -- ; ) {
|
||
b = last = a ;
|
||
stopped = 0 ;
|
||
while ( b != nt ) {
|
||
b = an [ b ] ;
|
||
if ( cs [ b ] ) { tmp[ cs [ last ] ] = cs [ b ] ; stopped = 1 ; break ; }
|
||
else
|
||
if ( ! dellist [ b ] ) {
|
||
tmp [ cs [ last ] ] = at ;
|
||
cs [ last = b ] = at ++ ; }
|
||
}
|
||
if ( !stopped ) tmp [ cs [ last ] ] = nt ; }
|
||
for ( a = nodz ; a -- ; ) an [ a ] = tmp [ a ] ;
|
||
an [ nt ] = at ;
|
||
}
|
||
|
||
void filldistoroot ( void )
|
||
{
|
||
short int * x = nodlist , * y ;
|
||
binmksis ( anx ) ;
|
||
distoroot [ nt ] = 0 ;
|
||
nodlist [ 0 ] = lef [ nt ] ;
|
||
nodlist [ 1 ] = rig [ nt ] ;
|
||
x = nodlist - 1 ;
|
||
y = nodlist + 2 ;
|
||
while ( ++ x < y ) {
|
||
distoroot [ * x ] = distoroot [ anx [ * x ] ] + 1 ;
|
||
if ( * x > nt ) {
|
||
* y ++ = lef [ * x ] ;
|
||
* y ++ = rig [ * x ] ; }}
|
||
return ;
|
||
}
|
||
|
||
void collapse_with_swaps ( int clip , int sisis )
|
||
{
|
||
int a , b , c , comnod ;
|
||
int da , db , dest , rootis ;
|
||
zert ( clip , sisis ) ;
|
||
for ( c = 0 ; c < numcolmoves ; ++ c ) {
|
||
dest = colmoves [ c ] ;
|
||
if ( dest == sisis ) continue ;
|
||
da = distoroot [ a = clip ] ;
|
||
db = distoroot [ b = dest ] ;
|
||
while ( distoroot [ a ] > db ) a = anx [ a ] ;
|
||
while ( distoroot [ b ] > da ) b = anx [ b ] ;
|
||
while ( a != b ) { a = anx [ a ] ; b = anx [ b ] ; }
|
||
comnod = a ;
|
||
if ( comnod != dest )
|
||
for ( a = anx [ dest ] ; a != comnod ; a = anx [ a ] ) unsuprt [ a ] = 1 ;
|
||
for ( b = anx [ clip ] ; b != comnod ; b = anx [ b ] ) unsuprt [ b ] = 1 ; }
|
||
if ( !use_spr && clip > nt )
|
||
for ( c = 0 ; c < numcolroots ; ++ c ) {
|
||
rootis = colroots [ c ] ;
|
||
if ( anx [ rootis ] != clip ) {
|
||
a = anx [ rootis ] ;
|
||
while ( a != clip ) {
|
||
unsuprt [ a ] = 1 ;
|
||
a = anx [ a ] ; }}}
|
||
unzert ( clip ) ;
|
||
return ;
|
||
}
|
||
|
||
void collapse_the_tree ( void )
|
||
{
|
||
int i ;
|
||
if ( !swapcollapse ) return ;
|
||
for ( i = 0 ; i < nodz ; ++ i ) unsuprt [ i ] = 0 ;
|
||
filldistoroot ( ) ;
|
||
recordgoodmoves = 1 ;
|
||
bswap ( ) ;
|
||
recordgoodmoves = 0 ;
|
||
delnodz ( ) ;
|
||
}
|
||
|
||
/*************** WAGNER AND BRANCH-SWAPPING FOR SINGLE BUFFER ****************/
|
||
|
||
void fullpass_all ( int clip , int sisis )
|
||
{
|
||
int nunos , a , nod , c ;
|
||
unsigned long int * lp , * rp , * np , * ap , x , y , z , bak ;
|
||
nunos = divoptlist ( nodlist , clip , sisis ) ;
|
||
for ( a = nunos ; a -- ; ) {
|
||
nod = nodlist [ a ] ;
|
||
if ( nodtoskip == nod ) continue ;
|
||
lp = mat [ lef [ nod ] ] ;
|
||
rp = mat [ rig [ nod ] ] ;
|
||
np = mat [ nod ] ;
|
||
for ( c = 0 ; c < nc2 ; ++ c ) {
|
||
x = * lp ++ & * rp ++ ;
|
||
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ) ;
|
||
y |= y << 1 ;
|
||
* np ++ = x | y ; }
|
||
for ( ; c < nc3 ; ++ c ) {
|
||
x = * lp & * rp ;
|
||
y = * lp ++ | * rp ++ ;
|
||
z = HI3 & ~( x | ( ( x & LO3 ) + LO3 ) ) ;
|
||
z |= z - ( z >> 2 ) ;
|
||
* np ++ = x | ( z & y ) ; }
|
||
for ( ; c < nc4 ; ++ c ) {
|
||
x = * lp & * rp ;
|
||
y = * lp ++ | * rp ++ ;
|
||
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
|
||
z |= z - ( z >> 3 ) ;
|
||
* np ++ = x | ( z & y ) ; }}
|
||
for ( a = 0 ; ++ a < nunos ; ) {
|
||
nod = nodlist [ a ] ;
|
||
if ( nod == clip || anx [ nod ] == nodtoskip || nod == nodtoskip ) continue ;
|
||
lp = mat [ lef [ nod ] ] - 1 ;
|
||
rp = mat [ rig [ nod ] ] - 1 ;
|
||
np = mat [ nod ] - 1 ;
|
||
ap = mat [ anx [ nod ] ] - 1 ;
|
||
for ( c = 0 ; c < nc2 ; ++ c ) {
|
||
bak = * ++ lp | * ++ rp ;
|
||
x = * ++ ap & ~ * ++ np ;
|
||
x = ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ;
|
||
x |= ( x << 1 ) ;
|
||
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }
|
||
for ( ; c < nc3 ; ++ c ) {
|
||
x = * ++ lp & * ++ rp ;
|
||
z = * lp | * rp ;
|
||
y = HI3 & ~( x | ( ( x & LO3 ) + LO3 ) ) ;
|
||
y |= y - ( y >> 2 ) ;
|
||
bak = z | y ;
|
||
x = * ++ ap & ~ * ++ np ;
|
||
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
|
||
x |= x - ( x >> 2 ) ;
|
||
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }
|
||
for ( ; c < nc4 ; ++ c ) {
|
||
x = * ++ lp & * ++ rp ;
|
||
z = * lp | * rp ;
|
||
y = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
|
||
y |= y - ( y >> 3 ) ;
|
||
bak = z | y ;
|
||
x = * ++ ap & ~ * ++ np ;
|
||
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
|
||
x |= x - ( x >> 3 ) ;
|
||
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }}
|
||
nodtoskip = 0 ;
|
||
}
|
||
|
||
unsigned long int scorediff_all ( short int clipis , short int srcis , short int tgtis )
|
||
{
|
||
unsigned long int len = 0 ;
|
||
unsigned long int * tgt , * tgta , * src , * srca ;
|
||
unsigned long int untgt , unsrc ;
|
||
unsigned long int x , y , z ;
|
||
unsigned long int * wts ;
|
||
long int a ;
|
||
if ( clipis < nt ) srca = mat [ clipis ] - 1 ;
|
||
else srca = mat [ anx [ srcis ] ] - 1 ;
|
||
src = mat [ srcis ] - 1 ;
|
||
if ( !tgtis ) tgta = mat [ 0 ] - 1 ;
|
||
else tgta = mat [ anx [ tgtis ] ] - 1 ;
|
||
tgt = mat [ tgtis ] - 1 ;
|
||
wts = wtchg - 1 ;
|
||
for ( a = 0 ; a < nc2 ; ++ a ) {
|
||
unsrc = * ++ src | * ++ srca ;
|
||
if ( srcis < nt && clipis > nt ) {
|
||
x = * srca & ~ * src ;
|
||
x = ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ;
|
||
x |= ( x << 1 ) ;
|
||
unsrc = * srca | ( * src & x ) ; }
|
||
untgt = * ++ tgt | * ++ tgta ;
|
||
if ( tgtis && tgtis < nt ) {
|
||
x = * tgta & ~ * tgt ;
|
||
x = ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ;
|
||
x |= ( x << 1 ) ;
|
||
untgt = * tgta | ( * tgt & x ) ; }
|
||
x = unsrc & untgt ;
|
||
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ) ;
|
||
if ( changewts )
|
||
if ( recorddn ) {
|
||
++ wts ;
|
||
if ( recorddn == 1 ) * wts = y ;
|
||
x = * wts & y ;
|
||
z = x ^ y ;
|
||
#ifdef BITS64
|
||
gmix += bitct [ 65535 & ( x | ( x >> 15 ) ) ] + bitct [ 65535 & ( ( x >> 32 ) | ( x >> 47 ) ) ] ;
|
||
gup += bitct [ 65535 & ( z | ( z >> 15 ) ) ] + bitct [ 65535 & ( ( z >> 32 ) | ( z >> 47 ) ) ] ;
|
||
#else
|
||
gmix += bitct [ 65535 & ( x | ( x >> 15 ) ) ] ;
|
||
gup += bitct [ 65535 & ( z | ( z >> 15 ) ) ] ;
|
||
#endif
|
||
continue ; }
|
||
else
|
||
y &= * ++ wts ;
|
||
#ifdef BITS64
|
||
len += bitct [ 65535 & ( y | ( y >> 15 ) ) ] + bitct [ 65535 & ( ( y >> 32 ) | ( y >> 47 ) ) ] ;
|
||
#else
|
||
len += bitct [ 65535 & ( y | ( y >> 15 ) ) ] ;
|
||
#endif
|
||
if ( len > lenlimit ) return len ; }
|
||
for ( ; a < nc3 ; ++ a ) {
|
||
unsrc = * ++ src | * ++ srca ;
|
||
if ( srcis < nt && clipis > nt ) {
|
||
x = * srca & ~ * src ;
|
||
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
|
||
x |= x - ( x >> 2 ) ;
|
||
unsrc = * srca | ( * src & x ) ; }
|
||
untgt = * ++ tgt | * ++ tgta ;
|
||
if ( tgtis && tgtis < nt ) {
|
||
x = * tgta & ~ * tgt ;
|
||
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
|
||
x |= x - ( x >> 2 ) ;
|
||
untgt = * tgta | ( * tgt & x ) ; }
|
||
x = unsrc & untgt ;
|
||
z = HI3 & ~( x | ( ( x & LO3 ) + LO3 ) ) ;
|
||
if ( changewts )
|
||
if ( recorddn ) {
|
||
++ wts ;
|
||
if ( recorddn == 1 ) * wts = z ;
|
||
y = * wts & z ;
|
||
x = y ^ z ;
|
||
#ifdef BITS64
|
||
gmix += bitct [ 65535 & ( y | ( y >> 16 ) ) ] + bitct [ 65535 & ( y >> 32 | ( y >> 49 ) ) ] ;
|
||
gup += bitct [ 65535 & ( x | ( x >> 16 ) ) ] + bitct [ 65535 & ( x >> 32 | ( x >> 49 ) ) ] ;
|
||
#else
|
||
gmix += bitct [ 65535 & ( y | ( y >> 17 ) ) ] ;
|
||
gup += bitct [ 65535 & ( x | ( x >> 17 ) ) ] ;
|
||
#endif
|
||
continue ; }
|
||
else
|
||
z &= * ++ wts ;
|
||
#ifdef BITS64
|
||
len += bitct [ 65535 & ( z | ( z >> 16 ) ) ] + bitct [ 65535 & ( z >> 32 | ( z >> 49 ) ) ] ;
|
||
#else
|
||
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
|
||
#endif
|
||
if ( len > lenlimit ) return len ; }
|
||
for ( ; a < nc4 ; ++ a ) {
|
||
unsrc = * ++ src | * ++ srca ;
|
||
if ( srcis < nt && clipis > nt ) {
|
||
x = * srca & ~ * src ;
|
||
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
|
||
x |= x - ( x >> 3 ) ;
|
||
unsrc = * srca | ( * src & x ) ; }
|
||
untgt = * ++ tgt | * ++ tgta ;
|
||
if ( tgtis && tgtis < nt ) {
|
||
x = * tgta & ~ * tgt ;
|
||
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
|
||
x |= x - ( x >> 3 ) ;
|
||
untgt = * tgta | ( * tgt & x ) ; }
|
||
x = unsrc & untgt ;
|
||
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
|
||
if ( changewts )
|
||
if ( recorddn ) {
|
||
++ wts ;
|
||
if ( recorddn == 1 ) * wts = z ;
|
||
y = * wts & z ;
|
||
x = y ^ z ;
|
||
#ifdef BITS64
|
||
gmix += bitct [ 65535 & ( y | ( y >> 17 ) | ( y >> 34 ) | ( y >> 51 ) ) ] ;
|
||
gup += bitct [ 65535 & ( x | ( x >> 17 ) | ( x >> 34 ) | ( x >> 51 ) ) ] ;
|
||
#else
|
||
gmix += bitct [ 65535 & ( y | ( y >> 17 ) ) ] ;
|
||
gup += bitct [ 65535 & ( x | ( x >> 17 ) ) ] ;
|
||
#endif
|
||
continue ; }
|
||
else
|
||
z &= * ++ wts ;
|
||
#ifdef BITS64
|
||
len += bitct [ 65535 & ( z | ( z >> 17 ) | ( z >> 34 ) | ( z >> 51 ) ) ] ;
|
||
#else
|
||
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
|
||
#endif
|
||
if ( len > lenlimit ) return len ; }
|
||
return len ;
|
||
}
|
||
|
||
unsigned long long wagner_all ( void )
|
||
{
|
||
int a , added, fac, basnod , bestpos , clip , tgt ;
|
||
unsigned long int lim , diff ;
|
||
unsigned long long len ;
|
||
short int * inlist = nodfork ;
|
||
unsigned long int seedwas = ranseed ;
|
||
randomlist( nt , ladd , 0 ) ;
|
||
initnet ( ladd [ 0 ] , ladd [ 1 ] , ladd [ 2 ] ) ;
|
||
blist [ 0 ] = ladd [ 1 ] ;
|
||
blist [ 1 ] = ladd [ 2 ] ;
|
||
blist [ 2 ] = nt + 1;
|
||
added = 2;
|
||
fac = 3 ;
|
||
basnod = nt + 2;
|
||
ranseed = 0 ;
|
||
while ( ++ added < nt ) {
|
||
if ( random_starts ) {
|
||
nodzert ( ladd [ added ] , blist [ rand() % fac ] ) ;
|
||
blist [ fac ++ ] = ladd [ added ] ;
|
||
blist [ fac ++ ] = basnod ++ ;
|
||
continue ; }
|
||
randomlist ( fac , inlist , 1 ) ;
|
||
clip = ladd [ added ] ;
|
||
fullpass_all ( clip , clip ) ;
|
||
lim = lenlimit = VERYBIG ;
|
||
if ( verbose )
|
||
fprintf ( stderr , "\rRep %3i %4i taxa added" , reportreplication , added ) ;
|
||
for ( a = 0 ; a < fac ; ++ a ) {
|
||
tgt = blist [ inlist [ a ] ] ;
|
||
diff = scorediff_all ( clip , clip , tgt ) ;
|
||
if ( diff < lim ) {
|
||
lim = diff ;
|
||
lenlimit = lim - 1 ;
|
||
bestpos = tgt ; }}
|
||
nodzert ( clip , bestpos ) ;
|
||
blist [ fac ++ ] = clip ;
|
||
blist [ fac ++ ] = basnod ++ ; }
|
||
len = downpass_all () ;
|
||
ranseed = seedwas ;
|
||
return len ;
|
||
}
|
||
|
||
unsigned long long bswap_all ( void )
|
||
{
|
||
unsigned long int diff , lim , nod ;
|
||
unsigned long long len , achk ;
|
||
int clip , bestpos , begpos , sisis , nunos , bestroot , begroot , i , isdone ;
|
||
int curroot , tgt , rounds , maxrounds = 0 , subsdone = 0 ;
|
||
short int * rdo , * rdone , * tgtdo , * tgtdone ;
|
||
double prop = 0 , secs ;
|
||
static int clockinit = 0 ;
|
||
static clock_t last , now ;
|
||
BremTyp * brempt ;
|
||
anx [ nt ] = nodz ;
|
||
binmksis ( anx ) ;
|
||
if ( !rig [ nt ] ) {
|
||
rig [ nt ] = lef [ nt ] ;
|
||
lef [ nt ] = 0 ; }
|
||
nunos = binoptlist ( nodlist , anx ) ;
|
||
len = downpass_all ( ) ;
|
||
clip = 0 ;
|
||
if ( !clockinit ) {
|
||
clockinit = 1 ;
|
||
last = clock () ; }
|
||
if ( ratcycles && verbose )
|
||
prop = ( double ) ratcyclesdone / ratcycles ;
|
||
for ( rounds = 1 ; rounds < nodz ; ++ rounds ) {
|
||
if ( ++ clip >= nodz ) clip = 1 ;
|
||
if ( clip == nt ) continue ;
|
||
if ( rounds > maxrounds ) maxrounds = rounds ;
|
||
now = clock () ;
|
||
secs = ( double ) ( now - last ) / CLOCKS_PER_SEC ;
|
||
if ( secs > reportperiod && !ratcheting && !doingbremer ) {
|
||
last = clock () ;
|
||
prop = ( double ) ( maxrounds + 1 ) / nodz ;
|
||
if ( ratcycles )
|
||
prop = ( double ) ratcyclesdone / ratcycles ;
|
||
if ( recordgoodmoves )
|
||
fprintf ( stderr , "%cRep %3i collapse LEN %-10lli %12lli rearrangs" , reportheader , reportreplication , len , rearrangs ) ;
|
||
else
|
||
fprintf ( stderr , "%cRep %3i %6.2f%% LEN %-10lli %12lli rearrangs" , reportheader , reportreplication , prop * 100 , len , rearrangs ) ; }
|
||
unzert ( clip ) ;
|
||
begpos = bestpos = sisis = sis [ clip ] ;
|
||
if ( sisis ) nodtoskip = anx [ clip ] ;
|
||
fullpass_all ( clip , sisis ) ;
|
||
lenlimit = VERYBIG ;
|
||
if ( doingbremer ) {
|
||
recorddn = changewts = 1 ;
|
||
gup = gmix = 0 ; }
|
||
if ( clip < nt )
|
||
lim = scorediff_all ( clip , clip , sisis ) ;
|
||
else
|
||
if ( lef [ clip ] > nt )
|
||
lim = scorediff_all ( clip , lef [ clip ] , sisis ) ;
|
||
else lim = scorediff_all ( clip , rig [ clip ] , sisis ) ;
|
||
if ( doingbremer ) {
|
||
brempt = bremdif ;
|
||
recorddn = 2 ;
|
||
nitdn = gmix ; }
|
||
numcolmoves = numcolroots = 0 ;
|
||
if ( !doingbremer )
|
||
if ( ratcheting || recordgoodmoves )
|
||
lenlimit = lim ;
|
||
else lenlimit = lim - 1 ;
|
||
if ( lim == 0 && !( ratcheting || recordgoodmoves || doingbremer ) ) {
|
||
zert ( clip , bestpos ) ;
|
||
continue ; }
|
||
tgtdo = tgtdone = tgtlist ;
|
||
if ( !sisis ) * tgtdo ++ = 0 ;
|
||
else {
|
||
* tgtdo ++ = sis [ 0 ] ;
|
||
while ( tgtdone < tgtdo ) {
|
||
nod = * tgtdone ++ ;
|
||
if ( nod > nt ) {
|
||
* tgtdo ++ = lef [ nod ] ;
|
||
* tgtdo ++ = rig [ nod ] ; }}}
|
||
rdo = rdone = rlist ;
|
||
if ( clip < nt ) * rdo ++ = begroot = bestroot = clip ;
|
||
else {
|
||
* rdo ++ = begroot = bestroot = rig [ clip ] ;
|
||
if ( lef [ clip ] > nt ) {
|
||
* rdo ++ = lef [ lef [ clip ] ] ;
|
||
* rdo ++ = rig [ lef [ clip ] ] ; }}
|
||
while ( rdone < rdo ) {
|
||
curroot = * rdone ;
|
||
if ( curroot > nt ) {
|
||
* rdo ++ = lef [ curroot ] ;
|
||
* rdo ++ = rig [ curroot ] ; }
|
||
for ( tgtdone = tgtlist ; tgtdone < tgtdo ; ++ tgtdone ) {
|
||
tgt = * tgtdone ;
|
||
++ rearrangs ;
|
||
if ( verbose )
|
||
if ( !ratcheting && !recordgoodmoves && !doingbremer )
|
||
fprintf ( stderr , "\rRep %3i %6.2f%% LEN %-10lli %12lli rearrangs" , reportreplication , prop * 100 , len , rearrangs ) ;
|
||
gmix = gup = 0 ;
|
||
diff = scorediff_all ( clip , curroot , tgt ) ;
|
||
if ( doingbremer ) {
|
||
gdn = nitdn - gmix ;
|
||
brempt -> up = gup ;
|
||
brempt -> dn = gdn ;
|
||
brempt -> root = curroot ;
|
||
brempt -> tgt = tgt ;
|
||
++ brempt ;
|
||
continue ; }
|
||
if ( recordgoodmoves && diff == lim ) {
|
||
if ( rdone > rlist ) {
|
||
for ( i = isdone = 0 ; i < numcolroots && !isdone ; ++ i )
|
||
if ( colroots [ i ] == curroot ) isdone = 1 ;
|
||
if ( !isdone ) colroots [ numcolroots ++ ] = curroot ; }
|
||
for ( i = isdone = 0 ; i < numcolmoves && !isdone ; ++ i )
|
||
if ( colmoves [ i ] == tgt ) isdone = 1 ;
|
||
if ( !isdone ) colmoves [ numcolmoves ++ ] = tgt ; }
|
||
if ( ratcheting && diff == lim ) {
|
||
bestpos = tgt ;
|
||
bestroot = curroot ; }
|
||
if ( diff < lim ) {
|
||
len -= lim - diff ;
|
||
lim = diff ;
|
||
lenlimit = lim - 1 ;
|
||
if ( ratcheting ) ++ lenlimit ;
|
||
bestpos = tgt ;
|
||
bestroot = curroot ; }
|
||
} // targets
|
||
if ( use_spr ) break ;
|
||
++ rdone ; } // rootings
|
||
if ( doingbremer )
|
||
transfer_bremer ( clip , sisis , brempt ) ;
|
||
if ( recordgoodmoves )
|
||
collapse_with_swaps ( clip , sisis ) ;
|
||
if ( clip > nt )
|
||
if ( !use_spr && bestroot != begroot ) tbreroot ( bestroot , clip ) ;
|
||
zert ( clip , bestpos ) ;
|
||
if ( bestpos != begpos || bestroot != begroot ) {
|
||
rounds = 0 ;
|
||
if ( ratcheting && ++ subsdone == numsubs ) break ; }} // clippings
|
||
achk = len ;
|
||
len = downpass_all ( ) ;
|
||
if ( len != achk && !doingbremer ) {
|
||
fprintf ( stderr , "\nOOPS!!!\nAn internal error occured!!!\nlength is %lli, not %lli\n" , len , achk ) ;
|
||
exit ( 0 ) ; }
|
||
return len ;
|
||
}
|
||
|
||
/********** WAGNER AND BRANCH-SWAPPING WITH DISK BUFFER ****************/
|
||
|
||
void fullpass_split ( int clip , int sisis )
|
||
{
|
||
int nunos , a , nod , c ;
|
||
unsigned long int * lp , * rp , * np , * ap , x , y , z , bak ;
|
||
nunos = divoptlist ( nodlist , clip , sisis ) ;
|
||
for ( a = nunos ; a -- ; ) {
|
||
nod = nodlist [ a ] ;
|
||
if ( nodtoskip == nod ) continue ;
|
||
lp = mat [ lef [ nod ] ] ;
|
||
rp = mat [ rig [ nod ] ] ;
|
||
np = mat [ nod ] ;
|
||
for ( c = 0 ; c < piecesz ; ++ c ) {
|
||
x = * lp & * rp ;
|
||
y = * lp ++ | * rp ++ ;
|
||
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
|
||
z |= z - ( z >> 3 ) ;
|
||
* np ++ = x | ( z & y ) ; }}
|
||
for ( a = 0 ; ++ a < nunos ; ) {
|
||
nod = nodlist [ a ] ;
|
||
if ( nod == clip || anx [ nod ] == nodtoskip || nod == nodtoskip ) continue ;
|
||
lp = mat [ lef [ nod ] ] - 1 ;
|
||
rp = mat [ rig [ nod ] ] - 1 ;
|
||
np = mat [ nod ] - 1 ;
|
||
ap = mat [ anx [ nod ] ] - 1 ;
|
||
for ( c = 0 ; c < piecesz ; ++ c ) {
|
||
x = * ++ lp & * ++ rp ;
|
||
z = * lp | * rp ;
|
||
y = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
|
||
y |= y - ( y >> 3 ) ;
|
||
bak = z | y ;
|
||
x = * ++ ap & ~ * ++ np ;
|
||
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
|
||
x |= x - ( x >> 3 ) ;
|
||
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }}
|
||
nodtoskip = 0 ;
|
||
}
|
||
|
||
unsigned long int scorediff_split ( short int clipis , short int srcis , short int tgtis , unsigned long int atnc )
|
||
{
|
||
int len = 0 ;
|
||
unsigned long int * tgt , * tgta , * src , * srca ;
|
||
unsigned long int untgt , unsrc ;
|
||
unsigned long int x , z , y ;
|
||
unsigned long int * wts ;
|
||
long int a ;
|
||
if ( clipis < nt ) srca = mat [ clipis ] - 1 ;
|
||
else srca = mat [ anx [ srcis ] ] - 1 ;
|
||
src = mat [ srcis ] - 1 ;
|
||
if ( !tgtis ) tgta = mat [ 0 ] - 1 ;
|
||
else tgta = mat [ anx [ tgtis ] ] - 1 ;
|
||
tgt = mat [ tgtis ] - 1 ;
|
||
if ( ratcycles ) wts = wtchg + atnc - 1 ;
|
||
else wts = wtchg - 1 ;
|
||
for ( a = 0 ; a < piecesz ; ++ a ) {
|
||
unsrc = * ++ src | * ++ srca ;
|
||
if ( srcis < nt && clipis > nt ) {
|
||
x = * srca & ~ * src ;
|
||
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
|
||
x |= x - ( x >> 3 ) ;
|
||
unsrc = * srca | ( * src & x ) ; }
|
||
untgt = * ++ tgt | * ++ tgta ;
|
||
if ( tgtis && tgtis < nt ) {
|
||
x = * tgta & ~ * tgt ;
|
||
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
|
||
x |= x - ( x >> 3 ) ;
|
||
untgt = * tgta | ( * tgt & x ) ; }
|
||
x = unsrc & untgt ;
|
||
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
|
||
if ( changewts )
|
||
if ( recorddn ) {
|
||
++ wts ;
|
||
if ( recorddn == 1 ) * wts = z ;
|
||
x = z & * wts ;
|
||
y = x ^ z ;
|
||
#ifdef BITS64
|
||
gmix += bitct [ 65535 & ( x | ( x >> 17 ) | ( x >> 34 ) | ( x >> 51 ) ) ] ;
|
||
gup += bitct [ 65535 & ( y | |