oblong/oblong.c
2022-04-12 01:32:27 +01:00

2456 lines
98 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*************************************************************************************
* OBLONG
* a program for finding most parsimonious
* trees with minimum RAM requirements
*
* Copyright April 2013 by Pablo A. Goloboff
*
* This program is free software; you may redistribute it and/or modify its
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* If publishing results based on this program, please cite Goloboff (forthcoming)
*
***********************************************************************************
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <ctype.h>
#include <math.h>
#define MAXNAMELEN 100
#define CHUNKSZ 1000
#define MAXMULTITAXA 1000
FILE * inp = NULL , * outp = NULL , * tmpp , * partsfile , * treeinp = NULL , * multinp ;
#ifdef WATCOM
long long * fpos ;
#else
fpos_t * fpos ;
#endif
unsigned long long bestgloballen , lenlag = 0 , rearrangs = 0 ;
unsigned long int ** mat , * wtchg , * movelist , lenlimit ;
unsigned long int gup , gmix , gdn , nitdn ;
long int nc2 = 0 , nc3 = 0 , nc4 = 0 , trashed = 0 , totnc , maxnc , piecesz , totnc4 ;
double * gbrem , * tbrem ;
long int * abrem , fasta = 0 ;
int nt , nc , nodz , nstord , together = 1 , use_spr = 0 , reportreplication = 0 ,
numreplications = 0 , reportperiod = 1 , ratcycles = 0 , ratcyclesdone = 0 ,
dojak = 0 , saveall = 0 , swapcollapse = 0 , recordgoodmoves = 0 , usenexus = 0 ;
int ranseed = 1 , do_branch_swapping = 1 , random_starts = 0 , numtrees = 0 , numhits ,
ratcheting = 0 , changewts = 0 , numsubs , jakprob = 36 , numcolmoves , numcolroots ,
savejusttrees = 0 , felipe = 0 , multifiles = 0 , nummultifiles , recorddn = 0 ,
equal_taxon_sequence = 0 , numparts , quickbremer = 0 , doingbremer = 0 ,
bremertype = 2 , shownegbremers = 0 , verbose = 0 ;
short int * nodlist , * anx , * lef , * rig , * sis , * rlist ,
* tgtlist , * lassis , * nodfork , nodtoskip = 0 , * ladd , * nodmatch ,
* blist , ** memtrees , * unsuprt , * distoroot , * colmoves , * colroots ;
char strsp[ MAXNAMELEN ] , maskin[ 256 ] , bitct [ 65536 ] , reportheader ,
reporttoscreen = 1 ;
char legindic [ ] = { '=' , 0 } ;
unsigned char ** matbuf , * tmpstates , * isabs = NULL ;
double maxram = -1 ;
typedef struct { unsigned long int up , dn ; int root , tgt ; } BremTyp ;
BremTyp * bremdif ;
void transfer_bremer ( int , int , BremTyp * ) ;
unsigned long long downpass_all ( void ) ;
unsigned long long downpass_split ( void ) ;
unsigned long long ( * downpass ) ( void ) ;
unsigned long long wagner_all ( void ) ;
unsigned long long wagner_split ( void ) ;
unsigned long long ( * wagner ) ( void ) ;
unsigned long long bswap_all ( void ) ;
unsigned long long bswap_split ( void ) ;
unsigned long long ( * bswap ) ( void ) ;
#define VERYBIG 10000000000000000
#ifdef BITS64
#define FLDS2 32
#define FLDS3 21
#define FLDS4 16
#define HI4 9838263505978427528UL
#define LO4 8608480567731124087UL
#define HI3 5270498306774157604UL
#define LO3 3952873730080618203UL
#define HI2 12297829382473034410UL
#define LO2 6148914691236517205UL
#else
#define FLDS2 16
#define FLDS3 10
#define FLDS4 8
#define HI4 2290649224
#define LO4 2004318071
#define HI3 613566756
#define LO3 460175067
#define HI2 2863311530
#define LO2 1431655765
#endif
/*********** TRIVIAL BUT USEFUL FUNCTIONS ******************/
void display_help ( int kind )
{
if ( kind == 0 )
fprintf ( stdout ,
"\n"
" OBLONG (Merriam-Webster): \"a rectangle with one of its\n"
" sides much longer than the other\"\n"
"\n"
" by P.A. Goloboff (2013)\n"
"\n"
" For phylogenetic analysis of wide matrices, with low memory\n"
" requirements. Recommended for data sets with many characters\n"
" and few taxa (not advised for hundreds of taxa or more, as\n"
" it is optimized for memory, not speed)\n"
" Uses temporary disk buffers, \"oblong.tmp\" and/or \"oblong.pts\"\n"
" to avoid holding all data in RAM. As much disk space as the size\n"
" of the data set may be required.\n"
" Screen output can be redirected to a file with \"2> file\"\n"
"\n"
" Use \"oblong -h\" for help on program options\n"
"\n"
) ;
else
fprintf ( stdout ,
"\n"
" Input/Output:\n"
"\n"
" -ifilename use \"filename\" as data file.\n"
" -p data files in Phylip/RAxML instead of TNT format.\n"
" -f data files in Fasta instead of TNT format.\n"
" -@list get from file \"list\" the list of data files, must\n"
" be in either Phylip (no \"-p\" switch required), or\n"
" Fasta format (\"-f\" switch required). By default, \n"
" taxon names are matched with identical strings; a \n"
" taxon absent from a data set gets all \"?\".\n"
" -e for file lists, instead of matching taxa based on name,\n"
" assume all data files have the same taxon sequence.\n"
" -ooutfile output results to \"outfile\"; this is a TNT-ready\n"
" data set with no characters (just tree topologies).\n"
"\n"
" Searches:\n"
"\n"
" -w use random trees as starting points, not Wagner trees\n"
" -rN use N starting points.\n"
" -spr use SPR branch swapping instead of the default TBR.\n"
" -b no branch swapping.\n"
" -a save the tree for each replication (even if suboptimal)\n"
" -jN jackknife the data, with N = P(del) (no N = 36). A \n"
" single (possibly collapsed) tree saved per replicate.\n"
" Note: uninformative characters/states are never \n"
" deactivated (they don't influence supports).\n"
" -mX use no more than X MBytes of RAM. If not using \"-j\" or\n"
" \"-x\", results are identical to the default but slower;\n"
" otherwise, randomization sequences may differ.\n"
" -RN use N as random seed.\n"
" -xN use N iterations of ratchet (no N = 10). Note that for\n"
" C informative characters, \"-x\" requires C/2 additional\n"
" bytes of RAM (even under \"-m\").\n"
" -k if jackknifing, do TBR/SPR tree collapsing (recommended).\n"
"\n"
" Other:\n"
"\n"
" -N output Nexus format (i.e. tree-file is TreeView-ready)\n"
" -t save only the trees (no \"xread\").\n"
" -Ttreefile read trees from \"treefile\" and use them as starting\n"
" points (if \"-b\" and no \"-x\", it just calculates\n"
" tree-scores). Treefile must be the output of Oblong\n"
" (i.e. what is in \"outfile\"), with number of trees in\n"
" tree-title.\n"
" -q use a quick approximmation for group supports. Default is\n"
" S = 0.64 * R ^ 1/A (where R = relative bremer support,\n"
" and A = absolute bremer support); this is also indicated\n"
" with \"-q2\". You can also indicate \"-q0\", absolute bremer\n"
" support, or \"-q1\", relative. Supports are obtained via\n"
" branch-swapping, recording score difference for each move.\n"
" You can use this to measure strength of contradiction, starting\n"
" from suboptimal trees (\"-T\") and skipping swapping (\"-b\").\n"
" -v verbose mode: report rearrangements examined as they are done\n"
" (note: repeatedly writing to screen slows down program).\n"
"\n"
" You can also view this info with \"oblong -h | less\" or \"oblong -h | more\"\n"
"\n"
) ;
}
void errout ( int cond , char * msg )
{
if ( !cond ) return ;
if ( msg != NULL )
fprintf ( stderr , "\n%s\n" , msg ) ; //output to the screen, usually error
exit ( 0 ) ; //exit(0)正常运行程序并退出程序exit(1)非正常运行导致退出程序
}
void * myalloc ( unsigned long int size )
{
void * vd = malloc ( size ) ;
errout ( vd == NULL , "Sorry, not enough memory available --try with the \"-m\" switch" ) ;
return vd ;
}
int stringis ( char * a , char * b )
{
while ( * a && tolower ( * a ) == tolower ( * b ) ) { ++a ; ++ b ; } //tolower: conver letters from uppercase to lowercase
if ( * a || * b ) return 0 ; // or || ,return fake
return 1 ; //return true
}
void procargs ( int nargs , char ** arg ) // int nargs =2 for [oblong -f] **arg = *arg [] 字符串数组
{
int i ;
char * cp ;
if ( nargs == 1 ) { // no arguments if nargs == 1
display_help ( 0 ) ; //not show the help ouput just no argument output
exit ( 0 ) ; }
for ( i = 1 ; i < nargs ; ++ i ) {
cp = arg [ i ] ;
if ( * cp != '-' ) { // if arguments not start with -
fprintf ( stderr , "\nWrong argument: %s\n" , arg [ i ] ) ; //show which argument is wrong
exit ( 0 ) ; }
switch ( * ++ cp ) {
case 'a': saveall = 1 ; break ; //save all trees
case 'b': do_branch_swapping = 0 ; break ; //no branch swapping
case 'e': equal_taxon_sequence = 1 ; break ;
case 'f': fasta = 1 ; break ; //fasta format
case 'h': display_help ( 1 ) ; exit ( 0 ) ; //help
case 'i': //use the filename -i${filename}
case '@':
if ( * cp == '@' ) multifiles = 1 ; //there are multi files
++ cp ;
errout ( stringis ( cp , "oblong.tmp" ) , "Cannot use \"oblong.tmp\" as input file; rename it and try again" ) ;
errout ( stringis ( cp , "oblong.pts" ) , "Cannot use \"oblong.pts\" as input file; rename it and try again" ) ;
errout ( inp != NULL , "Can't open TWO input files" ) ;
errout ( ( inp = fopen ( cp , "rb" ) ) == NULL , "Can't open input file" ) ; //input file, if the input list can't be readable in the bianry mode, on windows, binary is different
break ;
case 'j': //j${NumberofJakknife}
++ cp ;
if ( isdigit ( * cp ) ) //to find if the character is the numer
jakprob = atoi ( cp ) ; //atoi: convert character to the int
dojak = changewts = 1 ;
break ;
case 'k': swapcollapse = 1 ; break ; //do TBR/SPR collapsing
case 'm': errout ( ( ( maxram = atof ( ++ cp ) ) <= 0 ) , "You must indicate RAM to use" ) ; together = 0 ; break ; //atof: transfer chracter into double
case 'N': usenexus = 1 ; break ; //output the nexus file
case 'o': //-o${outfile}
++ cp ;
errout ( stringis ( cp , "oblong.tmp" ) , "Cannot use \"oblong.tmp\" as output file; use a different name" ) ;
errout ( stringis ( cp , "oblong.pts" ) , "Cannot use \"oblong.pts\" as output file; use a different name" ) ;
errout ( outp != NULL , "Can't open TWO output files" ) ;
errout ( ( outp = fopen ( cp , "wb" ) ) == NULL , "Can't open output file" ) ;
break ;
case 'p': felipe = 1 ; break ; //file in Phylip style
case 'q': //a quick approximmation
++ cp ;
quickbremer = 1 ;
if ( * cp == '0' ) bremertype = 0 ;
if ( * cp == '1' ) bremertype = 1 ;
if ( * cp == '2' ) bremertype = 2 ;
break ;
case 'r': numreplications = atoi ( ++ cp ) ; break ; //use N starting points. -r${Number}
case 'R': ranseed = atoi ( ++ cp ) ; break ; //random seed
case 's': //spr use spr instead of tbr
if ( !strcmp ( cp , "spr" ) ) { use_spr = 1 ; break ; }
case 't': savejusttrees = 1 ; break ; //output standrad tre format
case 'T': //set start tree -T${treefile}
++ cp ;
errout ( treeinp != NULL , "Can't open TWO tree files" ) ;
errout ( ( treeinp = fopen ( cp , "rb" ) ) == NULL , "Can't open tree file" ) ;
break ;
case 'v': verbose = 1 ; break ; //report rearrangements
case 'w': random_starts = 1 ; break ; //use random trees as start tree
case 'x': //-x${Numer of ratcher}
++ cp ;
if ( isdigit ( * cp ) ) ratcycles = atoi ( cp ) ;//if the char is number, make ratecycles=
else ratcycles = 10 ;
break ;
default: fprintf ( stderr , "\nUnrecognized option: %s\n" , arg [ i ] ) ; exit ( 0 ) ; }}
errout ( ( fasta && felipe ) , "Cannot use both Phylip and Fasta formats" ) ; //fasta = 1 and felipe = 1, it can't be identified as different type
errout ( ( inp == NULL ) , "No data set specified" ) ;
errout ( ( outp == NULL ) , "No output file specified" ) ;
if ( quickbremer && dojak ) {
dojak = changewts = 0 ; //if quick approximmation and jack both are set, the jack won't be done
fprintf ( stderr , "\nNOTE: Cannot do both jackknifing and bremer at the same time!\n" ) ;
if ( bremertype == 2 )
fprintf ( stderr , " deletion prob P=%.2f will be used to rescale supports (1-P)\n" , ( double ) jakprob / 100 ) ;
else
if ( bremertype == 0 ) fprintf ( stderr , " will do bremer supports instead\n" ) ;
else fprintf ( stderr , " will do relative bremer supports instead\n" ) ;
errout ( ( !jakprob && bremertype == 2 ) , " ...but cannot use a deletion prob. P=0 to rescale!" ) ;
fprintf ( stderr , "\n" ) ; }
if ( quickbremer && usenexus ) legindic [ 0 ] = 0 ; // *** left here temp
if ( !do_branch_swapping && !ratcycles ) shownegbremers = 1 ; // *** left here temp
if ( treeinp != NULL && numreplications )
fprintf ( stderr , "\nNote: \"-T\" overrides \"-r\": number of trees in file determines starting points\n" ) ;
if ( !numreplications ) numreplications = 1 ;
if ( maxram > 0 ) {
fprintf ( stderr , "\nUsing %.2f MBytes of RAM for matrix buffers\n" , maxram ) ;
fflush ( stderr ) ; //清空缓存区
maxram *= 1024 * 1024 ; } // maxram = maxram * 1024 * 1024;
reportheader = '\r' ;
if ( !isatty( fileno( stderr ) ) ) {
errout ( verbose , "Verbose allowed only when STDERR goes to screen" ) ;
reporttoscreen = 0 ;
reportheader = '\n' ;
reportperiod = 600 ;
fprintf ( stderr , "\n\nProgram options:\n\n oblong " ) ;
for ( i = 1 ; i < nargs ; ++ i )
fprintf ( stderr , "%s " , arg [ i ] ) ;
fprintf ( stderr , "\n\n" ) ; }
return ;
}
void init_bit_counters ( void )
{
int a , b , c ; // bitct[65536]
for ( a = 0 ; a < 65536 ; ++ a ) { // a 0 -> 65535
for ( b = 1 , c = 0 ; b <= a ; b <<= 1 ) // <= 是 < 或 =;
if ( ( b & a ) ) ++ c ; // c++;
bitct [ a ] = c ; }
}
void maskinit ( void )
{
int a ;
for ( a = 0 ; a < 256 ; ++ a ) maskin [ a ] = 0 ;
maskin [ 'a' ] = maskin [ 'A' ] = maskin [ '0' ] = 1 ;
maskin [ 'g' ] = maskin [ 'G' ] = maskin [ '1' ] = 2 ;
maskin [ 'c' ] = maskin [ 'C' ] = maskin [ '2' ] = 4 ;
maskin [ 't' ] = maskin [ 'T' ] = maskin [ '3' ] = 8 ;
maskin [ 'R' ] = maskin [ 'r' ] = maskin [ 'a' ] | maskin [ 'g' ] ;
maskin [ 'Y' ] = maskin [ 'y' ] = maskin [ 't' ] | maskin [ 'c' ] ;
maskin [ 'W' ] = maskin [ 'w' ] = maskin [ 'a' ] | maskin [ 't' ] ;
maskin [ 'S' ] = maskin [ 's' ] = maskin [ 'c' ] | maskin [ 'g' ] ;
maskin [ 'M' ] = maskin [ 'm' ] = maskin [ 'a' ] | maskin [ 'c' ] ;
maskin [ 'K' ] = maskin [ 'k' ] = maskin [ 'g' ] | maskin [ 't' ] ;
maskin [ 'B' ] = maskin [ 'b' ] = maskin [ 'c' ] | maskin [ 'g' ] | maskin [ 't' ] ;
maskin [ 'D' ] = maskin [ 'd' ] = maskin [ 'a' ] | maskin [ 'g' ] | maskin [ 't' ] ;
maskin [ 'H' ] = maskin [ 'h' ] = maskin [ 'a' ] | maskin [ 'c' ] | maskin [ 't' ] ;
maskin [ 'V' ] = maskin [ 'v' ] = maskin [ 'a' ] | maskin [ 'c' ] | maskin [ 'g' ] ;
maskin [ 'N' ] = maskin [ 'n' ] = maskin [ '-' ] = maskin [ '?' ] = 1 | 2 | 4 | 8 ;
maskin [ '.' ] = 127 ;
return ;
}
/********* CHECKING AND SORTING INPUT *************************/
void prepare ( int off , unsigned char intwo )
{
int a , b , c , totbytes ;
char msk , shif ;
unsigned short int * pt ;
unsigned long int * lp , written ;
void * vd ;
int trsf ;
char mybits = bitct [ intwo ] ;
char convert[ 16 ] , goesto[ 4 ] ;
if ( mybits == 2 ) { msk = 3 ; ++ nc2 ; }
if ( mybits == 3 ) { msk = 7 ; ++ nc3 ; }
if ( mybits == 4 ) { msk = 15 ; ++ nc4 ; }
for ( a = b = 1 , c = 0 ; a < 16 ; a <<= 1 , ++ c )
if ( ( a & intwo ) ) {
goesto [ c ] = b ;
b <<= 1 ; }
else goesto [ c ] = 0 ;
convert [ 0 ] = msk ;
for ( a = 1 ; a < 16 ; ++ a ) {
convert [ a ] = 0 ;
for ( b = 1 , c = 0 ; b <= a ; b <<= 1 , ++ c )
if ( ( a & b ) ) convert [ a ] |= goesto [ c ] ;
if ( !convert [ a ] ) convert [ a ] = msk ; }
for ( a = 0 ; a < nt ; ++ a )
matbuf [ a ] [ off ] = convert [ matbuf [ a ] [ off ] ] ;
fprintf ( tmpp , "%c" , mybits ) ;
pt = ( unsigned short int * ) tmpstates ;
for ( a = ( nt + 1 ) / 2 ; a -- > 0 ; )
pt [ a ] = 0 ;
vd = ( void * ) tmpstates ;
for ( a = shif = 0 ; a < nt ; ++ a ) {
lp = ( unsigned long int * ) pt ;
trsf = matbuf [ a ] [ off ] ;
trsf <<= shif ;
* lp |= trsf ;
shif += mybits ;
if ( shif >= 16 ) {
++ pt ;
shif -= 16 ; }}
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
written = fwrite ( vd , 1 , totbytes + 1 , tmpp ) ;
errout ( written != totbytes + 1 , "Sorry, cannot write to temporary file (disk full?)" ) ;
}
int domin ( int a )
{
int best = 10 ;
int b , c , misinsome ;
for ( c = 16 ; c -- ; ) {
for ( b = misinsome = 0; b < nt && !misinsome ; ++ b )
if ( ! ( matbuf [ b ] [ a ] & c ) ) misinsome = 1 ;
if ( !misinsome )
if ( best > bitct [ c ] ) best = bitct [ c ] ; }
return best - 1 ;
}
int dotrueextra ( long int a , unsigned long int intwo )
{
int st , ret = 0 , b ;
for ( b = 0 ; b < nt ; ++ b ) {
st = matbuf [ b ] [ a ] ;
if ( st == 15 ) continue ;
if ( ( st & ~intwo ) && !( st & intwo ) ) ret ++ ; }
return ret ;
}
void process_chunk ( int start )
{
long int a , b , d ;
unsigned char c ;
unsigned char * cp ;
unsigned char refval ;
unsigned char intwo , inone ;
nstord = CHUNKSZ ;
if ( start + nstord > nc ) nstord = nc - start ;
for ( d = 0 ; d < nt ; ++ d )
for ( b = nstord ; b -- ; )
matbuf [ d ] [ b ] = 0 ;
for ( d = 0 ; d < nt ; ++ d ) {
cp = matbuf [ d ] ;
if ( isabs != NULL )
if ( isabs [ d ] ) { // This can only happen when reading multiple files and taxon is absent from this matrix...
for ( b = 0 ; b < nstord ; ++ b )
* cp ++ = 15 ;
continue ; }
#ifdef WATCOM
_fseeki64 ( inp , fpos [ d ] , SEEK_SET ) ;
#else
fsetpos ( inp , &fpos [ d ] ) ;
#endif
for ( b = 0 ; b < nstord ; ++ b ) {
fscanf ( inp , " %c" , &c ) ;
if ( c == '[' ) {
refval = 0 ;
while ( 1 ) {
fscanf ( inp , " %c" , &c ) ;
if ( c == ']' ) break ;
refval |= maskin [ c ] ; }}
else
if ( c == '.' ) {
errout ( !d , "Cannot use \".\" for the first taxon" ) ;
refval = matbuf [ d ] [ b ] ; }
else refval = maskin [ c ] ;
* cp ++ = refval ; }
#ifdef WATCOM
fpos [ d ] = _ftelli64 ( inp ) ;
#else
fgetpos ( inp , &fpos [ d ] ) ;
#endif
}
for ( a = 0 ; a < nstord ; ++ a ) {
inone = intwo = 0 ;
for ( b = 0 ; b < nt ; ++ b ) {
if ( matbuf [ b ] [ a ] == 15 ) continue ;
intwo |= inone & matbuf [ b ] [ a ] ;
inone |= matbuf [ b ] [ a ] ; }
if ( bitct [ intwo ] > 1 ) {
if ( inone != intwo )
if ( bitct [ inone ] == 1 )
lenlag += bitct [ ( inone & ~intwo ) ] ;
else lenlag += dotrueextra ( a , intwo ) ;
prepare ( a , intwo ) ; }
else {
lenlag += domin ( a ) ;
++ trashed ; }}
}
void popmat ( int part ) // populate matrix, for split optimization
{
unsigned long int a , b , c , shif , mybits , totbytes ;
unsigned long int inmsk , msk , ent , inshif , val , numelems ;
unsigned long int * lp ;
unsigned short int * pt ;
unsigned long int sz = piecesz * FLDS4 , numnc = 0 ;
void * vd ;
for ( b = 0 ; b < piecesz ; ++ b )
for ( a = 0 ; a < nt ; ++ a )
mat [ a ] [ b ] = -1 ;
vd = ( void * ) tmpstates ;
for ( a = 0 , c = part * FLDS4 ; a < sz && c < totnc ; ++ a , ++ c ) {
mybits = getc ( tmpp ) ;
if ( mybits == 2 ) msk = 3 ;
if ( mybits == 3 ) msk = 7 ;
if ( mybits == 4 ) msk = 15 ;
ent = numnc / FLDS4 ;
inshif = 4 * ( numnc % FLDS4 ) ;
inmsk = ( unsigned long int ) 15 << inshif ;
++ numnc ;
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
numelems = fread ( vd , 1 , totbytes + 1 , tmpp ) ;
errout ( numelems != totbytes + 1 , "OOOPS, unexpected error loading matrix!.\nSorry, abandoning run..." ) ;
pt = ( unsigned short int * ) tmpstates ;
for ( b = shif = 0 ; b < nt ; ++ b ) {
lp = ( unsigned long int * ) pt ;
val = ( * lp >> shif ) & msk ;
mat [ b ] [ ent ] &= ~inmsk ;
mat [ b ] [ ent ] |= val << inshif ;
shif += mybits ;
if ( shif >= 16 ) {
++ pt ;
shif -= 16 ; }}}
}
void create_parts_file ( void )
{
unsigned long int atnc ;
unsigned long int written ;
void * vd ;
unsigned long int * at ;
errout ( ( partsfile = fopen ( "oblong.pts" , "wb" ) ) == NULL , "Cannot open temporary file \"oblong.pts\"" ) ;
for ( atnc = 0 ; atnc < nc4 ; atnc += piecesz ) {
popmat ( atnc ) ;
at = mat [ 0 ] ;
vd = ( void * ) at ;
written = fwrite ( vd , 1 , sizeof ( unsigned long int ) * nt * piecesz , partsfile ) ;
errout ( written != sizeof ( unsigned long int ) * nt * piecesz , "Sorry, cannot write to temporary file (disk full?)" ) ; }
fputc ( 32 , partsfile ) ;
fclose ( partsfile ) ;
partsfile = fopen ( "oblong.pts" , "rb" ) ;
return ;
}
void tmpparse ( void )
{
int atnc2 = 0 , atnc3 = 0 , atnc4 = 0 , totbytes ;
unsigned char mybits ;
int a , b , shif , inshif ;
unsigned short int * pt ;
unsigned long int * lp , inmsk , ent ;
signed long int sizis , toalloc , subtract ;
unsigned long int j , * segposit , msk , val ;
double prop , prevprop ;
totnc = nc2 + nc3 + nc4 ;
nc2 = ( nc2 + ( FLDS2 - 1 ) ) / FLDS2 ;
nc3 = ( nc3 + ( FLDS3 - 1 ) ) / FLDS3 ;
nc4 = ( nc4 + ( FLDS4 - 1 ) ) / FLDS4 ;
nc3 += nc2 ;
nc4 += nc3 ;
nodz = 2 * nt - 1 ;
nodlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
anx = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
lef = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
rig = ( short int * ) myalloc ( nt * sizeof ( short int ) ) ;
rig -= nt ;
sis = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
rlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
tgtlist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
lassis = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
nodfork = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
ladd = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
blist = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
mat = ( unsigned long int ** ) myalloc ( nodz * sizeof ( long int * ) ) ;
if ( swapcollapse || quickbremer ) {
unsuprt = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
distoroot = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
colmoves = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
colroots = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
if ( quickbremer ) {
nodmatch = ( short int * ) myalloc ( nt * sizeof ( short int ) ) ;
nodmatch -= nt ;
abrem = ( long int * ) myalloc ( nt * sizeof ( long int ) ) ;
abrem -= nt ;
gbrem = ( double * ) myalloc ( nt * sizeof ( double ) ) ;
gbrem -= nt ;
tbrem = ( double * ) myalloc ( nt * sizeof ( double ) ) ;
tbrem -= nt ;
bremdif = ( BremTyp * ) myalloc ( nt * nt * sizeof ( BremTyp ) ) ; }}
b = numreplications ;
if ( ratcycles ) b = ( numreplications * ratcycles ) + 1 ;
memtrees = ( short int ** ) myalloc ( b * sizeof ( short int * ) ) ;
for ( a = 0 ; a < b ; ++ a )
memtrees [ a ] = ( short int * ) myalloc ( nodz * sizeof ( short int ) ) ;
toalloc = nc4 ;
downpass = downpass_all ;
bswap = bswap_all ;
wagner = wagner_all ;
if ( !together ) {
sizis = ( signed long int ) maxram ;
subtract = ( 12 * nodz * sizeof ( short int ) ) + // tree-buffers...
( nt * nt * sizeof ( long int ) ) + // buffer to store moves...
( b * sizeof ( short int ** ) ) + ( b * nodz * sizeof ( short int ) ) ; // trees
subtract += nodz * sizeof ( long int * ) ;
if ( subtract >= sizis ) {
fprintf ( stderr , "\nSorry: %.0f Bytes are insufficient to run this data set. Increase \"-m\"\n" , maxram ) ;
exit ( 0 ) ; }
toalloc = ( sizis - subtract ) / ( nodz * sizeof ( long int ) ) ;
j = ( totnc + ( FLDS4 - 1 ) ) / FLDS4 ; // needed total
numparts = 1 + ( j / toalloc ) ;
toalloc = j / numparts ;
while ( toalloc * numparts * FLDS4 < totnc )
toalloc ++ ;
if ( toalloc < 1 ) {
fprintf ( stderr , "\nSorry: %.0f Bytes are insufficient to run this data set. Increase -m\n" , maxram ) ;
exit ( 0 ) ; }
if ( toalloc * FLDS4 < totnc ) {
piecesz = toalloc ;
nc2 = nc3 = 0 ;
nc4 = totnc4 = numparts * piecesz ;
fprintf ( stderr , "%cWill process the chars in %i chunks of %li\n" , reportheader , numparts , toalloc * FLDS4 ) ; fflush ( stderr ) ;
movelist = ( unsigned long int * ) myalloc ( nt * nt * sizeof ( long int ) ) ;
downpass = downpass_split ;
wagner = wagner_split ;
bswap = bswap_split ; }
else {
fprintf ( stderr , "%cLucky you! All data can be held in RAM! No disk swaps needed...\n" , reportheader ) ; fflush ( stderr ) ;
toalloc = nc4 ;
together = 1 ; }}
if ( ratcycles || ( ( dojak || quickbremer ) && together ) )
wtchg = ( unsigned long int * ) myalloc ( nc4 * sizeof ( long int ) ) ;
else if ( ( dojak || quickbremer ) && !together )
wtchg = ( unsigned long int * ) myalloc ( piecesz * sizeof ( long int ) ) ;
tmpp = fopen ( "oblong.tmp" , "rb" ) ;
if ( !together ) {
/** Alloc matrix for terms in a contiguous memory segment, for faster loading *******/
segposit = ( unsigned long int * ) myalloc ( nt * toalloc * sizeof ( long int ) ) ;
for ( a = 0 ; a < nt ; ++ a ) {
mat [ a ] = segposit ;
segposit += toalloc ; }
for ( a = nt ; a < nodz ; ++ a )
mat [ a ] = ( unsigned long int * ) myalloc ( toalloc * sizeof ( long int ) ) ;
create_parts_file( ) ;
fclose ( tmpp ) ;
remove ( "oblong.tmp" ) ;
return ; }
/** Just alloc matrix as normally done ****/
for ( a = 0 ; a < nodz ; ++ a ) {
mat [ a ] = ( unsigned long int * ) myalloc ( nc4 * sizeof ( long int ) ) ;
if ( a < nt )
for ( b = 0 ; b < nc4 ; ++ b ) mat [ a ] [ b ] = -1 ; }
prevprop = 0 ;
for ( a = 0 ; a < totnc ; ++ a ) {
if ( reporttoscreen && ( a % 327 ) == 0 ) {
prop = ( double ) a / totnc ;
if ( prop - prevprop > 0.015 ) {
fprintf ( stderr , "\rReading matrix... %.1f%%" , 50 + ( prop * 100 / 2 ) ) ;
prevprop = prop ; }}
mybits = getc ( tmpp ) ;
if ( mybits == 2 ) {
msk = 3 ; ent = atnc2 / FLDS2 ; inshif = 2 * ( atnc2 % FLDS2 ) ;
inmsk = ( unsigned long int ) 3 << inshif ; ++ atnc2 ; }
if ( mybits == 3 ) {
msk = 7 ; ent = nc2 + ( atnc3 / FLDS3 ) ; inshif = 3 * ( atnc3 % FLDS3 ) ;
inmsk = ( unsigned long int ) 7 << inshif ; ++ atnc3 ; }
if ( mybits == 4 ) {
msk = 15 ; ent = nc3 + ( atnc4 / FLDS4 ) ; inshif = 4 * ( atnc4 % FLDS4 ) ;
inmsk = ( unsigned long int ) 15 << inshif ; ++ atnc4 ; }
totbytes = ( ( mybits * ( nt + 1 ) ) - 1 ) / 8 ;
for ( b = 0 ; b <= totbytes ; ++ b )
tmpstates [ b ] = getc ( tmpp ) ;
pt = ( unsigned short int * ) tmpstates ;
for ( b = shif = 0 ; b < nt ; ++ b ) {
lp = ( unsigned long int * ) pt ;
val = ( * lp >> shif ) & msk ;
mat [ b ] [ ent ] &= ~inmsk ;
mat [ b ] [ ent ] |= val << inshif ;
shif += mybits ;
if ( shif >= 16 ) {
++ pt ;
shif -= 16 ; }}}
fclose ( tmpp ) ;
remove ( "oblong.tmp" ) ;
}
void get_fasta_dims ( int * mync , int * mynt )
{
int i ;
char c ;
int curnt = 0 , curnc = 0 , atnc ;
char * nampt = strsp ;
while ( !feof ( inp ) && i != '>' ) i = getc ( inp ) ; //没读到 EOF 并且不是 > 继续,直到读到 >
if ( i != '>' ) { //读到最后没有 > 报错
fprintf ( stderr , "\nError reading fasta file --expected taxon name\n" ) ;
errout ( 1 , NULL ) ; }
while ( !feof ( inp ) ) { //没读到 EOF 不停
fscanf ( inp , " %s" , nampt ) ; //从 inp 读到 nampt
i = 0 ;
++ curnt ;
if ( curnt == 1) //第一次
while ( 1 ) {
fscanf ( inp , " %c" , &c ) ;
if ( c == '>' || feof ( inp ) ) break ; //读到 > 或者 EOF
++ curnc ; } // 读到 > 或者 EOF curnc ++
else { //第二次及之后
atnc = 0 ;
while ( 1 ) {
fscanf ( inp , " %c" , &c ) ;
if ( c == '>' || feof ( inp ) ) break ;
++ atnc ; }
if ( atnc != curnc ) { //到最后比较
fprintf ( stderr , "\nError reading fasta file --it seems not to be aligned\n" ) ;
errout ( 1 , NULL ) ; }}}
* mync = curnc ;
* mynt = curnt ;
rewind ( inp ) ;
}
void read_multifiles ( void )
{
char * nampt = strsp ;
unsigned char state ;
long int a , b , i , curnt = 0 , curnc = 0 , matched , prvnt ;
int atfile = 0 , numfiles = 0 ;
char ** taxnam ;
double prop , secs , prevprop ;
clock_t beg , end ; // 定义起始时间
beg = clock () ; //计算时间
errout ( ( tmpp = fopen ( "oblong.tmp" , "wb" ) ) == NULL , "Cannot open temporary file, \"oblong.tmp\"" ) ;
// Find out total NT and total NC
multinp = inp ;
maskinit ( ) ; //maskinit 函数
taxnam = ( char ** ) myalloc ( MAXMULTITAXA * sizeof ( char * ) ) ; //default MAXMULTITAXA 1000 L32
fscanf ( inp , " %s" , nampt ) ; //FILE * inp = NULL 读入文件到 nampt
while ( !feof ( multinp ) ) { //feof 如果 EOF 返回非 0没有EOF 返回 0
++ numfiles ; //numfiles 初始为 0
if ( ( inp = fopen ( nampt , "rb" ) ) == NULL ) { // r readbale b binary 二进制可以防止 Windows下文件问题
fprintf ( stderr , "\nCannot open file %s (from list of multiple files)\n" , nampt ) ;
exit ( 0 ) ; }
if ( reporttoscreen ) // L60: default 1 L309:0
fprintf ( stderr , "%cChecking input file %3i %-40s" , reportheader , numfiles , nampt ) ;
if ( fasta ) get_fasta_dims ( &nc , &nt ) ; //get nc and nt
else {
i = fscanf ( inp , " %i %i" , &nt , &nc ) ;
if ( i < 2 ) {
fprintf ( stderr , "\nError reading number of taxa/characters in data set %s\nMake sure it is in proper Phylip format\n" , bitct ) ;
exit ( 0 ) ; }}
if ( numfiles == 1 ) prvnt = nt ;
errout ( ( equal_taxon_sequence && nt != prvnt ) , "Under \"-e\" all data files must have the same number of taxa" ) ;
curnc += nc ;
for ( a = 0 ; a < nt ; ++ a ) {
fscanf ( inp , " %s" , nampt ) ;
if ( fasta ) ++ nampt ;
if ( equal_taxon_sequence )
if ( numfiles == 1 ) matched = -1 ;
else matched = a ;
else
for ( i = 0 , matched = -1 ; i < curnt && matched < 0 ; ++ i )
if ( !strcmp ( nampt , taxnam[ i ] ) ) matched = i ;
if ( matched < 0 ) {
errout( curnt >= MAXMULTITAXA , "Sorry, cannot deal with so many distinct taxon names (change MAXMULTITAXA and recompile)" ) ;
taxnam [ curnt ] = ( char * ) myalloc ( MAXNAMELEN * sizeof ( char ) ) ; //MAXNAMELEN = 1000 default
strcpy ( taxnam [ curnt ++ ] , nampt ) ; }
if ( fasta ) -- nampt ;
for ( b = 0 ; b < nc ; ++ b ) {
fscanf ( inp , " %c" , &state ) ;
if ( !maskin [ state ] ) {
fprintf ( stderr , "\nUnrecognized symbol: %c\n" , state ) ;
exit ( 0 ) ; }}}
fclose ( inp ) ;
fscanf ( multinp , " %s" , nampt ) ; }
// Now do some initializations and process
rewind ( multinp ) ;
#ifdef WATCOM
fpos = ( long long * ) myalloc ( curnt * sizeof ( long long ) ) ;
#else
fpos = ( fpos_t * ) myalloc ( curnt * sizeof ( fpos_t ) ) ;
#endif
matbuf = ( unsigned char ** ) myalloc ( curnt * sizeof ( char * ) ) ;
isabs = ( unsigned char * ) myalloc ( curnt * sizeof ( char ) ) ;
for ( a = 0 ; a < curnt ; ++ a )
if ( curnc < CHUNKSZ )
matbuf [ a ] = ( char * ) myalloc ( curnc * sizeof ( char ) ) ;
else matbuf [ a ] = ( char * ) myalloc ( CHUNKSZ * sizeof ( char ) ) ;
a = curnt ;
if ( ( a & 1 ) ) ++ a ;
tmpstates = ( unsigned char * ) myalloc ( a * sizeof ( char ) ) ;
fscanf ( multinp , " %s" , nampt ) ;
while ( !feof ( multinp ) ) {
if ( ( inp = fopen ( nampt , "rb" ) ) == NULL ) { // it was just opened ok, but re-check just in case...
fprintf ( stderr , "\nCannot open file %s (from list of multiple files)\n" , nampt ) ;
exit ( 0 ) ; }
if ( !fasta ) fscanf ( inp , " %i %i" , &nt , &nc ) ;
for ( a = 0 ; a < curnt ; ++ a ) isabs [ a ] = 1 ;
for ( a = 0 ; a < nt ; ++ a ) {
fscanf ( inp , " %s" , nampt ) ;
if ( fasta ) ++ nampt ;
if ( equal_taxon_sequence )
i = a ;
else
for ( i = 0 ; i < curnt ; ++ i )
if ( !strcmp ( nampt , taxnam[ i ] ) ) break ; // there has to be a match...
if ( fasta ) -- nampt ;
#ifdef WATCOM
fpos [ i ] = _ftelli64 ( inp ) ;
#else
fgetpos ( inp , &fpos [ i ] ) ;
#endif
isabs [ i ] = 0 ;
for ( b = 0 ; b < nc ; ++ b ) fscanf ( inp , " %c" , &state ) ; }
nt = curnt ;
atfile ++ ;
prevprop = 0 ;
for ( a = 0 ; a < nc ; a += CHUNKSZ ) {
if ( reporttoscreen ) {
prop = ( double ) a / nc ;
if ( ( prop - prevprop ) > 0.009 ) {
fprintf ( stderr , "%cReading matrix %i of %i... %.1f%% " , reportheader , atfile , numfiles , prop * 100 ) ;
prevprop = prop ; }}
process_chunk ( a ) ; }
fclose ( inp ) ;
fscanf ( multinp , " %s" , nampt ) ; }
free ( matbuf ) ;
fclose ( tmpp ) ;
// Assign bufers and (if appropriate), read data in, from tmp file...
nt = curnt ;
tmpparse ( ) ;
fprintf ( stderr , "%cRead all data (%i files), %i taxa, %li chars. (%li informative) \n" , reportheader , numfiles , nt , curnc , curnc - trashed ) ;
end = clock () ;
secs = ( double ) ( end - beg ) / CLOCKS_PER_SEC ;
fprintf ( stderr , "Time to read data: %.2f secs.\n\n" , secs ) ;
// Begin writing to output file...
if ( !usenexus ) {
if ( !savejusttrees )
fprintf ( outp , "xread\n'Results from oblong'\n0 %i\n" , nt ) ; }
else fprintf ( outp , "#NEXUS\nbegin trees;\nTranslate\n" ) ;
for ( a = 0 ; a < nt ; ++ a )
if ( !usenexus ) {
if ( !savejusttrees )
fprintf ( outp , "%s " , taxnam [ a ] ) ; }
else {
fprintf ( outp , " %li %s" , a + 1 , taxnam [ a ] ) ;
if ( a < nt - 1 ) fprintf ( outp , "," ) ;
fprintf ( outp , "\n" ) ; }
if ( !usenexus ) {
if ( !savejusttrees )
fprintf ( outp , ";\ncollapse none;\n" ) ; }
else fprintf ( outp , " ;\n" ) ;
return ;
}
void read_input ( void )
{
char * nampt = strsp ;
unsigned char state ;
int a , b ;
char c ;
double prop , secs , prevprop ;
clock_t beg , end ;
strcpy ( nampt , " " ) ; // nampt = " "
if ( treeinp != NULL ) { // int is NULL (L34), but can specific the tree file (L273)
while ( strcmp ( nampt , "tread" ) && !feof ( treeinp ) ) //feof 如果 EOF 返回非 0没有EOF 返回 0 strcmp 0 相同非0 则执行,即 非 tread 并且 没有 EOF 且执行
fscanf ( treeinp , " %7s" , nampt ) ; // 输入长度为 7 的 string
errout ( feof ( treeinp ) , "String \"tread\" not found in tree file" ) ;
fscanf ( treeinp , " %c" , &c ) ;
errout ( c != 39 , "Number of trees should be indicated in the title of tree-file" ) ;
errout ( !fscanf ( treeinp , " %i" , &numreplications ) , "Number of trees should be indicated in the title of tree-file" ) ;
while ( getc ( treeinp ) != 39 ) ; }
if ( multifiles ) {
read_multifiles ( ) ;
return ; }
beg = clock () ;
if ( !felipe && !fasta ) {
while ( strcmp ( nampt , "xread" ) ) {
fscanf ( inp , " %7s" , nampt ) ;
if ( feof ( inp ) ) break ; }
errout ( feof ( inp ) , "String \"xread\" not found in data set\n\nMaybe it's in Phylip (\"-p\") or Fasta (\"-f\") format?" ) ;
fscanf ( inp , " %c" , &c ) ;
if ( c == 39 )
while ( getc ( inp ) != 39 && !feof ( inp ) ) ;
else ungetc ( c , inp ) ; }
if ( fasta ) get_fasta_dims ( &nc , &nt ) ;
else fscanf ( inp , " %i %i" , &nc , &nt ) ;
if ( felipe ) {
a = nc ;
nc = nt ;
nt = a ; }
errout ( !nc , "Illegal number of characters" ) ;
errout ( nt < 4 , "Illegal number of taxa" ) ;
#ifdef WATCOM
fpos = ( long long * ) myalloc ( nt * sizeof ( long long ) ) ;
#else
fpos = ( fpos_t * ) myalloc ( nt * sizeof ( fpos_t ) ) ;
#endif
matbuf = ( unsigned char ** ) myalloc ( nt * sizeof ( char * ) ) ;
a = nt ;
if ( ( a & 1 ) ) ++ a ;
tmpstates = ( unsigned char * ) myalloc ( a * sizeof ( char ) ) ;
maskinit ( ) ;
// Get file positions for first character for each taxon...
if ( !usenexus ) {
if ( !savejusttrees )
fprintf ( outp , "xread\n'Results from oblong'\n0 %i\n" , nt ) ; }
else fprintf ( outp , "#NEXUS\nbegin trees;\nTranslate\n" ) ;
for ( a = 0 ; a < nt ; ++ a ) {
fscanf ( inp , " %s" , nampt ) ;
if ( fasta ) ++ nampt ;
if ( !usenexus ) {
if ( !savejusttrees )
fprintf ( outp , "%s " , nampt ) ; }
else {
fprintf ( outp , " %i %s" , a + 1 , nampt ) ;
if ( a < nt - 1 ) fprintf ( outp , "," ) ;
fprintf ( outp , "\n" ) ; }
if ( fasta ) -- nampt ;
#ifdef WATCOM
fpos [ a ] = _ftelli64 ( inp ) ;
#else
fgetpos ( inp , &fpos [ a ] ) ;
#endif
if ( nc < CHUNKSZ )
matbuf [ a ] = ( char * ) myalloc ( nc * sizeof ( char ) ) ;
else matbuf [ a ] = ( char * ) myalloc ( CHUNKSZ * sizeof ( char ) ) ;
for ( b = 0 ; b < nc ; ++ b ) {
fscanf ( inp , " %c" , &state ) ;
if ( state == '[' ) {
while ( 1 ) {
fscanf ( inp , " %c" , &state ) ;
if ( state == ']' ) break ;
if ( !maskin [ state ] ) {
fprintf ( stderr , "\nUnrecognized symbol in matrix: %c\n" , state ) ;
exit ( 0 ) ; }}}
else
if ( !maskin [ state ] ) {
fprintf ( stderr , "\nUnrecognized symbol in matrix: %c\n" , state ) ;
exit ( 0 ) ; }}}
if ( !usenexus ) {
if ( !savejusttrees )
fprintf ( outp , ";\ncollapse none;\n" ) ; }
else fprintf ( outp , " ;\n" ) ;
// Read pieces, and output temporary file as you go...
errout ( ( tmpp = fopen ( "oblong.tmp" , "wb" ) ) == NULL , "Cannot open temporary file, \"oblong.tmp\"" ) ;
prevprop = 0 ;
for ( a = 0 ; a < nc ; a += CHUNKSZ ) {
if ( reporttoscreen ) {
prop = ( double ) a / nc ;
if ( together ) prop /= 2 ;
if ( ( prop - prevprop ) > 0.009 ) {
fprintf ( stderr , "\rReading matrix... %.1f%%" , prop * 100 ) ;
prevprop = prop ; }}
process_chunk ( a ) ; }
free ( matbuf ) ;
fclose ( tmpp ) ;
// Assign bufers and (if appropriate), read data in, from tmp file...
tmpparse ( ) ;
fprintf ( stderr , "%cReading matrix, %i taxa, %i chars. (%li informative) \n" , reportheader , nt , nc , nc - trashed ) ;
fclose ( inp ) ;
end = clock () ;
secs = ( double ) ( end - beg ) / CLOCKS_PER_SEC ;
fprintf ( stderr , "Time to read data: %.2f secs.\n\n" , secs ) ;
return ;
}
/************ GENERAL TREE-HANDLING STUFF ************/
void reset_nodz ( short int * anpt )
{
short int * bu = rlist ;
short int * tbu = tgtlist ;
int a , b , atno , lg ;
for ( a = nodz ; a -- > nt ; ) tbu [ a ] = -1;
for ( a = nt ; a -- ; ) tbu [ a ] = a ;
atno = nt ;
lg = anpt [ nt ] ;
tbu [ nt ] = nt ;
tbu [ lg ] = lg ;
for ( a = 0 ; a < nt ; ++a) {
b = anpt [ a ] ;
while ( tbu [ b ] < 0 ) {
tbu [ b ] = ++ atno ;
b = anpt [ b ] ; }}
for ( a = lg ; a-- ; )
bu [ tbu [ a ] ] = tbu [ anpt [ a ] ] ;
for ( a = lg ; a-- ; ) anpt [ a ] = bu [ a ] ;
anpt [ nt ] = ++ atno ;
return ;
}
int equal_trees ( short int * a , short int * b )
{
int c ;
for ( c = 0 ; c < nodz ; ++ c )
if ( * a ++ != * b ++ ) return 0 ;
return 1 ;
}
void storetree ( void )
{
int b , isdup = 0 ;
for ( b = 0 ; b < nodz ; ++ b ) memtrees [ numtrees ] [ b ] = anx [ b ] ;
reset_nodz ( memtrees [ numtrees ] ) ;
if ( !dojak && !saveall )
for ( b = 0 ; b < numtrees && !isdup ; ++ b )
if ( equal_trees ( memtrees [ b ] , memtrees [ numtrees ] ) ) isdup = 1 ;
if ( !isdup )
++ numtrees ;
}
void randomlist ( int num , short int * dali , int firstis )
{ short int * bakrand ;
int a , todo , i , n ;
if ( ranseed ) srand ( ranseed ) ;
for ( a = 0 ; a < num ; ++ a ) dali [ a ] = a ;
if ( firstis == 0 ) {
todo = num - 1 ;
++ dali ; // skip 0
bakrand = dali + todo ;
for ( a = 1 ; a < num ; ++ a ) {
i = rand () % todo -- ;
n = * -- bakrand ;
* bakrand = dali [ i ] ;
dali [ i ] = n ; }}
else {
todo = num ;
bakrand = dali + todo ;
for ( a = 0 ; a < num ; ++ a ) {
i = rand () % todo -- ;
n = * -- bakrand ;
* bakrand = dali [ i ] ;
dali [ i ] = n ; }}
}
void nodzert ( int what, int where )
{
int a, b, c ;
a = anx [ where ] ;
c = sis [ where ] ;
b = anx [ what ] = anx[ where ] = anx[nt]++;
anx [ b ] = a;
sis [ what ] = where;
sis [ where ] = what;
sis [ c ] = b;
sis [ b ] = c;
lef [ a ] = c;
rig [ a ] = b;
lef [ b ] = where ;
rig [ b ] = what ;
}
int savebremervals = 0 ;
void savant ( int which )
{
int a , didsome = 0 ;
if ( savebremervals && which != nt ) {
if ( gbrem [ which ] > 0 || shownegbremers ) fprintf ( outp , "(" ) ; }
else
fprintf ( outp , "(" ) ;
for ( a = 0 ; a < anx [ nt ] ; ++ a )
if ( anx [ a ] == which ) {
if ( usenexus && didsome )
fprintf ( outp , "," ) ;
didsome = 1 ;
if ( a < nt )
if ( usenexus ) fprintf ( outp , "%i " , a + 1 ) ;
else fprintf ( outp , "%i " , a ) ;
else savant ( a ) ; }
if ( savebremervals && which != nt && anx [ which ] != nt ) {
if ( gbrem [ which ] > 0 )
if ( bremertype == 0 ) fprintf ( outp , ")%s%.0f " , legindic , gbrem [ which ] ) ;
else fprintf ( outp , ")%s%.0f " , legindic , gbrem [ which ] * 100 ) ;
else if ( shownegbremers )
if ( bremertype == 0 ) fprintf ( outp , ")%s%.0f " , legindic , gbrem [ which ] ) ;
else fprintf ( outp , ")%s(%.0f) " , legindic , gbrem [ which ] * 100 ) ; }
else
fprintf ( outp , ")" ) ;
}
void savetrees ( void )
{
short int * anxwas = anx ;
int i ;
if ( quickbremer )
fprintf ( stderr , "\nSaving supports and %i trees to output file...\n\n" , numtrees ) ;
else
fprintf ( stderr , "\nSaving %i trees to output file...\n\n" , numtrees ) ;
if ( !usenexus )
fprintf ( outp , "tread '%i Oblong trees'\n" , numtrees ) ;
for ( i = 0 ; i < numtrees ; ++ i ) {
if ( quickbremer && !i ) savebremervals = 1 ;
if ( !usenexus ) {
if ( i ) fprintf ( outp , "*\n" ) ; }
else fprintf ( outp , "tree OBLONG_%i = [&U] " , i + 1 ) ;
anx = memtrees [ i ] ;
savant ( nt ) ;
if ( usenexus )
fprintf ( outp , ";\n" ) ;
if ( quickbremer && !i ) {
savebremervals = quickbremer = 0 ;
if ( !usenexus ) fprintf ( outp , "*\n" ) ;
i = -1 ; }}
anx = anxwas ;
if ( !usenexus )
fprintf ( outp , ";\nproc/;\n" ) ;
else fprintf ( outp , "End ; \n" ) ;
}
int binoptlist ( short int * list , short int * tt )
{
short int * x , * y ;
x = y = list ;
* y ++ = nt ;
while ( x < y ) {
if ( lef [ * x ] >= nt ) * y ++ = lef [ * x ] ;
if ( rig [ * x ] >= nt ) * y ++ = rig [ * x ] ;
++ x ; }
return y - list ;
}
void initnet ( int un, int dos, int tres )
{
anx [ un ] = anx [ nt + 1] = nt;
anx [ dos ] = anx [ tres ] = nt + 1;
anx [ nt ] = nt + 2 ;
lef [ nt ] = un;
rig [ nt ] = sis [ un ] = nt + 1;
lef [ nt + 1 ] = dos;
rig [ nt + 1 ] = sis [ dos ] = tres;
sis [ nt + 1 ] = un;
sis [ tres ] = dos;
}
/************ OPTIMIZATION STUFF *********************/
void dowtfilter ( int prob , int keepprevious , int dalim )
{
int i , j ;
unsigned long int msk ;
for ( i = 0 ; i < nc2 ; ++ i ) {
if ( keepprevious )
wtchg [ i ] |= wtchg [ i ] << 1 ;
else wtchg [ i ] = LO2 ;
for ( j = 0 , msk = 1 ; j < FLDS2 ; ++ j , msk <<= 2 )
if ( ( rand () % 100 ) < prob )
wtchg [ i ] &= ~msk ; }
for ( ; i < nc3 ; ++ i ) {
if ( keepprevious )
wtchg [ i ] |= wtchg [ i ] >> 1 ;
else wtchg [ i ] = HI3 ;
for ( j = 0 , msk = 4 ; j < FLDS3 ; ++ j , msk <<= 3 )
if ( ( rand () % 100 ) < prob )
wtchg [ i ] &= ~msk ; }
for ( ; i < dalim ; ++ i ) {
if ( keepprevious )
wtchg [ i ] |= wtchg [ i ] >> 1 ;
else wtchg [ i ] = HI4 ;
for ( j = 0 , msk = 8 ; j < FLDS4 ; ++ j , msk <<= 4 )
if ( ( rand () % 100 ) < prob )
wtchg [ i ] &= ~msk ; }
}
void resetwtfilter ( void )
{
int i ;
for ( i = 0 ; i < nc2 ; ++ i )
wtchg [ i ] = ( wtchg [ i ] >> 1 ) & LO2 ;
for ( ; i < nc3 ; ++ i )
wtchg [ i ] = ( wtchg [ i ] << 1 ) & HI3 ;
for ( ; i < nc4 ; ++ i )
wtchg [ i ] = ( wtchg [ i ] << 1 ) & HI4 ;
}
void storewtfilter ( void )
{
int i ;
for ( i = 0 ; i < nc2 ; ++ i )
wtchg [ i ] |= ( wtchg [ i ] << 1 ) ;
for ( ; i < nc3 ; ++ i )
wtchg [ i ] |= ( wtchg [ i ] >> 1 ) ;
for ( ; i < nc4 ; ++ i )
wtchg [ i ] |= ( wtchg [ i ] >> 1 ) ;
}
unsigned long long downpass_all ( void )
{
int nunos , len , a , nod , c ;
unsigned long long totlen ;
unsigned long int * lp , * rp , * np , x , y , z ;
unsigned long int * wts ;
nunos = binoptlist ( nodlist , anx ) ;
totlen = lenlag ;
for ( a = nunos ; a -- ; ) {
nod = nodlist [ a ] ;
lp = mat [ lef [ nod ] ] ;
rp = mat [ rig [ nod ] ] ;
np = mat [ nod ] ;
wts = wtchg - 1 ;
len = 0 ;
for ( c = 0 ; c < nc2 ; ++ c ) {
x = * lp ++ & * rp ++ ;
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ) ;
if ( changewts ) y &= * ++ wts ;
#ifdef BITS64
len += bitct [ 65535 & ( y | ( y >> 15 ) ) ] + bitct [ 65535 & ( ( y >> 32 ) | ( y >> 47 ) ) ] ;
#else
len += bitct [ 65535 & ( y | ( y >> 15 ) ) ] ;
#endif
y |= y << 1 ;
* np ++ = x | y ; }
for ( ; c < nc3 ; ++ c ) {
x = * lp & * rp ;
y = * lp ++ | * rp ++ ;
z = HI3 & ~( x | ( ( x & LO3 ) + LO3 ) ) ;
if ( changewts ) z &= * ++ wts ;
#ifdef BITS64
len += bitct [ 65535 & ( z | ( z >> 16 ) ) ] + bitct [ 65535 & ( z >> 32 | ( z >> 49 ) ) ] ;
#else
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
#endif
z |= z - ( z >> 2 ) ;
* np ++ = x | ( z & y ) ; }
for ( ; c < nc4 ; ++ c ) {
x = * lp & * rp ;
y = * lp ++ | * rp ++ ;
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
if ( changewts ) z &= * ++ wts ;
#ifdef BITS64
len += bitct [ 65535 & ( z | ( z >> 17 ) | ( z >> 34 ) | ( z >> 51 ) ) ] ;
#else
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
#endif
z |= z - ( z >> 3 ) ;
* np ++ = x | ( z & y ) ; }
totlen += len ; }
return totlen ;
}
void loadmat ( int part )
{
void * vd ;
unsigned long int * seg ;
unsigned long int numelems ;
if ( !part ) rewind ( partsfile ) ;
seg = mat [ 0 ] ;
vd = ( void * ) seg ;
numelems = fread ( vd , 1 , sizeof ( unsigned long int ) * nt * piecesz , partsfile ) ;
errout ( numelems != sizeof ( unsigned long int ) * nt * piecesz , "OOOPS, unexpected error loading matrix!.\nSorry, abandoning run..." ) ;
if ( dojak && !ratcycles && !together ) {
if ( !part ) srand ( ranseed ) ;
dowtfilter ( jakprob , 0 , piecesz ) ; }
}
unsigned long long downpass_split ( void )
{
int nunos , len , a , nod , c ;
unsigned long long totlen ;
unsigned long int * lp , * rp , * np , x , y , z ;
unsigned long int atnc ;
unsigned long int * wts ;
nunos = binoptlist ( nodlist , anx ) ;
totlen = lenlag ;
for ( atnc = 0 ; atnc < nc4 ; atnc += piecesz ) {
loadmat ( atnc ) ;
for ( a = nunos ; a -- ; ) {
nod = nodlist [ a ] ;
lp = mat [ lef [ nod ] ] ;
rp = mat [ rig [ nod ] ] ;
np = mat [ nod ] ;
if ( ratcycles ) wts = wtchg + atnc - 1 ;
else wts = wtchg - 1 ;
len = 0 ;
for ( c = 0 ; c < piecesz ; ++ c ) {
x = * lp & * rp ;
y = * lp ++ | * rp ++ ;
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
if ( changewts ) z &= * ++ wts ;
#ifdef BITS64
len += bitct [ 65535 & ( z | ( z >> 17 ) | ( z >> 34 ) | ( z >> 51 ) ) ] ;
#else
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
#endif
z |= z - ( z >> 3 ) ;
* np ++ = x | ( z & y ) ; }
totlen += len ;
}}
return totlen ;
}
/********* SUPPORT FUNCTIONS FOR WAGNER AND BRANCH-SWAPPING ************************/
void zert ( int whatp, int wherep )
{
int a, b, c ;
if ( wherep == 0 ) {
anx [ lef [ nt ] = sis [ whatp ] = 0 ] = nt ;
anx [ rig [ nt ] = sis [ 0 ] = whatp ] = nt ;
return ; }
a = anx [ wherep ] ;
c = sis [ wherep ] ;
b = anx [ wherep ] = anx [ whatp ] ;
anx [ b ] = a;
sis [ sis [ wherep ] = rig [ b ] = whatp ] = lef [ b ] = wherep;
sis [ lef [ a ] = c ] = b ;
sis [ rig [ a ] = b ] = c ;
}
void unzert ( int whatp )
{
int a, b, c;
a = anx [ whatp ] ;
if ( a == nt ) {
lef [ anx [ 0 ] = nt ] = rig [ nt ] = sis [ 0 ] = 0 ;
anx [ whatp ] = whatp ;
return ; }
b = sis [ whatp ] ;
c = sis [ a ] ;
a = anx [ a ] ;
lef [ a ] = b ;
rig [ anx [ b ] = anx [ c ] = a ] = sis [ sis [ c ] = b ] = c ;
}
void tbreroot ( int new, int old )
{
short int * an = anx ;
int i , j , k , m , n , nx , first ;
if ( an [ new ] == old || old < nt ) return ;
j = nx = first = an [ i = new ] ;
n = sis [ new ] ;
while ( nx != old ) {
k = i ;
i = j ;
j = nx ;
m = n ;
n = sis [ j ] ;
if ( k == lef [ i ] ) { lef [ i ] = j ; sis [ sis [ rig [ i ] ] = j ] = rig [ i ] ; }
else { rig [ i ] = j ; sis [ sis [ lef [ i ] ] = j ] = lef [ i ] ; }
nx = an [ j ] ;
an [ j ] = i ; }
an [ lef [ j ] = m ] = an [ rig [ j ] = n ] = j ;
sis [ sis [ n ] = m ] = n ;
an [ lef [ old ] = new ] = an [ rig [ old ] = first ] = old ;
sis [ sis [ new ] = first ] = new ;
return ;
}
void binmksis ( short int * tt )
{
int a , b , c , lownod = tt [ nt ] ;
for ( a = nodz ; a -- ; ) nodfork [ a ] = 0 ;
for ( a = nt ; a -- ; ) {
b = tt [ c = a ] ;
while ( b != lownod ) {
if ( !nodfork [ b ] )
lef [ b ] = lassis [ b ] = c ;
else
sis [ sis [ c ] = lassis [ b ] ] = rig [ b ] = c ;
if ( ++ nodfork [ b ] > 1 ) break ;
c = b ;
b = tt [ b ] ; }}
return ;
}
int divoptlist ( short int * list , int clip , int sisis )
{
short int * x , * y ;
x = y = list ;
if ( sisis ) * y ++ = nt ;
if ( clip > nt ) * y ++ = clip ;
while ( x < y ) {
if ( lef [ * x ] >= nt ) * y ++ = lef [ * x ] ;
if ( rig [ * x ] >= nt ) * y ++ = rig [ * x ] ;
++ x ; }
return y - list ;
}
/*********** HANDLE SWAP-COLLAPSING FOR JACKKNIFING *************/
void delnodz ( void )
{ short int * tmp , *cs ;
int a , b , at , last ;
char stopped ;
short int * an = anx ;
short int * dellist = unsuprt ;
tmp = nodlist ;
cs = lef ;
dellist[ nt ] = 0 ;
at = nt + 1 ;
for ( a = nt ; a -- ; ) cs [ a ] = a ;
for ( a = nt ; a < nodz ; ++ a ) cs [ a ] = 0 ;
cs [ nt ] = nt ;
for ( a = nodz ; a -- ; ) tmp[ a ] = 0 ;
for ( a = nt ; a -- ; ) {
b = last = a ;
stopped = 0 ;
while ( b != nt ) {
b = an [ b ] ;
if ( cs [ b ] ) { tmp[ cs [ last ] ] = cs [ b ] ; stopped = 1 ; break ; }
else
if ( ! dellist [ b ] ) {
tmp [ cs [ last ] ] = at ;
cs [ last = b ] = at ++ ; }
}
if ( !stopped ) tmp [ cs [ last ] ] = nt ; }
for ( a = nodz ; a -- ; ) an [ a ] = tmp [ a ] ;
an [ nt ] = at ;
}
void filldistoroot ( void )
{
short int * x = nodlist , * y ;
binmksis ( anx ) ;
distoroot [ nt ] = 0 ;
nodlist [ 0 ] = lef [ nt ] ;
nodlist [ 1 ] = rig [ nt ] ;
x = nodlist - 1 ;
y = nodlist + 2 ;
while ( ++ x < y ) {
distoroot [ * x ] = distoroot [ anx [ * x ] ] + 1 ;
if ( * x > nt ) {
* y ++ = lef [ * x ] ;
* y ++ = rig [ * x ] ; }}
return ;
}
void collapse_with_swaps ( int clip , int sisis )
{
int a , b , c , comnod ;
int da , db , dest , rootis ;
zert ( clip , sisis ) ;
for ( c = 0 ; c < numcolmoves ; ++ c ) {
dest = colmoves [ c ] ;
if ( dest == sisis ) continue ;
da = distoroot [ a = clip ] ;
db = distoroot [ b = dest ] ;
while ( distoroot [ a ] > db ) a = anx [ a ] ;
while ( distoroot [ b ] > da ) b = anx [ b ] ;
while ( a != b ) { a = anx [ a ] ; b = anx [ b ] ; }
comnod = a ;
if ( comnod != dest )
for ( a = anx [ dest ] ; a != comnod ; a = anx [ a ] ) unsuprt [ a ] = 1 ;
for ( b = anx [ clip ] ; b != comnod ; b = anx [ b ] ) unsuprt [ b ] = 1 ; }
if ( !use_spr && clip > nt )
for ( c = 0 ; c < numcolroots ; ++ c ) {
rootis = colroots [ c ] ;
if ( anx [ rootis ] != clip ) {
a = anx [ rootis ] ;
while ( a != clip ) {
unsuprt [ a ] = 1 ;
a = anx [ a ] ; }}}
unzert ( clip ) ;
return ;
}
void collapse_the_tree ( void )
{
int i ;
if ( !swapcollapse ) return ;
for ( i = 0 ; i < nodz ; ++ i ) unsuprt [ i ] = 0 ;
filldistoroot ( ) ;
recordgoodmoves = 1 ;
bswap ( ) ;
recordgoodmoves = 0 ;
delnodz ( ) ;
}
/*************** WAGNER AND BRANCH-SWAPPING FOR SINGLE BUFFER ****************/
void fullpass_all ( int clip , int sisis )
{
int nunos , a , nod , c ;
unsigned long int * lp , * rp , * np , * ap , x , y , z , bak ;
nunos = divoptlist ( nodlist , clip , sisis ) ;
for ( a = nunos ; a -- ; ) {
nod = nodlist [ a ] ;
if ( nodtoskip == nod ) continue ;
lp = mat [ lef [ nod ] ] ;
rp = mat [ rig [ nod ] ] ;
np = mat [ nod ] ;
for ( c = 0 ; c < nc2 ; ++ c ) {
x = * lp ++ & * rp ++ ;
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ) ;
y |= y << 1 ;
* np ++ = x | y ; }
for ( ; c < nc3 ; ++ c ) {
x = * lp & * rp ;
y = * lp ++ | * rp ++ ;
z = HI3 & ~( x | ( ( x & LO3 ) + LO3 ) ) ;
z |= z - ( z >> 2 ) ;
* np ++ = x | ( z & y ) ; }
for ( ; c < nc4 ; ++ c ) {
x = * lp & * rp ;
y = * lp ++ | * rp ++ ;
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
z |= z - ( z >> 3 ) ;
* np ++ = x | ( z & y ) ; }}
for ( a = 0 ; ++ a < nunos ; ) {
nod = nodlist [ a ] ;
if ( nod == clip || anx [ nod ] == nodtoskip || nod == nodtoskip ) continue ;
lp = mat [ lef [ nod ] ] - 1 ;
rp = mat [ rig [ nod ] ] - 1 ;
np = mat [ nod ] - 1 ;
ap = mat [ anx [ nod ] ] - 1 ;
for ( c = 0 ; c < nc2 ; ++ c ) {
bak = * ++ lp | * ++ rp ;
x = * ++ ap & ~ * ++ np ;
x = ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ;
x |= ( x << 1 ) ;
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }
for ( ; c < nc3 ; ++ c ) {
x = * ++ lp & * ++ rp ;
z = * lp | * rp ;
y = HI3 & ~( x | ( ( x & LO3 ) + LO3 ) ) ;
y |= y - ( y >> 2 ) ;
bak = z | y ;
x = * ++ ap & ~ * ++ np ;
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
x |= x - ( x >> 2 ) ;
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }
for ( ; c < nc4 ; ++ c ) {
x = * ++ lp & * ++ rp ;
z = * lp | * rp ;
y = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
y |= y - ( y >> 3 ) ;
bak = z | y ;
x = * ++ ap & ~ * ++ np ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x |= x - ( x >> 3 ) ;
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }}
nodtoskip = 0 ;
}
unsigned long int scorediff_all ( short int clipis , short int srcis , short int tgtis )
{
unsigned long int len = 0 ;
unsigned long int * tgt , * tgta , * src , * srca ;
unsigned long int untgt , unsrc ;
unsigned long int x , y , z ;
unsigned long int * wts ;
long int a ;
if ( clipis < nt ) srca = mat [ clipis ] - 1 ;
else srca = mat [ anx [ srcis ] ] - 1 ;
src = mat [ srcis ] - 1 ;
if ( !tgtis ) tgta = mat [ 0 ] - 1 ;
else tgta = mat [ anx [ tgtis ] ] - 1 ;
tgt = mat [ tgtis ] - 1 ;
wts = wtchg - 1 ;
for ( a = 0 ; a < nc2 ; ++ a ) {
unsrc = * ++ src | * ++ srca ;
if ( srcis < nt && clipis > nt ) {
x = * srca & ~ * src ;
x = ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ;
x |= ( x << 1 ) ;
unsrc = * srca | ( * src & x ) ; }
untgt = * ++ tgt | * ++ tgta ;
if ( tgtis && tgtis < nt ) {
x = * tgta & ~ * tgt ;
x = ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ;
x |= ( x << 1 ) ;
untgt = * tgta | ( * tgt & x ) ; }
x = unsrc & untgt ;
y = LO2 ^ ( ( x & LO2 ) | ( ( x & HI2 ) >> 1 ) ) ;
if ( changewts )
if ( recorddn ) {
++ wts ;
if ( recorddn == 1 ) * wts = y ;
x = * wts & y ;
z = x ^ y ;
#ifdef BITS64
gmix += bitct [ 65535 & ( x | ( x >> 15 ) ) ] + bitct [ 65535 & ( ( x >> 32 ) | ( x >> 47 ) ) ] ;
gup += bitct [ 65535 & ( z | ( z >> 15 ) ) ] + bitct [ 65535 & ( ( z >> 32 ) | ( z >> 47 ) ) ] ;
#else
gmix += bitct [ 65535 & ( x | ( x >> 15 ) ) ] ;
gup += bitct [ 65535 & ( z | ( z >> 15 ) ) ] ;
#endif
continue ; }
else
y &= * ++ wts ;
#ifdef BITS64
len += bitct [ 65535 & ( y | ( y >> 15 ) ) ] + bitct [ 65535 & ( ( y >> 32 ) | ( y >> 47 ) ) ] ;
#else
len += bitct [ 65535 & ( y | ( y >> 15 ) ) ] ;
#endif
if ( len > lenlimit ) return len ; }
for ( ; a < nc3 ; ++ a ) {
unsrc = * ++ src | * ++ srca ;
if ( srcis < nt && clipis > nt ) {
x = * srca & ~ * src ;
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
x |= x - ( x >> 2 ) ;
unsrc = * srca | ( * src & x ) ; }
untgt = * ++ tgt | * ++ tgta ;
if ( tgtis && tgtis < nt ) {
x = * tgta & ~ * tgt ;
x = ( x | ( ( x & LO3 ) + LO3 ) ) & HI3 ;
x |= x - ( x >> 2 ) ;
untgt = * tgta | ( * tgt & x ) ; }
x = unsrc & untgt ;
z = HI3 & ~( x | ( ( x & LO3 ) + LO3 ) ) ;
if ( changewts )
if ( recorddn ) {
++ wts ;
if ( recorddn == 1 ) * wts = z ;
y = * wts & z ;
x = y ^ z ;
#ifdef BITS64
gmix += bitct [ 65535 & ( y | ( y >> 16 ) ) ] + bitct [ 65535 & ( y >> 32 | ( y >> 49 ) ) ] ;
gup += bitct [ 65535 & ( x | ( x >> 16 ) ) ] + bitct [ 65535 & ( x >> 32 | ( x >> 49 ) ) ] ;
#else
gmix += bitct [ 65535 & ( y | ( y >> 17 ) ) ] ;
gup += bitct [ 65535 & ( x | ( x >> 17 ) ) ] ;
#endif
continue ; }
else
z &= * ++ wts ;
#ifdef BITS64
len += bitct [ 65535 & ( z | ( z >> 16 ) ) ] + bitct [ 65535 & ( z >> 32 | ( z >> 49 ) ) ] ;
#else
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
#endif
if ( len > lenlimit ) return len ; }
for ( ; a < nc4 ; ++ a ) {
unsrc = * ++ src | * ++ srca ;
if ( srcis < nt && clipis > nt ) {
x = * srca & ~ * src ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x |= x - ( x >> 3 ) ;
unsrc = * srca | ( * src & x ) ; }
untgt = * ++ tgt | * ++ tgta ;
if ( tgtis && tgtis < nt ) {
x = * tgta & ~ * tgt ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x |= x - ( x >> 3 ) ;
untgt = * tgta | ( * tgt & x ) ; }
x = unsrc & untgt ;
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
if ( changewts )
if ( recorddn ) {
++ wts ;
if ( recorddn == 1 ) * wts = z ;
y = * wts & z ;
x = y ^ z ;
#ifdef BITS64
gmix += bitct [ 65535 & ( y | ( y >> 17 ) | ( y >> 34 ) | ( y >> 51 ) ) ] ;
gup += bitct [ 65535 & ( x | ( x >> 17 ) | ( x >> 34 ) | ( x >> 51 ) ) ] ;
#else
gmix += bitct [ 65535 & ( y | ( y >> 17 ) ) ] ;
gup += bitct [ 65535 & ( x | ( x >> 17 ) ) ] ;
#endif
continue ; }
else
z &= * ++ wts ;
#ifdef BITS64
len += bitct [ 65535 & ( z | ( z >> 17 ) | ( z >> 34 ) | ( z >> 51 ) ) ] ;
#else
len += bitct [ 65535 & ( z | ( z >> 17 ) ) ] ;
#endif
if ( len > lenlimit ) return len ; }
return len ;
}
unsigned long long wagner_all ( void )
{
int a , added, fac, basnod , bestpos , clip , tgt ;
unsigned long int lim , diff ;
unsigned long long len ;
short int * inlist = nodfork ;
unsigned long int seedwas = ranseed ;
randomlist( nt , ladd , 0 ) ;
initnet ( ladd [ 0 ] , ladd [ 1 ] , ladd [ 2 ] ) ;
blist [ 0 ] = ladd [ 1 ] ;
blist [ 1 ] = ladd [ 2 ] ;
blist [ 2 ] = nt + 1;
added = 2;
fac = 3 ;
basnod = nt + 2;
ranseed = 0 ;
while ( ++ added < nt ) {
if ( random_starts ) {
nodzert ( ladd [ added ] , blist [ rand() % fac ] ) ;
blist [ fac ++ ] = ladd [ added ] ;
blist [ fac ++ ] = basnod ++ ;
continue ; }
randomlist ( fac , inlist , 1 ) ;
clip = ladd [ added ] ;
fullpass_all ( clip , clip ) ;
lim = lenlimit = VERYBIG ;
if ( verbose )
fprintf ( stderr , "\rRep %3i %4i taxa added" , reportreplication , added ) ;
for ( a = 0 ; a < fac ; ++ a ) {
tgt = blist [ inlist [ a ] ] ;
diff = scorediff_all ( clip , clip , tgt ) ;
if ( diff < lim ) {
lim = diff ;
lenlimit = lim - 1 ;
bestpos = tgt ; }}
nodzert ( clip , bestpos ) ;
blist [ fac ++ ] = clip ;
blist [ fac ++ ] = basnod ++ ; }
len = downpass_all () ;
ranseed = seedwas ;
return len ;
}
unsigned long long bswap_all ( void )
{
unsigned long int diff , lim , nod ;
unsigned long long len , achk ;
int clip , bestpos , begpos , sisis , nunos , bestroot , begroot , i , isdone ;
int curroot , tgt , rounds , maxrounds = 0 , subsdone = 0 ;
short int * rdo , * rdone , * tgtdo , * tgtdone ;
double prop = 0 , secs ;
static int clockinit = 0 ;
static clock_t last , now ;
BremTyp * brempt ;
anx [ nt ] = nodz ;
binmksis ( anx ) ;
if ( !rig [ nt ] ) {
rig [ nt ] = lef [ nt ] ;
lef [ nt ] = 0 ; }
nunos = binoptlist ( nodlist , anx ) ;
len = downpass_all ( ) ;
clip = 0 ;
if ( !clockinit ) {
clockinit = 1 ;
last = clock () ; }
if ( ratcycles && verbose )
prop = ( double ) ratcyclesdone / ratcycles ;
for ( rounds = 1 ; rounds < nodz ; ++ rounds ) {
if ( ++ clip >= nodz ) clip = 1 ;
if ( clip == nt ) continue ;
if ( rounds > maxrounds ) maxrounds = rounds ;
now = clock () ;
secs = ( double ) ( now - last ) / CLOCKS_PER_SEC ;
if ( secs > reportperiod && !ratcheting && !doingbremer ) {
last = clock () ;
prop = ( double ) ( maxrounds + 1 ) / nodz ;
if ( ratcycles )
prop = ( double ) ratcyclesdone / ratcycles ;
if ( recordgoodmoves )
fprintf ( stderr , "%cRep %3i collapse LEN %-10lli %12lli rearrangs" , reportheader , reportreplication , len , rearrangs ) ;
else
fprintf ( stderr , "%cRep %3i %6.2f%% LEN %-10lli %12lli rearrangs" , reportheader , reportreplication , prop * 100 , len , rearrangs ) ; }
unzert ( clip ) ;
begpos = bestpos = sisis = sis [ clip ] ;
if ( sisis ) nodtoskip = anx [ clip ] ;
fullpass_all ( clip , sisis ) ;
lenlimit = VERYBIG ;
if ( doingbremer ) {
recorddn = changewts = 1 ;
gup = gmix = 0 ; }
if ( clip < nt )
lim = scorediff_all ( clip , clip , sisis ) ;
else
if ( lef [ clip ] > nt )
lim = scorediff_all ( clip , lef [ clip ] , sisis ) ;
else lim = scorediff_all ( clip , rig [ clip ] , sisis ) ;
if ( doingbremer ) {
brempt = bremdif ;
recorddn = 2 ;
nitdn = gmix ; }
numcolmoves = numcolroots = 0 ;
if ( !doingbremer )
if ( ratcheting || recordgoodmoves )
lenlimit = lim ;
else lenlimit = lim - 1 ;
if ( lim == 0 && !( ratcheting || recordgoodmoves || doingbremer ) ) {
zert ( clip , bestpos ) ;
continue ; }
tgtdo = tgtdone = tgtlist ;
if ( !sisis ) * tgtdo ++ = 0 ;
else {
* tgtdo ++ = sis [ 0 ] ;
while ( tgtdone < tgtdo ) {
nod = * tgtdone ++ ;
if ( nod > nt ) {
* tgtdo ++ = lef [ nod ] ;
* tgtdo ++ = rig [ nod ] ; }}}
rdo = rdone = rlist ;
if ( clip < nt ) * rdo ++ = begroot = bestroot = clip ;
else {
* rdo ++ = begroot = bestroot = rig [ clip ] ;
if ( lef [ clip ] > nt ) {
* rdo ++ = lef [ lef [ clip ] ] ;
* rdo ++ = rig [ lef [ clip ] ] ; }}
while ( rdone < rdo ) {
curroot = * rdone ;
if ( curroot > nt ) {
* rdo ++ = lef [ curroot ] ;
* rdo ++ = rig [ curroot ] ; }
for ( tgtdone = tgtlist ; tgtdone < tgtdo ; ++ tgtdone ) {
tgt = * tgtdone ;
++ rearrangs ;
if ( verbose )
if ( !ratcheting && !recordgoodmoves && !doingbremer )
fprintf ( stderr , "\rRep %3i %6.2f%% LEN %-10lli %12lli rearrangs" , reportreplication , prop * 100 , len , rearrangs ) ;
gmix = gup = 0 ;
diff = scorediff_all ( clip , curroot , tgt ) ;
if ( doingbremer ) {
gdn = nitdn - gmix ;
brempt -> up = gup ;
brempt -> dn = gdn ;
brempt -> root = curroot ;
brempt -> tgt = tgt ;
++ brempt ;
continue ; }
if ( recordgoodmoves && diff == lim ) {
if ( rdone > rlist ) {
for ( i = isdone = 0 ; i < numcolroots && !isdone ; ++ i )
if ( colroots [ i ] == curroot ) isdone = 1 ;
if ( !isdone ) colroots [ numcolroots ++ ] = curroot ; }
for ( i = isdone = 0 ; i < numcolmoves && !isdone ; ++ i )
if ( colmoves [ i ] == tgt ) isdone = 1 ;
if ( !isdone ) colmoves [ numcolmoves ++ ] = tgt ; }
if ( ratcheting && diff == lim ) {
bestpos = tgt ;
bestroot = curroot ; }
if ( diff < lim ) {
len -= lim - diff ;
lim = diff ;
lenlimit = lim - 1 ;
if ( ratcheting ) ++ lenlimit ;
bestpos = tgt ;
bestroot = curroot ; }
} // targets
if ( use_spr ) break ;
++ rdone ; } // rootings
if ( doingbremer )
transfer_bremer ( clip , sisis , brempt ) ;
if ( recordgoodmoves )
collapse_with_swaps ( clip , sisis ) ;
if ( clip > nt )
if ( !use_spr && bestroot != begroot ) tbreroot ( bestroot , clip ) ;
zert ( clip , bestpos ) ;
if ( bestpos != begpos || bestroot != begroot ) {
rounds = 0 ;
if ( ratcheting && ++ subsdone == numsubs ) break ; }} // clippings
achk = len ;
len = downpass_all ( ) ;
if ( len != achk && !doingbremer ) {
fprintf ( stderr , "\nOOPS!!!\nAn internal error occured!!!\nlength is %lli, not %lli\n" , len , achk ) ;
exit ( 0 ) ; }
return len ;
}
/********** WAGNER AND BRANCH-SWAPPING WITH DISK BUFFER ****************/
void fullpass_split ( int clip , int sisis )
{
int nunos , a , nod , c ;
unsigned long int * lp , * rp , * np , * ap , x , y , z , bak ;
nunos = divoptlist ( nodlist , clip , sisis ) ;
for ( a = nunos ; a -- ; ) {
nod = nodlist [ a ] ;
if ( nodtoskip == nod ) continue ;
lp = mat [ lef [ nod ] ] ;
rp = mat [ rig [ nod ] ] ;
np = mat [ nod ] ;
for ( c = 0 ; c < piecesz ; ++ c ) {
x = * lp & * rp ;
y = * lp ++ | * rp ++ ;
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
z |= z - ( z >> 3 ) ;
* np ++ = x | ( z & y ) ; }}
for ( a = 0 ; ++ a < nunos ; ) {
nod = nodlist [ a ] ;
if ( nod == clip || anx [ nod ] == nodtoskip || nod == nodtoskip ) continue ;
lp = mat [ lef [ nod ] ] - 1 ;
rp = mat [ rig [ nod ] ] - 1 ;
np = mat [ nod ] - 1 ;
ap = mat [ anx [ nod ] ] - 1 ;
for ( c = 0 ; c < piecesz ; ++ c ) {
x = * ++ lp & * ++ rp ;
z = * lp | * rp ;
y = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
y |= y - ( y >> 3 ) ;
bak = z | y ;
x = * ++ ap & ~ * ++ np ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x |= x - ( x >> 3 ) ;
* np = ( * ap & ~ x ) | ( x & ( * np | * ap & bak ) ) ; }}
nodtoskip = 0 ;
}
unsigned long int scorediff_split ( short int clipis , short int srcis , short int tgtis , unsigned long int atnc )
{
int len = 0 ;
unsigned long int * tgt , * tgta , * src , * srca ;
unsigned long int untgt , unsrc ;
unsigned long int x , z , y ;
unsigned long int * wts ;
long int a ;
if ( clipis < nt ) srca = mat [ clipis ] - 1 ;
else srca = mat [ anx [ srcis ] ] - 1 ;
src = mat [ srcis ] - 1 ;
if ( !tgtis ) tgta = mat [ 0 ] - 1 ;
else tgta = mat [ anx [ tgtis ] ] - 1 ;
tgt = mat [ tgtis ] - 1 ;
if ( ratcycles ) wts = wtchg + atnc - 1 ;
else wts = wtchg - 1 ;
for ( a = 0 ; a < piecesz ; ++ a ) {
unsrc = * ++ src | * ++ srca ;
if ( srcis < nt && clipis > nt ) {
x = * srca & ~ * src ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x |= x - ( x >> 3 ) ;
unsrc = * srca | ( * src & x ) ; }
untgt = * ++ tgt | * ++ tgta ;
if ( tgtis && tgtis < nt ) {
x = * tgta & ~ * tgt ;
x = ( x | ( ( x & LO4 ) + LO4 ) ) & HI4 ;
x |= x - ( x >> 3 ) ;
untgt = * tgta | ( * tgt & x ) ; }
x = unsrc & untgt ;
z = HI4 & ~( x | ( ( x & LO4 ) + LO4 ) ) ;
if ( changewts )
if ( recorddn ) {
++ wts ;
if ( recorddn == 1 ) * wts = z ;
x = z & * wts ;
y = x ^ z ;
#ifdef BITS64
gmix += bitct [ 65535 & ( x | ( x >> 17 ) | ( x >> 34 ) | ( x >> 51 ) ) ] ;
gup += bitct [ 65535 & ( y |