gde_linux/SUPPORT/lsadt.c

1336 lines
25 KiB
C
Raw Normal View History

2023-04-12 03:39:54 +08:00
/*
PROGRAM LSADT
Fortran->C conversion by Mike Maciukenas, CPGA, Microbiology at University
of Illinois.
C-----------------------------------------------------------------------
C
C LEAST SQUARES ALGORITHM FOR FITTING ADDITIVE TREES TO
C PROXIMITY DATA
C
C GEERT DE SOETE -- VERSION 1.01 - FEB. 1983
C VERSION 1.02 - JUNE 1983
C VERSION 1.03 - JULY 1983
C
C REFERENCE: DE SOETE, G. A LEAST SQUARES ALGORITHM FOR FITTING
C ADDITIVE TREES TO PROXIMITY DATA. PSYCHOMETRIKA, 1983, 48,
C 621-626.
C DE SOETE, G. ADDITIVE TREE REPRESENTATIONS OF INCOMPLETE
C DISSIMILARITY DATA. QUALITY AND QUANTITY, 1984, 18,
C 387-393.
C REMARKS
C -------
C 2) UNIFORMLY DISTRIBUTED RANDOM NUMBERS ARE GENERATED BY A
C PROCEDURE DUE TO SCHRAGE. CF.
C SCHRAGE, L. A MORE PORTABLE FORTRAN RANDOM NUMBER GENERATOR.
C ACM TRANS. ON MATH. SOFTW., 1979, 5, 132-138.
C 3) SUBROUTINES VA14AD AND VA14AC (translated into minfungra) ARE
C ADAPTED FROM THE HARWELL SUBROUTINE LIBRARY (1979 EDITION).
C 4) ALTHOUGH THIS PROGRAM HAS BEEN CAREFULLY TESTED, THE
C AUTHOR DISCLAIMS ANY RESPONSABILITY FOR POSSIBLE
C ERRORS.
C
C-----------------------------------------------------------------------
*/
#include <stdio.h>
#include <math.h>
#include <values.h>
#include <ctype.h>
#include <fcntl.h>
#include <errno.h>
#define BUFLEN 1024
#define MAXLEAVES 256
static int m, n, dissim, pr, start, save, seed, nempty;
static double ps1, ps2, f, empty, tol, c;
static char fname[1000];
static char *names[MAXLEAVES];
static double *delta[MAXLEAVES];
static double **d;
static double **g;
static double **dold;
static FILE *reportf;
static int report;
extern int errno;
double nfac;
extern double strtod();
double dabs(a)
double a;
{
return((a<0.0) ? -a : a);
}
double sqr(a)
double a;
{
return(a*a);
}
double max(a, b)
double a;
double b;
{
return((a>b)?a:b);
}
int imin(a, b)
int a;
int b;
{
return((a<b)?a:b);
}
double min(a, b)
double a;
double b;
{
return((a<b)?a:b);
}
float runif()
{
#define A 16807
#define P 2147483647
#define B15 32768
#define B16 65536
int xhi, xalo, leftlo, fhi, k;
float t1, t2;
xhi = seed/B16;
xalo = (seed - (xhi * B16)) * A;
leftlo = xalo / B16;
fhi = xhi*A+leftlo;
k = fhi/B15;
seed = (((xalo - leftlo*B16) - P) + (fhi - k*B15)*B16) + k;
if(seed<0) seed += P;
t1 = seed;
t2 = t1 * 4.656612875e-10;
return (t2); /* seed * (1/(2**31-1))*/
}
float rnorm()
{
static float t1, t2;
static float n1, n2;
static int second = 0;
if(second)
{
second = 0;
n1 = sin(t2);
n2 = t1*n1;
n1 = t1*sin(t2);
return(n2);
}
else
{
t1 = runif();
t1 = sqrt(-2.0*log(t1));
n2 = 6.283185308;
t2 = runif()*n2;
n1 = cos(t2);
n2 = t1*n1;
n1 = t1*cos(t2);
second = 1;
return(n2);
}
}
double gd(i, j)
int i, j;
{
if(j<i)
return(d[i][j]);
else if(j>i)
return(d[j][i]);
else
show_error("gd: i=j -- programmer screwed up!");
}
char *repeatch(string, ch, num)
char *string;
int ch;
int num;
{
for(string[num--] = '\0'; num >= 0; string[num--] = ch);
return(string);
}
int getachar()
/* skips comments! */
{
static int oldchar = '\0';
int ch;
int more=1;
while(more)
{
ch = getchar();
if(oldchar == '\n' && ch == '#')
{
while(ch!='\n'&&ch!=EOF)
ch=getchar();
oldchar = ch;
}
else if(oldchar == '\n' && isspace(ch))
;
else more=0;
}
oldchar = ch;
return(ch);
}
int skip_space()
{
int ch;
while(isspace(ch=getachar()));
return(ch);
}
int getaword(string, len)
/* 0 if failed, 1 if data was read, -1 if data read to end of file */
char *string;
int len;
{
int i;
int ch;
ch = skip_space();
if(ch == EOF)
return(0);
for(i=0; !isspace(ch) && ch != EOF; i++)
{
if(i<len-1)
{
string[i] = ch;
}
ch = getachar();
}
string[i<len?i:len-1] = '\0';
if(ch==EOF)
return(-1);
else
return(1);
}
int readtobar(string, len)
/* 0 if failed, 1 if data was read */
char *string;
int len;
{
int i;
int ch;
ch = skip_space();
if(ch==EOF)
return(0);
for(i=0; ch!=EOF && ch!='|'; i++)
{
if(ch=='\n'||ch=='\r'||ch=='\t')
i--;
else
{
if(i<len-1)
string[i]=ch;
}
ch=getachar();
}
len = i<len?i:len-1;
for(i=len-1;i>=0 && isspace(string[i]); i--);
string[i+1] = '\0';
if(ch==EOF)
return(-1);
else
return(1);
}
int readtobarorcolon(string, len)
/* 0 if failed, 1 if data was read */
char *string;
int len;
{
int i;
int ch;
ch = skip_space();
if(ch==EOF)
return(0);
for(i=0; ch!=EOF && ch!='|' && ch!=':'; i++)
{
if(ch=='\n'||ch=='\r'||ch=='\t')
i--;
else
{
if(i<len-1)
string[i]=ch;
}
ch=getachar();
}
len = i<len?i:len-1;
for(i=len-1;i>=0 && isspace(string[i]); i--);
string[i+1] = '\0';
if(ch==EOF)
return(-1);
else
return(1);
}
char *getmem(nelem, elsize)
unsigned nelem, elsize;
{
char *temp;
temp = (char *)calloc(nelem+1, elsize);
if(temp == NULL) show_error("Couldn't allocate memory.");
else return(temp);
}
int get_parms(argc, argv)
int argc;
char **argv;
{
int i;
int cur_arg;
/* codes for current argument:
** 0 = no current argument
** 1 = pr
** 2 = start
** 3 = seed
** 4 = ps1
** 5 = ps2
** 6 = empty
** 7 = filename */
dissim = 0;
pr = 0;
start = 2;
save = 0;
seed = 12345;
ps1 = 0.0001;
ps2 = 0.0001;
empty = 0.0;
n = 0;
cur_arg = 0;
for(i=1; i<argc; i++)
switch(cur_arg) {
case 0:
if(strcmp(argv[i],"-d")==0)
dissim = 0;
else if(strcmp(argv[i],"-s")==0)
dissim = 1;
else if(strcmp(argv[i],"-print")==0)
cur_arg = 1;
else if(strcmp(argv[i],"-init")==0)
cur_arg = 2;
else if(strcmp(argv[i],"-save")==0)
save = 1;
else if(strcmp(argv[i],"-seed")==0)
cur_arg = 3;
else if(strcmp(argv[i],"-ps1")==0)
cur_arg = 4;
else if(strcmp(argv[i],"-ps2")==0)
cur_arg = 5;
else if(strcmp(argv[i],"-empty")==0)
cur_arg = 6;
else if(strcmp(argv[i],"-f")==0)
cur_arg = 7;
else if(strcmp(argv[i],"-help")==0)
show_help();
else
{
printf("\nlsadt: bad option\n");
show_help();
}
break;
case 1:
pr = atoi(argv[i]);
cur_arg = 0;
break;
case 2:
start = atoi(argv[i]);
cur_arg = 0;
break;
case 3:
seed = atoi(argv[i]);
cur_arg = 0;
break;
case 4:
ps1 = atof(argv[i]);
cur_arg = 0;
break;
case 5:
ps2 = atof(argv[i]);
cur_arg = 0;
break;
case 6:
empty = atof(argv[i]);
cur_arg = 0;
break;
case 7:
strcpy(fname, argv[i]);
cur_arg = 0;
break;
default:
printf("\nlsadt: bad option\n");
show_help();
break;
}
/* check validity of parameters */
if(ps1<0.0) ps1 = 0.0001;
if(ps2<0.0) ps2 = 0.0001;
if(dissim<0) dissim = 0;
if(start < 1 || start > 3) start = 3;
if(save != 1) save = 0;
if(seed < 0) seed = 12345;
/*printf("dissim=%d\n", dissim);*/
/*printf("pr=%d\n", pr);*/
/*printf("start=%d\n", start);*/
/*printf("save=%d\n", save);*/
/*printf("seed=%d\n", seed);*/
/*printf("ps1=%f\n", ps1);*/
/*printf("ps2=%f\n", ps2);*/
/*printf("empty=%f\n", empty);*/
}
int get_data()
{
int i, j, more;
char buf[BUFLEN];
char *ptr;
char ch;
int result;
double temp, nfactor, datmin, datmax;
nempty = n = 0;
more = 1;
ptr = &ch;
while(more)
{
result=readtobarorcolon(buf, BUFLEN);
if(result == 0 || result == -1)
more = 0;
else
{
n++;
names[n] = getmem(BUFLEN, 1);
result=readtobar(buf, BUFLEN);
if(result != 1)
show_error("get_data: bad name syntax, or missing '|'");
strcpy(names[n], buf);
if(n>1)
delta[n]=(double *)getmem(n, sizeof(double));
else
delta[n]=NULL;
for(j=1; j<n; j++)
{
if(!getaword(buf, BUFLEN))
show_error("get_data: missing data values.\n");
delta[n][j] = strtod(buf, &ptr);
if(ptr == buf)
show_error("get_data: invalid data value.\n");
if(delta[n][j] == empty) nempty++;
}
}
}
if(n<4) show_error("Must be at least 4 nodes.");
m = n * (n - 1) / 2;
/* make all positive */
datmin = MAXDOUBLE;
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(delta[i][j] < datmin && delta[i][j] != empty)
datmin = delta[i][j];
if(datmin < 0.0)
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(delta[i][j] != empty)
delta[i][j] -= datmin;
/* convert dissimilarities into similarities if -s option specified */
if(dissim)
{
datmax = MINDOUBLE;
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(delta[i][j] > datmax && delta[i][j] != empty)
datmax = delta[i][j];
datmax += 1.0;
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(delta[i][j] != empty)
delta[i][j] = datmax - delta[i][j];
}
/* make sum of squared deviations from delta to average(delta) = 1 */
temp = 0.0;
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(delta[i][j] != empty)
temp += delta[i][j];
temp = temp/(double)(m-nempty);
/* now temp = average of all non-empty cells */
nfactor = 0.0;
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(delta[i][j] != empty)
nfactor+=sqr(delta[i][j]-temp);
nfactor = 1.0/sqrt(nfactor);
nfac = nfactor;
/* now nfactor is the normalization factor */
if(report)
fprintf(reportf, "Normalization factor: %15.10f\n", nfactor);
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(delta[i][j] != empty)
delta[i][j] *= nfactor;
/* now all is normalized */
}
int initd()
{
int i, j, k;
double t1, t2;
int emp, nemp;
double dmax;
double efac, estd;
efac = 0.333333333333333;
if(start == 1)
{
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
d[i][j] = runif();
return;
}
if(nempty == 0 && start == 2)
{
estd = sqrt(efac/m);
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
d[i][j] = delta[i][j] + rnorm()*estd;
return;
}
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
d[i][j] = delta[i][j];
if(start==3 && nempty==0) return;
emp=nempty;
nemp=0;
while(nemp<emp)
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(d[i][j] == empty)
{
dmax = MAXDOUBLE;
for(k=1; k<=n; k++)
{
t1 = gd(i,k);
t2 = gd(j,k);
if(t1!=empty && t2!=empty)
dmax = min(dmax, max(t1,t2));
}
if(dmax!=MAXDOUBLE)
d[i][j] = dmax;
else
nemp++;
}
if(nemp!=0)
{
t1 = 0.0;
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(d[i][j]!=empty)
t1+=d[i][j];
t1 /= m-nemp;
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
if(d[i][j]==empty)
d[i][j]=t1;
}
if(start!=3)
{
estd=sqrt(efac/(m-nempty));
for(i=2; i<=n; i++)
for(j=1; j<i; j++)
d[i][j]+= rnorm()*estd;
}
return;
}
static int nm0, nm1, nm2, nm3;
static double fitl, fitp, r;
fungra()
{
double fac;
int i, j, k, l;
double wijkl;
for(i=2;i<=n;i++)
for(j=1;j<i;j++)
g[i][j] = 0.0;
fitl = 0.0;
fac = 2.0;
for(i=2;i<=n;i++)
for(j=1;j<i;j++)
if(delta[i][j] != empty)
{
fitl += sqr(d[i][j] - delta[i][j]);
g[i][j] += fac*(d[i][j] - delta[i][j]);
}
fitp = 0.0;
fac = 2.0*r;
for(i=1;i<=nm3;i++)
for(j=i+1;j<=nm2;j++)
for(k=j+1;k<=nm1;k++)
for(l=k+1;l<=nm0;l++)
if((d[j][i]+d[l][k]>=d[l][i]+d[k][j])&&
(d[k][i]+d[l][j]>=d[l][i]+d[k][j]))
{
wijkl=d[j][i]+d[l][k]
-d[k][i]-d[l][j];
fitp+=sqr(wijkl);
g[j][i]+=fac*wijkl;
g[l][k]+=fac*wijkl;
g[k][i]-=fac*wijkl;
g[l][j]-=fac*wijkl;
}
else
if((d[j][i]+d[l][k]>=d[k][i]+d[l][j])&&
(d[l][i]+d[k][j]>=d[k][i]+d[l][j]))
{
wijkl=d[j][i]+d[l][k]
-d[l][i]-d[k][j];
fitp+=sqr(wijkl);
g[j][i]+=fac*wijkl;
g[l][k]+=fac*wijkl;
g[k][j]-=fac*wijkl;
g[l][i]-=fac*wijkl;
}
else
if((d[k][i]+d[l][j]>=d[j][i]+d[l][k])&&
(d[l][i]+d[k][j]>=d[j][i]+d[l][k]))
{
wijkl=d[k][i]+d[l][j]
-d[l][i]-d[k][j];
fitp+=sqr(wijkl);
g[k][i]+=fac*wijkl;
g[l][j]+=fac*wijkl;
g[l][i]-=fac*wijkl;
g[k][j]-=fac*wijkl;
}
f = fitl+r*fitp;
}
static double **dr, **dgr, **d1, **gs, **xx, **gg;
static int iterc, prc;
print_iter(maxfnc, f)
int maxfnc;
double f;
{
int i, j;
if(pr == 0)
{
iterc++;
}
else if(prc < abs(pr))
{
prc++;
iterc++;
}
else
{
printf("Iteration %6d", iterc);
printf(": function values %6d", maxfnc);
printf(" f = %24.16e\n", f);
if(pr < 0)
{
printf(" d[] looks like this:\n");
for(i=2;i<=n;i++)
{
printf(" ");
for(j=1;j<i;j++)
printf("%24.16e ", d[i][j]);
printf("\n ");
}
printf(" g[] looks like this:\n");
for(i=2;i<=n;i++)
{
printf(" ");
for(j=1;j<i;j++)
printf("%24.16e ", g[i][j]);
printf("\n ");
}
}
prc = 1;
iterc++;
}
}
minfungra(dfn, acc, maxfn)
double dfn, acc;
int maxfn;
{
double beta, c, clt, dginit, dgstep, dtest, finit;
double fmin, gamden, gamma, ggstar, gm, gmin, gnew;
double gsinit, gsumsq, xbound, xmin, xnew, xold;
int itcrs, flag, retry, maxfnk, maxfnc;
int i, j;
flag = 0;
retry = -2;
iterc = 0;
maxfnc = 1;
prc = abs(pr);
goto L190;
L10:
xnew = dabs(dfn+dfn)/gsumsq;
dgstep = -gsumsq;
itcrs = m+1;
L30:
if(maxfnk<maxfnc)
{
f = fmin;
for(i=2;i<=n;i++)
for(j=1;j<i;j++)
{
d[i][j] = xx[i][j];
g[i][j] = gg[i][j];
}
}
if(flag > 0)
{
prc = 0;
print_iter(maxfnc, f);
return;
}
if(itcrs>m)
{
for(i=2;i<=n;i++)
for(j=1;j<i;j++)
d1[i][j] = -g[i][j];
}
print_iter(maxfnc, f);
dginit = 0.0;
for(i=2;i<=n;i++)
for(j=1;j<i;j++)
{
gs[i][j] = g[i][j];
dginit += d1[i][j]*g[i][j];
}
if(dginit >= 0.0)
{
retry = -retry;
if(imin(retry, maxfnk)<2)
{
printf("minfungra: \
gradient wrong or acc too small\n");
flag = 2;
}
else
itcrs = m+1;
goto L30;
}
xmin = 0.0;
fmin = f;
finit = f;
gsinit = gsumsq;
gmin = dginit;
gm = dginit;
xbound = -1.0;
xnew = xnew * min(1.0, dgstep/dginit);
dgstep = dginit;
L170:
c = xnew-xmin;
dtest = 0.0;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
{
d[i][j] = xx[i][j]+c*d1[i][j];
dtest += dabs(d[i][j]-xx[i][j]);
}
if(dtest<=0.0)
{
clt = .7;
goto L300;
}
if(max(xbound, xmin-c) < 0.0) {
clt = 0.1;
}
maxfnc++;
L190:
fungra();
if(maxfnc>1)
{
for(gnew = 0.0,i=1;i<=n;i++)
for(j=1;j<i;j++)
gnew += d1[i][j]*g[i][j];
}
if(maxfnc<=1 ||
(maxfnc>1 && f<fmin) ||
(maxfnc>1 && f==fmin && dabs(gnew) <= dabs(gmin)))
{
maxfnk = maxfnc;
gsumsq = 0.0;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
gsumsq += sqr(g[i][j]);
if(gsumsq <= acc)
{
prc = 0;
print_iter(maxfnc, f);
return;
}
}
if(maxfnc==maxfn)
{
printf("minfungra: \
maxfn function values have been calculated\n");
flag = 1;
goto L30;
}
if(maxfnk < maxfnc) goto L300;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
{
xx[i][j] = d[i][j];
gg[i][j] = g[i][j];
}
if(maxfnc <= 1) goto L10;
gm = gnew;
if(gm<=dginit) goto L310;
ggstar = 0.0;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
ggstar += gs[i][j]*g[i][j];
beta = (gsumsq-ggstar)/(gm-dginit);
L300:
if(dabs(gm)>clt*dabs(dginit) ||
(dabs(gm)<=clt*dabs(dginit) && dabs(gm*beta) >= clt*gsumsq))
{
L310:
clt += 0.3;
if(clt>0.8)
{
retry = -retry;
if(imin(retry, maxfnk)<2)
{
printf("minfungra: \
gradient wrong or acc too small\n");
flag = 2;
}
else
itcrs = m+1;
goto L30;
}
xold = xnew;
xnew = .5*(xmin+xold);
if(maxfnk >= maxfnc && gmin*gnew > 0.0)
{
xnew = 10.0*xold;
if(xbound>=0.0) {
xnew = 0.5*(xold+xbound);
}
}
c = gnew-(3.0*gnew + gmin-4.0*(f-fmin)/(xold-xmin))*
(xold-xnew)/(xold-xmin);
if(maxfnk>=maxfnc)
{
if(gmin*gnew<=0.0) {
xbound = xmin;
}
xmin = xold;
fmin = f;
gmin = gnew;
}
else
xbound = xold;
if(c*gmin < 0.0)
xnew = (xmin*c-xnew*gmin)/(c-gmin);
goto L170;
}
if(min(f, fmin)<finit ||
(min(f, fmin)>=finit && gsumsq < gsinit))
{
if(itcrs<m && dabs(ggstar) < .2*gsumsq)
{
gamma = 0.0;
c = 0.0;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
{
gamma += gg[i][j] * dgr[i][j];
c += gg[i][j]*dr[i][j];
}
gamma /= gamden;
if(dabs(beta*gm+gamma*c)<.2*gsumsq)
{
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
d1[i][j] = -gg[i][j] +
beta*d1[i][j] +
gamma*dr[i][j];
itcrs++;
}
else
{
itcrs = 2;
gamden = gm-dginit;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
{
dr[i][j] = d1[i][j];
dgr[i][j] = gg[i][j]-gs[i][j];
d1[i][j] = -gg[i][j]+beta*d1[i][j];
}
}
}
else
{
itcrs = 2;
gamden = gm-dginit;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
{
dr[i][j] = d1[i][j];
dgr[i][j] = gg[i][j]-gs[i][j];
d1[i][j] = -gg[i][j]+beta*d1[i][j];
}
}
goto L30;
}
else
{
retry = -retry;
if(imin(retry, maxfnk)<2)
{
printf("minfungra: \
gradient wrong or acc too small\n");
flag = 2;
}
else
itcrs = m+1;
goto L30;
}
}
get_dist()
{
static double cons = 1.0;
static int maxfn = 500;
int i, j, iter;
double dif;
dr = (double **)getmem(n, sizeof(delta[1]));
dgr = (double **)getmem(n, sizeof(delta[1]));
d1 = (double **)getmem(n, sizeof(delta[1]));
gs = (double **)getmem(n, sizeof(delta[1]));
xx = (double **)getmem(n, sizeof(delta[1]));
gg = (double **)getmem(n, sizeof(delta[1]));
dr[1] = NULL;
dgr[1] = NULL;
d1[1] = NULL;
gs[1] = NULL;
xx[1] = NULL;
gg[1] = NULL;
for(i=2; i<=n; i++)
{
dr[i] = (double *)getmem(i-1, sizeof(double));
dgr[i] = (double *)getmem(i-1, sizeof(double));
d1[i] = (double *)getmem(i-1, sizeof(double));
gs[i] = (double *)getmem(i-1, sizeof(double));
xx[i] = (double *)getmem(i-1, sizeof(double));
gg[i] = (double *)getmem(i-1, sizeof(double));
}
nm0 = n;
nm1 = n-1;
nm2 = n-2;
nm3 = n-3;
if(start==3)
{
r = 0.001;
fungra();
}
else
{
r = 1.0;
fungra();
r = cons*fitl/fitp;
f = (1.0+cons)*fitl;
}
iter=1;
do
{
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
dold[i][j] = d[i][j];
minfungra(0.5*f, ps1, maxfn);
dif = 0.0;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
dif +=sqr(d[i][j] - dold[i][j]);
dif = sqrt(dif);
if(dif>ps2)
{
iter++;
r*=10.0;
}
} while (dif>ps2);
fungra();
for(i=2; i<=n; i++)
{
free(dr[i]);
free(dgr[i]);
free(d1[i]);
free(gs[i]);
free(xx[i]);
free(gg[i]);
}
free(dr);
free(dgr);
free(d1);
free(gs);
free(xx);
free(gg);
}
double gttol()
{
double result;
int i, j, k, l;
result = 0.0;
nm0 = n;
nm1 = n-1;
nm2 = n-2;
nm3 = n-3;
for(i=1;i<=nm3;i++)
for(j=i+1;j<=nm2;j++)
for(k=j+1;k<=nm1;k++)
for(l=k+1;l<=nm0;l++)
if((d[j][i]+d[l][k]>=d[l][i]+d[k][j])&&
(d[k][i]+d[l][j]>=d[l][i]+d[k][j]))
result=max(result,
dabs(d[j][i]+d[l][k]-d[k][i]-d[l][j]));
else
if((d[j][i]+d[l][k]>=d[k][i]+d[l][j])&&
(d[l][i]+d[k][j]>=d[k][i]+d[l][j]))
result=max(result,
dabs(d[j][i]+d[l][k]-d[l][i]-d[k][j]));
else
if((d[k][i]+d[l][j]>=d[j][i]+d[l][k])&&
(d[l][i]+d[k][j]>=d[j][i]+d[l][k]))
result=max(result,
dabs(d[k][i]+d[l][j]-d[l][i]-d[k][j]));
return(result);
}
gtcord()
{
double sumx, sumy, ssqx, ssqy, scp, fn;
int i, j;
sumx = sumy = ssqx = ssqy = scp = 0.0;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
if(delta[i][j]!=empty)
{
sumx+=delta[i][j];
sumy+=d[i][j];
ssqx+=sqr(delta[i][j]);
ssqy+=sqr(d[i][j]);
scp+=delta[i][j]*d[i][j];
}
fn=(double)(m-nempty);
if((fn*ssqy-sumx*sumy)<=0.0)
show_error("gtcord: path length distances have zero variance");
else
return((fn*scp-sumx*sumy)/
sqrt((fn*ssqx-sqr(sumx))*(fn*ssqy-sqr(sumy))));
}
goodfit()
{
double r, rsq;
r=gtcord();
rsq=r*r*100.0;
tol=gttol();
}
additive_const()
{
int i, j, k;
c=0.0;
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
for(k=1;k<=n;k++)
if(k!=i && k!=j)
c=max(c,gd( i, j)-
gd( i, k)-
gd( j, k));
if(report)
fprintf(reportf, "Additive Constant: %15.10f\n", c);
if(c!=0.0)
for(i=1;i<=n;i++)
for(j=1;j<i;j++)
d[i][j]+=c;
}
static int *mergei, *mergej;
static int ninv;
get_tree()
{
#define false 0
#define true 1
int i, j, k, l, im, jm, km, lm, count;
int maxnode, maxcnt, nnode, nact;
double arci, arcj, arcim, arcjm;
char *act;
maxnode = 2*n-2;
mergei = (int *)getmem(maxnode, sizeof(int));
mergej = (int *)getmem(maxnode, sizeof(int));
act = (char *)getmem(maxnode, sizeof(char));
for(i=1;i<=maxnode;i++)
act[i] = false;
nact=n;
ninv=0;
nnode=n;
while(nact>4)
{
maxcnt=0;
for(i=1;i<=nnode;i++) if(!act[i])
for(j=1;j<i;j++) if(!act[j])
{
count=0;
arci=0.0;
arcj=0.0;
for(k=2;k<=nnode;k++) if(!act[k] && k!=i && k!=j)
for(l=1;l<k;l++) if(!act[l] && l!= i && l!= j)
if(gd(i,j)+gd(k,l)<=gd(i,k)+gd(j,l) &&
gd(i,j)+gd(k,l)<=gd(i,l)+gd(j,k))
{
count++;
arci+=(gd(i,j)+gd(i,k)-gd(j,k))/2.0;
arcj+=(gd(i,j)+gd(j,l)-gd(i,l))/2.0;
}
if(count>maxcnt)
{
maxcnt = count;
arcim=max(0.0, arci/count);
arcjm=max(0.0, arcj/count);
im=i;
jm=j;
}
}
nnode++;
if(nnode+2>maxnode)
show_error("get_tree: number of nodes exceeds 2N-2");
ninv++;
mergei[ninv]=im;
mergej[ninv]=jm;
act[im]=true;
act[jm]=true;
d[nnode]=(double *)getmem(nnode-1, sizeof(double));
d[nnode][im]=arcim;
d[nnode][jm]=arcjm;
for(i=1;i<=nnode-1;i++)
if(!act[i])
d[nnode][i] = max(0.0, gd(im,i)-arcim);
nact--;
}
for(i=1;act[i];i++)
if(i>nnode)
show_error("get_tree: can't find last two invisible nodes");
im=i;
for(i=im+1;act[i];i++)
if(i>nnode)
show_error("get_tree: can't find last two invisible nodes");
jm=i;
for(i=jm+1;act[i];i++)
if(i>nnode)
show_error("get_tree: can't find last two invisible nodes");
km=i;
for(i=km+1;act[i];i++)
if(i>nnode)
show_error("get_tree: can't find last two invisible nodes");
lm=i;
if(gd(im,jm)+gd(km,lm)<=gd(im,km)+gd(jm,lm)+tol &&
gd(im,jm)+gd(km,lm)<=gd(im,lm)+gd(jm,km)+tol)
{
i=im;
j=jm;
k=km;
l=lm;
}
else if(gd(im,lm)+gd(jm,km)<=gd(im,km)+gd(jm,lm)+tol &&
gd(im,lm)+gd(jm,km)<=gd(im,jm)+gd(km,lm)+tol)
{
i=im;
j=lm;
k=km;
l=jm;
}
else if(gd(im,km)+gd(jm,lm)<=gd(im,jm)+gd(km,lm)+tol &&
gd(im,km)+gd(jm,lm)<=gd(im,lm)+gd(jm,km)+tol)
{
i=im;
j=km;
k=lm;
l=jm;
}
nnode++;
ninv++;
mergei[ninv]=i;
mergej[ninv]=j;
d[nnode]=(double *)getmem(nnode-1, sizeof(double));
d[nnode][i] = max(0.0, (gd(i,j)+gd(i,k)-gd(j,k))/2.0);
d[nnode][j] = max(0.0, (gd(i,j)+gd(j,l)-gd(i,l))/2.0);
nnode++;
ninv++;
mergei[ninv]=k;
mergej[ninv]=l;
d[nnode]=(double *)getmem(nnode-1, sizeof(double));
d[nnode][k] = max(0.0, (gd(k,l)+gd(i,k)-gd(i,l))/2.0);
d[nnode][l] = max(0.0, (gd(k,l)+gd(j,l)-gd(j,k))/2.0);
d[nnode][nnode-1] = max(0.0, (gd(i,k)+gd(j,l)-gd(i,j)-gd(k,l))/2.0);
}
print_node(node, dist, indent)
int node;
double dist;
int indent;
{
static char buf[BUFLEN];
if(node<=n)
printf("%s%s:%6.4f",
repeatch(buf, '\t', indent),
names[node],
dist/nfac);
else
{
printf("%s(\n",
repeatch(buf, '\t', indent));
print_node(mergei[node-n], gd(node, mergei[node-n]), indent+1);
printf(",\n");
print_node(mergej[node-n], gd(node, mergej[node-n]), indent+1);
printf("\n%s):%6.4f", repeatch(buf, '\t', indent),
dist/nfac);
}
}
show_tree()
{
int i, j, current;
int ij[2];
current=0;
for(i=1;current<2;i++)
{
for(j=1;(mergei[j]!=i && mergej[j] != i) && j<=ninv;j++);
if(j>ninv)
ij[current++]=i;
}
printf("(\n");
print_node(ij[0], gd(ij[0],ij[1])/2.0, 1);
printf(",\n");
print_node(ij[1], gd(ij[0],ij[1])/2.0, 1);
printf("\n);\n");
}
show_help()
{
printf("\nlsadt--options:\n");
printf(" -f file - write report to 'file'\n");
printf(" -d - treat data as dissimilarities (default)\n");
printf(" -s - treat data as similarities\n");
printf(" -print 0 - don't print out iteration history (default)\n");
printf(" -print n>0 - print out iteration history every n iterations\n");
printf(" -print n<0 - print out iteration history every n iterations\n");
printf(" (including current distance estimates & gradients)\n");
printf(" -init n - initial parameter estimates (default = 3)\n");
printf(" n=1 - uniformly distributed random numbers\n");
printf(" n=2 - error-perturbed data\n");
printf(" n=3 - original distance data from input matrix\n");
printf(" -save - save final parameter estimates (default is don't save\n");
printf(" -seed n - seed for random number generator (default = 12345)\n");
printf(" -ps1 n - convergence criterion for inner iterations (default = 0.0001)\n");
printf(" -ps2 n - convergence criterion for major iterations (default = 0.0001)\n");
printf(" -empty n - missing data indicator (default = 0.0)\n");
printf(" -help - show this help text\n");
exit(0);
}
show_error(message)
char *message;
{
printf("\n>>>>ERROR:\n>>>>%s\n", message);
exit(0);
}
main(argc, argv)
int argc;
char **argv;
{
int i;
strcpy(fname, "");
get_parms(argc, argv);
if(strcmp(fname, ""))
{
report=1;
reportf = fopen(fname, "w");
if(reportf==NULL)
{
perror("lsadt");
exit(0);
}
}
else
report=0;
get_data();
d = (double **)getmem(n, sizeof(delta[1]));
g = (double **)getmem(n, sizeof(delta[1]));
dold = (double **)getmem(n, sizeof(delta[1]));
d[1]=NULL;
g[1]=NULL;
dold[1]=NULL;
for(i=2; i<=n; i++)
{
d[i]=(double *)getmem(i-1, sizeof(double));
g[i]=(double *)getmem(i-1, sizeof(double));
dold[i]=(double *)getmem(i-1, sizeof(double));
}
initd();
get_dist();
goodfit();
additive_const();
get_tree();
show_tree();
if(report)
close(reportf);
}