1305 lines
28 KiB
C
1305 lines
28 KiB
C
/*
|
|
PROGRAM LSADT
|
|
Fortran->C conversion by Mike Maciukenas, CPGA, Microbiology at University
|
|
of Illinois.
|
|
C-----------------------------------------------------------------------
|
|
C
|
|
C LEAST SQUARES ALGORITHM FOR FITTING ADDITIVE TREES TO
|
|
C PROXIMITY DATA
|
|
C
|
|
C GEERT DE SOETE -- VERSION 1.01 - FEB. 1983
|
|
C VERSION 1.02 - JUNE 1983
|
|
C VERSION 1.03 - JULY 1983
|
|
C
|
|
C REFERENCE: DE SOETE, G. A LEAST SQUARES ALGORITHM FOR FITTING
|
|
C ADDITIVE TREES TO PROXIMITY DATA. PSYCHOMETRIKA, 1983, 48,
|
|
C 621-626.
|
|
C DE SOETE, G. ADDITIVE TREE REPRESENTATIONS OF INCOMPLETE
|
|
C DISSIMILARITY DATA. QUALITY AND QUANTITY, 1984, 18,
|
|
C 387-393.
|
|
C REMARKS
|
|
C -------
|
|
C 2) UNIFORMLY DISTRIBUTED RANDOM NUMBERS ARE GENERATED BY A
|
|
C PROCEDURE DUE TO SCHRAGE. CF.
|
|
C SCHRAGE, L. A MORE PORTABLE FORTRAN RANDOM NUMBER GENERATOR.
|
|
C ACM TRANS. ON MATH. SOFTW., 1979, 5, 132-138.
|
|
C 3) SUBROUTINES VA14AD AND VA14AC (translated into minfungra) ARE
|
|
C ADAPTED FROM THE HARWELL SUBROUTINE LIBRARY (1979 EDITION).
|
|
C 4) ALTHOUGH THIS PROGRAM HAS BEEN CAREFULLY TESTED, THE
|
|
C AUTHOR DISCLAIMS ANY RESPONSABILITY FOR POSSIBLE
|
|
C ERRORS.
|
|
C
|
|
C-----------------------------------------------------------------------
|
|
*/
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <values.h>
|
|
|
|
#define BUFLEN 1024
|
|
#define MAXLEAVES 256
|
|
|
|
static int m, n, dissim, pr, start, save, seed, nempty;
|
|
static double ps1, ps2, f, empty, tol, c;
|
|
static char fname[1000];
|
|
static char *names[MAXLEAVES];
|
|
static double *delta[MAXLEAVES];
|
|
static double **d;
|
|
static double **g;
|
|
static double **dold;
|
|
static FILE *reportf;
|
|
static int report;
|
|
extern int errno;
|
|
double nfac;
|
|
|
|
extern double strtod();
|
|
|
|
double dabs(a)
|
|
double a;
|
|
{
|
|
return ((a < 0.0) ? -a : a);
|
|
}
|
|
|
|
double sqr(a)
|
|
double a;
|
|
{
|
|
return (a * a);
|
|
}
|
|
|
|
double max(a, b)
|
|
double a;
|
|
double b;
|
|
{
|
|
return ((a > b) ? a : b);
|
|
}
|
|
|
|
int imin(a, b)
|
|
int a;
|
|
int b;
|
|
{
|
|
return ((a < b) ? a : b);
|
|
}
|
|
|
|
double min(a, b)
|
|
double a;
|
|
double b;
|
|
{
|
|
return ((a < b) ? a : b);
|
|
}
|
|
|
|
float runif()
|
|
{
|
|
#define A 16807
|
|
#define P 2147483647
|
|
#define B15 32768
|
|
#define B16 65536
|
|
|
|
int xhi, xalo, leftlo, fhi, k;
|
|
float t1, t2;
|
|
|
|
xhi = seed / B16;
|
|
xalo = (seed - (xhi * B16)) * A;
|
|
leftlo = xalo / B16;
|
|
fhi = xhi * A + leftlo;
|
|
k = fhi / B15;
|
|
seed = (((xalo - leftlo * B16) - P) + (fhi - k * B15) * B16) + k;
|
|
if (seed < 0) seed += P;
|
|
t1 = seed;
|
|
t2 = t1 * 4.656612875e-10;
|
|
return (t2); /* seed * (1/(2**31-1))*/
|
|
}
|
|
|
|
float rnorm()
|
|
{
|
|
static float t1, t2;
|
|
static float n1, n2;
|
|
static int second = 0;
|
|
|
|
if (second) {
|
|
second = 0;
|
|
n1 = sin(t2);
|
|
n2 = t1 * n1;
|
|
n1 = t1 * sin(t2);
|
|
return (n2);
|
|
}
|
|
else {
|
|
t1 = runif();
|
|
t1 = sqrt(-2.0 * log(t1));
|
|
n2 = 6.283185308;
|
|
t2 = runif() * n2;
|
|
n1 = cos(t2);
|
|
n2 = t1 * n1;
|
|
n1 = t1 * cos(t2);
|
|
second = 1;
|
|
return (n2);
|
|
}
|
|
}
|
|
|
|
double gd(i, j)
|
|
int i, j;
|
|
{
|
|
if (j < i)
|
|
return (d[i][j]);
|
|
else if (j > i)
|
|
return (d[j][i]);
|
|
else
|
|
show_error("gd: i=j -- programmer screwed up!");
|
|
}
|
|
|
|
char *repeatch(string, ch, num)
|
|
char *string;
|
|
int ch;
|
|
int num;
|
|
{
|
|
for (string[num--] = '\0'; num >= 0; string[num--] = ch)
|
|
;
|
|
return (string);
|
|
}
|
|
|
|
int getachar()
|
|
/* skips comments! */
|
|
{
|
|
static int oldchar = '\0';
|
|
int ch;
|
|
int more = 1;
|
|
|
|
while (more) {
|
|
ch = getchar();
|
|
if (oldchar == '\n' && ch == '#') {
|
|
while (ch != '\n' && ch != EOF) ch = getchar();
|
|
oldchar = ch;
|
|
}
|
|
else if (oldchar == '\n' && isspace(ch))
|
|
;
|
|
else
|
|
more = 0;
|
|
}
|
|
oldchar = ch;
|
|
return (ch);
|
|
}
|
|
|
|
int skip_space()
|
|
{
|
|
int ch;
|
|
|
|
while (isspace(ch = getachar()))
|
|
;
|
|
return (ch);
|
|
}
|
|
|
|
int getaword(string, len)
|
|
/* 0 if failed, 1 if data was read, -1 if data read to end of file */
|
|
char *string;
|
|
int len;
|
|
{
|
|
int i;
|
|
int ch;
|
|
ch = skip_space();
|
|
if (ch == EOF) return (0);
|
|
for (i = 0; !isspace(ch) && ch != EOF; i++) {
|
|
if (i < len - 1) {
|
|
string[i] = ch;
|
|
}
|
|
ch = getachar();
|
|
}
|
|
string[i < len ? i : len - 1] = '\0';
|
|
if (ch == EOF)
|
|
return (-1);
|
|
else
|
|
return (1);
|
|
}
|
|
|
|
int readtobar(string, len)
|
|
/* 0 if failed, 1 if data was read */
|
|
char *string;
|
|
int len;
|
|
{
|
|
int i;
|
|
int ch;
|
|
|
|
ch = skip_space();
|
|
if (ch == EOF) return (0);
|
|
for (i = 0; ch != EOF && ch != '|'; i++) {
|
|
if (ch == '\n' || ch == '\r' || ch == '\t')
|
|
i--;
|
|
else {
|
|
if (i < len - 1) string[i] = ch;
|
|
}
|
|
ch = getachar();
|
|
}
|
|
len = i < len ? i : len - 1;
|
|
for (i = len - 1; i >= 0 && isspace(string[i]); i--)
|
|
;
|
|
string[i + 1] = '\0';
|
|
if (ch == EOF)
|
|
return (-1);
|
|
else
|
|
return (1);
|
|
}
|
|
|
|
int readtobarorcolon(string, len)
|
|
/* 0 if failed, 1 if data was read */
|
|
char *string;
|
|
int len;
|
|
{
|
|
int i;
|
|
int ch;
|
|
|
|
ch = skip_space();
|
|
if (ch == EOF) return (0);
|
|
for (i = 0; ch != EOF && ch != '|' && ch != ':'; i++) {
|
|
if (ch == '\n' || ch == '\r' || ch == '\t')
|
|
i--;
|
|
else {
|
|
if (i < len - 1) string[i] = ch;
|
|
}
|
|
ch = getachar();
|
|
}
|
|
len = i < len ? i : len - 1;
|
|
for (i = len - 1; i >= 0 && isspace(string[i]); i--)
|
|
;
|
|
string[i + 1] = '\0';
|
|
if (ch == EOF)
|
|
return (-1);
|
|
else
|
|
return (1);
|
|
}
|
|
|
|
char *getmem(nelem, elsize)
|
|
unsigned nelem, elsize;
|
|
{
|
|
char *temp;
|
|
|
|
temp = (char *)calloc(nelem + 1, elsize);
|
|
if (temp == NULL)
|
|
show_error("Couldn't allocate memory.");
|
|
else
|
|
return (temp);
|
|
}
|
|
|
|
int get_parms(argc, argv)
|
|
int argc;
|
|
char **argv;
|
|
{
|
|
int i;
|
|
int cur_arg;
|
|
|
|
/* codes for current argument:
|
|
** 0 = no current argument
|
|
** 1 = pr
|
|
** 2 = start
|
|
** 3 = seed
|
|
** 4 = ps1
|
|
** 5 = ps2
|
|
** 6 = empty
|
|
** 7 = filename */
|
|
|
|
dissim = 0;
|
|
pr = 0;
|
|
start = 2;
|
|
save = 0;
|
|
seed = 12345;
|
|
ps1 = 0.0001;
|
|
ps2 = 0.0001;
|
|
empty = 0.0;
|
|
n = 0;
|
|
cur_arg = 0;
|
|
for (i = 1; i < argc; i++) switch (cur_arg) {
|
|
case 0:
|
|
if (strcmp(argv[i], "-d") == 0)
|
|
dissim = 0;
|
|
else if (strcmp(argv[i], "-s") == 0)
|
|
dissim = 1;
|
|
else if (strcmp(argv[i], "-print") == 0)
|
|
cur_arg = 1;
|
|
else if (strcmp(argv[i], "-init") == 0)
|
|
cur_arg = 2;
|
|
else if (strcmp(argv[i], "-save") == 0)
|
|
save = 1;
|
|
else if (strcmp(argv[i], "-seed") == 0)
|
|
cur_arg = 3;
|
|
else if (strcmp(argv[i], "-ps1") == 0)
|
|
cur_arg = 4;
|
|
else if (strcmp(argv[i], "-ps2") == 0)
|
|
cur_arg = 5;
|
|
else if (strcmp(argv[i], "-empty") == 0)
|
|
cur_arg = 6;
|
|
else if (strcmp(argv[i], "-f") == 0)
|
|
cur_arg = 7;
|
|
else if (strcmp(argv[i], "-help") == 0)
|
|
show_help();
|
|
else {
|
|
printf("\nlsadt: bad option\n");
|
|
show_help();
|
|
}
|
|
break;
|
|
case 1:
|
|
pr = atoi(argv[i]);
|
|
cur_arg = 0;
|
|
break;
|
|
case 2:
|
|
start = atoi(argv[i]);
|
|
cur_arg = 0;
|
|
break;
|
|
case 3:
|
|
seed = atoi(argv[i]);
|
|
cur_arg = 0;
|
|
break;
|
|
case 4:
|
|
ps1 = atof(argv[i]);
|
|
cur_arg = 0;
|
|
break;
|
|
case 5:
|
|
ps2 = atof(argv[i]);
|
|
cur_arg = 0;
|
|
break;
|
|
case 6:
|
|
empty = atof(argv[i]);
|
|
cur_arg = 0;
|
|
break;
|
|
case 7:
|
|
strcpy(fname, argv[i]);
|
|
cur_arg = 0;
|
|
break;
|
|
default:
|
|
printf("\nlsadt: bad option\n");
|
|
show_help();
|
|
break;
|
|
}
|
|
|
|
/* check validity of parameters */
|
|
if (ps1 < 0.0) ps1 = 0.0001;
|
|
if (ps2 < 0.0) ps2 = 0.0001;
|
|
if (dissim < 0) dissim = 0;
|
|
if (start < 1 || start > 3) start = 3;
|
|
if (save != 1) save = 0;
|
|
if (seed < 0) seed = 12345;
|
|
|
|
/*printf("dissim=%d\n", dissim);*/
|
|
/*printf("pr=%d\n", pr);*/
|
|
/*printf("start=%d\n", start);*/
|
|
/*printf("save=%d\n", save);*/
|
|
/*printf("seed=%d\n", seed);*/
|
|
/*printf("ps1=%f\n", ps1);*/
|
|
/*printf("ps2=%f\n", ps2);*/
|
|
/*printf("empty=%f\n", empty);*/
|
|
}
|
|
|
|
int get_data()
|
|
{
|
|
int i, j, more;
|
|
char buf[BUFLEN];
|
|
char *ptr;
|
|
char ch;
|
|
int result;
|
|
double temp, nfactor, datmin, datmax;
|
|
|
|
nempty = n = 0;
|
|
more = 1;
|
|
ptr = &ch;
|
|
while (more) {
|
|
result = readtobarorcolon(buf, BUFLEN);
|
|
if (result == 0 || result == -1)
|
|
more = 0;
|
|
else {
|
|
n++;
|
|
names[n] = getmem(BUFLEN, 1);
|
|
result = readtobar(buf, BUFLEN);
|
|
if (result != 1)
|
|
show_error(
|
|
"get_data: bad name syntax, or missing "
|
|
"'|'");
|
|
strcpy(names[n], buf);
|
|
if (n > 1)
|
|
delta[n] = (double *)getmem(n, sizeof(double));
|
|
else
|
|
delta[n] = NULL;
|
|
for (j = 1; j < n; j++) {
|
|
if (!getaword(buf, BUFLEN))
|
|
show_error(
|
|
"get_data: missing data values.\n");
|
|
delta[n][j] = strtod(buf, &ptr);
|
|
if (ptr == buf)
|
|
show_error(
|
|
"get_data: invalid data value.\n");
|
|
if (delta[n][j] == empty) nempty++;
|
|
}
|
|
}
|
|
}
|
|
if (n < 4) show_error("Must be at least 4 nodes.");
|
|
m = n * (n - 1) / 2;
|
|
/* make all positive */
|
|
datmin = MAXDOUBLE;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] < datmin && delta[i][j] != empty)
|
|
datmin = delta[i][j];
|
|
if (datmin < 0.0)
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] != empty) delta[i][j] -= datmin;
|
|
/* convert dissimilarities into similarities if -s option specified */
|
|
if (dissim) {
|
|
datmax = MINDOUBLE;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] > datmax &&
|
|
delta[i][j] != empty)
|
|
datmax = delta[i][j];
|
|
datmax += 1.0;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] != empty)
|
|
delta[i][j] = datmax - delta[i][j];
|
|
}
|
|
|
|
/* make sum of squared deviations from delta to average(delta) = 1 */
|
|
temp = 0.0;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] != empty) temp += delta[i][j];
|
|
temp = temp / (double)(m - nempty);
|
|
/* now temp = average of all non-empty cells */
|
|
nfactor = 0.0;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] != empty)
|
|
nfactor += sqr(delta[i][j] - temp);
|
|
nfactor = 1.0 / sqrt(nfactor);
|
|
nfac = nfactor;
|
|
/* now nfactor is the normalization factor */
|
|
if (report)
|
|
fprintf(reportf, "Normalization factor: %15.10f\n", nfactor);
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] != empty) delta[i][j] *= nfactor;
|
|
/* now all is normalized */
|
|
}
|
|
|
|
int initd()
|
|
{
|
|
int i, j, k;
|
|
double t1, t2;
|
|
int emp, nemp;
|
|
double dmax;
|
|
double efac, estd;
|
|
|
|
efac = 0.333333333333333;
|
|
if (start == 1) {
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++) d[i][j] = runif();
|
|
return;
|
|
}
|
|
if (nempty == 0 && start == 2) {
|
|
estd = sqrt(efac / m);
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
d[i][j] = delta[i][j] + rnorm() * estd;
|
|
return;
|
|
}
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++) d[i][j] = delta[i][j];
|
|
if (start == 3 && nempty == 0) return;
|
|
emp = nempty;
|
|
nemp = 0;
|
|
while (nemp < emp)
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (d[i][j] == empty) {
|
|
dmax = MAXDOUBLE;
|
|
for (k = 1; k <= n; k++) {
|
|
t1 = gd(i, k);
|
|
t2 = gd(j, k);
|
|
if (t1 != empty && t2 != empty)
|
|
dmax = min(dmax,
|
|
max(t1, t2));
|
|
}
|
|
if (dmax != MAXDOUBLE)
|
|
d[i][j] = dmax;
|
|
else
|
|
nemp++;
|
|
}
|
|
if (nemp != 0) {
|
|
t1 = 0.0;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (d[i][j] != empty) t1 += d[i][j];
|
|
t1 /= m - nemp;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (d[i][j] == empty) d[i][j] = t1;
|
|
}
|
|
if (start != 3) {
|
|
estd = sqrt(efac / (m - nempty));
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++) d[i][j] += rnorm() * estd;
|
|
}
|
|
return;
|
|
}
|
|
|
|
static int nm0, nm1, nm2, nm3;
|
|
static double fitl, fitp, r;
|
|
|
|
fungra()
|
|
{
|
|
double fac;
|
|
int i, j, k, l;
|
|
double wijkl;
|
|
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++) g[i][j] = 0.0;
|
|
fitl = 0.0;
|
|
fac = 2.0;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] != empty) {
|
|
fitl += sqr(d[i][j] - delta[i][j]);
|
|
g[i][j] += fac * (d[i][j] - delta[i][j]);
|
|
}
|
|
fitp = 0.0;
|
|
fac = 2.0 * r;
|
|
for (i = 1; i <= nm3; i++)
|
|
for (j = i + 1; j <= nm2; j++)
|
|
for (k = j + 1; k <= nm1; k++)
|
|
for (l = k + 1; l <= nm0; l++)
|
|
if ((d[j][i] + d[l][k] >=
|
|
d[l][i] + d[k][j]) &&
|
|
(d[k][i] + d[l][j] >=
|
|
d[l][i] + d[k][j])) {
|
|
wijkl = d[j][i] + d[l][k] -
|
|
d[k][i] - d[l][j];
|
|
fitp += sqr(wijkl);
|
|
g[j][i] += fac * wijkl;
|
|
g[l][k] += fac * wijkl;
|
|
g[k][i] -= fac * wijkl;
|
|
g[l][j] -= fac * wijkl;
|
|
}
|
|
else if ((d[j][i] + d[l][k] >=
|
|
d[k][i] + d[l][j]) &&
|
|
(d[l][i] + d[k][j] >=
|
|
d[k][i] + d[l][j])) {
|
|
wijkl = d[j][i] + d[l][k] -
|
|
d[l][i] - d[k][j];
|
|
fitp += sqr(wijkl);
|
|
g[j][i] += fac * wijkl;
|
|
g[l][k] += fac * wijkl;
|
|
g[k][j] -= fac * wijkl;
|
|
g[l][i] -= fac * wijkl;
|
|
}
|
|
else if ((d[k][i] + d[l][j] >=
|
|
d[j][i] + d[l][k]) &&
|
|
(d[l][i] + d[k][j] >=
|
|
d[j][i] + d[l][k])) {
|
|
wijkl = d[k][i] + d[l][j] -
|
|
d[l][i] - d[k][j];
|
|
fitp += sqr(wijkl);
|
|
g[k][i] += fac * wijkl;
|
|
g[l][j] += fac * wijkl;
|
|
g[l][i] -= fac * wijkl;
|
|
g[k][j] -= fac * wijkl;
|
|
}
|
|
f = fitl + r * fitp;
|
|
}
|
|
|
|
static double **dr, **dgr, **d1, **gs, **xx, **gg;
|
|
static int iterc, prc;
|
|
print_iter(maxfnc, f) int maxfnc;
|
|
double f;
|
|
{
|
|
int i, j;
|
|
|
|
if (pr == 0) {
|
|
iterc++;
|
|
}
|
|
else if (prc < abs(pr)) {
|
|
prc++;
|
|
iterc++;
|
|
}
|
|
else {
|
|
printf("Iteration %6d", iterc);
|
|
printf(": function values %6d", maxfnc);
|
|
printf(" f = %24.16e\n", f);
|
|
if (pr < 0) {
|
|
printf(" d[] looks like this:\n");
|
|
for (i = 2; i <= n; i++) {
|
|
printf(" ");
|
|
for (j = 1; j < i; j++)
|
|
printf("%24.16e ", d[i][j]);
|
|
printf("\n ");
|
|
}
|
|
printf(" g[] looks like this:\n");
|
|
for (i = 2; i <= n; i++) {
|
|
printf(" ");
|
|
for (j = 1; j < i; j++)
|
|
printf("%24.16e ", g[i][j]);
|
|
printf("\n ");
|
|
}
|
|
}
|
|
prc = 1;
|
|
iterc++;
|
|
}
|
|
}
|
|
|
|
minfungra(dfn, acc, maxfn) double dfn, acc;
|
|
int maxfn;
|
|
{
|
|
double beta, c, clt, dginit, dgstep, dtest, finit;
|
|
double fmin, gamden, gamma, ggstar, gm, gmin, gnew;
|
|
double gsinit, gsumsq, xbound, xmin, xnew, xold;
|
|
int itcrs, flag, retry, maxfnk, maxfnc;
|
|
int i, j;
|
|
|
|
flag = 0;
|
|
retry = -2;
|
|
iterc = 0;
|
|
maxfnc = 1;
|
|
prc = abs(pr);
|
|
goto L190;
|
|
|
|
L10:
|
|
xnew = dabs(dfn + dfn) / gsumsq;
|
|
dgstep = -gsumsq;
|
|
itcrs = m + 1;
|
|
L30:
|
|
if (maxfnk < maxfnc) {
|
|
f = fmin;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++) {
|
|
d[i][j] = xx[i][j];
|
|
g[i][j] = gg[i][j];
|
|
}
|
|
}
|
|
if (flag > 0) {
|
|
prc = 0;
|
|
print_iter(maxfnc, f);
|
|
return;
|
|
}
|
|
if (itcrs > m) {
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++) d1[i][j] = -g[i][j];
|
|
}
|
|
print_iter(maxfnc, f);
|
|
dginit = 0.0;
|
|
for (i = 2; i <= n; i++)
|
|
for (j = 1; j < i; j++) {
|
|
gs[i][j] = g[i][j];
|
|
dginit += d1[i][j] * g[i][j];
|
|
}
|
|
if (dginit >= 0.0) {
|
|
retry = -retry;
|
|
if (imin(retry, maxfnk) < 2) {
|
|
printf(
|
|
"minfungra: \
|
|
gradient wrong or acc too small\n");
|
|
flag = 2;
|
|
}
|
|
else
|
|
itcrs = m + 1;
|
|
goto L30;
|
|
}
|
|
xmin = 0.0;
|
|
fmin = f;
|
|
finit = f;
|
|
gsinit = gsumsq;
|
|
gmin = dginit;
|
|
gm = dginit;
|
|
xbound = -1.0;
|
|
xnew = xnew * min(1.0, dgstep / dginit);
|
|
dgstep = dginit;
|
|
L170:
|
|
c = xnew - xmin;
|
|
dtest = 0.0;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) {
|
|
d[i][j] = xx[i][j] + c * d1[i][j];
|
|
dtest += dabs(d[i][j] - xx[i][j]);
|
|
}
|
|
if (dtest <= 0.0) {
|
|
clt = .7;
|
|
goto L300;
|
|
}
|
|
if (max(xbound, xmin - c) < 0.0) {
|
|
clt = 0.1;
|
|
}
|
|
maxfnc++;
|
|
L190:
|
|
fungra();
|
|
|
|
if (maxfnc > 1) {
|
|
for (gnew = 0.0, i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) gnew += d1[i][j] * g[i][j];
|
|
}
|
|
if (maxfnc <= 1 || (maxfnc > 1 && f < fmin) ||
|
|
(maxfnc > 1 && f == fmin && dabs(gnew) <= dabs(gmin))) {
|
|
maxfnk = maxfnc;
|
|
gsumsq = 0.0;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) gsumsq += sqr(g[i][j]);
|
|
if (gsumsq <= acc) {
|
|
prc = 0;
|
|
print_iter(maxfnc, f);
|
|
return;
|
|
}
|
|
}
|
|
if (maxfnc == maxfn) {
|
|
printf(
|
|
"minfungra: \
|
|
maxfn function values have been calculated\n");
|
|
flag = 1;
|
|
goto L30;
|
|
}
|
|
if (maxfnk < maxfnc) goto L300;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) {
|
|
xx[i][j] = d[i][j];
|
|
gg[i][j] = g[i][j];
|
|
}
|
|
if (maxfnc <= 1) goto L10;
|
|
gm = gnew;
|
|
if (gm <= dginit) goto L310;
|
|
ggstar = 0.0;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) ggstar += gs[i][j] * g[i][j];
|
|
beta = (gsumsq - ggstar) / (gm - dginit);
|
|
L300:
|
|
if (dabs(gm) > clt * dabs(dginit) ||
|
|
(dabs(gm) <= clt * dabs(dginit) &&
|
|
dabs(gm * beta) >= clt * gsumsq)) {
|
|
L310:
|
|
clt += 0.3;
|
|
if (clt > 0.8) {
|
|
retry = -retry;
|
|
if (imin(retry, maxfnk) < 2) {
|
|
printf(
|
|
"minfungra: \
|
|
gradient wrong or acc too small\n");
|
|
flag = 2;
|
|
}
|
|
else
|
|
itcrs = m + 1;
|
|
goto L30;
|
|
}
|
|
xold = xnew;
|
|
xnew = .5 * (xmin + xold);
|
|
if (maxfnk >= maxfnc && gmin * gnew > 0.0) {
|
|
xnew = 10.0 * xold;
|
|
if (xbound >= 0.0) {
|
|
xnew = 0.5 * (xold + xbound);
|
|
}
|
|
}
|
|
c = gnew -
|
|
(3.0 * gnew + gmin - 4.0 * (f - fmin) / (xold - xmin)) *
|
|
(xold - xnew) / (xold - xmin);
|
|
if (maxfnk >= maxfnc) {
|
|
if (gmin * gnew <= 0.0) {
|
|
xbound = xmin;
|
|
}
|
|
xmin = xold;
|
|
fmin = f;
|
|
gmin = gnew;
|
|
}
|
|
else
|
|
xbound = xold;
|
|
if (c * gmin < 0.0)
|
|
xnew = (xmin * c - xnew * gmin) / (c - gmin);
|
|
goto L170;
|
|
}
|
|
if (min(f, fmin) < finit ||
|
|
(min(f, fmin) >= finit && gsumsq < gsinit)) {
|
|
if (itcrs < m && dabs(ggstar) < .2 * gsumsq) {
|
|
gamma = 0.0;
|
|
c = 0.0;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) {
|
|
gamma += gg[i][j] * dgr[i][j];
|
|
c += gg[i][j] * dr[i][j];
|
|
}
|
|
gamma /= gamden;
|
|
if (dabs(beta * gm + gamma * c) < .2 * gsumsq) {
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
d1[i][j] = -gg[i][j] +
|
|
beta * d1[i][j] +
|
|
gamma * dr[i][j];
|
|
itcrs++;
|
|
}
|
|
else {
|
|
itcrs = 2;
|
|
gamden = gm - dginit;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) {
|
|
dr[i][j] = d1[i][j];
|
|
dgr[i][j] = gg[i][j] - gs[i][j];
|
|
d1[i][j] =
|
|
-gg[i][j] + beta * d1[i][j];
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
itcrs = 2;
|
|
gamden = gm - dginit;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) {
|
|
dr[i][j] = d1[i][j];
|
|
dgr[i][j] = gg[i][j] - gs[i][j];
|
|
d1[i][j] = -gg[i][j] + beta * d1[i][j];
|
|
}
|
|
}
|
|
goto L30;
|
|
}
|
|
else {
|
|
retry = -retry;
|
|
if (imin(retry, maxfnk) < 2) {
|
|
printf(
|
|
"minfungra: \
|
|
gradient wrong or acc too small\n");
|
|
flag = 2;
|
|
}
|
|
else
|
|
itcrs = m + 1;
|
|
goto L30;
|
|
}
|
|
}
|
|
|
|
get_dist()
|
|
{
|
|
static double cons = 1.0;
|
|
static int maxfn = 500;
|
|
int i, j, iter;
|
|
double dif;
|
|
|
|
dr = (double **)getmem(n, sizeof(delta[1]));
|
|
dgr = (double **)getmem(n, sizeof(delta[1]));
|
|
d1 = (double **)getmem(n, sizeof(delta[1]));
|
|
gs = (double **)getmem(n, sizeof(delta[1]));
|
|
xx = (double **)getmem(n, sizeof(delta[1]));
|
|
gg = (double **)getmem(n, sizeof(delta[1]));
|
|
dr[1] = NULL;
|
|
dgr[1] = NULL;
|
|
d1[1] = NULL;
|
|
gs[1] = NULL;
|
|
xx[1] = NULL;
|
|
gg[1] = NULL;
|
|
for (i = 2; i <= n; i++) {
|
|
dr[i] = (double *)getmem(i - 1, sizeof(double));
|
|
dgr[i] = (double *)getmem(i - 1, sizeof(double));
|
|
d1[i] = (double *)getmem(i - 1, sizeof(double));
|
|
gs[i] = (double *)getmem(i - 1, sizeof(double));
|
|
xx[i] = (double *)getmem(i - 1, sizeof(double));
|
|
gg[i] = (double *)getmem(i - 1, sizeof(double));
|
|
}
|
|
nm0 = n;
|
|
nm1 = n - 1;
|
|
nm2 = n - 2;
|
|
nm3 = n - 3;
|
|
if (start == 3) {
|
|
r = 0.001;
|
|
fungra();
|
|
}
|
|
else {
|
|
r = 1.0;
|
|
fungra();
|
|
r = cons * fitl / fitp;
|
|
f = (1.0 + cons) * fitl;
|
|
}
|
|
iter = 1;
|
|
do {
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) dold[i][j] = d[i][j];
|
|
minfungra(0.5 * f, ps1, maxfn);
|
|
dif = 0.0;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
dif += sqr(d[i][j] - dold[i][j]);
|
|
dif = sqrt(dif);
|
|
if (dif > ps2) {
|
|
iter++;
|
|
r *= 10.0;
|
|
}
|
|
} while (dif > ps2);
|
|
fungra();
|
|
for (i = 2; i <= n; i++) {
|
|
free(dr[i]);
|
|
free(dgr[i]);
|
|
free(d1[i]);
|
|
free(gs[i]);
|
|
free(xx[i]);
|
|
free(gg[i]);
|
|
}
|
|
free(dr);
|
|
free(dgr);
|
|
free(d1);
|
|
free(gs);
|
|
free(xx);
|
|
free(gg);
|
|
}
|
|
|
|
double gttol()
|
|
{
|
|
double result;
|
|
int i, j, k, l;
|
|
|
|
result = 0.0;
|
|
nm0 = n;
|
|
nm1 = n - 1;
|
|
nm2 = n - 2;
|
|
nm3 = n - 3;
|
|
for (i = 1; i <= nm3; i++)
|
|
for (j = i + 1; j <= nm2; j++)
|
|
for (k = j + 1; k <= nm1; k++)
|
|
for (l = k + 1; l <= nm0; l++)
|
|
if ((d[j][i] + d[l][k] >=
|
|
d[l][i] + d[k][j]) &&
|
|
(d[k][i] + d[l][j] >=
|
|
d[l][i] + d[k][j]))
|
|
result = max(
|
|
result,
|
|
dabs(d[j][i] + d[l][k] -
|
|
d[k][i] - d[l][j]));
|
|
else if ((d[j][i] + d[l][k] >=
|
|
d[k][i] + d[l][j]) &&
|
|
(d[l][i] + d[k][j] >=
|
|
d[k][i] + d[l][j]))
|
|
result = max(
|
|
result,
|
|
dabs(d[j][i] + d[l][k] -
|
|
d[l][i] - d[k][j]));
|
|
else if ((d[k][i] + d[l][j] >=
|
|
d[j][i] + d[l][k]) &&
|
|
(d[l][i] + d[k][j] >=
|
|
d[j][i] + d[l][k]))
|
|
result = max(
|
|
result,
|
|
dabs(d[k][i] + d[l][j] -
|
|
d[l][i] - d[k][j]));
|
|
return (result);
|
|
}
|
|
|
|
gtcord()
|
|
{
|
|
double sumx, sumy, ssqx, ssqy, scp, fn;
|
|
int i, j;
|
|
|
|
sumx = sumy = ssqx = ssqy = scp = 0.0;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
if (delta[i][j] != empty) {
|
|
sumx += delta[i][j];
|
|
sumy += d[i][j];
|
|
ssqx += sqr(delta[i][j]);
|
|
ssqy += sqr(d[i][j]);
|
|
scp += delta[i][j] * d[i][j];
|
|
}
|
|
fn = (double)(m - nempty);
|
|
if ((fn * ssqy - sumx * sumy) <= 0.0)
|
|
show_error("gtcord: path length distances have zero variance");
|
|
else
|
|
return (
|
|
(fn * scp - sumx * sumy) /
|
|
sqrt((fn * ssqx - sqr(sumx)) * (fn * ssqy - sqr(sumy))));
|
|
}
|
|
|
|
goodfit()
|
|
{
|
|
double r, rsq;
|
|
|
|
r = gtcord();
|
|
rsq = r * r * 100.0;
|
|
tol = gttol();
|
|
}
|
|
|
|
additive_const()
|
|
{
|
|
int i, j, k;
|
|
|
|
c = 0.0;
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++)
|
|
for (k = 1; k <= n; k++)
|
|
if (k != i && k != j)
|
|
c = max(c,
|
|
gd(i, j) - gd(i, k) - gd(j, k));
|
|
if (report) fprintf(reportf, "Additive Constant: %15.10f\n", c);
|
|
if (c != 0.0)
|
|
for (i = 1; i <= n; i++)
|
|
for (j = 1; j < i; j++) d[i][j] += c;
|
|
}
|
|
|
|
static int *mergei, *mergej;
|
|
static int ninv;
|
|
get_tree()
|
|
{
|
|
#define false 0
|
|
#define true 1
|
|
int i, j, k, l, im, jm, km, lm, count;
|
|
int maxnode, maxcnt, nnode, nact;
|
|
double arci, arcj, arcim, arcjm;
|
|
char *act;
|
|
|
|
maxnode = 2 * n - 2;
|
|
mergei = (int *)getmem(maxnode, sizeof(int));
|
|
mergej = (int *)getmem(maxnode, sizeof(int));
|
|
act = (char *)getmem(maxnode, sizeof(char));
|
|
for (i = 1; i <= maxnode; i++) act[i] = false;
|
|
nact = n;
|
|
ninv = 0;
|
|
nnode = n;
|
|
while (nact > 4) {
|
|
maxcnt = 0;
|
|
for (i = 1; i <= nnode; i++)
|
|
if (!act[i])
|
|
for (j = 1; j < i; j++)
|
|
if (!act[j]) {
|
|
count = 0;
|
|
arci = 0.0;
|
|
arcj = 0.0;
|
|
for (k = 2; k <= nnode; k++)
|
|
if (!act[k] && k != i &&
|
|
k != j)
|
|
for (l = 1;
|
|
l < k; l++)
|
|
if (!act[l] &&
|
|
l !=
|
|
i &&
|
|
l !=
|
|
j)
|
|
if (gd(i,
|
|
j) + gd(k,
|
|
l) <=
|
|
gd(i,
|
|
k) +
|
|
gd(j,
|
|
l) &&
|
|
gd(i,
|
|
j) + gd(k,
|
|
l) <=
|
|
gd(i,
|
|
l) +
|
|
gd(j,
|
|
k)) {
|
|
count++;
|
|
arci +=
|
|
(gd(i,
|
|
j) +
|
|
gd(i,
|
|
k) -
|
|
gd(j,
|
|
k)) /
|
|
2.0;
|
|
arcj +=
|
|
(gd(i,
|
|
j) +
|
|
gd(j,
|
|
l) -
|
|
gd(i,
|
|
l)) /
|
|
2.0;
|
|
}
|
|
if (count > maxcnt) {
|
|
maxcnt = count;
|
|
arcim = max(
|
|
0.0, arci / count);
|
|
arcjm = max(
|
|
0.0, arcj / count);
|
|
im = i;
|
|
jm = j;
|
|
}
|
|
}
|
|
|
|
nnode++;
|
|
if (nnode + 2 > maxnode)
|
|
show_error("get_tree: number of nodes exceeds 2N-2");
|
|
ninv++;
|
|
mergei[ninv] = im;
|
|
mergej[ninv] = jm;
|
|
act[im] = true;
|
|
act[jm] = true;
|
|
d[nnode] = (double *)getmem(nnode - 1, sizeof(double));
|
|
d[nnode][im] = arcim;
|
|
d[nnode][jm] = arcjm;
|
|
for (i = 1; i <= nnode - 1; i++)
|
|
if (!act[i]) d[nnode][i] = max(0.0, gd(im, i) - arcim);
|
|
nact--;
|
|
}
|
|
|
|
for (i = 1; act[i]; i++)
|
|
if (i > nnode)
|
|
show_error(
|
|
"get_tree: can't find last two invisible nodes");
|
|
im = i;
|
|
for (i = im + 1; act[i]; i++)
|
|
if (i > nnode)
|
|
show_error(
|
|
"get_tree: can't find last two invisible nodes");
|
|
jm = i;
|
|
for (i = jm + 1; act[i]; i++)
|
|
if (i > nnode)
|
|
show_error(
|
|
"get_tree: can't find last two invisible nodes");
|
|
km = i;
|
|
for (i = km + 1; act[i]; i++)
|
|
if (i > nnode)
|
|
show_error(
|
|
"get_tree: can't find last two invisible nodes");
|
|
lm = i;
|
|
if (gd(im, jm) + gd(km, lm) <= gd(im, km) + gd(jm, lm) + tol &&
|
|
gd(im, jm) + gd(km, lm) <= gd(im, lm) + gd(jm, km) + tol) {
|
|
i = im;
|
|
j = jm;
|
|
k = km;
|
|
l = lm;
|
|
}
|
|
else if (gd(im, lm) + gd(jm, km) <= gd(im, km) + gd(jm, lm) + tol &&
|
|
gd(im, lm) + gd(jm, km) <= gd(im, jm) + gd(km, lm) + tol) {
|
|
i = im;
|
|
j = lm;
|
|
k = km;
|
|
l = jm;
|
|
}
|
|
else if (gd(im, km) + gd(jm, lm) <= gd(im, jm) + gd(km, lm) + tol &&
|
|
gd(im, km) + gd(jm, lm) <= gd(im, lm) + gd(jm, km) + tol) {
|
|
i = im;
|
|
j = km;
|
|
k = lm;
|
|
l = jm;
|
|
}
|
|
|
|
nnode++;
|
|
ninv++;
|
|
mergei[ninv] = i;
|
|
mergej[ninv] = j;
|
|
d[nnode] = (double *)getmem(nnode - 1, sizeof(double));
|
|
d[nnode][i] = max(0.0, (gd(i, j) + gd(i, k) - gd(j, k)) / 2.0);
|
|
d[nnode][j] = max(0.0, (gd(i, j) + gd(j, l) - gd(i, l)) / 2.0);
|
|
nnode++;
|
|
ninv++;
|
|
mergei[ninv] = k;
|
|
mergej[ninv] = l;
|
|
d[nnode] = (double *)getmem(nnode - 1, sizeof(double));
|
|
d[nnode][k] = max(0.0, (gd(k, l) + gd(i, k) - gd(i, l)) / 2.0);
|
|
d[nnode][l] = max(0.0, (gd(k, l) + gd(j, l) - gd(j, k)) / 2.0);
|
|
d[nnode][nnode - 1] =
|
|
max(0.0, (gd(i, k) + gd(j, l) - gd(i, j) - gd(k, l)) / 2.0);
|
|
}
|
|
|
|
print_node(node, dist, indent) int node;
|
|
double dist;
|
|
int indent;
|
|
{
|
|
static char buf[BUFLEN];
|
|
|
|
if (node <= n)
|
|
printf("%s%s:%6.4f", repeatch(buf, '\t', indent), names[node],
|
|
dist / nfac);
|
|
else {
|
|
printf("%s(\n", repeatch(buf, '\t', indent));
|
|
print_node(mergei[node - n], gd(node, mergei[node - n]),
|
|
indent + 1);
|
|
printf(",\n");
|
|
print_node(mergej[node - n], gd(node, mergej[node - n]),
|
|
indent + 1);
|
|
printf("\n%s):%6.4f", repeatch(buf, '\t', indent), dist / nfac);
|
|
}
|
|
}
|
|
|
|
show_tree()
|
|
{
|
|
int i, j, current;
|
|
int ij[2];
|
|
|
|
current = 0;
|
|
for (i = 1; current < 2; i++) {
|
|
for (j = 1; (mergei[j] != i && mergej[j] != i) && j <= ninv;
|
|
j++)
|
|
;
|
|
if (j > ninv) ij[current++] = i;
|
|
}
|
|
printf("(\n");
|
|
print_node(ij[0], gd(ij[0], ij[1]) / 2.0, 1);
|
|
printf(",\n");
|
|
print_node(ij[1], gd(ij[0], ij[1]) / 2.0, 1);
|
|
printf("\n);\n");
|
|
}
|
|
|
|
show_help()
|
|
{
|
|
printf("\nlsadt--options:\n");
|
|
printf(" -f file - write report to 'file'\n");
|
|
printf(" -d - treat data as dissimilarities (default)\n");
|
|
printf(" -s - treat data as similarities\n");
|
|
printf(" -print 0 - don't print out iteration history (default)\n");
|
|
printf(
|
|
" -print n>0 - print out iteration history every n iterations\n");
|
|
printf(
|
|
" -print n<0 - print out iteration history every n iterations\n");
|
|
printf(
|
|
" (including current distance estimates & "
|
|
"gradients)\n");
|
|
printf(" -init n - initial parameter estimates (default = 3)\n");
|
|
printf(" n=1 - uniformly distributed random numbers\n");
|
|
printf(" n=2 - error-perturbed data\n");
|
|
printf(" n=3 - original distance data from input matrix\n");
|
|
printf(
|
|
" -save - save final parameter estimates (default is don't "
|
|
"save\n");
|
|
printf(
|
|
" -seed n - seed for random number generator (default = "
|
|
"12345)\n");
|
|
printf(
|
|
" -ps1 n - convergence criterion for inner iterations (default "
|
|
"= 0.0001)\n");
|
|
printf(
|
|
" -ps2 n - convergence criterion for major iterations (default "
|
|
"= 0.0001)\n");
|
|
printf(" -empty n - missing data indicator (default = 0.0)\n");
|
|
printf(" -help - show this help text\n");
|
|
exit(0);
|
|
}
|
|
|
|
show_error(message) char *message;
|
|
{
|
|
printf("\n>>>>ERROR:\n>>>>%s\n", message);
|
|
exit(0);
|
|
}
|
|
|
|
main(argc, argv) int argc;
|
|
char **argv;
|
|
{
|
|
int i;
|
|
|
|
strcpy(fname, "");
|
|
get_parms(argc, argv);
|
|
if (strcmp(fname, "")) {
|
|
report = 1;
|
|
reportf = fopen(fname, "w");
|
|
if (reportf == NULL) {
|
|
perror("lsadt");
|
|
exit(0);
|
|
}
|
|
}
|
|
else
|
|
report = 0;
|
|
get_data();
|
|
d = (double **)getmem(n, sizeof(delta[1]));
|
|
g = (double **)getmem(n, sizeof(delta[1]));
|
|
dold = (double **)getmem(n, sizeof(delta[1]));
|
|
d[1] = NULL;
|
|
g[1] = NULL;
|
|
dold[1] = NULL;
|
|
for (i = 2; i <= n; i++) {
|
|
d[i] = (double *)getmem(i - 1, sizeof(double));
|
|
g[i] = (double *)getmem(i - 1, sizeof(double));
|
|
dold[i] = (double *)getmem(i - 1, sizeof(double));
|
|
}
|
|
initd();
|
|
get_dist();
|
|
goodfit();
|
|
additive_const();
|
|
get_tree();
|
|
show_tree();
|
|
if (report) close(reportf);
|
|
}
|