/* PROGRAM LSADT Fortran->C conversion by Mike Maciukenas, CPGA, Microbiology at University of Illinois. C----------------------------------------------------------------------- C C LEAST SQUARES ALGORITHM FOR FITTING ADDITIVE TREES TO C PROXIMITY DATA C C GEERT DE SOETE -- VERSION 1.01 - FEB. 1983 C VERSION 1.02 - JUNE 1983 C VERSION 1.03 - JULY 1983 C C REFERENCE: DE SOETE, G. A LEAST SQUARES ALGORITHM FOR FITTING C ADDITIVE TREES TO PROXIMITY DATA. PSYCHOMETRIKA, 1983, 48, C 621-626. C DE SOETE, G. ADDITIVE TREE REPRESENTATIONS OF INCOMPLETE C DISSIMILARITY DATA. QUALITY AND QUANTITY, 1984, 18, C 387-393. C REMARKS C ------- C 2) UNIFORMLY DISTRIBUTED RANDOM NUMBERS ARE GENERATED BY A C PROCEDURE DUE TO SCHRAGE. CF. C SCHRAGE, L. A MORE PORTABLE FORTRAN RANDOM NUMBER GENERATOR. C ACM TRANS. ON MATH. SOFTW., 1979, 5, 132-138. C 3) SUBROUTINES VA14AD AND VA14AC (translated into minfungra) ARE C ADAPTED FROM THE HARWELL SUBROUTINE LIBRARY (1979 EDITION). C 4) ALTHOUGH THIS PROGRAM HAS BEEN CAREFULLY TESTED, THE C AUTHOR DISCLAIMS ANY RESPONSABILITY FOR POSSIBLE C ERRORS. C C----------------------------------------------------------------------- */ #include #include #include #include #include #include #include #include #define BUFLEN 1024 #define MAXLEAVES 256 static int m, n, dissim, pr, start, save, seed, nempty; static double ps1, ps2, f, empty, tol, c; static char fname[1000]; static char *names[MAXLEAVES]; static double *delta[MAXLEAVES]; static double **d; static double **g; static double **dold; static FILE *reportf; static int report; extern int errno; double nfac; extern double strtod(); double dabs(a) double a; { return ((a < 0.0) ? -a : a); } double sqr(a) double a; { return (a * a); } double max(a, b) double a; double b; { return ((a > b) ? a : b); } int imin(a, b) int a; int b; { return ((a < b) ? a : b); } double min(a, b) double a; double b; { return ((a < b) ? a : b); } float runif() { #define A 16807 #define P 2147483647 #define B15 32768 #define B16 65536 int xhi, xalo, leftlo, fhi, k; float t1, t2; xhi = seed / B16; xalo = (seed - (xhi * B16)) * A; leftlo = xalo / B16; fhi = xhi * A + leftlo; k = fhi / B15; seed = (((xalo - leftlo * B16) - P) + (fhi - k * B15) * B16) + k; if (seed < 0) seed += P; t1 = seed; t2 = t1 * 4.656612875e-10; return (t2); /* seed * (1/(2**31-1))*/ } float rnorm() { static float t1, t2; static float n1, n2; static int second = 0; if (second) { second = 0; n1 = sin(t2); n2 = t1 * n1; n1 = t1 * sin(t2); return (n2); } else { t1 = runif(); t1 = sqrt(-2.0 * log(t1)); n2 = 6.283185308; t2 = runif() * n2; n1 = cos(t2); n2 = t1 * n1; n1 = t1 * cos(t2); second = 1; return (n2); } } double gd(i, j) int i, j; { if (j < i) return (d[i][j]); else if (j > i) return (d[j][i]); else show_error("gd: i=j -- programmer screwed up!"); } char *repeatch(string, ch, num) char *string; int ch; int num; { for (string[num--] = '\0'; num >= 0; string[num--] = ch) ; return (string); } int getachar() /* skips comments! */ { static int oldchar = '\0'; int ch; int more = 1; while (more) { ch = getchar(); if (oldchar == '\n' && ch == '#') { while (ch != '\n' && ch != EOF) ch = getchar(); oldchar = ch; } else if (oldchar == '\n' && isspace(ch)) ; else more = 0; } oldchar = ch; return (ch); } int skip_space() { int ch; while (isspace(ch = getachar())) ; return (ch); } int getaword(string, len) /* 0 if failed, 1 if data was read, -1 if data read to end of file */ char *string; int len; { int i; int ch; ch = skip_space(); if (ch == EOF) return (0); for (i = 0; !isspace(ch) && ch != EOF; i++) { if (i < len - 1) { string[i] = ch; } ch = getachar(); } string[i < len ? i : len - 1] = '\0'; if (ch == EOF) return (-1); else return (1); } int readtobar(string, len) /* 0 if failed, 1 if data was read */ char *string; int len; { int i; int ch; ch = skip_space(); if (ch == EOF) return (0); for (i = 0; ch != EOF && ch != '|'; i++) { if (ch == '\n' || ch == '\r' || ch == '\t') i--; else { if (i < len - 1) string[i] = ch; } ch = getachar(); } len = i < len ? i : len - 1; for (i = len - 1; i >= 0 && isspace(string[i]); i--) ; string[i + 1] = '\0'; if (ch == EOF) return (-1); else return (1); } int readtobarorcolon(string, len) /* 0 if failed, 1 if data was read */ char *string; int len; { int i; int ch; ch = skip_space(); if (ch == EOF) return (0); for (i = 0; ch != EOF && ch != '|' && ch != ':'; i++) { if (ch == '\n' || ch == '\r' || ch == '\t') i--; else { if (i < len - 1) string[i] = ch; } ch = getachar(); } len = i < len ? i : len - 1; for (i = len - 1; i >= 0 && isspace(string[i]); i--) ; string[i + 1] = '\0'; if (ch == EOF) return (-1); else return (1); } char *getmem(nelem, elsize) unsigned nelem, elsize; { char *temp; temp = (char *)calloc(nelem + 1, elsize); if (temp == NULL) show_error("Couldn't allocate memory."); else return (temp); } int get_parms(argc, argv) int argc; char **argv; { int i; int cur_arg; /* codes for current argument: ** 0 = no current argument ** 1 = pr ** 2 = start ** 3 = seed ** 4 = ps1 ** 5 = ps2 ** 6 = empty ** 7 = filename */ dissim = 0; pr = 0; start = 2; save = 0; seed = 12345; ps1 = 0.0001; ps2 = 0.0001; empty = 0.0; n = 0; cur_arg = 0; for (i = 1; i < argc; i++) switch (cur_arg) { case 0: if (strcmp(argv[i], "-d") == 0) dissim = 0; else if (strcmp(argv[i], "-s") == 0) dissim = 1; else if (strcmp(argv[i], "-print") == 0) cur_arg = 1; else if (strcmp(argv[i], "-init") == 0) cur_arg = 2; else if (strcmp(argv[i], "-save") == 0) save = 1; else if (strcmp(argv[i], "-seed") == 0) cur_arg = 3; else if (strcmp(argv[i], "-ps1") == 0) cur_arg = 4; else if (strcmp(argv[i], "-ps2") == 0) cur_arg = 5; else if (strcmp(argv[i], "-empty") == 0) cur_arg = 6; else if (strcmp(argv[i], "-f") == 0) cur_arg = 7; else if (strcmp(argv[i], "-help") == 0) show_help(); else { printf("\nlsadt: bad option\n"); show_help(); } break; case 1: pr = atoi(argv[i]); cur_arg = 0; break; case 2: start = atoi(argv[i]); cur_arg = 0; break; case 3: seed = atoi(argv[i]); cur_arg = 0; break; case 4: ps1 = atof(argv[i]); cur_arg = 0; break; case 5: ps2 = atof(argv[i]); cur_arg = 0; break; case 6: empty = atof(argv[i]); cur_arg = 0; break; case 7: strcpy(fname, argv[i]); cur_arg = 0; break; default: printf("\nlsadt: bad option\n"); show_help(); break; } /* check validity of parameters */ if (ps1 < 0.0) ps1 = 0.0001; if (ps2 < 0.0) ps2 = 0.0001; if (dissim < 0) dissim = 0; if (start < 1 || start > 3) start = 3; if (save != 1) save = 0; if (seed < 0) seed = 12345; /*printf("dissim=%d\n", dissim);*/ /*printf("pr=%d\n", pr);*/ /*printf("start=%d\n", start);*/ /*printf("save=%d\n", save);*/ /*printf("seed=%d\n", seed);*/ /*printf("ps1=%f\n", ps1);*/ /*printf("ps2=%f\n", ps2);*/ /*printf("empty=%f\n", empty);*/ } int get_data() { int i, j, more; char buf[BUFLEN]; char *ptr; char ch; int result; double temp, nfactor, datmin, datmax; nempty = n = 0; more = 1; ptr = &ch; while (more) { result = readtobarorcolon(buf, BUFLEN); if (result == 0 || result == -1) more = 0; else { n++; names[n] = getmem(BUFLEN, 1); result = readtobar(buf, BUFLEN); if (result != 1) show_error( "get_data: bad name syntax, or missing " "'|'"); strcpy(names[n], buf); if (n > 1) delta[n] = (double *)getmem(n, sizeof(double)); else delta[n] = NULL; for (j = 1; j < n; j++) { if (!getaword(buf, BUFLEN)) show_error( "get_data: missing data values.\n"); delta[n][j] = strtod(buf, &ptr); if (ptr == buf) show_error( "get_data: invalid data value.\n"); if (delta[n][j] == empty) nempty++; } } } if (n < 4) show_error("Must be at least 4 nodes."); m = n * (n - 1) / 2; /* make all positive */ datmin = MAXDOUBLE; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] < datmin && delta[i][j] != empty) datmin = delta[i][j]; if (datmin < 0.0) for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] != empty) delta[i][j] -= datmin; /* convert dissimilarities into similarities if -s option specified */ if (dissim) { datmax = MINDOUBLE; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] > datmax && delta[i][j] != empty) datmax = delta[i][j]; datmax += 1.0; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] != empty) delta[i][j] = datmax - delta[i][j]; } /* make sum of squared deviations from delta to average(delta) = 1 */ temp = 0.0; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] != empty) temp += delta[i][j]; temp = temp / (double)(m - nempty); /* now temp = average of all non-empty cells */ nfactor = 0.0; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] != empty) nfactor += sqr(delta[i][j] - temp); nfactor = 1.0 / sqrt(nfactor); nfac = nfactor; /* now nfactor is the normalization factor */ if (report) fprintf(reportf, "Normalization factor: %15.10f\n", nfactor); for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] != empty) delta[i][j] *= nfactor; /* now all is normalized */ } int initd() { int i, j, k; double t1, t2; int emp, nemp; double dmax; double efac, estd; efac = 0.333333333333333; if (start == 1) { for (i = 2; i <= n; i++) for (j = 1; j < i; j++) d[i][j] = runif(); return; } if (nempty == 0 && start == 2) { estd = sqrt(efac / m); for (i = 2; i <= n; i++) for (j = 1; j < i; j++) d[i][j] = delta[i][j] + rnorm() * estd; return; } for (i = 2; i <= n; i++) for (j = 1; j < i; j++) d[i][j] = delta[i][j]; if (start == 3 && nempty == 0) return; emp = nempty; nemp = 0; while (nemp < emp) for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (d[i][j] == empty) { dmax = MAXDOUBLE; for (k = 1; k <= n; k++) { t1 = gd(i, k); t2 = gd(j, k); if (t1 != empty && t2 != empty) dmax = min(dmax, max(t1, t2)); } if (dmax != MAXDOUBLE) d[i][j] = dmax; else nemp++; } if (nemp != 0) { t1 = 0.0; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (d[i][j] != empty) t1 += d[i][j]; t1 /= m - nemp; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (d[i][j] == empty) d[i][j] = t1; } if (start != 3) { estd = sqrt(efac / (m - nempty)); for (i = 2; i <= n; i++) for (j = 1; j < i; j++) d[i][j] += rnorm() * estd; } return; } static int nm0, nm1, nm2, nm3; static double fitl, fitp, r; fungra() { double fac; int i, j, k, l; double wijkl; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) g[i][j] = 0.0; fitl = 0.0; fac = 2.0; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] != empty) { fitl += sqr(d[i][j] - delta[i][j]); g[i][j] += fac * (d[i][j] - delta[i][j]); } fitp = 0.0; fac = 2.0 * r; for (i = 1; i <= nm3; i++) for (j = i + 1; j <= nm2; j++) for (k = j + 1; k <= nm1; k++) for (l = k + 1; l <= nm0; l++) if ((d[j][i] + d[l][k] >= d[l][i] + d[k][j]) && (d[k][i] + d[l][j] >= d[l][i] + d[k][j])) { wijkl = d[j][i] + d[l][k] - d[k][i] - d[l][j]; fitp += sqr(wijkl); g[j][i] += fac * wijkl; g[l][k] += fac * wijkl; g[k][i] -= fac * wijkl; g[l][j] -= fac * wijkl; } else if ((d[j][i] + d[l][k] >= d[k][i] + d[l][j]) && (d[l][i] + d[k][j] >= d[k][i] + d[l][j])) { wijkl = d[j][i] + d[l][k] - d[l][i] - d[k][j]; fitp += sqr(wijkl); g[j][i] += fac * wijkl; g[l][k] += fac * wijkl; g[k][j] -= fac * wijkl; g[l][i] -= fac * wijkl; } else if ((d[k][i] + d[l][j] >= d[j][i] + d[l][k]) && (d[l][i] + d[k][j] >= d[j][i] + d[l][k])) { wijkl = d[k][i] + d[l][j] - d[l][i] - d[k][j]; fitp += sqr(wijkl); g[k][i] += fac * wijkl; g[l][j] += fac * wijkl; g[l][i] -= fac * wijkl; g[k][j] -= fac * wijkl; } f = fitl + r * fitp; } static double **dr, **dgr, **d1, **gs, **xx, **gg; static int iterc, prc; print_iter(maxfnc, f) int maxfnc; double f; { int i, j; if (pr == 0) { iterc++; } else if (prc < abs(pr)) { prc++; iterc++; } else { printf("Iteration %6d", iterc); printf(": function values %6d", maxfnc); printf(" f = %24.16e\n", f); if (pr < 0) { printf(" d[] looks like this:\n"); for (i = 2; i <= n; i++) { printf(" "); for (j = 1; j < i; j++) printf("%24.16e ", d[i][j]); printf("\n "); } printf(" g[] looks like this:\n"); for (i = 2; i <= n; i++) { printf(" "); for (j = 1; j < i; j++) printf("%24.16e ", g[i][j]); printf("\n "); } } prc = 1; iterc++; } } minfungra(dfn, acc, maxfn) double dfn, acc; int maxfn; { double beta, c, clt, dginit, dgstep, dtest, finit; double fmin, gamden, gamma, ggstar, gm, gmin, gnew; double gsinit, gsumsq, xbound, xmin, xnew, xold; int itcrs, flag, retry, maxfnk, maxfnc; int i, j; flag = 0; retry = -2; iterc = 0; maxfnc = 1; prc = abs(pr); goto L190; L10: xnew = dabs(dfn + dfn) / gsumsq; dgstep = -gsumsq; itcrs = m + 1; L30: if (maxfnk < maxfnc) { f = fmin; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) { d[i][j] = xx[i][j]; g[i][j] = gg[i][j]; } } if (flag > 0) { prc = 0; print_iter(maxfnc, f); return; } if (itcrs > m) { for (i = 2; i <= n; i++) for (j = 1; j < i; j++) d1[i][j] = -g[i][j]; } print_iter(maxfnc, f); dginit = 0.0; for (i = 2; i <= n; i++) for (j = 1; j < i; j++) { gs[i][j] = g[i][j]; dginit += d1[i][j] * g[i][j]; } if (dginit >= 0.0) { retry = -retry; if (imin(retry, maxfnk) < 2) { printf( "minfungra: \ gradient wrong or acc too small\n"); flag = 2; } else itcrs = m + 1; goto L30; } xmin = 0.0; fmin = f; finit = f; gsinit = gsumsq; gmin = dginit; gm = dginit; xbound = -1.0; xnew = xnew * min(1.0, dgstep / dginit); dgstep = dginit; L170: c = xnew - xmin; dtest = 0.0; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) { d[i][j] = xx[i][j] + c * d1[i][j]; dtest += dabs(d[i][j] - xx[i][j]); } if (dtest <= 0.0) { clt = .7; goto L300; } if (max(xbound, xmin - c) < 0.0) { clt = 0.1; } maxfnc++; L190: fungra(); if (maxfnc > 1) { for (gnew = 0.0, i = 1; i <= n; i++) for (j = 1; j < i; j++) gnew += d1[i][j] * g[i][j]; } if (maxfnc <= 1 || (maxfnc > 1 && f < fmin) || (maxfnc > 1 && f == fmin && dabs(gnew) <= dabs(gmin))) { maxfnk = maxfnc; gsumsq = 0.0; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) gsumsq += sqr(g[i][j]); if (gsumsq <= acc) { prc = 0; print_iter(maxfnc, f); return; } } if (maxfnc == maxfn) { printf( "minfungra: \ maxfn function values have been calculated\n"); flag = 1; goto L30; } if (maxfnk < maxfnc) goto L300; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) { xx[i][j] = d[i][j]; gg[i][j] = g[i][j]; } if (maxfnc <= 1) goto L10; gm = gnew; if (gm <= dginit) goto L310; ggstar = 0.0; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) ggstar += gs[i][j] * g[i][j]; beta = (gsumsq - ggstar) / (gm - dginit); L300: if (dabs(gm) > clt * dabs(dginit) || (dabs(gm) <= clt * dabs(dginit) && dabs(gm * beta) >= clt * gsumsq)) { L310: clt += 0.3; if (clt > 0.8) { retry = -retry; if (imin(retry, maxfnk) < 2) { printf( "minfungra: \ gradient wrong or acc too small\n"); flag = 2; } else itcrs = m + 1; goto L30; } xold = xnew; xnew = .5 * (xmin + xold); if (maxfnk >= maxfnc && gmin * gnew > 0.0) { xnew = 10.0 * xold; if (xbound >= 0.0) { xnew = 0.5 * (xold + xbound); } } c = gnew - (3.0 * gnew + gmin - 4.0 * (f - fmin) / (xold - xmin)) * (xold - xnew) / (xold - xmin); if (maxfnk >= maxfnc) { if (gmin * gnew <= 0.0) { xbound = xmin; } xmin = xold; fmin = f; gmin = gnew; } else xbound = xold; if (c * gmin < 0.0) xnew = (xmin * c - xnew * gmin) / (c - gmin); goto L170; } if (min(f, fmin) < finit || (min(f, fmin) >= finit && gsumsq < gsinit)) { if (itcrs < m && dabs(ggstar) < .2 * gsumsq) { gamma = 0.0; c = 0.0; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) { gamma += gg[i][j] * dgr[i][j]; c += gg[i][j] * dr[i][j]; } gamma /= gamden; if (dabs(beta * gm + gamma * c) < .2 * gsumsq) { for (i = 1; i <= n; i++) for (j = 1; j < i; j++) d1[i][j] = -gg[i][j] + beta * d1[i][j] + gamma * dr[i][j]; itcrs++; } else { itcrs = 2; gamden = gm - dginit; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) { dr[i][j] = d1[i][j]; dgr[i][j] = gg[i][j] - gs[i][j]; d1[i][j] = -gg[i][j] + beta * d1[i][j]; } } } else { itcrs = 2; gamden = gm - dginit; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) { dr[i][j] = d1[i][j]; dgr[i][j] = gg[i][j] - gs[i][j]; d1[i][j] = -gg[i][j] + beta * d1[i][j]; } } goto L30; } else { retry = -retry; if (imin(retry, maxfnk) < 2) { printf( "minfungra: \ gradient wrong or acc too small\n"); flag = 2; } else itcrs = m + 1; goto L30; } } get_dist() { static double cons = 1.0; static int maxfn = 500; int i, j, iter; double dif; dr = (double **)getmem(n, sizeof(delta[1])); dgr = (double **)getmem(n, sizeof(delta[1])); d1 = (double **)getmem(n, sizeof(delta[1])); gs = (double **)getmem(n, sizeof(delta[1])); xx = (double **)getmem(n, sizeof(delta[1])); gg = (double **)getmem(n, sizeof(delta[1])); dr[1] = NULL; dgr[1] = NULL; d1[1] = NULL; gs[1] = NULL; xx[1] = NULL; gg[1] = NULL; for (i = 2; i <= n; i++) { dr[i] = (double *)getmem(i - 1, sizeof(double)); dgr[i] = (double *)getmem(i - 1, sizeof(double)); d1[i] = (double *)getmem(i - 1, sizeof(double)); gs[i] = (double *)getmem(i - 1, sizeof(double)); xx[i] = (double *)getmem(i - 1, sizeof(double)); gg[i] = (double *)getmem(i - 1, sizeof(double)); } nm0 = n; nm1 = n - 1; nm2 = n - 2; nm3 = n - 3; if (start == 3) { r = 0.001; fungra(); } else { r = 1.0; fungra(); r = cons * fitl / fitp; f = (1.0 + cons) * fitl; } iter = 1; do { for (i = 1; i <= n; i++) for (j = 1; j < i; j++) dold[i][j] = d[i][j]; minfungra(0.5 * f, ps1, maxfn); dif = 0.0; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) dif += sqr(d[i][j] - dold[i][j]); dif = sqrt(dif); if (dif > ps2) { iter++; r *= 10.0; } } while (dif > ps2); fungra(); for (i = 2; i <= n; i++) { free(dr[i]); free(dgr[i]); free(d1[i]); free(gs[i]); free(xx[i]); free(gg[i]); } free(dr); free(dgr); free(d1); free(gs); free(xx); free(gg); } double gttol() { double result; int i, j, k, l; result = 0.0; nm0 = n; nm1 = n - 1; nm2 = n - 2; nm3 = n - 3; for (i = 1; i <= nm3; i++) for (j = i + 1; j <= nm2; j++) for (k = j + 1; k <= nm1; k++) for (l = k + 1; l <= nm0; l++) if ((d[j][i] + d[l][k] >= d[l][i] + d[k][j]) && (d[k][i] + d[l][j] >= d[l][i] + d[k][j])) result = max( result, dabs(d[j][i] + d[l][k] - d[k][i] - d[l][j])); else if ((d[j][i] + d[l][k] >= d[k][i] + d[l][j]) && (d[l][i] + d[k][j] >= d[k][i] + d[l][j])) result = max( result, dabs(d[j][i] + d[l][k] - d[l][i] - d[k][j])); else if ((d[k][i] + d[l][j] >= d[j][i] + d[l][k]) && (d[l][i] + d[k][j] >= d[j][i] + d[l][k])) result = max( result, dabs(d[k][i] + d[l][j] - d[l][i] - d[k][j])); return (result); } gtcord() { double sumx, sumy, ssqx, ssqy, scp, fn; int i, j; sumx = sumy = ssqx = ssqy = scp = 0.0; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) if (delta[i][j] != empty) { sumx += delta[i][j]; sumy += d[i][j]; ssqx += sqr(delta[i][j]); ssqy += sqr(d[i][j]); scp += delta[i][j] * d[i][j]; } fn = (double)(m - nempty); if ((fn * ssqy - sumx * sumy) <= 0.0) show_error("gtcord: path length distances have zero variance"); else return ( (fn * scp - sumx * sumy) / sqrt((fn * ssqx - sqr(sumx)) * (fn * ssqy - sqr(sumy)))); } goodfit() { double r, rsq; r = gtcord(); rsq = r * r * 100.0; tol = gttol(); } additive_const() { int i, j, k; c = 0.0; for (i = 1; i <= n; i++) for (j = 1; j < i; j++) for (k = 1; k <= n; k++) if (k != i && k != j) c = max(c, gd(i, j) - gd(i, k) - gd(j, k)); if (report) fprintf(reportf, "Additive Constant: %15.10f\n", c); if (c != 0.0) for (i = 1; i <= n; i++) for (j = 1; j < i; j++) d[i][j] += c; } static int *mergei, *mergej; static int ninv; get_tree() { #define false 0 #define true 1 int i, j, k, l, im, jm, km, lm, count; int maxnode, maxcnt, nnode, nact; double arci, arcj, arcim, arcjm; char *act; maxnode = 2 * n - 2; mergei = (int *)getmem(maxnode, sizeof(int)); mergej = (int *)getmem(maxnode, sizeof(int)); act = (char *)getmem(maxnode, sizeof(char)); for (i = 1; i <= maxnode; i++) act[i] = false; nact = n; ninv = 0; nnode = n; while (nact > 4) { maxcnt = 0; for (i = 1; i <= nnode; i++) if (!act[i]) for (j = 1; j < i; j++) if (!act[j]) { count = 0; arci = 0.0; arcj = 0.0; for (k = 2; k <= nnode; k++) if (!act[k] && k != i && k != j) for (l = 1; l < k; l++) if (!act[l] && l != i && l != j) if (gd(i, j) + gd(k, l) <= gd(i, k) + gd(j, l) && gd(i, j) + gd(k, l) <= gd(i, l) + gd(j, k)) { count++; arci += (gd(i, j) + gd(i, k) - gd(j, k)) / 2.0; arcj += (gd(i, j) + gd(j, l) - gd(i, l)) / 2.0; } if (count > maxcnt) { maxcnt = count; arcim = max( 0.0, arci / count); arcjm = max( 0.0, arcj / count); im = i; jm = j; } } nnode++; if (nnode + 2 > maxnode) show_error("get_tree: number of nodes exceeds 2N-2"); ninv++; mergei[ninv] = im; mergej[ninv] = jm; act[im] = true; act[jm] = true; d[nnode] = (double *)getmem(nnode - 1, sizeof(double)); d[nnode][im] = arcim; d[nnode][jm] = arcjm; for (i = 1; i <= nnode - 1; i++) if (!act[i]) d[nnode][i] = max(0.0, gd(im, i) - arcim); nact--; } for (i = 1; act[i]; i++) if (i > nnode) show_error( "get_tree: can't find last two invisible nodes"); im = i; for (i = im + 1; act[i]; i++) if (i > nnode) show_error( "get_tree: can't find last two invisible nodes"); jm = i; for (i = jm + 1; act[i]; i++) if (i > nnode) show_error( "get_tree: can't find last two invisible nodes"); km = i; for (i = km + 1; act[i]; i++) if (i > nnode) show_error( "get_tree: can't find last two invisible nodes"); lm = i; if (gd(im, jm) + gd(km, lm) <= gd(im, km) + gd(jm, lm) + tol && gd(im, jm) + gd(km, lm) <= gd(im, lm) + gd(jm, km) + tol) { i = im; j = jm; k = km; l = lm; } else if (gd(im, lm) + gd(jm, km) <= gd(im, km) + gd(jm, lm) + tol && gd(im, lm) + gd(jm, km) <= gd(im, jm) + gd(km, lm) + tol) { i = im; j = lm; k = km; l = jm; } else if (gd(im, km) + gd(jm, lm) <= gd(im, jm) + gd(km, lm) + tol && gd(im, km) + gd(jm, lm) <= gd(im, lm) + gd(jm, km) + tol) { i = im; j = km; k = lm; l = jm; } nnode++; ninv++; mergei[ninv] = i; mergej[ninv] = j; d[nnode] = (double *)getmem(nnode - 1, sizeof(double)); d[nnode][i] = max(0.0, (gd(i, j) + gd(i, k) - gd(j, k)) / 2.0); d[nnode][j] = max(0.0, (gd(i, j) + gd(j, l) - gd(i, l)) / 2.0); nnode++; ninv++; mergei[ninv] = k; mergej[ninv] = l; d[nnode] = (double *)getmem(nnode - 1, sizeof(double)); d[nnode][k] = max(0.0, (gd(k, l) + gd(i, k) - gd(i, l)) / 2.0); d[nnode][l] = max(0.0, (gd(k, l) + gd(j, l) - gd(j, k)) / 2.0); d[nnode][nnode - 1] = max(0.0, (gd(i, k) + gd(j, l) - gd(i, j) - gd(k, l)) / 2.0); } print_node(node, dist, indent) int node; double dist; int indent; { static char buf[BUFLEN]; if (node <= n) printf("%s%s:%6.4f", repeatch(buf, '\t', indent), names[node], dist / nfac); else { printf("%s(\n", repeatch(buf, '\t', indent)); print_node(mergei[node - n], gd(node, mergei[node - n]), indent + 1); printf(",\n"); print_node(mergej[node - n], gd(node, mergej[node - n]), indent + 1); printf("\n%s):%6.4f", repeatch(buf, '\t', indent), dist / nfac); } } show_tree() { int i, j, current; int ij[2]; current = 0; for (i = 1; current < 2; i++) { for (j = 1; (mergei[j] != i && mergej[j] != i) && j <= ninv; j++) ; if (j > ninv) ij[current++] = i; } printf("(\n"); print_node(ij[0], gd(ij[0], ij[1]) / 2.0, 1); printf(",\n"); print_node(ij[1], gd(ij[0], ij[1]) / 2.0, 1); printf("\n);\n"); } show_help() { printf("\nlsadt--options:\n"); printf(" -f file - write report to 'file'\n"); printf(" -d - treat data as dissimilarities (default)\n"); printf(" -s - treat data as similarities\n"); printf(" -print 0 - don't print out iteration history (default)\n"); printf( " -print n>0 - print out iteration history every n iterations\n"); printf( " -print n<0 - print out iteration history every n iterations\n"); printf( " (including current distance estimates & " "gradients)\n"); printf(" -init n - initial parameter estimates (default = 3)\n"); printf(" n=1 - uniformly distributed random numbers\n"); printf(" n=2 - error-perturbed data\n"); printf(" n=3 - original distance data from input matrix\n"); printf( " -save - save final parameter estimates (default is don't " "save\n"); printf( " -seed n - seed for random number generator (default = " "12345)\n"); printf( " -ps1 n - convergence criterion for inner iterations (default " "= 0.0001)\n"); printf( " -ps2 n - convergence criterion for major iterations (default " "= 0.0001)\n"); printf(" -empty n - missing data indicator (default = 0.0)\n"); printf(" -help - show this help text\n"); exit(0); } show_error(message) char *message; { printf("\n>>>>ERROR:\n>>>>%s\n", message); exit(0); } main(argc, argv) int argc; char **argv; { int i; strcpy(fname, ""); get_parms(argc, argv); if (strcmp(fname, "")) { report = 1; reportf = fopen(fname, "w"); if (reportf == NULL) { perror("lsadt"); exit(0); } } else report = 0; get_data(); d = (double **)getmem(n, sizeof(delta[1])); g = (double **)getmem(n, sizeof(delta[1])); dold = (double **)getmem(n, sizeof(delta[1])); d[1] = NULL; g[1] = NULL; dold[1] = NULL; for (i = 2; i <= n; i++) { d[i] = (double *)getmem(i - 1, sizeof(double)); g[i] = (double *)getmem(i - 1, sizeof(double)); dold[i] = (double *)getmem(i - 1, sizeof(double)); } initd(); get_dist(); goodfit(); additive_const(); get_tree(); show_tree(); if (report) close(reportf); }