# maximin projection for a rooted tree maximin <- function(A, maxcyc=20){ B <- A for(cycle in 1:maxcyc){ for(i in 1:dim(A)[1]) for(j in 1:i) A[i,j] <- A[j,i] <- max(pmin(A[i,], A[,j])) if(all(B==A)) break B <- A } A } ######################################################################################### # minimax projection for an unrooted tree minimax <- function(D){-maximin(-D)} ######################################################################################### R2U <- function(A){outer(diag(A), diag(A), "+") - 2*A} ######################################################################################### U2R <- function(D, root=0){ # if D happens to be a tree, the default puts the root at the mid-point of the longest path # Otherwise the leaf numbered root is used. Root edge length = 0 if(root < 1 || root > dim(D)[1]){ alpha <- apply(D, 1, max) return((outer(alpha, alpha, "+") - D - max(alpha))/2) } alpha <- D[,root] return((outer(alpha, alpha, "+") - D)/2) } ######################################################################################### is.tree <- function(A, rooted, tol=1.0e-6){ if(missing(rooted)) if(sum(abs(diag(A))) == 0) rooted = 0 else rooted = 1 if(!rooted) A <- U2R(A) return(max(maximin(pmax(A, 0)) - A) < tol) } ######################################################################################### logdet <- function(matrix, rank=0, eps = 1.0e-10){ eig <- eigen(matrix)$values if(rank <= 0){ tol <- eps*sum(abs(eig)) return(sum(log(abs(eig[abs(eig) > tol])))) } else return(sum(log(abs(eig[1:rank])))) } ################################################################################### Branches <- function(tree, trunk, randomized=1, rooted=1, tol=1.0e-14){ # returns a Boolean matrix together with a list of coefficients # which determine the tree (assumed to be binary) # If not binary, a randomized choice may be made if(!rooted) return(Edges(tree)) n <- dim(tree)[1] if(missing(trunk)) trunk <- as.logical(rep(1, n)) if(randomized) leaf <- trunc(sum(trunk) * runif(1) + 1) else leaf <- 1 leaf <- sort.list(as.numeric(trunk), decreasing=TRUE)[leaf] if(sum(trunk) <= 1) return(c(as.numeric(trunk), tree[leaf, leaf])) coef <- unique(sort(as.numeric(tree[trunk, trunk]))) branch1 <- tree[leaf, ] > coef[1] + tol branch1[leaf] <- TRUE branch2 <- as.logical(trunk * (!branch1)) cbind(c(as.numeric(trunk), coef[1]), Branches(tree-coef[1], trunk=branch1), Branches(tree-coef[1], trunk=branch2)) } ################################################################################### Edges <- function(urt, tol=1.0e-10){ n <- dim(urt)[1] T <- maximin(U2R(urt) + 1) H <- Branches(T, tol=tol) col = 3 while(sum(H[1:n, col] * H[1:n, 2]) != 0) col <- col+1 H[n+1, 2] = H[n+1, 2] + H[n+1,col] H <- H[, c(-1,-col)] H[1:n,] <- 2*H[1:n,] - 1 H } ################################################################################### PartitionRep <- function(tree, sph=0, randomized=1){ # returns a list of Boolean basis matrices for the tree n <- dim(tree)[1] if(sph){ diag(tree) <- max(diag(tree)) coef <- unique(sort(as.numeric(tree))) if(length(coef) > n) cat("In PartitionRep: non-spherical tree\n") if(length(coef) < n){ cat("In PartitionRep: non-binary spherical tree: n=", length(coef)) eta <- min(diff(c(0, coef))) eta <- matrix(runif(n^2)*eta/10000, n, n) tree <- maximin(tree + abs(eta-t(eta))) coef <- unique(sort(as.numeric(tree))) if(length(coef) == n) cat(" fixed\n") else cat("not fixed\n") } V <- as.list(1: length(coef)) for(i in 1:length(coef)) V[[i]] <- matrix(as.numeric(tree >= coef[i]), n,n) return(list(V=V, coef = diff(c(0, coef)))) } M <- Branches(tree, randomized=randomized) V <- as.list(1:dim(M)[2]) for(i in 1:dim(M)[2]) V[[i]] <- outer(M[1:n,i], M[1:n,i]) list(V=V, coef=M[n+1,]) } ################################################################################### TreeFit <- function(S, kernel=0, sph=0, maxcyc=40, step=1, Sigma, tol=1.0e-3){ p <- dim(S)[1] if(kernel == 1) X <- matrix(rep(1, p), p, 1) else X <- kernel if(missing(Sigma)) Sigma <- maximin(S) else Sigma <- maximin(Sigma) prep <- PartitionRep(Sigma, sph=sph) if(sph) diag(Sigma) <- max(diag(Sigma)) V <- prep$V q <- length(V) coef <- prep$coef deriv <- rep(0, q) FI <- matrix(0, q, q) dev <- rep(0, maxcyc) Q <- diag(p) W <- ginv(S) if(kernel != 0) Q <- diag(p) - X %*% solve(t(X) %*% W %*% X, t(X) %*% W) base.dev <- sum(diag(Q)) - logdet(W%*%Q) for(cycle in 1:maxcyc){ W <- ginv(Sigma) if(kernel != 0) Q <- diag(p) - X %*% solve(t(X) %*% W %*% X, t(X) %*% W) WQ <- W %*% Q dev[cycle] <- sum(WQ * S) - logdet(WQ) if(cycle > 4 && abs(dev[cycle] - dev[cycle-1]) < tol^2) break prep <- PartitionRep(Sigma, sph=sph) V <- prep$V q <- length(V) coef <- prep$coef deriv <- rep(0, length(coef)) for(i in 1:q){ WVW <- WQ %*% V[[i]] %*% WQ deriv[i] <- sum(WVW * (S - Sigma)) for(j in 1:q) FI[i,j] <- sum(WVW * V[[j]]) } if(cycle < 3) factor <- cycle/(cycle+1) else factor <- 1 nz <- (coef > 0) | (deriv > 0) #if(kernel == 1) nz[1] <- FALSE delta <- rep(0, length(coef)) delta[nz] <- gsolve(FI[nz, nz], deriv[nz]) * factor * step coef <- pmax(coef + delta, 0) Sigma <- matrix(0, p, p) for(i in 1:q) Sigma <- Sigma + V[[i]] * coef[i] if(max(abs(delta[nz])) < tol) break } return(list(Sigma=Sigma, dev=dev[cycle]-base.dev, deriv=deriv, delta=delta, cycle=cycle)) } ################################################################################### ginv <- function(A, rank=0, tol=1.0e-10){ eig <- eigen(A) if(missing(rank) | rank==0) nz <- abs(eig$values) > tol else nz <- (1:dim(A)[1]) <= rank eig$vectors[, nz] %*% diag(1/eig$values[nz]) %*% t(eig$vectors[, nz]) } gsolve <- function(A, B, rank=0){ginv(A, rank) %*% B} ################################################################################### topology <- function(tree){ n <- dim(tree)[1] top <- matrix(0, n, n) if(sum(diag(tree)) == 0){ B <- Edges(tree) for(r in 1:dim(B)[2]) top <- top + outer(B[1:n, r], B[1:n, r], "!=") return(top) } B <- Branches(tree) return((B %*% t(B))[1:n, 1:n]) } ################################################################################### TEQ <- function(tree1, tree2){ if(any(dim(tree1) != dim(tree2))) return(FALSE) all(topology(tree1) == topology(tree2)) } ################################################################################### Intersection <- function(Delta){ n <- dim(Delta)[1] V <- matrix(0, n^2, n^2) i <- as.numeric(gl(n,1,n^2)) j <- as.numeric(gl(n,n,n^2)) for(row in 1:n^2) for(col in 1:n^2) V[row, col] <- Delta[i[row], i[col]] + Delta[j[row], j[col]] - Delta[i[row], j[col]] - Delta[j[row], i[col]] -V } ################################################################################### # data from Felsenstein p. 163 species <- c("dog", "bear", "raccoon", "weasel", "seal", "sea-lion", "cat", "monkey") q <- 8 d <- matrix(c( 0, 32, 48, 51, 50, 48, 98, 148, 32, 0, 26, 34, 29, 33, 84, 136, 48, 26, 0, 42, 44, 44, 92, 152, 51, 34, 42, 0, 44, 38, 86, 142, 50, 29, 44, 44, 0, 24, 89, 142, 48, 33, 44, 38, 24, 0, 90, 142, 98, 84, 92, 86, 89, 90, 0, 148, 148, 136, 152, 142, 142, 142, 148, 0), q, q) B <- matrix(c( 1, 0, 0, 0, 0, 0, 0, 0, 25.25, 0, 1, 0, 0, 0, 0, 0, 0, 6.875, 0, 0, 1, 0, 0, 0, 0, 0, 19.125, 0, 0, 0, 1, 0, 0, 0, 0, 19.5625, 0, 0, 0, 0, 1, 0, 0, 0, 12.35, 0, 0, 0, 0, 0, 1, 0, 0, 11.65, 0, 0, 0, 0, 0, 0, 1, 0, 47.0833, 0, 0, 0, 0, 0, 0, 0, 1, 100.9166, 0, 1, 1, 0, 0, 0, 0, 0, 1.75, 1, 1, 1, 0, 0, 0, 0, 0, 3.4375, 0, 0, 0, 0, 1, 1, 0, 0, 7.8125, 1, 1, 1, 0, 1, 1, 0, 0, 1.5625, 1, 1, 1, 1, 1, 1, 0, 0, 20.4375), 2*q-3, q+1, byrow=TRUE) NJ <- matrix(0, q, q) #NJ tree for(r in 1:dim(B)[1]) NJ <- NJ + outer(B[r, 1:q], B[r, 1:q], "!=") * B[r, q+1] ########################################################################## B <- matrix(c( 1, 0, 0, 0, 0, 0, 0, 0, 22.9, 0, 1, 0, 0, 0, 0, 0, 0, 13, 0, 0, 1, 0, 0, 0, 0, 0, 13, 0, 0, 0, 1, 0, 0, 0, 0, 19.75, 0, 0, 0, 0, 1, 0, 0, 0, 12.0, 0, 0, 0, 0, 0, 1, 0, 0, 12.0, 0, 0, 0, 0, 0, 0, 1, 0, 44.9166, 0, 0, 0, 0, 0, 0, 0, 1, 99.369, 0, 1, 1, 0, 0, 0, 0, 0, 5.75, 0, 0, 0, 0, 1, 1, 0, 0, 6.75, 0, 1, 1, 0, 1, 1, 0, 0, 1.0, 0, 1, 1, 1, 1, 1, 0, 0, 3.15, 1, 1, 1, 1, 1, 1, 0, 0, 20.0166), 2*q-3, q+1, byrow=TRUE) UPGMA <- matrix(0, q, q) #UPGMA tree for(r in 1:dim(B)[1]) UPGMA <- UPGMA + outer(B[r, 1:q], B[r, 1:q], "!=") * B[r, q+1] 1, 0, 0, 0, 0, 0, 0, 0, 25.48, 0, 1, 0, 0, 0, 0, 0, 0, 6.52, 0, 0, 1, 0, 0, 0, 0, 0, 19.01, 0, 0, 0, 1, 0, 0, 0, 0, 18.87, 0, 0, 0, 0, 1, 0, 0, 0, 12.07, 0, 0, 0, 0, 0, 1, 0, 0, 11.93, 0, 0, 0, 0, 0, 0, 1, 0, 46.99, 0, 0, 0, 0, 0, 0, 0, 1, 101.01, 1, 1, 0, 0, 0, 0, 0, 0, 1.08, 1, 1, 1, 0, 0, 0, 0, 0, 4.14, 0, 0, 0, 0, 1, 1, 0, 0, 7.51, 1, 1, 1, 0, 1, 1, 0, 0, 2.38, 1, 1, 1, 1, 1, 1, 0, 0, 20.76), 2*q-3, q+1, byrow=TRUE) Delta <- matrix(0, q, q) for(r in 1:dim(B)[1]) Delta <- Delta + outer(B[r, 1:q], B[r, 1:q], "!=") * B[r, q+1] Sigma <- U2R(Delta) # max lik spherical tree B <- matrix(c( 1, 1, 1, 1, 1, 1, 1, 0, 100.51, 1, 1, 1, 1, 1, 1, 0, 0, 23.46, 1, 1, 1, 0, 1, 1, 0, 0, 0.42, 0, 0, 0, 0, 1, 1, 0, 0, 8.77, 0, 0, 0, 0, 1, 0, 0, 0, 12.10, 0, 0, 0, 0, 0, 1, 0, 0, 12.10, 1, 1, 1, 0, 0, 0, 0, 0, 1.99, 0, 1, 1, 0, 0, 0, 0, 0, 5.04, 0, 0, 1, 0, 0, 0, 0, 0, 13.83, 0, 1, 0, 0, 0, 0, 0, 0, 13.83, 1, 0, 0, 0, 0, 0, 0, 0, 18.87, 0, 0, 0, 1, 0, 0, 0, 0, 21.28, 0, 0, 0, 0, 0, 0, 1, 0, 44.74), 2*q-3, q+1, byrow=TRUE) Deltas <- matrix(0, q, q) for(r in 1:dim(B)[1]) Deltas <- Deltas + outer(B[r, 1:q], B[r, 1:q], "!=") * B[r, q+1] Sigmas <- U2R(Deltas) diag(Sigmas) <- max(diag(Sigmas)) fit0 <- TreeFit(1 + U2R(d), ker=1, sph=1, Sigma=Sigmas) round(R2U(fit0$Sigma), 2) round(Edges(R2U(fit0$Sigma)), 2) ## original max lik spherical tree #B <- matrix(c( #1, 1, 1, 1, 1, 1, 0, 0, 23.39, #1, 1, 1, 0, 1, 1, 0, 0, 0.49, #0, 1, 1, 0, 1, 1, 0, 0, 1.58, #0, 0, 0, 0, 1, 1, 0, 0, 7.54, #0, 1, 1, 0, 0, 0, 0, 0, 5.56, #1, 0, 0, 0, 0, 0, 0, 0, 20.96, #0, 1, 0, 0, 0, 0, 0, 0, 13.82, #0, 0, 1, 0, 0, 0, 0, 0, 13.82, #0, 0, 0, 1, 0, 0, 0, 0, 21.45, #0, 0, 0, 0, 1, 0, 0, 0, 11.84, #0, 0, 0, 0, 0, 1, 0, 0, 11.84, #0, 0, 0, 0, 0, 0, 1, 0, 44.84, #1, 1, 1, 1, 1, 1, 1, 0, 100.48), 2*q-3, q+1, byrow=TRUE) #Deltas <- matrix(0, q, q) #for(r in 1:dim(B)[1]) # Deltas <- Deltas + outer(B[r, 1:q], B[r, 1:q], "!=") * B[r, q+1] #Sigmas <- U2R(Deltas) #diag(Sigmas) <- max(diag(Sigmas)) # NJ <- function(D, r=0){ n <- dim(D)[1] u <- as.vector(D %*% rep(1, n) - diag(D)) / (n-2) u <- apply(D, 1, max) D1 <- D - outer(u, u, "+") diag(D1) <- 0 ij <- sort.list(as.vector(D1))[2*r+1]-1 i <- trunc(ij/n) j <- ij - n*i + 1 i <- i+1 vi <- (D[i,] + D[j,] + u[i] - u[j] )/2 vj <- vi + u[j] - u[i] D[i,] <- D[,i] <- vi D[j,] <- D[,j] <- vj D }