regress <- function(formula, Vformula, kernel, identity=TRUE, taper, start=NULL, pos, print.level=0, maxcyc=25, tol=1e-4, nz.eps=1.0e-8, data){ # # Adapted from the original regress() function by David Clifford # PMcC July 2011 # # Modifications: # (1) inclusion of a kernel or null space matrix K governing the nature # of the likelihood being maximized: Ordinarily span(K) \subset \span(X). # Default likelihood with kernel missing: REML with K=X # kernel=0: ordinary maximum likelihood # kernel=1: one-dimensional vector space of constant functions # (2) order of variance components puts Id first (the nugget) # (3) pos=c(1,0,1,...) forces positivity for selected coefficients # by using N-R on log scale (no change here). # (4) regress$sigma gives fitted variance components regardless of # positivity settings. (previous version returned values on log scale) # (5) start: optional initial variance coefficients, which may be positive or # negative, but must be strictly positive if pos() component is true. # (Only the relative sizes of the components matters.) # (6) taper: (replaces the scalar fraction) an optional list of # positive scalar factors in (0, 1] for controlling N-R steps # # Usage: # row and col declared as factors; # K <- model.matrix(~row+col) # V as a matrix (strictly pos def on K-contrasts) # fit00 <- regress(y~row+col) # simple linear regn # fit01 <- regress(y~row+col, ~V) # using default ker = row+col # fit02 <- regress(y~row+col, ~V, kernel=K) # identical to fit01 # It is not normal to have a kernel bigger than the mean space but it is not prohibited # fit03 <- regress(y~1, ~V, kernel=K) # likelihood identical to fit01 but no row/col coefficients # summary(fit01) # # for comparisons using likelihood ratios, the two fitted models MUST have the same kernel # model 00 is a sub-model of 01, both with kernel K=row+col # REML L-R statistic for variance component (block factor or spatial correlation) # 2*(fit01$llik - fit00$llik) # # fit04 <- regress(y~row+col+treat, ~V) # using REML as default # fit05 <- regress(y~row+col+treat, ~V, kernel=K) # holding kernel fixed for comparison # L-R statistic for treatment effect allowing for spatial correlation # 2*(fit05$llik - fit01$llik) # NOT 2*(fit04$llik - fit01$llik) # Neither 00 nor 01 is a sub-model of 04 because K = row+col+treat in 04 # # K <- model.matrix(~1+x) # fit1 <- regress(y~1+x, ~row+col+V, ker=K) # also REML # fit2 <- regress(y~1+x+treat, ~row+col+V, ker=K) # not REML # fit3 <- regress(y~1+x+treat, ~row+col+V) # REML (ker=1+x+treat) # fit4 <- regress(y~1+x+treat, ~row+col+V, ker=1) # not REML # model 1 is a sub-model of 2: L-R statistic for treatment effect # L-R statistic for treat 2*(fit2$llik- fit1$llik) # neither 1 nor 2 is a sub-model of 3 or 4 # ## References: ## D. Bates (2005) Fitting linear mixed models in R. R newsletter vol 5. ## David Clifford and Peter McCullagh (2006) The regress function. ## R newsletter vol 6, pages 6-10. ## Peter McCullagh and David Clifford (2006) Evidence for conformal invariance of crop yields. ## Proc. Roy. Soc. Lond. A. vol462 pages2119-2143. ## Welham, S. and Thompson, R. (1997) Likelihood ratio tests for fixed model terms... ## JRSSB 59, 701--714. ## Wahba, G. (1990) Spline Models for Observational Data. SIAM ## if(missing(data)) data <- environment(formula) mf <- model.frame(formula,data=data,na.action=na.pass) mf <- eval(mf,parent.frame()) y <- model.response(mf) y <- na.omit(y) nobs <- length(y) X <- model.matrix(formula,data=data) Xcolnames <- dimnames(X)[[2]] if(is.null(Xcolnames)) Xcolnames <- paste("X.col",c(1:dim(as.matrix(X))[2]),sep="") if(length(y) != nrow(X)){ cat("Error: dim(X) = (", dim(X), "); length(y) =", length(y), "\n") stop("This could be caused by missing values\n") } qr <- qr(X) if(qr$rank) { X <- matrix(X[, qr$pivot[1:qr$rank]],nobs,qr$rank) Xcolnames <- Xcolnames[qr$pivot[1:qr$rank]] } else cat("\nERROR: X has rank 0\n\n") #if(testmode) cat(" nobs=", nobs, " dim(X) = ", dim(X), "\n") if(!missing(Vformula)) { V <- model.frame(Vformula,data=data,na.action=na.pass) V <- eval(V, parent.frame()) Vnames <- names(V) V <- as.list(V) k <- length(V) for(i in 1:k){ if(is.factor(V[[i]])){ if(length(V[[i]]) != nobs) stop("block factor has wrong length") V[[i]] <- outer(V[[i]], V[[i]], "==") } else { if(any(dim(V[[i]])!=c(nobs, nobs))) stop("V[[i]] has wrong dimension") } } } else { V <- NULL k <- 0 Vnames=NULL } if(identity || k==0){ if(k>0) for(i in k:1) V[[i+1]] <- V[[i]] V[[1]] <- diag(nobs) k <- k+1 Vnames <- c("Id", Vnames) } if(missing(pos)) pos <- rep(FALSE, k) if((l <- length(pos)) < k) pos <- c(as.logical(pos), rep(FALSE, k-l)) else{ pos <- as.logical(pos[1:k]) } #if(testmode) cat("Vnames =", Vnames, "pos =", pos, "\n") if(missing(kernel)){K <- X; colnames(K) <- Xcolnames; reml <- TRUE; kernel<-NULL} else{ if(length(kernel)==1 && kernel>0){ K <- matrix(rep(1, nobs), nobs, 1) colnames(K) <- c("1") } if(length(kernel)==1 && kernel<=0) {K <- Kcolnames <- NULL; KX <- X; rankQK <- nobs} if(length(kernel) > 1) K <- matrix(kernel, nrow=nobs) reml <- FALSE } if(!is.null(K)){ Kcolnames <- colnames(K) qr <- qr(K) rankQK <- nobs - qr$rank if(qr$rank == 0) K <- NULL else{ K <- matrix(K[, qr$pivot[1:qr$rank]],nobs,qr$rank) Kcolnames <- Kcolnames[qr$pivot[1:qr$rank]] KX <- cbind(K, X) # Spanning K + X: Oct 12 2011 qr <- qr(KX) #if(testmode) cat("dim(KX) = ", dim(KX), "rank = ", qr$rank, "\n") #if(testmode)cat("qr(KX)$pivot =", qr$pivot, "\n") KX <- matrix(KX[, qr$pivot[1:qr$rank]],nobs,qr$rank) # basis of K+X } } #if(testmode) cat("rankQK =", rankQK, "dim(K) =", dim(K), "\n") sigma <- coef <- rep(0, k) if(missing(start)) { sigma[pos] <- tol*var(y) sigma[1] <- var(y) } else{ l <- min(length(start), k) sigma[1:l] <- start[1:l] } coef[!pos] <- sigma[!pos] coef[pos] <- log(sigma[pos]) if(missing(taper)){ taper <- rep(0.9, maxcyc) if(missing(start) && k>1) taper[1:2] <- c(0.5, 0.7) } else{ taper <- pmin(abs(taper), 1) if((l <- length(taper)) < maxcyc) taper <- c(taper, rep(taper[l], maxcyc-l)) } #if(testmode) cat("taper =", taper[1:9], "\n") T <- list(NULL) x <- rep(0, k) A <- matrix(0, k, k) llik <- 0.0 delta.llik <- 1 stats <- rep(0, 0) nz <- rep(TRUE, k) #if(testmode) cat("k=", k, " sigma=", sigma ) #if(testmode) for(i in 1:k) cat(" dim(V[[", i, "]])=", dim(V[[i]])); cat("\n") for(cycle in 1:maxcyc){ Sigma <- matrix(0, nobs, nobs) for(i in 1:k) Sigma <- Sigma + V[[i]]*sigma[i] if(print.level) cat("sigma =", sigma, "\n") WK <- solve(Sigma,cbind(diag(nobs), K)) W <- WK[,1:nobs] if(is.null(K)) WQK <- W else{ WK <- WK[, nobs + 1:ncol(K)] WQK <- W - WK %*% solve(t(K)%*%WK, t(WK)) } if(reml) WQX <- WQK else{ WX <- W %*% KX # including the kernel (Oct 12 2011) WQX <- W - WX %*% solve(t(KX)%*%WX, t(WX)) } rss <- as.numeric(t(y) %*% WQX %*% y) sigma <- sigma * rss/rankQK coef[!pos & nz] <- sigma[!pos & nz] coef[pos & nz] <- log(sigma[pos & nz]) Sigma <- Sigma * rss/rankQK WQK <- WQK * rankQK/rss WQX <- WQX * rankQK/rss rss <- rankQK eig <- sort(eigen(WQK,symmetric=TRUE,only.values=TRUE)$values, decreasing=TRUE)[1:rankQK] #if(testmode) cat("rss =", rss, "range(eig)=", range(eig), "ldet=", sum(log(eig)), "\n") if(any(eig < 0)){ cat("error: Sigma is not pos def on contrasts: range(eig)=", range(eig), "\n") WQK <- WQK + (tol - min(eig))*diag(nobs) eig <- eig + tol - min(eig) } ldet <- sum(log(eig)) llik <- ldet/2 - rss/2 if(cycle > 1) delta.llik <- llik - llik0 llik0 <- llik #if(testmode) cat("cycle=", cycle, " sigma = ", sigma, "coef=", coef, " ldet=", ldet, "rss=", rss, "\n") if(print.level) cat("sigma =", sigma, "(scale-adjusted)\n") if(print.level && reml) cat("resid llik =", llik, "delta.llik =", delta.llik, "\n") if(print.level && !reml) cat("llik =", llik, "\n") for(i in 1:k) { T[[i]] <- WQK %*% V[[i]] WQXy <- WQX %*% y x[i] <- as.numeric(t(WQXy) %*% V[[i]] %*% WQXy - sum(diag(T[[i]]))) for(j in 1:i) A[i,j] <- A[j,i] <- sum(T[[j]] * t(T[[i]])) } A <- A * outer(sigma*pos + (1-pos), sigma*pos + (1-pos)) x[pos] <- sigma[pos] * x[pos] nz <- nz | (x > 0) stats <- c(stats, llik, sigma, x) if(qr(A)$rank < k){ if(cycle==1) { cat("Warning: non-identifiable dispersion model\n") cat("sigma=", sigma, "\nA=", A, "\n") } } # coef <- coef + taper[cycle] * solve(A, x) deltacoef <- solve(A[nz, nz], x[nz]) coef[nz] <- coef[nz] + taper[cycle] * deltacoef sigma[!pos & nz] <- coef[!pos & nz] sigma[pos & nz] <- exp(coef[pos & nz]) nz <- !pos | abs(sigma) > nz.eps sigma[!nz] <- 0.0 if(print.level) cat("sigmanew=", sigma, "; nz=", as.numeric(nz), "\n") if(abs(delta.llik) < tol*10) break if(cycle > 10 & max(abs(deltacoef)) < tol) break } if(cycle == maxcyc && maxcyc>1) cat("Warning: max cycles", maxcyc, "reached before convergence\n") stats <- matrix(stats, cycle, 2*k+1, byrow=TRUE) colnames(stats) <- c("llik",paste("s^2_", Vnames, sep=""), paste("der_", Vnames, sep="")) WX <- W %*% X beta.cov <- solve(t(X) %*% WX) beta <- beta.cov %*% t(WX) %*% y beta <- matrix(beta,length(beta),1) row.names(beta) <- Xcolnames beta.se <- sqrt(abs(diag(beta.cov))) beta.se[diag(beta.cov) < 0] <- NA beta.se <- matrix(beta.se,length(beta.se),1) row.names(beta.se) <- Xcolnames fitted.values <- X%*%beta A <- A / outer(sigma*pos + (1-pos), sigma*pos + (1-pos)) result <- list(trace=stats, sigma=sigma, cycle=cycle, llik=llik, fitted=X%*%beta, beta=beta, beta.cov=beta.cov, beta.se=beta.se, reml=reml, kernel=kernel, W=W, Kcolnames=Kcolnames, sigma.cov=2*ginv(A[nz,nz]),nz=nz, Vnames=Vnames, pos=pos) class(result) <- "regress" result } ginv <- function (X, tol = sqrt(.Machine$double.eps)) { ## taken from library MASS if (length(dim(X)) > 2 || !(is.numeric(X) || is.complex(X))) stop("X must be a numeric or complex matrix") if (!is.matrix(X)) X <- as.matrix(X) Xsvd <- svd(X) if (is.complex(X)) Xsvd$u <- Conj(Xsvd$u) Positive <- Xsvd$d > max(tol * Xsvd$d[1], 0) if (all(Positive)) Xsvd$v %*% (1/Xsvd$d * t(Xsvd$u)) else if (!any(Positive)) array(0, dim(X)[2:1]) else Xsvd$v[, Positive, drop = FALSE] %*% ((1/Xsvd$d[Positive]) * t(Xsvd$u[, Positive, drop = FALSE])) } summary.regress <- function(object, digits=4, fixed.effects=TRUE,...) { cat("Likelihood kernel: K=") if(length(object$kernel) == 1){ cat(max(sign(object$kernel), 0)) } else if(!is.null(object$Kcolnames)) cat(object$Kcolnames, sep="+") cat("\nMaximized log likelihood with kernel K is ") cat(round(object$llik,digits),"\n",sep=" ") indent.lin <- max(nchar(dimnames(object$beta)[[1]])) indent.var <- max(nchar(object$Vnames)) indent <- max(indent.lin,indent.var) extra.space <- "" space.var <- extra.space for(i in 0:(indent-indent.var)) space.var <- paste(space.var," ",sep="") space.lin <- extra.space for(i in 0:(indent-indent.lin)) space.lin <- paste(space.lin," ",sep="") coefficients <- cbind(round(object$beta, digits), round(object$beta.se, digits-1), round(object$beta/object$beta.se, 2)) dimnames(coefficients)[[2]] <- c("Estimate","Std. Error", " Ratio") if(fixed.effects) { cat("\nMean-vector regression coefficients:\n") row.names(coefficients) <- paste(space.lin,dimnames(object$beta)[[1]],sep="") print(coefficients) cat("\n") } else { cat("\nMean-vector regression coefficients: not shown\n\n") } cat("Variance coefficients: (after", object$cycle, "cycles)\n") if(sum(object$pos)==0) { var.coef <- cbind(object$sigma,sqrt(diag(as.matrix(object$sigma.cov)))) row.names(var.coef) <- paste(space.var,object$Vnames,sep="") colnames(var.coef) <- c("Estimate","Std. Error") print(round(var.coef, digits)) cat("\n") } else { var.coef <- object$sigma se <- sep <- rep(NA, length(var.coef)) se[object$nz] <- sqrt(diag(as.matrix(object$sigma.cov))) sep[object$nz] <- sqrt(diag(as.matrix(object$sigma.cov))) / abs(var.coef[object$nz]) var.coef <- cbind(var.coef, se, var.coef-2*se, var.coef+2*se) if(length(object$sigma)>1){ var.coef[object$pos,] <- cbind(var.coef[,c(1,2)], var.coef[,1]*exp(-2*sep), var.coef[,1]*exp(2*sep))[object$pos,] } else { if(object$pos) var.coef <- c(var.coef[,c(1,2)], var.coef[,1]*exp(-2*sep), var.coef[,1]*exp(2*sep)) } var.coef <- matrix(var.coef, nrow=length(object$sigma)) row.names(var.coef) <- paste(space.var,object$Vnames,sep="") colnames(var.coef) <- c("Estimate","Std. Error","C. 2.5%", "C. 97.5%") print(round(var.coef, digits)) cat("\n") } }