regress <- function(formula, Vformula, kernel, identity=TRUE, taper, start=NULL,
pos, print.level=0, maxcyc=25, tol=1e-4, nz.eps=1.0e-8, data){
#
# Adapted from the original regress() function by David Clifford
# PMcC July 2011
#
# Modifications: 
#   (1) inclusion of a kernel or null space matrix K governing the nature
#	of the likelihood being maximized: Ordinarily span(K) \subset \span(X).
#	Default likelihood with kernel missing: REML with K=X
#	kernel=0: ordinary maximum likelihood
#	kernel=1: one-dimensional vector space of constant functions
#   (2) order of variance components puts Id first (the nugget)
#   (3) pos=c(1,0,1,...) forces positivity for selected coefficients
#	by using N-R on log scale (no change here).
#   (4) regress$sigma gives fitted variance components regardless of
#	positivity settings. (previous version returned values on log scale)
#   (5) start: optional initial variance coefficients, which may be positive or
#	negative, but must be strictly positive if pos() component is true.
#	(Only the relative sizes of the components matters.)
#   (6) taper: (replaces the scalar fraction)  an optional list of
#	positive scalar factors in (0, 1] for controlling N-R steps
#
# Usage:
#   row and col declared as factors;
#   K <- model.matrix(~row+col)
#   V as a matrix (strictly pos def on K-contrasts)
#	fit00 <- regress(y~row+col)		# simple linear regn
#	fit01 <- regress(y~row+col, ~V)		# using default ker = row+col
#	fit02 <- regress(y~row+col, ~V, kernel=K)	# identical to fit01
#   It is not normal to have a kernel bigger than the mean space but it is not prohibited
#	fit03 <- regress(y~1, ~V, kernel=K)	# likelihood identical to fit01 but no row/col coefficients
#	summary(fit01)
#
#   for comparisons using likelihood ratios, the two fitted models MUST have the same kernel
#   model 00 is a sub-model of 01, both with kernel K=row+col
#   REML L-R statistic for variance component (block factor or spatial correlation)
#	2*(fit01$llik - fit00$llik)
#
#	fit04 <- regress(y~row+col+treat, ~V)		# using REML as default
#	fit05 <- regress(y~row+col+treat, ~V, kernel=K)	# holding kernel fixed for comparison
#   L-R statistic for treatment effect allowing for spatial correlation
#	2*(fit05$llik - fit01$llik)			# NOT 2*(fit04$llik - fit01$llik)
#   Neither 00 nor 01 is a sub-model of 04 because K = row+col+treat in 04
#
#	K <- model.matrix(~1+x)
#	fit1 <- regress(y~1+x, ~row+col+V, ker=K)	# also REML
#	fit2 <- regress(y~1+x+treat, ~row+col+V, ker=K)	# not REML 
#	fit3 <- regress(y~1+x+treat, ~row+col+V)	# REML (ker=1+x+treat)
#	fit4 <- regress(y~1+x+treat, ~row+col+V, ker=1)	# not REML 
#   model 1 is a sub-model of 2: L-R statistic for treatment effect
#   L-R statistic for treat 2*(fit2$llik- fit1$llik)
#   neither 1 nor 2 is a sub-model of 3 or 4
#
## References:
## D. Bates (2005) Fitting linear mixed models in R. R newsletter vol 5.
## David Clifford and Peter McCullagh (2006) The regress function.
##      R newsletter vol 6, pages 6-10.
## Peter McCullagh and David Clifford (2006) Evidence for conformal invariance of crop yields.
##      Proc. Roy. Soc. Lond. A. vol462 pages2119-2143.
## Welham, S. and Thompson, R. (1997)  Likelihood ratio tests for fixed model terms...
##       JRSSB  59, 701--714.
## Wahba, G. (1990) Spline Models for Observational Data. SIAM
##

if(missing(data)) data <- environment(formula)
mf <- model.frame(formula,data=data,na.action=na.pass)
mf <- eval(mf,parent.frame())
y <- model.response(mf)
y <- na.omit(y)
nobs <- length(y)
X <- model.matrix(formula,data=data)
Xcolnames <- dimnames(X)[[2]]
if(is.null(Xcolnames)) Xcolnames <- paste("X.col",c(1:dim(as.matrix(X))[2]),sep="")
if(length(y) != nrow(X)){
	cat("Error: dim(X) = (", dim(X), "); length(y) =", length(y), "\n")
	stop("This could be caused by missing values\n")
	}
qr <- qr(X)
if(qr$rank) {
	X <- matrix(X[, qr$pivot[1:qr$rank]],nobs,qr$rank)
	Xcolnames <- Xcolnames[qr$pivot[1:qr$rank]]
	} else cat("\nERROR: X has rank 0\n\n")
#if(testmode) cat(" nobs=", nobs, " dim(X) = ", dim(X), "\n")

if(!missing(Vformula)) {
	V <- model.frame(Vformula,data=data,na.action=na.pass)
	V <- eval(V, parent.frame())
	Vnames <- names(V)
	V <- as.list(V)
	k <- length(V)
	for(i in 1:k){
		if(is.factor(V[[i]])){
			if(length(V[[i]]) != nobs) stop("block factor has wrong length")
			V[[i]] <- outer(V[[i]], V[[i]], "==")
			} else {
		if(any(dim(V[[i]])!=c(nobs, nobs))) stop("V[[i]] has wrong dimension")
		}
		}
	} else {
	V <- NULL
	k <- 0
	Vnames=NULL
	}
if(identity || k==0){
	if(k>0) for(i in k:1) V[[i+1]] <- V[[i]]
	V[[1]] <- diag(nobs)
	k <- k+1
	Vnames <- c("Id", Vnames)
	}
if(missing(pos)) pos <- rep(FALSE, k) 
if((l <- length(pos)) < k) pos <- c(as.logical(pos), rep(FALSE, k-l)) else{
	pos <- as.logical(pos[1:k])
	}
#if(testmode) cat("Vnames =", Vnames, "pos =", pos, "\n")

if(missing(kernel)){K <- X; colnames(K) <- Xcolnames; reml <- TRUE; kernel<-NULL} else{
	if(length(kernel)==1 && kernel>0){
		K <- matrix(rep(1, nobs), nobs, 1)
		colnames(K) <- c("1")
		}
	if(length(kernel)==1 && kernel<=0) {K <- Kcolnames <- NULL; KX <- X; rankQK <- nobs}
	if(length(kernel) > 1) K <- matrix(kernel, nrow=nobs) 
	reml <- FALSE
	}
if(!is.null(K)){
	Kcolnames <- colnames(K)
	qr <- qr(K)
	rankQK <- nobs - qr$rank
	if(qr$rank == 0) K <- NULL else{
		K <- matrix(K[, qr$pivot[1:qr$rank]],nobs,qr$rank) 
		Kcolnames <- Kcolnames[qr$pivot[1:qr$rank]]
		KX <- cbind(K, X)	# Spanning K + X: Oct 12 2011
		qr <- qr(KX)
#if(testmode) cat("dim(KX) = ", dim(KX), "rank = ", qr$rank, "\n")
#if(testmode)cat("qr(KX)$pivot =", qr$pivot, "\n")
		KX <- matrix(KX[, qr$pivot[1:qr$rank]],nobs,qr$rank)	# basis of K+X	
		}
	}
#if(testmode) cat("rankQK =", rankQK, "dim(K) =", dim(K), "\n")

sigma <- coef <- rep(0, k)
if(missing(start)) {
	sigma[pos] <- tol*var(y) 
	sigma[1] <- var(y)
	} else{
	l <- min(length(start), k)
	sigma[1:l] <- start[1:l]
	}
coef[!pos] <- sigma[!pos]
coef[pos] <- log(sigma[pos])
if(missing(taper)){
	taper <- rep(0.9, maxcyc)
	if(missing(start) && k>1) taper[1:2] <- c(0.5, 0.7)
	} else{
	taper <- pmin(abs(taper), 1)
	if((l <- length(taper)) < maxcyc) taper <- c(taper, rep(taper[l], maxcyc-l))
	}
#if(testmode) cat("taper =", taper[1:9], "\n")
T <- list(NULL)
x <- rep(0, k)
A <- matrix(0, k, k)
llik <- 0.0
delta.llik <- 1
stats <- rep(0, 0)
nz <- rep(TRUE, k)
#if(testmode) cat("k=", k, " sigma=", sigma )
#if(testmode) for(i in 1:k) cat(" dim(V[[", i, "]])=", dim(V[[i]])); cat("\n")
for(cycle in 1:maxcyc){
	Sigma <- matrix(0, nobs, nobs)
	for(i in 1:k) Sigma <- Sigma + V[[i]]*sigma[i]
	if(print.level) cat("sigma =", sigma, "\n")
	
	WK <- solve(Sigma,cbind(diag(nobs), K))
	W <- WK[,1:nobs]
	if(is.null(K)) WQK <- W else{
		WK <- WK[, nobs + 1:ncol(K)]
		WQK <- W - WK %*% solve(t(K)%*%WK, t(WK))
		}
	if(reml) WQX <- WQK else{
		WX <- W %*% KX		# including the kernel (Oct 12 2011)
        	WQX <- W - WX %*% solve(t(KX)%*%WX, t(WX))
		}

	rss <- as.numeric(t(y) %*% WQX %*% y)
	sigma <- sigma * rss/rankQK
	coef[!pos & nz] <- sigma[!pos & nz]
	coef[pos & nz] <- log(sigma[pos & nz])
	Sigma <- Sigma * rss/rankQK
	WQK <- WQK * rankQK/rss
	WQX <- WQX * rankQK/rss
	rss <- rankQK
	
	eig <- sort(eigen(WQK,symmetric=TRUE,only.values=TRUE)$values, decreasing=TRUE)[1:rankQK]
#if(testmode) cat("rss =", rss, "range(eig)=", range(eig), "ldet=", sum(log(eig)), "\n")
	if(any(eig < 0)){
		cat("error: Sigma is not pos def on contrasts: range(eig)=", range(eig), "\n")
		WQK <- WQK + (tol - min(eig))*diag(nobs)
		eig <- eig + tol - min(eig)
		}
	ldet <- sum(log(eig))
	llik <- ldet/2 - rss/2
	if(cycle > 1) delta.llik <- llik - llik0
	llik0 <- llik
#if(testmode) cat("cycle=", cycle, " sigma = ", sigma, "coef=", coef, " ldet=", ldet, "rss=", rss, "\n")
	if(print.level) cat("sigma =", sigma, "(scale-adjusted)\n")
	if(print.level && reml) cat("resid llik =", llik, "delta.llik =", delta.llik, "\n")
	if(print.level && !reml) cat("llik =", llik, "\n")

	for(i in 1:k) {
        	T[[i]] <- WQK %*% V[[i]]
		WQXy <- WQX %*% y
        	x[i] <- as.numeric(t(WQXy) %*% V[[i]] %*% WQXy - sum(diag(T[[i]])))
      		for(j in 1:i)  A[i,j] <- A[j,i] <-  sum(T[[j]] * t(T[[i]]))
		}
	A <- A * outer(sigma*pos + (1-pos), sigma*pos + (1-pos))
	x[pos] <- sigma[pos] * x[pos]
	nz <- nz | (x > 0)
	stats <- c(stats, llik, sigma, x)

	if(qr(A)$rank < k){
		if(cycle==1) {
			cat("Warning: non-identifiable dispersion model\n")
			cat("sigma=", sigma, "\nA=", A, "\n")
			}
		}
#	coef <- coef + taper[cycle] * solve(A, x)
	deltacoef <- solve(A[nz, nz], x[nz])
	coef[nz] <- coef[nz] + taper[cycle] * deltacoef
	sigma[!pos & nz] <- coef[!pos & nz]
	sigma[pos & nz] <- exp(coef[pos & nz])
	nz <- !pos | abs(sigma) > nz.eps
	sigma[!nz] <- 0.0
if(print.level) cat("sigmanew=", sigma, "; nz=", as.numeric(nz), "\n")

	if(abs(delta.llik) < tol*10) break
	if(cycle > 10 & max(abs(deltacoef)) < tol) break
	}
if(cycle == maxcyc && maxcyc>1) cat("Warning: max cycles", maxcyc, "reached before convergence\n")

stats <- matrix(stats, cycle, 2*k+1, byrow=TRUE)
colnames(stats) <- c("llik",paste("s^2_", Vnames, sep=""), paste("der_", Vnames, sep="")) 
WX <- W %*% X
beta.cov <- solve(t(X) %*% WX)
beta <- beta.cov %*% t(WX) %*% y
beta <- matrix(beta,length(beta),1)
row.names(beta) <- Xcolnames
beta.se <- sqrt(abs(diag(beta.cov)))
beta.se[diag(beta.cov) < 0] <- NA
beta.se <- matrix(beta.se,length(beta.se),1)
row.names(beta.se) <- Xcolnames
fitted.values <- X%*%beta
A <- A / outer(sigma*pos + (1-pos), sigma*pos + (1-pos))

result <- list(trace=stats, sigma=sigma, cycle=cycle, llik=llik, fitted=X%*%beta,
	beta=beta, beta.cov=beta.cov, beta.se=beta.se, reml=reml, kernel=kernel, W=W,
	Kcolnames=Kcolnames, sigma.cov=2*ginv(A[nz,nz]),nz=nz, Vnames=Vnames, pos=pos)
class(result) <- "regress"
result
}

ginv <- function (X, tol = sqrt(.Machine$double.eps))
{
  ## taken from library MASS
  if (length(dim(X)) > 2 || !(is.numeric(X) || is.complex(X)))
    stop("X must be a numeric or complex matrix")
  if (!is.matrix(X))
    X <- as.matrix(X)
  Xsvd <- svd(X)
  if (is.complex(X))
    Xsvd$u <- Conj(Xsvd$u)
  Positive <- Xsvd$d > max(tol * Xsvd$d[1], 0)
  if (all(Positive))
    Xsvd$v %*% (1/Xsvd$d * t(Xsvd$u))
  else if (!any(Positive))
    array(0, dim(X)[2:1])
  else Xsvd$v[, Positive, drop = FALSE] %*% ((1/Xsvd$d[Positive]) *
                            t(Xsvd$u[, Positive, drop = FALSE]))
}


summary.regress <- function(object, digits=4, fixed.effects=TRUE,...) {
cat("Likelihood kernel: K=")
if(length(object$kernel) == 1){
	cat(max(sign(object$kernel), 0))
	} else if(!is.null(object$Kcolnames)) cat(object$Kcolnames, sep="+")
cat("\nMaximized log likelihood with kernel K is ")
cat(round(object$llik,digits),"\n",sep=" ")
    indent.lin <- max(nchar(dimnames(object$beta)[[1]]))
    indent.var <- max(nchar(object$Vnames))
    indent <- max(indent.lin,indent.var)

    extra.space <- ""
    space.var <- extra.space
    for(i in 0:(indent-indent.var)) space.var <- paste(space.var," ",sep="")
    space.lin <- extra.space
    for(i in 0:(indent-indent.lin)) space.lin <- paste(space.lin," ",sep="")
    
    coefficients <- cbind(round(object$beta, digits), round(object$beta.se, digits-1), round(object$beta/object$beta.se, 2))
    dimnames(coefficients)[[2]] <- c("Estimate","Std. Error", " Ratio")
    if(fixed.effects) {
      cat("\nMean-vector regression coefficients:\n")
      row.names(coefficients) <- paste(space.lin,dimnames(object$beta)[[1]],sep="")
      print(coefficients)
      cat("\n")
    } else {
      cat("\nMean-vector regression coefficients: not shown\n\n")
    }
    
      cat("Variance coefficients: (after", object$cycle, "cycles)\n")
    if(sum(object$pos)==0) {
      var.coef <- cbind(object$sigma,sqrt(diag(as.matrix(object$sigma.cov))))
      row.names(var.coef) <- paste(space.var,object$Vnames,sep="")
      colnames(var.coef) <- c("Estimate","Std. Error")
      print(round(var.coef, digits))
      cat("\n")
    } else {
	var.coef <- object$sigma
	se <- sep <- rep(NA, length(var.coef))
	se[object$nz] <- sqrt(diag(as.matrix(object$sigma.cov)))
	sep[object$nz] <- sqrt(diag(as.matrix(object$sigma.cov))) / abs(var.coef[object$nz])
	var.coef <- cbind(var.coef, se, var.coef-2*se, var.coef+2*se)
	if(length(object$sigma)>1){
		var.coef[object$pos,] <- cbind(var.coef[,c(1,2)], var.coef[,1]*exp(-2*sep),
		var.coef[,1]*exp(2*sep))[object$pos,] } else {
		if(object$pos) var.coef <- c(var.coef[,c(1,2)], var.coef[,1]*exp(-2*sep),
		var.coef[,1]*exp(2*sep)) }
	var.coef <- matrix(var.coef, nrow=length(object$sigma))
      row.names(var.coef) <- paste(space.var,object$Vnames,sep="")
      colnames(var.coef) <- c("Estimate","Std. Error","C. 2.5%", "C. 97.5%")
      print(round(var.coef, digits))
      cat("\n")
    }
  }