source("http://www.stat.uchicago.edu/~pmcc/courses/regress.R") liver <- read.csv("http://www.stat.uchicago.edu/~pmcc/reports/LiverData.csv") baseline <- liver$time==0 liver$treatment[baseline] <- -1 censored <- !liver$cens ## apparently liver$cens is coded in reverse revival <- (liver$survival - liver$time)[!censored] id <- liver$id[!censored]; prothrombin <- liver$prothrombin[!censored]; survival <- liver$survival[!censored]; time <- liver$time[!censored]; treat <- liver$treatment[!censored] treat <- as.factor(treat); levels(treat) <- c("null", "control", "prednizone") ndt <- -abs(outer(time, time, "-")) ndrt <- -abs(outer(revival, revival, "-")) ndrt2 <- ndrt^2 * log(-ndrt + (ndrt==0)) # quadratic spline covariance function ndrt3 <- -ndrt^3 # cubic spline covariance function Patient <- outer(id, id, "==") lambda <- 1.5; Patient.dt <- Patient * exp(ndrt/lambda); K <- model.matrix(~revival+treat+survival) fit0 <- regress(prothrombin~treat, ~Patient+Patient.dt+ndrt, kernel=K, start=c(5,6,6,1)) fit1 <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+ndrt, kernel=K, start=fit0$sigma) fit2 <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+ndrt2, kernel=K, start=fit1$sigma) fit3 <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+ndrt3, kernel=K, start=fit2$sigma) c(fit0$llik, fit1$llik, fit2$llik, fit3$llik) # all with same kernel to make likelihoods comparable # -5672.057 -5672.057 -5673.894 -5676.175 # # Brief notes on regress(), covariance kernels and generalized covariance functions # # The linear spline $-|x - x'|$ is positive definite for linear functionals $\alpha$ in # the $n-1$-dimensional subspace $1^0$ of contrasts whose coefficients add to zero. # The quadratic and cubic spline generalized covariance functions are positive definite # for contrasts in the (n-2)-dimensional subspace of linear functionals such that # $\alpha(z) = 0$ for vectors $z$ in the 2-dimensional subspace ~1+x, (or ~1+revival here). # This subspace $\K$ is called the kernel of the covariance function: # the linear functionals are in $\K^0$. # A proper (strictly positive definite) covariance function has a zero kernel. # # To compare linear with quadratic spline functions via likelihood ratios, you must compute # $P_0(E)/P_1(E)$ only for events $E\subset\R^n$ on which both $P_0$ and $P_1$ are defined. # To ensure comparability here, the declared kernel $\K$ must contain $~1+revival$ at least. # In the fits shown above, $\K$ is a 5-dimensional subspace containing also treat+survival. # That is not necessary: if you want to test whether survival is needed in the mean model, # you could take $\K=~1+revival$, but $\K=~1+revival+treat$ is better. # # The choice of $\K$ determines the nature of the likelihood maximized and reported by regress() # It also affects parameter estimates slightly. # Setting kernel=0 gives the ordinary likelihood (provided that the covariance model is proper); # ker=1 gives the likelihood based on simple contrasts. # Two models being compared need not have the same mean-value subspace $\X$, but they must # implicitly or explicitly have the same kernel. The default in regress is to use # the REML kernel, which is the subspace $\X$ of mean vectors. # Two models being compared via REML must have the same mean-value subspace. # # Two Gaussian models are equivalent if they have the same $\K$, $\K+\X$ and covariance model. # Thus, fit0 and fit1 above are effectively identical. # If $\K$ is set to $~1+revival+treat$, fit0 is a proper comparable sub-model of fit1. # The maximized log likelihood ratio is then fit1$llik-fit0$llik = 6.973, (or 13.95 on the # conventional $\chi_1^2$ scale). The fitted survival coefficient is 1.8029\pm 0.475. # With the larger kernel used above, the additive effect of survival time is automatically eliminated # from likelihood calculations, but the survival coefficient is reported as # 1.8030 \pm 0.476, so the difference in coefficients is very slight. # ################################################################################################# # Bayes estimates shown in Fig 5 #postscript(file="/Users/petermccullagh/papers/revival/fig5.eps") yrs <- seq(0, 10, 1/24); # reverse time X <- model.matrix(~yrs); Q <- diag(length(yrs)) - X %*% solve(t(X) %*% X, t(X)); V <- -abs(outer(yrs, revival, "-")) blp1 <- fit1$sigma[4] * Q %*% V %*% fit1$W %*% (prothrombin - fit1$fitted) par(mar=c(5,2,4,5)+0.1) plot(-yrs, blp1, type="l", yaxt='n', ann=FALSE) axis(4) mtext("Prothrombin index", side=4, line=2.0) title(xlab="Time before failure (years)") V2 <- V^2 * log(-V + (V==0)) blp2 <- fit2$sigma[4] * V2 %*% fit2$W %*% (prothrombin - fit2$fitted) lines(-yrs, Q%*%blp2, type="l", col="green") V3 <- abs(outer(yrs, revival, "-"))^3 blp3 <- fit3$sigma[4] * V3 %*% fit3$W %*% (prothrombin - fit3$fitted) lines(-yrs, Q%*%blp3, type="l", col="blue") dev.off() ##################################################################################################### # forward time versus reverse time versus both fit0 <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+ndt, start=fit0$sigma) fit1 <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+ndrt, start=fit1$sigma) fit2 <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+ndrt+ndt, start=fit1$sigma) c(fit0$llik, fit1$llik, fit2$llik, fit1$llik-fit0$llik) # REML values summary(fit2) #################################################################################################### # non-stationary covariance model day <- 1/365; offset <- 1*day; s <- log(revival + offset) nds <- -abs(outer(s, s, "-")); nds2 <- nds^2*log(-nds + (nds==0)); nds3 <- abs(outer(s, s, "-"))^3 #fit1a <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+ndrt, kernel=1, start=fit1$sigma) fit1b <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+nds, kernel=1, start=fit1b$sigma) c(offset/day, fit1a$llik, fit1b$llik, fit1b$llik-fit1a$llik) K <- model.matrix(~treat+s) #fit1a <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+ndrt, kernel=K, start=fit1$sigma) fit1b <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+nds, kernel=K, start=fit1b$sigma) fit2b <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+nds2, kernel=K, start=fit1b$sigma) fit3b <- regress(prothrombin~treat+survival, ~Patient+Patient.dt+nds3, kernel=K, start=fit1b$sigma) c(fit1a$llik, fit1b$llik, fit2b$llik, fit3b$llik) # all with same kernel to make likelihoods comparable #################################################################################################### #################################################################################################### # Bayes estimate of $\eta_0$: non-stationary version #postscript(file="/Users/petermccullagh/papers/revival/fig6.eps") yrs <- seq(0, 10, 1/24); s1 <- log(yrs + offset) X <- model.matrix(~s1); Q <- diag(length(s1)) - X %*% solve(t(X) %*% X, t(X)); V <- -abs(outer(s1, s, "-")) blp1 <- fit1b$sigma[4] * Q %*% V %*% fit1b$W %*% (prothrombin - fit1b$fitted) par(mar=c(5,2,4,5)+0.1) plot(-s1, blp1, type="l", yaxt='n', ann=FALSE) axis(4) mtext("Prothrombin index", side=4, line=2.0) title(xlab="Time before failure (years)") V2 <- V^2 * log(-V + (V==0)) blp2 <- fit2b$sigma[4] * V2 %*% fit2b$W %*% (prothrombin - fit2b$fitted) lines(-s1, Q%*%blp2, type="l", col="green") V3 <- abs(outer(s1, s, "-"))^3 blp3 <- fit3b$sigma[4] * V3 %*% fit3b$W %*% (prothrombin - fit3b$fitted) lines(-s1, Q%*%blp3, type="l", col="red") dev.off() ####################################################################################################### ####################################################################################################### # computation of Table 1 (mean prothrombin values) fs <- trunc(survival); fs <- pmin(fs,8) ft <- trunc(time); ft <- pmin(ft, 8) fst <- fs + 9*ft ymean <- round(tapply(prothrombin, fst, mean), 1) M <- N <- matrix(0, 9, 9); row <- as.numeric(gl(9, 1, 81)); col <- as.numeric(gl(9, 9, 81)) colnames(M) <- colnames(N) <- c("t0-1", "t1-2", "t2-3", "t3-4", "t4-5", "t5-6", "t6-7", "t7-8", "t8+") rownames(M) <- rownames(N) <- c("T0-1", "T1-2", "T2-3", "T3-4", "T4-5", "T5-6", "T6-7", "T7-8", "T8+") M[row >= col] <- ymean; N[row >= col] <- table(fst) M t0-1 t1-2 t2-3 t3-4 t4-5 t5-6 t6-7 t7-8 t8+ T0-1 58.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 T1-2 72.5 66.4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 T2-3 72.6 73.2 66.0 0.0 0.0 0.0 0.0 0.0 0.0 T3-4 69.8 71.2 68.5 54.2 0.0 0.0 0.0 0.0 0.0 T4-5 68.5 75.7 72.5 74.6 57.7 0.0 0.0 0.0 0.0 T5-6 70.5 77.3 73.5 57.1 64.5 60.9 0.0 0.0 0.0 T6-7 81.8 73.6 81.1 80.6 79.4 75.5 75.8 0.0 0.0 T7-8 84.4 88.8 88.1 92.1 85.2 81.2 84.3 88.1 0.0 T8+ 77.3 73.6 87.0 74.1 92.0 80.3 89.2 79.4 84.7 N t0-1 t1-2 t2-3 t3-4 t4-5 t5-6 t6-7 t7-8 t8+ T0-1 266 0 0 0 0 0 0 0 0 T1-2 130 42 0 0 0 0 0 0 0 T2-3 114 38 40 0 0 0 0 0 0 T3-4 90 38 27 33 0 0 0 0 0 T4-5 55 22 17 20 18 0 0 0 0 T5-6 58 19 18 14 19 23 0 0 0 T6-7 72 27 15 24 25 20 15 0 0 T7-8 32 9 10 11 10 8 10 8 0 T8+ 68 27 20 22 23 19 20 18 20 ###################################################################################################### ymean <- as.vector(M) rtime <- row - col w <- as.numeric(row >= col) T5 <- as.factor(row > 5) fit0 <- lm(ymean~1, weights=w); rss0 <- sum(w*fit0$resid^2) fit1 <- lm(ymean~as.factor(col), weights=w); rss1 <- sum(w*fit1$resid^2) fit1a <- lm(ymean~as.factor(col) + row, weights=w); rss1a <- sum(w*fit1a$resid^2) fit1b <- lm(ymean~as.factor(col) + row+T5, weights=w); rss1b <- sum(w*fit1b$resid^2) c(rss0, rss1, rss1a, rss1b) c(rss0-rss1, rss0-rss1a, rss0-rss1b) round(matrix(fit1a$resid*sqrt(w), 9, 9), 1) matrix(fit1a$resid*sqrt(w), 9, 9) fit1 <- lm(ymean~as.factor(rtime), weights=w); rss1 <- sum(w*fit1$resid^2) fit1a <- lm(ymean~as.factor(rtime) + row, weights=w); rss1a <- sum(w*fit1a$resid^2) fit1b <- lm(ymean~as.factor(rtime) + row+T5, weights=w); rss1b <- sum(w*fit1b$resid^2) fit1c <- lm(ymean~as.factor(rtime)*T5 + row, weights=w); rss1c <- sum(w*fit1b$resid^2) c(rss0, rss1, rss1a, rss1b, rss1c) c(rss0-rss1, rss0-rss1a, rss0-rss1b, rss0-rss1c) round(matrix(fit1a$resid*sqrt(w), 9, 9), 1) ###################################################################################################### ###################################################################################################### r1 <- tapply(revival, id, min) min2 <- function(x){diff(sort(x))[1]}; r2 <- tapply(revival, id, min2) min3 <- function(x){diff(sort(x))[2]}; r3 <- tapply(revival, id, min3) min4 <- function(x){diff(sort(x))[3]}; r4 <- tapply(revival, id, min4) summary(r1*365); summary(r2*365); summary(r3*365); summary(r4*365) #which gives (in days) # Min. 1st Qu. Median Mean 3rd Qu. Max. NAs # 0.00 3.00 21.00 76.86 88.25 762.00 # 15.00 90.25 165.5 209.8 350.0 1097.0 22 # 35.0 106.0 292.5 251.6 365.5 726.0 56 # 28.0 104.0 238.0 241.8 357.5 763.0 92 ###################################################################################################### ###################################################################################################### ###################################################################################################### # Section 6 # Survival prognosis: Effect of prothrombin on survival ndens <- function(y, Sigma, Log=TRUE){ ldens <- -length(y)*log(2*pi)-log(det(Sigma)) - t(y) %*% solve(Sigma, y) if(Log) return(ldens/2) else return(exp(ldens/2)) } fit1 <- regress(prothrombin~treat+survival+revival, ~Patient+Patient.dt+ndrt, start=fit1$sigma) fit3b <- regress(prothrombin~treat+survival+s, ~Patient+Patient.dt+nds3, start=fit3b$sigma) # REML fit #postscript(file="/Users/petermccullagh/papers/revival/fig6.eps") #postscript(file="/net/roxbee/home/pmcc/papers/revival/fig6.eps") u <- 402; aptu <- liver$time[liver$id==u]; yu <- liver$prothrombin[liver$id==u] # Xu, yu and aptu and stu for patient u; and fit for fitted model m <- length(aptu); fit <- fit1; p <- length(fit$beta) tlvl <- 2; lratio <- rep(0,0) for(stu in seq(max(aptu), 10, 0.1)){ Xu <- matrix(0, m, p); Xu[,1] <- 1; rvlu <- stu - aptu Xu[2:m, tlvl+1] <- 1; Xu[,4] <- stu; Xu[, 5] <- rvlu ucov <- -abs(outer(rvlu, revival, "-")) * fit$sigma[4] # for linear spline model Sigmau <- diag(m)*fit$sigma[1] + fit$sigma[2] - abs(outer(rvlu, rvlu, "-"))* fit$sigma[4] + exp(-abs(outer(rvlu, rvlu, "-"))/lambda) * fit$sigma[3] blpu <- Xu %*% fit$beta + ucov %*% fit$W %*% (prothrombin - fit$fitted) blvu <- Sigmau - ucov %*% fit$W %*% t(ucov) lratio <- c(lratio, ndens(yu-blpu, blvu)) } lratio <- lratio - mean(lratio) plot(seq(max(aptu), 10, 0.1), lratio, type="l", xlab="Survival time") #lines(seq(max(aptu), 10, 0.1), lratio, lty=2, col="red") tlvl <- 2; fit <- fit3b; p <- length(fit$beta) lratio <- rep(0,0) for(stu in seq(max(aptu), 10, 0.1)){ Xu <- matrix(0, m, p); Xu[,1] <- Xu[,tlvl+1] <- 1; Xu[,4] <- stu # null treatment level Xu[1, tlvl+1] <- 0 # null level at time zero rvlu <- stu - aptu; su <- log(rvlu + offset) Xu[, 5] <- su ucov <- abs(outer(su, log(revival+offset), "-"))^3 * fit$sigma[4] # for cubic spline model Sigmau <- diag(m)*fit$sigma[1] + fit$sigma[2] + abs(outer(su, su, "-"))^3* fit$sigma[4] + exp(-abs(outer(rvlu, rvlu, "-"))/lambda) * fit$sigma[3] blpu <- Xu %*% fit$beta + ucov %*% fit$W %*% (prothrombin - fit$fitted) blvu <- Sigmau - ucov %*% fit$W %*% t(ucov) lratio <- c(lratio, ndens(yu-blpu, blvu)) } lratio <- lratio - mean(lratio) lines(seq(max(aptu), 10, 0.1), lratio, col=2) dev.off()