# Consulting project 2007 Arabidopsis plants? # Plant type 0 = "cid" # Plant type 1 = "108" # height height height height height height height height height height height #plant type day 29 day 33 day 37 day 41 day 45 day 49 day 53 day 57 day 61 day 65 day 69 ####################################################################################################### source("/ga/pmcc/public_html/courses/regress.R") nplants <- 70 ntimes <- 11 # 11 measurements per plant data <- scan("/ga/pmcc/datasets/PlantGrowth.dat") data <- matrix(data, nrow=nplants, ncol=13, byrow=T) plant <- as.factor(1:nplants) ptype <- as.factor(data[,2]) # plant type (40 type 0; 30 type 1) height <- data[, 3:13] # plant height in mm date <- seq(29, 69, 4) # calendar time: plant age in days from planting of seed brearded <- 25 + apply(height == 0, 1, sum)*4 # inferred plant birth date y <- as.vector(data[, 3:13]) # plant height in mm plant <- rep(plant, ntimes) ptype <- rep(ptype, ntimes) brearded <- rep(brearded, ntimes) # plant-specific birth date date <- rep(date, rep(nplants, ntimes)) # from date of planting #postscript("/home/pmcc/datasets/PlantGrowth.eps", horizontal=FALSE) #par(mfrow=c(3,2), mar=c(2,2,0,0)+1.0) par(mfrow=c(2,2), mar=c(1,2,2,2)) plot(date[plant==1], height[plant==1], col=1, type="l", ylim=c(0, max(height))) for(i in 2:nplants) lines(date[plant==i], height[plant==i], col=as.numeric(ptype[i]) + 2) text(35,38, "Plant height") text(65, 0, "date") t <- 1:40 plot(t, t^2/(13^2 + t^2), ylim=c(0,1), type="l") text(8, 0.8, "height/max height") text(20, 1.0, "standard growth trajectory after brearding") text(25, 0.9, "t^2/(13^2+t^2)") text(35, 0, "age") age <- date - brearded # plant age: negative or zero prior to brearding w <- age > 0 # excluding age<=0 to avoid singularities y <- y[w]; age <- age[w]; ptype <- ptype[w]; plant <- plant[w] V1 <- abs(outer(age, age, "pmin")) # BM starting from zero at age=0 BV1 <- outer(plant, plant, "==") * V1 # indep BM for each plant plot(c(0,age[plant==1]), c(0, y[plant==1]), col=1, type="l", ylim=c(0, max(y)), xlim=c(0, max(age))) for(i in 2:nplants) lines(c(0,age[plant==i]), c(0,y[plant==i]), col=(i >40) + 3) text(2, 38, "height") text(35, 0, "age") tau <- 12.839; x <- age^2/(tau^2 + age^2) #postscript("/home/pmcc/datasets/PlantGrowth2.eps", horizontal=FALSE) par(mfrow=c(1,1), mar=c(2,2,2,2)) age2 <- age-2; x <- age2/(tau+age2) fit1 <- regress(y~x:ptype-1, ~V1+BV1, start=c(2, 1, 1), pos=c(1,1,1)) fv <- fit1$fitted; res <- y - fv blp <- fit1$fitted + fit1$sigma[2] * V1 %*% fit1$W %*% res plot(c(0, age2[plant==70]), c(0, blp[plant==70]), type="l", col=4) lines(c(0, age2[plant==70]), c(0, fv[plant==70]), lty=2, col=4) lines(c(0, age2[plant==2]), c(0, blp[plant==2]), type="l", col=3) lines(c(0, age2[plant==2]), c(0, fv[plant==2]), lty=2, col=3) text(25, 7, "mu(t): dashed lines") text(25, 5, "blp(t): solid lines") ym0 <- tapply(y[ptype==0], age[ptype==0], mean); ym0 <- c(0, ym0) ym1 <- tapply(y[ptype==1], age[ptype==1], mean); ym1 <- c(0, ym1) t1 <- c(0, seq(2, 40, 4)) t0 <- c(0, seq(2, 36, 4)) points(t1, ym1, cex=0.5, col="blue") points(t0, ym0, cex=0.5, col="red") #dev.off() tau <- 13; x <- age^2/(tau^2 + age^2) fit0 <- regress(y~x:ptype-1, ~V1+BV1, start=c(2,0.1,1), pos=c(1,1,1), kernel=0) # First estimate tau by ordinary maximum likelihood for(tau in seq(11,15,0.2)){ x <- age^(2)/(tau^(2) + age^(2)) fit0 <- regress(y~x:ptype-1, ~V1+BV1, start=fit0$sigma, pos=c(1,1,1), kernel=0) cat(tau, fit0$llik, "\n") } tau <- 12.782; x <- age^2/(tau^2 + age^2) deriv <- -2*tau*fit1$fitted * x / age^2 #deriv of mu wrt tau fit0 <- regress(y~x:ptype-1, ~V1+BV1, start=fit0$sigma, pos=c(1,1,1), kernel=0) fit0a <- regress(y~deriv+x:ptype-1, ~V1+BV1, start=fit0$sigma, pos=c(1,1,1), kernel=0) # Use Box-Tidwell method to adjust SEs for estimation of tau deriv <- -2*tau*fit1$fitted * x / age^2 #deriv of mu wrt tau fit1a <- regress(y~deriv+x:ptype-1, ~V1+BV1, start=fit1$sigma, pos=c(1,1,1)) summary(fit1a) # implicitly taking the tangent space as kernel tau <- 12.839; x <- age^2/(tau^2 + age^2) deriv <- -2*tau*fit1$fitted * x / age^2 #deriv of mu wrt tau fit0b <- regress(y~deriv+x:ptype-1, ~V1+BV1, start=fit0$sigma, pos=c(1,1,1)) # testing whether the second variance component (V1) is zero fit1b <- regress(y~x:ptype-1, ~BV1, start=fit1$sigma[c(1,3)], maxcyc=10, pos=c(1,1,1)) 2*(fit1$llik - fit1b$llik) # 26.74711 # testing whether the two types are equivalent in terms of plateau height K <- model.matrix(~x-1) fit0 <- regress(y~x-1, ~V1+BV1, start=fit1$sigma, pos=c(1,1,1), maxcyc=10) fit1 <- regress(y~x:ptype-1, ~V1+BV1, start=fit1$sigma, pos=c(1,1,1), maxcyc=10, kernel=K) 2*(fit1$llik - fit0$llik) # 115.35 ############################################################################################# #Alternative version of model using FBM in place of BM fit1 <- regress(y~x:ptype-1, ~V1+BV1, start=fit1$sigma, pos=c(1,1,1), maxcyc=20) fit3 <- fit1 for(p in seq(1, 0.4, -0.1)){ V2 <- outer(age^p, age^p, "+") - abs(outer(age, age, "-"))^p BV2 <- outer(plant, plant, "==") * V2 # indep FBM for each plant fit3 <- regress(y~x:ptype-1, ~V2+BV2, start=fit3$sigma, pos=c(1,1,1), maxcyc=20) cat(p, fit3$llik-fit1$llik, "\n") } p <- 0.5 V2 <- outer(age^p, age^p, "+") - abs(outer(age, age, "-"))^p BV2 <- outer(plant, plant, "==") * V2 # indep FBM for each plant fit3 <- regress(y~x:ptype-1, ~V2+BV2, start=fit3$sigma, pos=c(1,1,1), maxcyc=20) summary(fit3) 2*(fit3$llik - fit1$llik) # 18.40 ############################################################################################### # FBM plots as cts functions x <- seq(0, 10, 0.01) par(mfrow=c(3,2)) for(p in seq(0.5, 1.75, 0.25)){ V2 <- outer(x^p, x^p, "+") - abs(outer(x, x, "-"))^p L <- t(chol(1+V2)) # adding one to avoid singularity fbm <- L %*% rnorm(length(x)) plot(x, fbm - fbm[1], type="l") # subtracting y[1] to compensate } ############################################################################################### # Alternative models using additional covariance terms such as MA correlation D1 <- abs(outer(age, age, "-"))/4; D1 <- (D1==1) * outer(plant, plant, "==") SV1 <- V1 * outer(ptype, ptype, "==") # splitting V1 into two independent terms tau <- 12.839; x <- age^2/(tau^2 + age^2) fit1 <- regress(y~x:ptype-1, ~V1+BV1, start=fit1$sigma, pos=c(1,1,1)) fit2 <- regress(y~x:ptype-1, ~V1+BV1+D1, start=fit1$sigma, pos=c(1,1,1)) fit3 <- regress(y~x:ptype-1, ~V1+BV1+SV1, start=fit1$sigma, pos=c(1,1,1)) c(fit1$llik, fit2$llik, fit3$llik) - fit1$llik # 0.000 0.381 1.046 tau <- 12.839; age2 <- age-2; x <- age2/(tau+age2) V1 <- outer(age2, age2, "pmin"); BV1 <- V1*outer(plant, plant, "==") for(tau in seq(13, 14, 0.1)){ x <- age2/(tau + age2) fit1 <- regress(y~x:ptype-1, ~V1+BV1, start=fit1$sigma, pos=c(1,1,1), kernel=0) cat(tau, fit1$llik, "\n") } x <- age2/(tau + age2); deriv <- -fit1a$fitted/(tau + age2) fit1a <- regress(y~deriv+x:ptype-1, ~V1+BV1, start=fit1$sigma, pos=c(1,1,1))