Metals = read.table("metals.dat",header=TRUE) # Data in which "replicates" are almost certainly pseudoreplicates # The first six columns are concentrations in ppm and the last # three columns are cell mortalities in percentages. Metals Cd Cu Zn Ni Cr Pb Y1 Y2 Y3 1 0.01 0.20 0.80 5.00 14.00 16.00 19.95 17.60 18.22 2 0.05 2.00 10.00 0.10 8.00 12.00 22.09 22.85 22.62 3 0.10 10.00 0.01 12.00 2.00 8.00 31.74 32.79 32.87 4 0.20 18.00 1.00 0.80 0.40 4.00 39.37 40.65 37.87 5 0.40 0.10 12.00 18.00 0.05 1.00 31.90 31.18 33.75 6 0.80 1.00 0.05 4.00 18.00 0.40 31.14 30.66 31.18 7 1.00 8.00 2.00 0.05 12.00 0.10 39.81 39.61 40.80 8 2.00 16.00 14.00 10.00 5.00 0.01 42.48 41.86 43.79 9 4.00 0.05 0.10 0.40 1.00 18.00 24.97 24.65 25.05 10 5.00 0.80 4.00 16.00 0.20 14.00 50.29 51.22 50.54 11 8.00 5.00 16.00 2.00 0.01 10.00 60.71 60.43 59.69 12 10.00 14.00 0.20 0.01 16.00 5.00 67.01 71.99 67.12 13 12.00 0.01 5.00 8.00 10.00 2.00 32.77 30.86 33.70 14 14.00 0.40 18.00 0.20 4.00 0.80 29.94 28.68 30.66 15 16.00 4.00 0.40 14.00 0.80 0.20 67.87 69.25 67.04 16 18.00 12.00 8.00 1.00 0.10 0.05 55.56 55.28 56.52 17 20.00 20.00 20.00 20.00 20.00 20.00 79.57 79.43 78.48 # Convert data into 51 rows. Metals51 = Metals[rep(1:17,rep(3,17)),1:6] Metals51=cbind(Metals51,as.vector(t(Metals[,7:9]))) names(Metals51)[7] = "Mort" attach(Metals51) # Because have all quantitative covariates, parameterization # shouldn't matter, so looking at coefficients okay? lmcat = lm(Mort ~ Cd + Cu + Zn + Ni + Cr + Pb) print(summary(lmcat),digits=2) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 23.758 2.965 8.0 4e-10 *** Cd 1.516 0.229 6.6 4e-08 *** Cu 1.148 0.215 5.3 3e-06 *** Zn -0.300 0.229 -1.3 0.20 Ni 0.545 0.215 2.5 0.01 * Cr -0.049 0.213 -0.2 0.82 Pb 0.051 0.213 0.2 0.81 Residual standard error: 10 on 44 degrees of freedom Multiple R-Squared: 0.71, Adjusted R-squared: 0.67 F-statistic: 18 on 6 and 44 DF, p-value: 1.6e-10 # Perhaps covariates should be on log scale? lmcat = lm(Mort ~ log(Cd) + log(Cu) + log(Zn) + log(Ni) + log(Cr) + log(Pb)) print(summary(lmcat),digits=2) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 35.05 1.28 27.3 <2e-16 *** log(Cd) 5.38 0.48 11.1 2e-14 *** log(Cu) 4.97 0.47 10.5 1e-13 *** log(Zn) 0.30 0.48 0.6 0.539 log(Ni) 1.54 0.47 3.3 0.002 ** log(Cr) 0.20 0.48 0.4 0.688 log(Pb) 2.05 0.48 4.2 1e-04 *** Residual standard error: 7.2 on 44 degrees of freedom Multiple R-Squared: 0.85, Adjusted R-squared: 0.83 F-statistic: 42 on 6 and 44 DF, p-value: <2e-16 # Can we explain more of variation by including some higher order terms? # What model does following command fit? lmcat = lm(Mort ~ (log(Cd) + log(Cu) + log(Zn) + log(Ni) + log(Cr) + log(Pb))^2) print(summary(lmcat),digits=3) Coefficients: (5 not defined because of singularities) Estimate Std. Error t value Pr(>|t|) (Intercept) 45.553 1.028 44.31 < 2e-16 *** log(Cd) -2.535 0.602 -4.21 0.00017 *** log(Cu) 3.536 0.411 8.61 4.7e-10 *** log(Zn) -2.226 0.353 -6.31 3.4e-07 *** log(Ni) -6.245 0.799 -7.82 4.2e-09 *** log(Cr) -1.405 0.171 -8.20 1.5e-09 *** log(Pb) 0.589 0.454 1.30 0.20262 log(Cd):log(Cu) 4.197 0.267 15.70 < 2e-16 *** log(Cd):log(Zn) 0.571 0.150 3.79 0.00058 *** log(Cd):log(Ni) 3.021 0.224 13.49 3.3e-15 *** log(Cd):log(Cr) 1.171 0.242 4.84 2.8e-05 *** log(Cd):log(Pb) 2.097 0.199 10.53 3.0e-12 *** log(Cu):log(Zn) -4.289 0.306 -14.00 1.1e-15 *** log(Cu):log(Ni) 1.976 0.262 7.55 9.1e-09 *** log(Cu):log(Cr) -2.245 0.190 -11.83 1.3e-13 *** log(Cu):log(Pb) -1.831 0.235 -7.81 4.4e-09 *** log(Zn):log(Ni) 1.844 0.345 5.35 6.1e-06 *** log(Zn):log(Cr) NA NA NA NA log(Zn):log(Pb) NA NA NA NA log(Ni):log(Cr) NA NA NA NA log(Ni):log(Pb) NA NA NA NA log(Cr):log(Pb) NA NA NA NA Residual standard error: 1.11 on 34 degrees of freedom Multiple R-Squared: 0.997, Adjusted R-squared: 0.996 F-statistic: 782 on 16 and 34 DF, p-value: <2e-16 # Changing order now matters, even though all covariates quantitative # (look at log(Pb)). WHY? lmcat = lm(Mort ~ (log(Pb) + log(Cr) + log(Ni) + log(Zn) + log(Cu) + log(Cd))^2) print(summary(lmcat),digits=2) Coefficients: (5 not defined because of singularities) Estimate Std. Error t value Pr(>|t|) (Intercept) 57.496 1.561 36.8 <2e-16 *** log(Pb) -11.705 0.843 -13.9 1e-15 *** log(Cr) -1.886 0.230 -8.2 1e-09 *** log(Ni) 0.559 0.228 2.5 0.02 * log(Zn) -7.615 0.380 -20.1 <2e-16 *** log(Cu) -2.388 0.480 -5.0 2e-05 *** log(Cd) 4.917 0.260 18.9 <2e-16 *** log(Pb):log(Cr) 0.463 0.119 3.9 4e-04 *** log(Pb):log(Ni) -0.217 0.054 -4.0 3e-04 *** log(Pb):log(Zn) -1.053 0.128 -8.3 1e-09 *** log(Pb):log(Cu) 6.179 0.380 16.3 <2e-16 *** log(Pb):log(Cd) 2.054 0.289 7.1 3e-08 *** log(Cr):log(Ni) -0.784 0.098 -8.0 2e-09 *** log(Cr):log(Zn) 4.149 0.260 16.0 <2e-16 *** log(Cr):log(Cu) -0.174 0.096 -1.8 0.08 . log(Cr):log(Cd) -5.911 0.372 -15.9 <2e-16 *** log(Ni):log(Zn) 3.755 0.162 23.2 <2e-16 *** log(Ni):log(Cu) NA NA NA NA log(Ni):log(Cd) NA NA NA NA log(Zn):log(Cu) NA NA NA NA log(Zn):log(Cd) NA NA NA NA log(Cu):log(Cd) NA NA NA NA # Maybe it makes more sense to include quadratic terms in each factor # before cross-products lmcat = lm(Mort ~ log(Cd) + log(Cu) + log(Zn) + log(Ni) + log(Cr) + log(Pb) + I(log(Cd)^2) + I(log(Cu)^2) + I(log(Zn)^2) + I(log(Ni)^2) + I(log(Cr)^2) + I(log(Pb)^2)) print(summary(lmcat),digits=2) Estimate Std. Error t value Pr(>|t|) (Intercept) 30.123 1.068 28.2 <2e-16 *** log(Cd) 4.944 0.176 28.1 <2e-16 *** log(Cu) 4.920 0.188 26.1 <2e-16 *** log(Zn) -0.678 0.188 -3.6 9e-04 *** log(Ni) 2.433 0.174 14.0 <2e-16 *** log(Cr) -0.547 0.194 -2.8 0.008 ** log(Pb) 1.181 0.183 6.4 1e-07 *** I(log(Cd)^2) 0.435 0.082 5.3 5e-06 *** I(log(Cu)^2) 0.129 0.076 1.7 0.097 . I(log(Zn)^2) -0.770 0.079 -9.8 6e-12 *** I(log(Ni)^2) 0.948 0.081 11.7 3e-14 *** I(log(Cr)^2) 0.540 0.077 7.0 2e-08 *** I(log(Pb)^2) -0.150 0.080 -1.9 0.070 . Residual standard error: 2.3 on 38 degrees of freedom Multiple R-Squared: 0.99, Adjusted R-squared: 0.98 F-statistic: 2.3e+02 on 12 and 38 DF, p-value: <2e-16 # All preceding analyses silly because of pseudoreplication # Let's analyze the average mortality for each experimental condition Avemort = rowMeans(Metals[,7:9]) lmcat = lm(Avemort ~ log(Cd) + log(Cu) + log(Zn) + log(Ni) + log(Cr) + log(Pb), data = Metals) print(summary(lmcat),digits=2) Estimate Std. Error t value Pr(>|t|) (Intercept) 35.05 2.67 13.2 1e-07 *** log(Cd) 5.38 1.01 5.3 3e-04 *** log(Cu) 4.97 0.98 5.0 5e-04 *** log(Zn) 0.30 1.01 0.3 0.77 log(Ni) 1.54 0.98 1.6 0.15 log(Cr) 0.20 1.01 0.2 0.85 log(Pb) 2.05 1.01 2.0 0.07 . Residual standard error: 8.7 on 10 degrees of freedom Multiple R-Squared: 0.85, Adjusted R-squared: 0.76 F-statistic: 9.7 on 6 and 10 DF, p-value: 0.0011 # Adding quadratics in each log(conc) lmcat = lm(Avemort ~ log(Cd) + log(Cu) + log(Zn) + log(Ni) + log(Cr) + log(Pb) + I(log(Cd)^2) + I(log(Cu)^2) + I(log(Zn)^2) + I(log(Ni)^2) + I(log(Cr)^2) + I(log(Pb)^2), data = Metals) print(summary(lmcat),digits=2) Estimate Std. Error t value Pr(>|t|) (Intercept) 30.12 2.94 10.2 5e-04 *** log(Cd) 4.94 0.48 10.2 5e-04 *** log(Cu) 4.92 0.52 9.5 7e-04 *** log(Zn) -0.68 0.52 -1.3 0.261 log(Ni) 2.43 0.48 5.1 0.007 ** log(Cr) -0.55 0.54 -1.0 0.365 log(Pb) 1.18 0.51 2.3 0.080 . I(log(Cd)^2) 0.43 0.23 1.9 0.127 I(log(Cu)^2) 0.13 0.21 0.6 0.571 I(log(Zn)^2) -0.77 0.22 -3.6 0.024 * I(log(Ni)^2) 0.95 0.22 4.3 0.013 * I(log(Cr)^2) 0.54 0.21 2.5 0.064 . I(log(Pb)^2) -0.15 0.22 -0.7 0.535 Residual standard error: 3.7 on 4 degrees of freedom Multiple R-Squared: 0.99, Adjusted R-squared: 0.96 F-statistic: 30 on 12 and 4 DF, p-value: 0.0024 # F test for quadratic terms: ((10*8.7^2 - 4*3.7^2)/6)/3.7^2 [1] 8.548089 1 - pf(8.548,6,4) [1] 0.02845842 # Following fits the same model (respects uneven spacing) # but orthonormalizes the polynomials. lmcat = lm(Avemort ~ poly(log(Cd),2) + poly(log(Cu),2) + poly(log(Zn),2) + poly(log(Ni),2) + poly(log(Cr),2) + poly(log(Pb),2), data = Metals) print(summary(lmcat),digits=2) Estimate Std. Error t value Pr(>|t|) (Intercept) 42.86 0.91 47.3 1e-06 *** poly(log(Cd), 2)1 43.39 4.51 9.6 7e-04 *** poly(log(Cd), 2)2 8.55 4.45 1.9 0.13 poly(log(Cu), 2)1 45.03 4.65 9.7 6e-04 *** poly(log(Cu), 2)2 2.54 4.12 0.6 0.57 poly(log(Zn), 2)1 -1.61 4.66 -0.3 0.75 poly(log(Zn), 2)2 -15.15 4.26 -3.6 0.02 * poly(log(Ni), 2)1 16.86 4.59 3.7 0.02 * poly(log(Ni), 2)2 18.66 4.38 4.3 0.01 * poly(log(Cr), 2)1 -8.40 4.88 -1.7 0.16 poly(log(Cr), 2)2 10.63 4.17 2.5 0.06 . poly(log(Pb), 2)1 11.92 4.71 2.5 0.06 . poly(log(Pb), 2)2 -2.95 4.36 -0.7 0.54 lmcat = lm(Avemort ~ poly(log(Pb),2) + poly(log(Cu),2) + poly(log(Zn),2) + poly(log(Ni),2) + poly(log(Cr),2) + poly(log(Cd),2), data = Metals) print(summary(lmcat),digits=2) # The following ANOVA may be useful but does depend on order of factors. anova(lmcat) Analysis of Variance Table Df Sum Sq Mean Sq F value Pr(>F) poly(log(Cd), 2) 2 2469.69 1234.84 88.4759 0.0004886 *** poly(log(Cu), 2) 2 1629.70 814.85 58.3834 0.0010970 ** poly(log(Zn), 2) 2 48.90 24.45 1.7519 0.2841591 poly(log(Ni), 2) 2 593.51 296.75 21.2623 0.0073919 ** poly(log(Cr), 2) 2 250.95 125.48 8.9903 0.0331159 * poly(log(Pb), 2) 2 90.63 45.31 3.2467 0.1453094 Residuals 4 55.83 13.96 # Plotting fits. Note that all effects are 0 at concentration 1 ppm # due to choice of units and log scale for concentrations. # # Residual plot. Is this a good idea with so few df's for error? lmcat = lm(Avemort ~ log(Cd) + log(Cu) + log(Zn) + log(Ni) + log(Cr) + log(Pb) + I(log(Cd)^2) + I(log(Cu)^2) + I(log(Zn)^2) + I(log(Ni)^2) + I(log(Cr)^2) + I(log(Pb)^2), data = Metals) bb = coefficients(lmcat) xx = log(Metals[,1]) postscript("metals-fit.ps",height=6.5,width=8.5) matplot(xx,cbind(bb[2]*xx+bb[8]*xx^2,bb[3]*xx+bb[9]*xx^2,bb[4]*xx+bb[10]*xx^2, bb[5]*xx+bb[11]*xx^2,bb[6]*xx+bb[12]*xx^2,bb[7]*xx+bb[13]*xx^2),type="n", ylab="Effect on percent mortality",xlab="Concentration (ppm)",axes=FALSE, main="Fitted marginal effects for six metals") axis(2) axis(1,at=log(c(.01,0.1,1,10)),labels=c(0.01,0.1,1,10)) text(rep(xx,6),cbind(bb[2]*xx+bb[8]*xx^2,bb[3]*xx+bb[9]*xx^2,bb[4]*xx+bb[10]*xx^2, bb[5]*xx+bb[11]*xx^2,bb[6]*xx+bb[12]*xx^2,bb[7]*xx+bb[13]*xx^2), rep(names(Metals)[1:6],rep(17,6)),cex=0.7,col=rep(1:6,rep(17,6))) plot(Metals[,1],resid(lmcat),type="n",xlab="Cadmium concentration (ppm)", ylab="Residual percentage mortality",main="Residual plot Symbol is rank of Cu concentration",log="x") text(Metals[,1],resid(lmcat),rank(Metals[,2]),cex=0.75) dev.off()