# Single factor experiment and analysis rm(list = ls()) # clear the memory #help(read.table) pulp = read.table("http://www.stat.ualberta.ca/~wiens/stat568/datasets/pulp_Table_2_1.dat", header = T, quote= "") pulp y = c(as.matrix(pulp)) operator = as.factor(c(rep("A", 5), rep("B", 5),rep("C", 5),rep("D", 5))) data.pulp = data.frame(y, operator) par(mfrow = c(1,2)) colMeans(pulp) round(sqrt(apply(pulp, 2, var)),2) # Apply the 'variance' function to each column of 'pulp' stripchart(y~operator, vertical = T, ylab = "reflectance") boxplot(y ~ operator,data=data.pulp) fit = aov(y ~ operator) summary(fit) print(model.tables(fit,"means"),digits=3) #report the means (or effects) and the number of subjects/cell print(model.tables(fit,"effects"),digits=3) win.graph() par(mfrow = c(1,2)) resid = fit$resid qqnorm(resid) qqline(resid) text(0, .9*max(resid), paste("p = ", round(shapiro.test(resid)[[2]],3))) plot(fit$fitted,fit$resid, xlab = "fits", ylab = "resids") # Tukey Honestly Significant Differences TukeyHSD(fit) # What is the adj p-value? out = TukeyHSD(fit) differences = out$operator[,1] # differences between the means #Divide each absolute difference by sigmahat/n (n=5=size of each group) sigmahat = sqrt(sum(fit$resid^2)/fit$df.resid) sigmahat t = abs(differences)/(sigmahat/sqrt(5)) adj.p = ptukey(t,4,16,lower.tail=F) # compare these with the values produced by TukeyHSD(fit): cbind(adj.p,out$operator[,4]) # A simultaneous confidence interval on the true difference, with # confidence 1 - alpha, where alpha = adj.p, will have # one endpoint = 0: lower = differences -(sigmahat/sqrt(5))* qtukey(adj.p,4,16,lower.tail=F) upper = differences +(sigmahat/sqrt(5))* qtukey(adj.p,4,16,lower.tail=F) round(cbind(lower, differences, upper),4) #install and load the 'pwr' package # See http://www.statmethods.net/stats/power.html library(pwr) #help(pwr.anova.test) # assumes balance - all n_i = n pwr.anova.test(k = 4, n = NULL, f = .5, sig.level = .05, power = .8) # need n=12 for this power against this 'effect' # The effect size f has f^2 = average(tau1^2,..,tauk^2)/sigmasqd (so f = lambda/sqrt(N), where N = kn) pwr.anova.test(k = 4, n = 5, f = sqrt(12.64/20), sig.level = .05, power = NULL) # power = .76, as in text p. 68 pwr.anova.test(k = 4, n = NULL, f = .1, sig.level = .05, power = .95) ################################### # A robust test: ranks = rank(y) cbind(y, ranks) fit.rank = aov(ranks ~ operator) summary(fit.rank) kruskal.test(y, operator) ################################### # As a random effects model: MSTr = 0.44667 MSE = 0.10625 n = 5 N = 20 k = 4 sigmasqd.hat = MSE sigmasqd.tau.hat = (MSTr - MSE)/n S.ybar = sqrt(MSTr/N) sigmasqd.hat sigmasqd.tau.hat etahat = mean(y) ci.width = qt(.975, k-1)*S.ybar ci = c(etahat - ci.width, etahat + ci.width) ci