# Single factor experiment and analysis
rm(list = ls())  # clear the memory
#help(read.table)
pulp = read.table("http://www.stat.ualberta.ca/~wiens/stat568/datasets/pulp_Table_2_1.dat", header = T, quote= "")
pulp


y = c(as.matrix(pulp))
operator = as.factor(c(rep("A", 5), rep("B", 5),rep("C", 5),rep("D", 5)))
data.pulp = data.frame(y, operator)


par(mfrow = c(1,2))
colMeans(pulp)
round(sqrt(apply(pulp, 2, var)),2)  # Apply the 'variance' function to each column of 'pulp'
stripchart(y~operator, vertical = T, ylab = "reflectance")
boxplot(y ~ operator,data=data.pulp)


fit = aov(y ~ operator)
summary(fit)

print(model.tables(fit,"means"),digits=3)       #report the means (or effects) and the number of subjects/cell
print(model.tables(fit,"effects"),digits=3)
 

win.graph()
par(mfrow = c(1,2))
resid = fit$resid
qqnorm(resid)
qqline(resid)
text(0, .9*max(resid), paste("p = ", round(shapiro.test(resid)[[2]],3)))
plot(fit$fitted,fit$resid, xlab = "fits", ylab = "resids")

# Tukey Honestly Significant Differences
TukeyHSD(fit) 

# What is the adj p-value?
out = TukeyHSD(fit)
differences = out$operator[,1] # differences between the means
#Divide each absolute difference by sigmahat/n (n=5=size of each group)
sigmahat = sqrt(sum(fit$resid^2)/fit$df.resid)
sigmahat
t = abs(differences)/(sigmahat/sqrt(5))
adj.p = ptukey(t,4,16,lower.tail=F)
# compare these with the values produced by TukeyHSD(fit):
cbind(adj.p,out$operator[,4]) 
# A simultaneous confidence interval on the true difference, with
# confidence 1 - alpha, where alpha = adj.p, will have
# one endpoint = 0:
lower = differences -(sigmahat/sqrt(5))* qtukey(adj.p,4,16,lower.tail=F)
upper = differences +(sigmahat/sqrt(5))* qtukey(adj.p,4,16,lower.tail=F)
round(cbind(lower, differences, upper),4)

#install and load the 'pwr' package
# See http://www.statmethods.net/stats/power.html
library(pwr)
#help(pwr.anova.test)
# assumes balance - all n_i = n
pwr.anova.test(k = 4, n = NULL, f = .5, sig.level = .05, power = .8) # need n=12 for this power against this 'effect'
# The effect size f has f^2 = average(tau1^2,..,tauk^2)/sigmasqd (so f = lambda/sqrt(N), where N = kn)
pwr.anova.test(k = 4, n = 5, f = sqrt(12.64/20), sig.level = .05, power = NULL) # power = .76, as in text p. 68

pwr.anova.test(k = 4, n = NULL, f = .1, sig.level = .05, power = .95)

###################################
# A robust test:
ranks = rank(y)
cbind(y, ranks)
fit.rank = aov(ranks ~ operator)
summary(fit.rank)

kruskal.test(y, operator)

###################################
# As a random effects model:
MSTr = 0.44667
MSE = 0.10625  
n = 5
N = 20
k = 4
sigmasqd.hat = MSE
sigmasqd.tau.hat = (MSTr - MSE)/n
S.ybar = sqrt(MSTr/N)

sigmasqd.hat
sigmasqd.tau.hat

etahat = mean(y)
ci.width = qt(.975, k-1)*S.ybar
ci = c(etahat - ci.width, etahat + ci.width)
ci