rm(list = ls()) # clear the memory ########################### X = as.matrix(read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T5-2.DAT")) n = nrow(X) p = ncol(X) colnames(X) = c("SS&H", "Vb", "Sc") xbar = colMeans(X) S = cov(X) S ######################################## # Test marginal normality. par(mfrow = c(2,2)) # A 2 by 2 panel of plots for(i in 1:3) { y = X[,i] v=qqnorm(y, ylab = colnames(X)[i]) text(0, .9*max(v$y), paste("p = ", round(shapiro.test(y)[[2]],3))) qqline(y) } # Trivariate normality: dsqd = vector(length = n) qsqd = vector(length = n) for(i in 1:n) { dsqd[i] = t(X[i,] - xbar)%*%solve(S,(X[i,] - xbar)) qsqd[i] = qchisq((i-.5)/n, p, lower.tail = T) } dsqd = sort(dsqd) qqplot(qsqd, dsqd, main = "Chisquare Q-Q Plot", xlab = "Chisquare quantiles", ylab = "sample quantiles") abline(0,1) text(6, max(qsqd-2), paste("corr = ", round(cor(qsqd,dsqd),3))) ######################################## # Bonferroni intervals on the three marginal means; 95% alpha = .05 m = 3 alpham = alpha/m int = matrix(nrow = m, ncol = 2, data = NA) for(i in 1:m) { psihat = xbar[i] sd_psihat = sqrt(S[i,i]/n) qT = qt(alpham/2, n-1, lower.tail = 0) int[i,] = c(psihat - qT*sd_psihat, psihat + qT*sd_psihat) } int_Bon = cbind(int[,1], xbar, int[,2]) colnames(int_Bon) = c("lower", "xbar", "upper") cat("Bonferroni intervals on the three marginal means","\n") int_Bon ######################################## # Simultaneous intervals on these AND ALL OTHERS alpha = .05 m = 3 int = matrix(nrow = m, ncol = 2, data = NA) for(i in 1:m) { I = diag(m) a = I[,i] # Any other vectors a could be used without lowering the overall confidence psihat = t(a)%*%xbar sd_psihat = sqrt(t(a)%*%S%*%a/n) qF = sqrt(((n-1)*p/(n-p))*qf(alpha, p, n-p, lower.tail = 0)) int[i,] = c(psihat - qF*sd_psihat, psihat + qF*sd_psihat) } int_Scheffe = cbind(int[,1], xbar, int[,2]) colnames(int_Scheffe) = c("lower", "xbar", "upper") int_Scheffe # Values in text not very accurate - lots of rounding ######################################## # 95% confidence ellipsoid on means of VB and Sc: X23 = X[, 2:3] n = nrow(X23) p = ncol(X23) alpha = .05 xbar23 = colMeans(X23) S23 = cov(X23) U = chol(S23) # S23 = U'U c = sqrt(((n-1)*p/(n-p))*qf(alpha, p, n-p, lower.tail = 0)) phi = 2*pi*seq(from = 0, to = 1, length = 201) # 2*pi*(0,1/200, 2/200, ... 199/200, 1) z = rbind(cos(phi), sin(phi)) # 201 columns, each of norm 1 mu = xbar23 + (c/sqrt(n))*t(U)%*%z dev.new() plot(mu[1,], mu[2,], xlab = "mean Vb", ylab = "mean Sc", type = 'l') ######################################## # Simultaneous intervals on these two means, and all other comparisons of these two means only; # should agree with the extremes of the confidence ellipsoid: alpha = .05 m = 2 int = matrix(nrow = m, ncol = 2, data = NA) for(i in 1:m) { I = diag(m) a = I[,i] psihat = t(a)%*%xbar23 sd_psihat = sqrt(t(a)%*%S23%*%a/n) qF = sqrt(((n-1)*p/(n-p))*qf(alpha, p, n-p, lower.tail = 0)) int[i,] = c(psihat - qF*sd_psihat, psihat + qF*sd_psihat) } int2_Scheffe = cbind(int[,1], xbar23, int[,2]) colnames(int2_Scheffe) = c("lower", "xbar", "upper") int2_Scheffe abline(v=int[1,]) # vertical lines at the endpoints of the interval on mean(Vb) abline(h=int[2,]) # horizontal lines at the endpoints of the interval on mean(Sc) extremes_of_ellipsoid = rbind(c(min(mu[1,]), max(mu[1,])),c(min(mu[2,]), max(mu[2,]))) extremes_of_ellipsoid ######################################## # 95% prediction region for a new (VB,Sc): c = sqrt(((n-1)*p/(n-p))*qf(alpha, p, n-p, lower.tail = 0)) c = c*sqrt((n+1)/n) phi = 2*pi*seq(from = 0, to = 1, length = 201) # 2*pi*(0,1/200, 2/200, ... 199/200, 1) z = rbind(cos(phi), sin(phi)) # 201 columns, each of norm 1 mu = xbar23 + (c/sqrt(n))*t(U)%*%z dev.new() plot(mu[1,], mu[2,], xlab = "Vb", ylab = "Sc", type = 'l') ## Superimpose the previous confidence ellipsoid on the means: c = sqrt(((n-1)*p/(n-p))*qf(alpha, p, n-p, lower.tail = 0)) phi = 2*pi*seq(from = 0, to = 1, length = 201) # 2*pi*(0,1/200, 2/200, ... 199/200, 1) mu = xbar23 + (c/sqrt(n))*t(U)%*%z #dev.new() lines(mu[1,], mu[2,], type = 'l') ######################################## ## Test mu = c(500, 50, 25) using Tsqd and the large sample test p = 3 mu0 = c(500, 50, 25) Tsqd = n*t(xbar-mu0)%*%solve(S,xbar-mu0) Fcalc = (n-p)*Tsqd/(p*(n-1)) pval = pf(Fcalc, p, n-p, lower.tail = 0) # prob(F > Fcalc) Lambda = (1+Tsqd/(n-1))^(-n/2) chisqd.calc = -2*log(Lambda) pval.approx = pchisq(chisqd.calc, p, lower.tail = 0) pval pval.approx