rm(list = ls()) # clear the memory ########################### data = read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T11-5.DAT") class = data[,5] X = data[,1:4] X = as.matrix(X) colnames(X) = c("sep.length","sep.width", "pet.length","pet.width") ################################################# # Plot Petal and sepal length; all three classes plot(X[(class==1),1],X[(class==1),3], pch=21, col = "black", xlab = "sepal length", ylab = "petal length", xlim = c(.9*min(X[,1]),1.1*max(X[,1])), ylim = c(.9*min(X[,3]),1.1*max(X[,3]))) points(X[(class==2),1],X[(class==2),3], pch=23, col = "red") points(X[(class==3),1],X[(class==3),3], pch=25, col = "blue") legend("topleft", legend = c("pi1: setosa", "pi2: versicolor", "pi3: virginica"), col = c("black", "red", "blue"), pch = c(21, 23, 25)) ################################################# # Reduce to only two groups (2 and 3) dev.new() X23 = X[51:150,c(1,3)] # Eliminate the first class from the training sample class23 = class[51:150] plot(X23[(class23==2),1],X23[(class23==2),2], pch=23, col = "red", xlab = "sepal length", ylab = "petal length", xlim = c(.9*min(X23[,1]),1.1*max(X23[,1])), ylim = c(.9*min(X23[,2]),1.1*max(X23[,2]))) points(X23[(class23==3),1],X23[(class23==3),2], pch=25, col = "blue") legend("topleft", legend = c("pi2: versicolor", "pi3: virginica"), col = c("red", "blue"), pch = c(23, 25)) sample2 = X23[(class23==2),] sample3 = X23[(class23==3),] x2bar = colMeans(sample2) x3bar = colMeans(sample3) S2 = cov(sample2) S3 = cov(sample3) Spool = (S2+S3)/2 beta2 = solve(Spool, x2bar) beta3 = solve(Spool, x3bar) alpha2 = log(1/2) - .5*sum(beta2*x2bar) alpha3 = log(1/2) - .5*sum(beta3*x3bar) L2 = function(x) alpha2 + sum(beta2*x) L3 = function(x) alpha3 + sum(beta3*x) alpha32 = alpha3 - alpha2 beta32 = beta3 - beta2 a32 = -alpha32/beta32[2] b32 = -beta32[1]/beta32[2] abline(a32, b32, lty = 1) #The line L3-L2=0 as a function of x1; # this is the boundary between classes 3,2; # x2 above this line -> class 3 a32 + b32*5 #= 4.24; x2 = 4 is below this so (6,4) -> class 2 title(main = "Linear (solid line) and logistic (broken line) \n discrimators between groups 1 and 2") #Classifier: belong = function(x) { vec = c(L2(x), L3(x)) which.max(vec) + 1 } belong(c(6,4)) ################################################# # Use the built-in R function; see that it produces identical results # install and load the MASS library library(MASS) lin = lda(X23, class23, prior = c(1,1)/2) # Use equal priors for comparison with the above ##Compare classifications of original data pred1 = as.numeric(predict(lin, X23)$class) + 1 pred2 = vector(length=100) for(i in 1:100) pred2[i] = belong(X23[i,]) # Quadratic discrimination quad = qda(X23, class23, prior = c(1,1)/2) pred3 = as.numeric(predict(quad, X23)$class) + 1 pred1-pred2 pred2-pred3 ################################################# # Logistic classification X23 = as.matrix(X23) class.01 = class23 - 2 fit = glm(class.01 ~ X23, family = binomial()) a = fit$coef[1] b1 = fit$coef[2] b2 = fit$coef[3] A = -a/b2 B = -b1/b2 abline(A, B, lty = 2) # add the discriminationg line to the current plot pred4 = predict(fit, as.data.frame(X23)) pred4 = (pred4 > 0) + 2 ################################################# confusion.matrix = function(class, pred) { J = length(unique(class)) mat = matrix(nrow = J, ncol = J) for(j in 1:J) { for(k in 1:J) { mat[j,k] = sum((class == j)*(pred == k)) }} error.rate = 1-sum(diag(mat))/sum(mat) list(mat = mat, rate = error.rate) } confuse.lin = confusion.matrix(class23-1, pred1-1) confuse.quad = confusion.matrix(class23-1, pred2-1) confuse.logistic = confusion.matrix(class23-1, pred4-1) confuse.lin confuse.quad confuse.logistic ################################################# ################################################# # Use the complete data set # Requires my own multilogistic() function when there are more than 2 classes source("http://www.stat.ualberta.ca/~wiens/stat575/R%20scripts/multilogistic.R") # Test on training set # Here the priors are estimated (the default with lda and quad) lin = lda(X, class) pred.lin = as.numeric(predict(lin, X)$class) confuse.lin = confusion.matrix(class, pred.lin) cat(paste("Linear discrimination: APER =", round(confuse.lin$rate,3), "Confusion matrix is"), "\n") print(confuse.lin$mat) quad = qda(X, class) pred.quad = as.numeric(predict(quad, X)$class) confuse.quad = confusion.matrix(class, pred.quad) cat(paste("Quadratic discrimination: APER =", round(confuse.quad$rate,3), "Confusion matrix is"), "\n") print(confuse.quad$mat) source("multilogistic.R") logitfit = multilogistic(X, class, scoring = F, newdata = NULL, tol = 1e-4) pred.logitfit = logitfit$pred.class confuse.logit = confusion.matrix(class, pred.logitfit) cat(paste("Logistic discrimination: APER =", round(confuse.logit$rate,3), "Confusion matrix is"), "\n") print(confuse.logit$mat) ################################################## # Holdout procedure: pred.holdout.lin = vector(length = nrow(X)) pred.holdout.quad = vector(length = nrow(X)) for (i in 1:nrow(X)) { lin2 = lda(X[-i,],class[-i]) quad2 = qda(X[-i,],class[-i]) pred.holdout.lin[i] = as.numeric(predict(lin2, X[i,])$class) pred.holdout.quad[i] = as.numeric(predict(quad2, X[i,])$class) } confuse.holdout.lin = confusion.matrix(class, pred.holdout.lin) confuse.holdout.quad = confusion.matrix(class, pred.holdout.quad) ## Holdout procedure using multilogistic(X, class, newdata, tol): ## Uncomment these next lines to do logistic holdout; takes a long time. #pred.holdout.logit = vector(length = nrow(X)) #for (i in 1:nrow(X)) { # new = multilogistic(X[-i,], class[-i], scoring = F, newdata = rbind(X[i,]), tol = 1e-4)$pred.new # cat("i = ", i, "pred.new = ", new, "\n") # pred.holdout.logit[i] = new # } #confuse.holdout.logit = confusion.matrix(class, pred.holdout.logit) cat(paste("Linear discrimination: estimated error rate =", round(confuse.holdout.lin$rate,3), "Confusion matrix is"), "\n") print(confuse.holdout.lin$mat) cat(paste("Quadratic discrimination: estimated error rate =", round(confuse.holdout.quad$rate,3), "Confusion matrix is"), "\n") print(confuse.holdout.quad$mat) #cat(paste("Logistic discrimination: estimated error rate =", round(confuse.holdout.logit$rate,3), "Confusion matrix is"), "\n") #print(confuse.holdout.logit$mat) ################################################## ################################################## rm(list = ls()) # clear the memory data = read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T11-5.DAT") class = data[,5] X = data[,1:4] X = as.matrix(X) colnames(X) = c("sep.length","sep.width", "pet.length","pet.width") # Fisher classification; go through the calculations step-by-step: n = nrow(X) J = length(unique(class)) sample1 = X[(class==1),] sample2 = X[(class==2),] sample3 = X[(class==3),] x1bar = colMeans(sample1) x2bar = colMeans(sample2) x3bar = colMeans(sample3) B = (J-1)*cov(rbind(x1bar, x2bar, x3bar)) S1 = cov(sample1) S2 = cov(sample2) S3 = cov(sample3) W = (n-J)*(S1+S2+S3)/3 # Compute W^(-1/2): eig = eigen(W) Q = eig$vectors D = diag(eig$values) W.neg.root = Q%*%diag(1/sqrt(eig$values))%*%t(Q) #check: W.neg.root%*%W%*%W.neg.root M = W.neg.root%*%B%*%W.neg.root eig2 = eigen(M) E = eig2$vectors lambda = eig2$values Lambda = diag(lambda) # 2 nonzero eigenvalues: s = 2 = min(J-1,p) = min(2,4). A0 = W.neg.root%*%E[,c(1,2)] # First two sample discriminants lambda[1]/(lambda[1] + lambda[2]) # .99; no need to go beyond the first sample discriminant. dev.new() par(mfrow=c(2, 2)) ############################################################# ## First use only one sample discriminant, computed above # Compute L1 on the training set L1 = X%*%A0[,1] L1.1 = x1bar%*%A0[,1] L1.2 = x2bar%*%A0[,1] L1.3 = x3bar%*%A0[,1] plot(x = L1[(class==1)],y = rep(0,50), pch=21, col = "black", xlab = "first discriminant", yaxt = "n", xlim = c(1.1*min(L1), 1.1*max(L1)), ylab = "", ylim = c(-.3,.3), main = "First sample discriminant; \n lines at discriminant means") legend(-.25,-.2, legend = c("pi1: setosa", "pi2: versicolor", "pi3: virginica"), yjust = .5, col = c("black", "red", "blue"), pch = c(21, 23, 25)) points(x = L1[(class==2)],y = rep(0,50), pch=23, col = "red") points(x = L1[(class==3)],y = rep(0,50), pch=25, col = "blue") lines(rbind(c(L1.1,-.1), c(L1.1, .1)), col = "black") lines(rbind(c(L1.2,-.1), c(L1.2, .1)), col = "red") lines(rbind(c(L1.3,-.1), c(L1.3, .1)), col = "blue") ## Use two discriminants L2 = X%*%A0 L2.1 = x1bar%*%A0 L2.2 = x2bar%*%A0 L2.3 = x3bar%*%A0 plot(L2[(class==1),],pch=21, col = "black", xlim = c(1.1*min(L2[,1]), 1.1*max(L2[,1])), ylim = c(1.1*min(L2[,2]), 1.1*max(L2[,2])), ylab = "second discriminant", xlab = "first discriminant", main = "Both sample discriminants; \n discriminant means indicated") points(L2[(class==2),], pch=23, col = "red") points(L2[(class==3),], pch=25, col = "blue") points(L2.1[1], L2.1[2], pch='X', col = "black") points(L2.2[1], L2.2[2], pch='Y', col = "black") points(L2.3[1], L2.3[2], pch='Z', col = "black") ############################################################# #Repeat, using lin1 = lda(X, class) A = -lin1$scaling # columns are the coefficients of the two sample discriminants ############################################################# ## First use only one sample discriminant, computed above # Compute L1 on the training set L1 = X%*%A[,1] plot(x = L1[(class==1)],y = rep(0,50), pch=21, col = "black", xlab = "first discriminant", yaxt = "n", xlim = c(1.1*min(L1), 1.1*max(L1)), ylab = "", ylim = c(-.3,.3), main = "First sample discriminant, \n usingt lda output (times -1)") legend(-.25,-.2, legend = c("pi1: setosa", "pi2: versicolor", "pi3: virginica"), yjust = .5, col = c("black", "red", "blue"), pch = c(21, 23, 25)) points(x = L1[(class==2)],y = rep(0,50), pch=23, col = "red") points(x = L1[(class==3)],y = rep(0,50), pch=25, col = "blue") ## Use two discriminants L2 = X%*%A plot(L2[(class==1),],pch=21, col = "black", xlim = c(1.1*min(L2[,1]), 1.1*max(L2[,1])), ylim = c(1.1*min(L2[,2]), 1.1*max(L2[,2])), ylab = "second discriminant", xlab = "first discriminant", main = "Both sample discriminants, \n usingt lda output (times -1)") points(L2[(class==2),], pch=23, col = "red") points(L2[(class==3),], pch=25, col = "blue") ############################################################# cat("Sample discriminants calculated above are", "\n") round(A0,2) cat("Sample discriminants returned by lda are", "\n") round(lin1$scaling,2)