explode = ts(scan("http://www.stat.ualberta.ca/~wiens/stat479/S&Sdatasets/eqexp.dat")) explode = ts(scan("eqexp.dat")) data.combined = ts(matrix(explode, nrow = length(explode)/17, ncol = 17), frequency = 40) colnames(data.combined) = c(paste("eq", 1:8, sep=""),paste("ex", 1:8, sep=""), "NZ") ## Seventeen columns - 8 earthquakes, 8 explosions, 1 event of unknown origin ## Each column contains the 'P' and 'S' signals ## Split each column into these two signals: data.P = ts(data.combined[1:(nrow(data.combined)/2), ], frequency = 40) data.S = ts(data.combined[(nrow(data.combined)/2+1):(nrow(data.combined)), ], frequency = 40) data.eq.P = data.P[,1:8] data.ex.P = data.P[,9:16] data.NZ.P = data.P[,17] data.eq.S = data.S[,1:8] data.ex.S = data.S[,9:16] data.NZ.S = data.S[,17] # Plot events 5, 6 and NZ: par(mfcol = c(5,2)) mat.56 = cbind(data.eq.P[,5:6], data.ex.P[,5:6], data.NZ.P, data.eq.S[,5:6], data.ex.S[,5:6], data.NZ.S) colnames(mat.56) = c("eq5", "eq6", "ex5", "ex6", "nz","eq5", "eq6", "ex5", "ex6", "nz") plot(mat.56, main="") title(main = "\n\nP (left) and S (right) components \nof earthquakes 5&6, explosions 5&6 and NZ event", outer = T) # Plot P followed by S for Earthquake 6 and Explosion 6: eq6.P = data.P[,6] eq6.S = data.S[,6] eq6.both = ts(c(eq6.P, eq6.S)) ex6.P = data.P[,14] ex6.S = data.S[,14] ex6.both = ts(c(ex6.P, ex6.S)) win.graph() par(mfrow=c(2,1)) plot(eq6.both, ylab = "EQ6") abline(v=length(eq6.P)) plot(ex6.both, ylab = "EX6") abline(v=length(ex6.P)) title(main = "\nP phase followed by S phase for EQ6 and EX6", outer = T) ## Prepare data for time domain discrimination, based on "maximum peak to peak amplitudes" m.P = vector(length = ncol(data.P)) m.S = vector(length = ncol(data.S)) for(j in 1:ncol(data.P)) { m.P[j] = log10(max(data.P[ ,j]) - min(data.P[ ,j])) m.S[j] = log10(max(data.S[ ,j]) - min(data.S[ ,j])) } eq.P = m.P[1:8] ex.P = m.P[9:16] NZ.P = m.P[17] eq.S = m.S[1:8] ex.S = m.S[9:16] NZ.S = m.S[17] ## Display the 16 bivariate observations (8 from each population) and the new observation: trainingdata = cbind(eq.P, eq.S, ex.P, ex.S) colnames(trainingdata) = c("eq.P", "eq.S", "ex.P", "ex.S") newdata = cbind(NZ.P, NZ.S) colnames(newdata) = c("NZ.P", "NZ.S") means = apply(trainingdata, 2, "mean") names(means) = colnames(trainingdata) xbar.eq = means[1:2] xbar.ex = means[3:4] ## Summaries: print(round(trainingdata, 2)) print(round(newdata, 2)) print(round(xbar.eq, 2)) print(round(xbar.ex, 2)) cov.eq = var(cbind(eq.P,eq.S)) cov.ex = var(cbind(ex.P, ex.S)) cov.pooled = (cov.ex + cov.eq)/2 print(round(cov.eq, 4)) print(round(cov.ex, 4)) print(round(cov.pooled, 4)) ## Compute the linear discriminant function: slopes.eq = solve(cov.pooled, xbar.eq) intercept.eq = -sum(slopes.eq*xbar.eq)/2 cat(slopes.eq, intercept.eq, "\n") slopes.ex = solve(cov.pooled, xbar.ex) intercept.ex = -sum(slopes.ex*xbar.ex)/2 cat(slopes.ex, intercept.ex, "\n") slopes = slopes.eq - slopes.ex intercept = intercept.eq - intercept.ex cat(slopes, intercept, "\n") cat("Discriminant function is", slopes[1], "*x_P +", slopes[2], "*x_S +", intercept,"\n") ## Classify new observation: d = sum(slopes*newdata) + intercept posterior.prob.eq = exp(d)/(1+exp(d)) cat("d(x) =", d, "; posterior P(EQ|data) =", posterior.prob.eq, "\n" ) win.graph() plot(x = trainingdata[,1], y = trainingdata[,2], xlim = c(3,6), ylim = c(3,6), xlab = "log range(P)", ylab = "log range(S)", type = "p", pch = "*") points(x = trainingdata[,3], y = trainingdata[,4], pch = "+") points(newdata, pch = "X") abline(a = -intercept/slopes[2], b = -slopes[1]/slopes[2]) title(main = "\n\nData and partition formed from linear discriminant d(x).\n * = EQ, + = EX, X = NZ", outer = T) ################# Cross-validation ################# sample = rbind(trainingdata[,1:2],trainingdata[,3:4]) rownames(sample) = c(paste("eq", 1:8, sep = ""), paste("ex", 1:8, sep = "")) colnames(sample) = c("P", "S") sample Posterior.eq = vector(length = 8) Posterior.ex = vector(length = 8) for(j in 1:17) { if(j<=8) { sample.eq = sample[-c(j, 9:16),] sample.ex = sample[9:16,] } if(j>8 & j<=16){ sample.eq = sample[1:8,] sample.ex = sample[-c(j, 1:8),] } if(j == 17) { sample.eq = sample[1:8,] sample.ex = sample[9:16,] } # Compute the discriminant function; evaluate at the hold out: xbar.eq = colMeans(sample.eq) xbar.ex = colMeans(sample.ex) cov.eq = var(sample.eq) cov.ex = var(sample.ex) cov.pooled = ((nrow(sample.ex)-1)*cov.ex + (nrow(sample.eq)-1)*cov.eq)/(nrow(sample.ex)+nrow(sample.eq)-2) slopes.eq = solve(cov.pooled, xbar.eq) intercept.eq = -sum(slopes.eq*xbar.eq)/2 slopes.ex = solve(cov.pooled, xbar.ex) intercept.ex = -sum(slopes.ex*xbar.ex)/2 slopes = slopes.eq - slopes.ex intercept = intercept.eq - intercept.ex if(j<=16) d = sum(slopes*sample[j,]) + intercept if(j==17) d = sum(slopes*newdata) + intercept posterior.eq = exp(d)/(1+exp(d)) posterior.ex = 1 - posterior.eq if(j<=8) Posterior.eq[j] = posterior.eq if(j>8 & j<=16) Posterior.ex[j-8] = posterior.ex if(j == 17) Posterior.NZ.eq = posterior.eq } Posterior = cbind(1:8, Posterior.eq, 1:8, Posterior.ex) colnames(Posterior) = c("EQ", "P(EQ|data)", "EX", "P(EX|data)") round(Posterior,3) Posterior.NZ.eq # A useful check