explode = ts(scan("http://www.stat.ualberta.ca/~wiens/stat479/S&Sdatasets/eqexp.dat")) explode = ts(scan("eqexp.dat")) data.combined = ts(matrix(explode, nrow = length(explode)/17, ncol = 17), frequency = 40) colnames(data.combined) = c(paste("eq", 1:8, sep=""),paste("ex", 1:8, sep=""), "NZ") ## Seventeen columns - 8 earthquakes, 8 explosions, 1 event of unknown origin ## Each column contains the 'P' and 'S' signals ## Split each column into these two signals: data.P = ts(data.combined[1:(nrow(data.combined)/2), ], frequency = 40) data.S = ts(data.combined[(nrow(data.combined)/2+1):(nrow(data.combined)), ], frequency = 40) library(cluster) ############ Time domain hierarchical clustering ################### # Clustering on the basis of P and S m.P = vector(length = ncol(data.P)) m.S = vector(length = ncol(data.S)) for(j in 1:ncol(data.P)) { m.P[j] = log10(max(data.P[ ,j]) - min(data.P[ ,j])) m.S[j] = log10(max(data.S[ ,j]) - min(data.S[ ,j])) } eq.P = m.P[1:8] ex.P = m.P[9:16] NZ.P = m.P[17] eq.S = m.S[1:8] ex.S = m.S[9:16] NZ.S = m.S[17] ## Display the 16 bivariate observations (8 from each population) and the new observation: trainingdata = cbind(eq.P, eq.S, ex.P, ex.S) newdata = cbind(NZ.P, NZ.S) PSdata = rbind(trainingdata[,1:2],trainingdata[,3:4], newdata) colnames(PSdata) = c("P", "S") plot(PSdata[,1], PSdata[,2], pch="", xlab="P", ylab = "S") text(PSdata[,1], PSdata[,2], labels = colnames(data.combined)) win.graph() cluster1 = agnes(PSdata, method = "average") pltree(cluster1, labels = colnames(data.combined)) ############ Frequency domain hierarchical clustering ################### ############ Set some options: ############ L = 5 scaling = T if(scaling == T) { for (j in 1:ncol(data.P)) data.P[,j] = data.P[,j]/(max(data.P[,j])-min(data.P[,j])) for (j in 1:ncol(data.S)) data.S[,j] = data.S[,j]/(max(data.S[,j])-min(data.S[,j])) } ######################################## data.eq.P = data.P[,1:8] data.ex.P = data.P[,9:16] data.NZ.P = data.P[,17] data.eq.S = data.S[,1:8] data.ex.S = data.S[,9:16] data.NZ.S = data.S[,17] # Compute all 17 spectral density matrices SPEC = array(dim = c(17, 2, 2, 512)) dimnames(SPEC) = list(colnames(data.combined), NULL, NULL, NULL) for(s in 1:17) { if(s <= 8 ) data = cbind(data.eq.P[,s], data.eq.S[,s]) if(s > 8 & s <= 16) data = cbind(data.ex.P[,s-8], data.ex.S[,s-8]) if (s == 17) data = cbind(data.NZ.P, data.NZ.S) # SPEC[s,i,j,k] is the spectrum between the i-th and j-th series in dataset 's', at frequency 2k/n': for(i in 1:ncol(data)) { for (j in i:ncol(data)) { power = spec.pgram(data[,c(i,j)], kernel("daniell",(L-1)/2), plot = F) SPEC[s,i,i, ] = power$spec[,1] SPEC[s,j,j, ] = power$spec[,2] coh.ij = power$coh phase.ij = power$phase SPEC[s,i,j, ] = sqrt(coh.ij*power$spec[,1]*power$spec[,2])*exp(1i*phase.ij) SPEC[s,j,i, ] = Conj(SPEC[s,i,j, ]) }} } # Compute all pairs of divergences J(fi, fj): J = matrix(0, nrow = 17, ncol = 17) dimnames(J) = list(colnames(data.combined), colnames(data.combined)) for (i in 1:16) { for (j in (i+1):17) { tr = vector(length = 512) for (k in 1:512) { tr[k] = sum(diag(solve(SPEC[i,,,k],SPEC[j,,,k]))) + sum(diag(solve(SPEC[j,,,k],SPEC[i,,,k]))) } J[i,j] = sum(tr) }} J = Re(J + t(J))/1024 round(J, 2) # Determine the position of the minimum divergence K = J for (i in 1:17) { for (j in 1:17) { if (j >= i) K[j,i] = 1/0}} minK = which(K == min(K)) mincol = 1 + floor(minK/nrow(K)) minrow = minK - nrow(K)*floor(minK/nrow(K)) minposition = c(minrow, mincol) cat("Closest pair are",colnames(data.combined)[minposition],"\n") # EQ 3 and EQ8 are the closest and form the first cluster # Now use the "cluster" library of contributed functions win.graph() cluster2 = agnes(J, diss = T, method = "average") # Clusters on the basis of the measures in the dissimilarity matrix J; # distances between clusters are the average distances between members # Use method = "single" for nearest neighbour distances summary(cluster2) pltree(cluster2, labels = colnames(data.combined)) ############ Frequency domain partitioned clustering ################### win.graph() cluster3 = pam(J, k = 2, diss = T) summary(cluster3) plot(cluster3) win.graph() cluster4 = pam(J, k = 3, diss = T) summary(cluster4) plot(cluster4, labels = 3) ############ Time domain partitioned clustering ################### rownames(PSdata) = colnames(data.combined) win.graph() cluster5 = pam(PSdata, k = 2, diss = F) summary(cluster5) plot(cluster5, labels = 2)