explode = ts(scan("http://www.stat.ualberta.ca/~wiens/stat479/S&Sdatasets/eqexp.dat"))
explode = ts(scan("eqexp.dat"))

data.combined = ts(matrix(explode, nrow = length(explode)/17, ncol = 17), frequency = 40)
colnames(data.combined) = c(paste("eq", 1:8, sep=""),paste("ex", 1:8, sep=""), "NZ")
	## Seventeen columns - 8 earthquakes, 8 explosions, 1 event of unknown origin
	## Each column contains the 'P' and 'S' signals
	## Split each column into these two signals:
data.P = ts(data.combined[1:(nrow(data.combined)/2), ], frequency = 40)
data.S = ts(data.combined[(nrow(data.combined)/2+1):(nrow(data.combined)), ], frequency = 40)
library(cluster)

############  Time domain hierarchical clustering  ###################
# Clustering on the basis of P and S

m.P = vector(length = ncol(data.P))
m.S = vector(length = ncol(data.S))
for(j in 1:ncol(data.P)) {
	m.P[j] = log10(max(data.P[ ,j]) - min(data.P[ ,j]))
	m.S[j] = log10(max(data.S[ ,j]) - min(data.S[ ,j]))
	}

eq.P = m.P[1:8]
ex.P = m.P[9:16]
NZ.P = m.P[17]

eq.S = m.S[1:8]
ex.S = m.S[9:16]
NZ.S = m.S[17]

## Display the 16 bivariate observations (8 from each population) and the new observation:
trainingdata = cbind(eq.P, eq.S, ex.P, ex.S)
newdata = cbind(NZ.P, NZ.S)
PSdata = rbind(trainingdata[,1:2],trainingdata[,3:4], newdata)
colnames(PSdata) = c("P", "S")

plot(PSdata[,1], PSdata[,2], pch="", xlab="P", ylab = "S")
text(PSdata[,1], PSdata[,2], labels = colnames(data.combined))

win.graph()

cluster1 = agnes(PSdata, method = "average")
pltree(cluster1, labels = colnames(data.combined))


############  Frequency domain hierarchical clustering  ###################

############  Set some options:  ############  
L = 5
scaling = T

if(scaling == T) {
for (j in 1:ncol(data.P)) data.P[,j] = data.P[,j]/(max(data.P[,j])-min(data.P[,j]))
for (j in 1:ncol(data.S)) data.S[,j] = data.S[,j]/(max(data.S[,j])-min(data.S[,j]))
}

########################################   

data.eq.P = data.P[,1:8]
data.ex.P = data.P[,9:16]
data.NZ.P = data.P[,17]
data.eq.S = data.S[,1:8]
data.ex.S = data.S[,9:16]
data.NZ.S = data.S[,17]

# Compute all 17 spectral density matrices

SPEC = array(dim = c(17, 2, 2, 512)) 
dimnames(SPEC) = list(colnames(data.combined), NULL, NULL, NULL)
for(s in 1:17) {
	if(s <= 8 ) data = cbind(data.eq.P[,s], data.eq.S[,s])
	if(s > 8 & s <= 16) data = cbind(data.ex.P[,s-8], data.ex.S[,s-8])
	if (s == 17) data = cbind(data.NZ.P, data.NZ.S)
	# SPEC[s,i,j,k] is the spectrum between the i-th and j-th series in dataset 's', at frequency 2k/n':

	for(i in 1:ncol(data)) { 
	for (j in i:ncol(data)) {
		power = spec.pgram(data[,c(i,j)], kernel("daniell",(L-1)/2), plot = F)
		SPEC[s,i,i, ] = power$spec[,1]
		SPEC[s,j,j, ] = power$spec[,2]
		coh.ij = power$coh 
		phase.ij = power$phase 
		SPEC[s,i,j, ] = sqrt(coh.ij*power$spec[,1]*power$spec[,2])*exp(1i*phase.ij)
		SPEC[s,j,i, ] = Conj(SPEC[s,i,j, ])
		}}
}


# Compute all pairs of divergences J(fi, fj):

J = matrix(0, nrow = 17, ncol = 17)
dimnames(J) = list(colnames(data.combined), colnames(data.combined))
for (i in 1:16) { for (j in (i+1):17) {
	tr = vector(length = 512)
	for (k in 1:512) {
		tr[k] = sum(diag(solve(SPEC[i,,,k],SPEC[j,,,k]))) + 
				sum(diag(solve(SPEC[j,,,k],SPEC[i,,,k])))
	}
J[i,j] = sum(tr)
}}
J = Re(J + t(J))/1024
round(J, 2)

# Determine the position of the minimum divergence
K = J
for (i in 1:17) { for (j in 1:17) { if (j >= i) K[j,i] = 1/0}}

minK = which(K == min(K))
mincol =  1 + floor(minK/nrow(K))
minrow = minK - nrow(K)*floor(minK/nrow(K))
minposition = c(minrow, mincol)  
cat("Closest pair are",colnames(data.combined)[minposition],"\n")

# EQ 3 and EQ8 are the closest and form the first cluster
# Now use the "cluster" library of contributed functions 


win.graph()
cluster2 = agnes(J, diss = T, method = "average")  
 # Clusters on the basis of the measures in the dissimilarity matrix J;  
 # distances between clusters are the average distances between members
 # Use method = "single" for nearest neighbour distances
summary(cluster2)
pltree(cluster2, labels = colnames(data.combined))

############  Frequency domain partitioned clustering  ###################

win.graph()
cluster3 = pam(J, k = 2, diss = T)  
summary(cluster3)
plot(cluster3)

win.graph()
cluster4 = pam(J, k = 3, diss = T)  
summary(cluster4)
plot(cluster4, labels = 3)

############  Time domain partitioned clustering  ###################

rownames(PSdata) = colnames(data.combined)
win.graph()
cluster5 = pam(PSdata, k = 2, diss = F)  
summary(cluster5)
plot(cluster5, labels = 2)