rm(list = ls())  # clear the memory
###########################

# This is a brief introduction to R
# Open R, and then work through these commands

#   '#'  = comment
#   '<-' or '=' = affect; defines the left hand side to be the right hand side
#   '>'  = prompt of R

# basic operations - enter these; see what you get: 

7+5

7-5

7*5

7/5

7/0

0/0

# use the function c() for creating a vector 
x = c(1,2,3) 
x  # response: [1] 1 2 3
x = 1:3 # Gives the same thing
x

x = c(1,2,3) + c(2,3,4) 
x # response: [1] 3 5 7

c(1,2,3)*c(2,3,4) # response: [1]  2  6 12

c(1,2,3)*2 # response: [1] 2 4 6

c(1,2,3)/2 # response: [1] 0.5 1.0 1.5

# create a vector, called 'height', containing 10 values
height <- c(72,70,71,66,73,66,72,72,74,75)
height # response: [1] 72 70 71 66 73 66 72 72 74 75

mean(height)              # compute sample average

median(height)            # compute sample median

mean(height, trim=0.1)    # trimmed mean.

var(height)               # variance
sum((height - mean(height))^2)/(length(height)-1)
				# should give the same thing - why?

sqrt(var(height))         # standard deviation

height[2]                 # the second element of the vector

       
# generate a vector 
rep(2,3)               # a vector of 2's of length 3: [1] 2 2 2

seq(1,10, length=21)   # a vector of length 21; equally spaced entries spanning [1,10]


##############################################
# Go to http://www.stat.ualberta.ca/~wiens/stat575/stat575.html and download the data sets which are there.
# Then to use these files without going to the web, replace lines like
# read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T1-3.dat") 
# with lines leading to where the data sets are on YOUR system. (Note that FORWARD slashes are required.)
# You can specify the directory from which R starts - right click on the R icon and go to 'Properties'.
help(read.table)

lizard = as.matrix(read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T1-3.dat"))
lizard

# You can specify the directory from which R starts - right click on the R icon and go to 'Properties'.

colnames(lizard) = c("mass", "svl", "hls")
colMeans(lizard) # Means of the three individual variables
S = cov(lizard)  # Covariance matrix S
S

apply(lizard, 2, var)  # Apply the 'variance' function to each column of 'lizard'
			     # Compare with the diagonal of S

# Plotting:

pairs(lizard)

# For a more sophisticated plot, click on Packages on the top menu of the screen, 
# choose a mirror (e.g. in BC) and install the "cars" package - it contains the scatterplotMatrix function
library(car)
dev.new()
scatterplotMatrix(lizard, diagonal = "boxplot", ellipse = TRUE)



U = chol(S)  	# Upper triangular Cholesky factor
U
S - t(U)%*%U	# Check that S=U'U
E = diag(1/sqrt(diag(S))) # 1/standard deviations
R = E%*%S%*%E  # Correlation matrix

det(S)  # determinant
eig = eigen(S)
V = eig$vectors
D = diag(eig$values)
# Check that S=VDV' with V orthogonal:
S - V%*%D%*%t(V) 
round(t(V)%*%V,12)

pc = lizard%*%V
pc    # Contains the same info as X itself, but these columns are uncorrelated:
Spc = round(cov(pc),10)
Spc

D  # The vectors in pc are uncorrelated linear combinations of those in lizard, with the same total variance
sum(diag(S))
sum(diag(D))

pc1 = -pc[,1]
var(pc1)
var(pc1)/sum(D)

v = -V[,1]
v  # coefficients of lin. comb. explaining almost all (97%) of the variance.
dev.new()
pairs(cbind(lizard, pc1))

# Normality check:
dev.new()
y=lizard[,1]
qqnorm(y)
qqline(y)

# More sophisticated plotting:
dev.new()
par(mfrow = c(1,3))  # A 1 by 3 panel of plots
for(i in 1:3) {
y = lizard[,i]
v=qqnorm(y, ylab = colnames(lizard)[i])
text(0, max(v$y-2), paste("corr = ", round(cor(v$x,v$y),3)))
qqline(y)
}

##############################################
# Using 'scripts':  What you are reading right now is an example of a 'script' - a list of commands
# to be carried out by R.  In your work you will typically have a script for each assigned question, etc.
# To open a blank script, click on File -> New script in the R menu at the top of the screen.
# To save the script for future use, click on the 'save' icon and follow the instructions.
# To run the command in the script, save it and then click on Edit -> Run all.  
# (Try it - to run THIS script, save it to a directory somewhere, then re-open it and run it.)
# Alternatively, one will often just copy and paste a few commands, from the script, 
# into the Command window ("R console") in order to try them out.