rm(list = ls()) # clear the memory
###########################
# This is a brief introduction to R
# Open R, and then work through these commands
# '#' = comment
# '<-' or '=' = affect; defines the left hand side to be the right hand side
# '>' = prompt of R
# basic operations - enter these; see what you get:
7+5
7-5
7*5
7/5
7/0
0/0
# use the function c() for creating a vector
x = c(1,2,3)
x # response: [1] 1 2 3
x = 1:3 # Gives the same thing
x
x = c(1,2,3) + c(2,3,4)
x # response: [1] 3 5 7
c(1,2,3)*c(2,3,4) # response: [1] 2 6 12
c(1,2,3)*2 # response: [1] 2 4 6
c(1,2,3)/2 # response: [1] 0.5 1.0 1.5
# create a vector, called 'height', containing 10 values
height <- c(72,70,71,66,73,66,72,72,74,75)
height # response: [1] 72 70 71 66 73 66 72 72 74 75
mean(height) # compute sample average
median(height) # compute sample median
mean(height, trim=0.1) # trimmed mean.
var(height) # variance
sum((height - mean(height))^2)/(length(height)-1)
# should give the same thing - why?
sqrt(var(height)) # standard deviation
height[2] # the second element of the vector
# generate a vector
rep(2,3) # a vector of 2's of length 3: [1] 2 2 2
seq(1,10, length=21) # a vector of length 21; equally spaced entries spanning [1,10]
##############################################
# Go to http://www.stat.ualberta.ca/~wiens/stat575/stat575.html and download the data sets which are there.
# Then to use these files without going to the web, replace lines like
# read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T1-3.dat")
# with lines leading to where the data sets are on YOUR system. (Note that FORWARD slashes are required.)
# You can specify the directory from which R starts - right click on the R icon and go to 'Properties'.
help(read.table)
lizard = as.matrix(read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T1-3.dat"))
lizard
# You can specify the directory from which R starts - right click on the R icon and go to 'Properties'.
colnames(lizard) = c("mass", "svl", "hls")
colMeans(lizard) # Means of the three individual variables
S = cov(lizard) # Covariance matrix S
S
apply(lizard, 2, var) # Apply the 'variance' function to each column of 'lizard'
# Compare with the diagonal of S
# Plotting:
pairs(lizard)
# For a more sophisticated plot, click on Packages on the top menu of the screen,
# choose a mirror (e.g. in BC) and install the "cars" package - it contains the scatterplotMatrix function
library(car)
dev.new()
scatterplotMatrix(lizard, diagonal = "boxplot", ellipse = TRUE)
U = chol(S) # Upper triangular Cholesky factor
U
S - t(U)%*%U # Check that S=U'U
E = diag(1/sqrt(diag(S))) # 1/standard deviations
R = E%*%S%*%E # Correlation matrix
det(S) # determinant
eig = eigen(S)
V = eig$vectors
D = diag(eig$values)
# Check that S=VDV' with V orthogonal:
S - V%*%D%*%t(V)
round(t(V)%*%V,12)
pc = lizard%*%V
pc # Contains the same info as X itself, but these columns are uncorrelated:
Spc = round(cov(pc),10)
Spc
D # The vectors in pc are uncorrelated linear combinations of those in lizard, with the same total variance
sum(diag(S))
sum(diag(D))
pc1 = -pc[,1]
var(pc1)
var(pc1)/sum(D)
v = -V[,1]
v # coefficients of lin. comb. explaining almost all (97%) of the variance.
dev.new()
pairs(cbind(lizard, pc1))
# Normality check:
dev.new()
y=lizard[,1]
qqnorm(y)
qqline(y)
# More sophisticated plotting:
dev.new()
par(mfrow = c(1,3)) # A 1 by 3 panel of plots
for(i in 1:3) {
y = lizard[,i]
v=qqnorm(y, ylab = colnames(lizard)[i])
text(0, max(v$y-2), paste("corr = ", round(cor(v$x,v$y),3)))
qqline(y)
}
##############################################
# Using 'scripts': What you are reading right now is an example of a 'script' - a list of commands
# to be carried out by R. In your work you will typically have a script for each assigned question, etc.
# To open a blank script, click on File -> New script in the R menu at the top of the screen.
# To save the script for future use, click on the 'save' icon and follow the instructions.
# To run the command in the script, save it and then click on Edit -> Run all.
# (Try it - to run THIS script, save it to a directory somewhere, then re-open it and run it.)
# Alternatively, one will often just copy and paste a few commands, from the script,
# into the Command window ("R console") in order to try them out.