rm(list = ls()) # clear the memory ########################### # This is a brief introduction to R # Open R, and then work through these commands # '#' = comment # '<-' or '=' = affect; defines the left hand side to be the right hand side # '>' = prompt of R # basic operations - enter these; see what you get: 7+5 7-5 7*5 7/5 7/0 0/0 # use the function c() for creating a vector x = c(1,2,3) x # response: [1] 1 2 3 x = 1:3 # Gives the same thing x x = c(1,2,3) + c(2,3,4) x # response: [1] 3 5 7 c(1,2,3)*c(2,3,4) # response: [1] 2 6 12 c(1,2,3)*2 # response: [1] 2 4 6 c(1,2,3)/2 # response: [1] 0.5 1.0 1.5 # create a vector, called 'height', containing 10 values height <- c(72,70,71,66,73,66,72,72,74,75) height # response: [1] 72 70 71 66 73 66 72 72 74 75 mean(height) # compute sample average median(height) # compute sample median mean(height, trim=0.1) # trimmed mean. var(height) # variance sum((height - mean(height))^2)/(length(height)-1) # should give the same thing - why? sqrt(var(height)) # standard deviation height[2] # the second element of the vector # generate a vector rep(2,3) # a vector of 2's of length 3: [1] 2 2 2 seq(1,10, length=21) # a vector of length 21; equally spaced entries spanning [1,10] ############################################## # Go to http://www.stat.ualberta.ca/~wiens/stat575/stat575.html and download the data sets which are there. # Then to use these files without going to the web, replace lines like # read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T1-3.dat") # with lines leading to where the data sets are on YOUR system. (Note that FORWARD slashes are required.) # You can specify the directory from which R starts - right click on the R icon and go to 'Properties'. help(read.table) lizard = as.matrix(read.table("http://www.stat.ualberta.ca/~wiens/stat575/datasets/T1-3.dat")) lizard # You can specify the directory from which R starts - right click on the R icon and go to 'Properties'. colnames(lizard) = c("mass", "svl", "hls") colMeans(lizard) # Means of the three individual variables S = cov(lizard) # Covariance matrix S S apply(lizard, 2, var) # Apply the 'variance' function to each column of 'lizard' # Compare with the diagonal of S # Plotting: pairs(lizard) # For a more sophisticated plot, click on Packages on the top menu of the screen, # choose a mirror (e.g. in BC) and install the "cars" package - it contains the scatterplotMatrix function library(car) dev.new() scatterplotMatrix(lizard, diagonal = "boxplot", ellipse = TRUE) U = chol(S) # Upper triangular Cholesky factor U S - t(U)%*%U # Check that S=U'U E = diag(1/sqrt(diag(S))) # 1/standard deviations R = E%*%S%*%E # Correlation matrix det(S) # determinant eig = eigen(S) V = eig$vectors D = diag(eig$values) # Check that S=VDV' with V orthogonal: S - V%*%D%*%t(V) round(t(V)%*%V,12) pc = lizard%*%V pc # Contains the same info as X itself, but these columns are uncorrelated: Spc = round(cov(pc),10) Spc D # The vectors in pc are uncorrelated linear combinations of those in lizard, with the same total variance sum(diag(S)) sum(diag(D)) pc1 = -pc[,1] var(pc1) var(pc1)/sum(D) v = -V[,1] v # coefficients of lin. comb. explaining almost all (97%) of the variance. dev.new() pairs(cbind(lizard, pc1)) # Normality check: dev.new() y=lizard[,1] qqnorm(y) qqline(y) # More sophisticated plotting: dev.new() par(mfrow = c(1,3)) # A 1 by 3 panel of plots for(i in 1:3) { y = lizard[,i] v=qqnorm(y, ylab = colnames(lizard)[i]) text(0, max(v$y-2), paste("corr = ", round(cor(v$x,v$y),3))) qqline(y) } ############################################## # Using 'scripts': What you are reading right now is an example of a 'script' - a list of commands # to be carried out by R. In your work you will typically have a script for each assigned question, etc. # To open a blank script, click on File -> New script in the R menu at the top of the screen. # To save the script for future use, click on the 'save' icon and follow the instructions. # To run the command in the script, save it and then click on Edit -> Run all. # (Try it - to run THIS script, save it to a directory somewhere, then re-open it and run it.) # Alternatively, one will often just copy and paste a few commands, from the script, # into the Command window ("R console") in order to try them out.