# Compute an M-estimate of regression, using Huber's psi function and IRLS. # Data: x = stack.x y = stack.loss n = length(y) p = 4 X = cbind(1,x) # Set 'c': c = 1 # Define Huber's psi function, with tuning constant 'c': psi = function(r) pmax(-c, pmin(r,c)) # Weights: w = function(r) pmin(1, c/abs(r)) # Delta: delta = 1-2*c*dnorm(c)+2*(c^2-1)*pnorm(-c) # Arrange to store the output: out = matrix(ncol = p+2) dimnames(out) = list(NULL, c("intercept", "air.flow", "water.temp", "acid.conc", "sigma", "norm")) # Start with an L1-estimate library(quantreg) init.fit = rq(y ~ x) theta = init.fit$coef r = init.fit$resid sigma = mad(r, center = 0) #Initial scale estimate std.res = r/sigma weights = w(std.res) norm = sqrt(sum((t(X)%*%psi(std.res))^2)) #Euclidean norm of t(X)%*%psi(std.res) out[1,] = round(c(theta, sigma, norm),5) while (norm > .001) { fit = lsfit(x, y, wt = weights) theta = fit$coef r = fit$resid sigma = sqrt(sum((weights*r)^2)/(delta*(n-p))) std.res = r/sigma norm = sqrt(sum((t(X)%*%psi(std.res))^2)) out = rbind(out, round(c(theta, sigma, norm),5)) weights = w(std.res) } print(out) # Compare with Least Squares: ls.print(lsfit(x,y)) # Final weights: print(weights) par(mfcol = c(2,2)) # Residual analysis: fits = X%*%theta robresids = std.res # r yrange = c(-4,4) plot(fits, robresids, xlab = "fits", ylab = "robust std.res", xlim = c(5,45), ylim = yrange) text(x = fits[c(1,3,4,17,21)], y = robresids[c(1,3,4,17,21)], labels = c(1,3,4,17,21), pos=4) covariate = x[,2] range = c(min(covariate)-1, max(covariate)+1) plot(covariate, robresids, xlab = "water.temp", ylab = "robust std.res", xlim = range, ylim = yrange) text(x = covariate[c(1,3,4,17,21)], y = robresids[c(1,3,4,17,21)], labels = c(1,3,4,17,21), pos=4) ls.fit = lsfit(x,y) fits = X%*%ls.fit$coef lsresids = ls.diag(ls.fit)$std.res # ls.fit$resid plot(fits, lsresids, xlab = "fits", ylab = "LS std.res", xlim = c(5,45), ylim = yrange) text(x = fits[c(1,3,4,17,21)], y = lsresids[c(1,3,4,17,21)], labels = c(1,3,4,17,21), pos=4) plot(covariate, lsresids, xlab = "water.temp", ylab = "LS std.res", xlim = range, ylim = yrange) text(x = covariate[c(1,3,4,17,21)], y = lsresids[c(1,3,4,17,21)], labels = c(1,3,4,17,21), pos=4) # Compute pseudovalues psiprime = function(r) (abs(r)<=c) a = mean(psiprime(std.res)) y.tilde = X%*%theta + (sigma/a)*psi(std.res) pseudofit = lsfit(x,y.tilde) ls.print(pseudofit)