# # Code to mode the frequency of America dataset # with GLMs # # Read in data amer = read.table("./freqAmericaData.txt",header=TRUE); # # The following regressions are fit to the State of the # Union data. Alternatively, you can try the same code # applied to the inaugural address data. Just replace # 'sotu' with 'inaug'. # # Fit Logistic Regression md.sotu.logit = glm( sotu.america/sotu.wcnt ~ indx, data=amer, weight=sotu.wcnt, family=binomial(link=logit) ); # Confidence interval for parameters confint(md.sotu.logit, level=0.95); # Fit Overdispersed Logistic Regression # (The one I used in the article) md.sotu.logit2 = glm( sotu.america/sotu.wcnt ~ indx, data=amer, weight=sotu.wcnt, family=quasibinomial(link=logit) ); # Confidence interval for parameters confint(md.sotu.logit, level=0.95); # Fit a Possion regression to the raw counts md.sotu.pois = glm( sotu.america ~ indx, data=amer, family=poisson(link=log) ); # Fit a Poisson regression including the word count # as a regressor md.sotu.pois2 = glm( sotu.america ~ indx+sotu.wcnt, data=amer, family=poisson(link=log) ); # # Notice that the estimated parameter for the indx # variable is 0.085 in the Logistic case and similarly # 0.081 in the Poisson case. Hence, both methods provide # similar estimates of the rate of increase of America # be it log odds or log counts. #