#
#  Code to mode the frequency of America dataset
#  with GLMs
#

# Read in data
amer = read.table("./freqAmericaData.txt",header=TRUE);

#
#  The following regressions are fit to the State of the
#  Union data.  Alternatively, you can try the same code
#  applied to the inaugural address data.  Just replace
#  'sotu' with 'inaug'.
#

# Fit Logistic Regression
md.sotu.logit = glm( 
  sotu.america/sotu.wcnt ~ indx, data=amer, 
  weight=sotu.wcnt, family=binomial(link=logit) 
);

# Confidence interval for parameters
confint(md.sotu.logit, level=0.95);

# Fit Overdispersed Logistic Regression
# (The one I used in the article)
md.sotu.logit2 = glm( 
  sotu.america/sotu.wcnt ~ indx, data=amer, 
  weight=sotu.wcnt, family=quasibinomial(link=logit) 
);

# Confidence interval for parameters
confint(md.sotu.logit, level=0.95);

# Fit a Possion regression to the raw counts
md.sotu.pois = glm( 
  sotu.america ~ indx, data=amer, 
  family=poisson(link=log) 
);

# Fit a Poisson regression including the word count
# as a regressor
md.sotu.pois2 = glm( 
  sotu.america ~ indx+sotu.wcnt, data=amer, 
  family=poisson(link=log) 
);

#
#  Notice that the estimated parameter for the indx
#  variable is 0.085 in the Logistic case and similarly
#  0.081 in the Poisson case.  Hence, both methods provide
#  similar estimates of the rate of increase of America 
#  be it log odds or log counts.
#