# 9/7/2022 # # Solutions to practice problem at end of notes on 2-way chi-square tests # # o various representations of table (joint, margins, poroportion) # o measures of association # o Chi-square tests # o Some graphical representations # # # Data: gss view on abortion x political view # library(xtable) # To output tables in LaTex, i.e., xtable(tabluar.object) library(vcd) # xtabs and assocstats library(vcdExtra) library(MASS) # For loglinear model, i.e., loglm() library(Epi) # get relative risk and odds ratios library(epitools) # odds ratio function # -- Create the data frame labels <- expand.grid(abortion=c("yes","no"), polview = c("progressive", "conervative")) count <- c(337, 313, 137, 156) df <- as.data.frame( cbind(labels, count) ) ################################################################################ # -- Tabular format # --- frequencies (df.tab <- xtabs(count ~ abortion + polview, data=df)) # --- as marinal frequenices addmargins(df.tab) # ---- joint proportions prop.table(df.tab) # --- marginal proportions prop.table(df.tab,1) # column conditional distribution given row prop.table(df.tab,2) # row conditional distribution given column ############################################################################### # -- X^2 and G2, df, pvalue, phi (i.e., r) # -- Table of frequencies as input assocstats(df.tab) # -- Model of independence using glm df2 <- as.data.frame(df) summary(glm0 <- glm(count ~ abortion + polview, data=df2, family=poisson)) # --- Adjusted standardized Pearson residuals (Haberman) (df$rstd <- rstandard(glm0, type="pearson")) xtabs(rstd ~ abortion + polview, data=df) # -- Model of independence using loglm from MASS (glm1 <- loglm(count ~ abortion + polview, data=df)) # -- Pearson chi-square # ---- test with continuity correction chisq.test(df.tab) # ---- Pearson chi-squat without correction orrection chisq.test(df.tab, correct=FALSE) ############################################################################### # -- Odds ratio # ---- function from epi (oddsratio(df.tab, conf.level = 0.95)) df.mat <- as.matrix(df.tab) ### Nice one ### #-- From epitools (rel risk, odds ratios, exact & asymptotic p-values) twoby2(df.tab) ############################################################################## # ------------- Some Graphics ---------------- #-- Bar chart bar.colors <- c("red","blue") # I defined pallette par(cex=1.2) barplot(df.tab, beside=TRUE, legend=TRUE,col=bar.colors, main="Practice Data from lecture: support abortion", xlab="Political Party", ylab="Frequency", ) # -- Tile Plot tile(df.tab) # -- What plot gives (like a tile plot) plot(df.tab, col="cornflowerblue") # -- Spine plot box.colors=rainbow(6, s=0.6) spineplot(df.tab, col=box.colors) # -- Mosaic mosaic(df.tab)