# Edps 589 # Fall 2021 # C.J.Anderson # # Step by Stepc practice of ordinal test for 2-way tables # using 2018 GSS data on quality of life & health. # In brief the steps are # 1. Create data set & check # 2. Create tab object (i.e., turn data.frame into a table) # 3. Run CMHtest using tab object and desired scores # 4. Compute pearson product moment correlation using M2 # and sample size. # # Packages that I use library(vcd) # to create data using expand.grid library(vcdExtra) # for the MMH test library(DescTools) library(logmult) # for rc(M) assocation models library(ca) # for correspondence analysis library(mass) # for loglm command library(psych) # To use "phi" command ##################################################### # Step 1: Create data set--use either method 1 or 2 # but not both ###################################################### # # Method 1: read data in from an external file # which mean you need to set a working directory # either from the toolbar from Rstudio ordinal # the command. # # Method 1 is preferable when dealing with larger # data sets. # setwd("D:\\Dropbox\\edps 589\\2 Chi-square") gss.long <- read.table("gss2018_health_life.txt",header=TRUE) gss.long # # Method 2: Use expand.grid to define variable # names and levels, create an object with the # frequencies (counts) and merge to create the # data frame that includes the frequencies # # Method 2 is fine for smaller problem and # and when you want to make sure order of # correpond to what you want. # def.var <- expand.grid(quality=c("excellent","very_good","good", "fair", "poor"), health=c("excellent","very_good","good", "fair", "poor") ) # look at what this command did" def.var count<-c(221, 120, 29, 7, 1, 160, 410, 71, 5, 1, 66, 328, 341, 40, 2, 29, 81, 172, 138, 11, 2, 11, 27, 34, 22) gss <- as.data.frame(cbind(def.var, count )) # # check data are in OK gss ################################################### # Test inpdependence as a log-linear model # Use glm which is part of base R ################################################### summary(ind.mod <- glm(count ~ quality + health, data=gss, family="poisson")) #--- I will want pearson chi-square to latter x2.pearson <- loglm(count ~ quality + health, data=gss)$pearson ################################################### ################################################### # Step 2: Create a tab object ################################################### # gss.tab <- xtabs(count ~ quality + health, data=gss) # Check your table gss.tab #################################################### # Step 3: #################################################### # Cochran-Mantel-Haenszel test of association # (vcdExtra package) # Note: Can get more than just linear association # types=c("cor","rmeans","cmeans","general") # where "general" Ho is no association # "cor" Ho is Pearson correlation is 0 # "rmeans" Ho is row means are equal # "cmean" Ho is column means are equal (cmh.integer<- CMHtest(gss.tab, strata=NULL, rscores=1:5, cscores=1:5, types=c("cor","general") )) m2 <- cmh.integer$table[1,1] ####################################################### # Step 4: ####################################################### # Compute the Pearson correlation using the fact that # M2= (n-1)r^2 or r = sqrt(M2/(n-1)) n <- sum(gss.tab) ( r <- sqrt( m2 /(n-1)) ) ######################################### # Repeat Steps 3 and 4 but using # mid-ranks which were given by me ######################################### (cmh.midranks <- CMHtest(gss.tab, strata=NULL, rscores=c(240, 954.5, 1749.5, 2181.5, 2312), cscores=c(189.5,702,1414,2018,2281.5), types=c("cor","general") )) m2 <-cmh.midranks$table[1,1] # Pearson product moment correlation: n <- sum(gss.tab) ( r <- sqrt( m2/(n-1)) ) ############################################## # Repeat Steps 3 & 4 using Scores from # correspondence analysis ############################################## (cmh.ca <- CMHtest(gss.tab, strata=NULL, rscores=c(-.9254,-.3754,.5643,1.5577,2.4021), cscores=c(-.9580,-.6407,.1597,1.0745,1.9739), types=c("cor","general") )) m2 <- cmh.ca$table[1,1] # Compute correlation n <- sum(gss.tab) ( r <- sqrt( 858.33 /(n-1)) ) ################################################## # How to get best scores: correpsondence analysis # -- just different scaling ################################################## (ca.gss <- ca(gss.tab)) names(ca.gss) ca.gss$rowcoord rscores=c(-.9254,-.3754,.5643,1.5577,2.4021) cor(ca.gss$rowcoord[,1], rscores) cscores=c(-.9580,-.6407,.1597,1.0745,1.9739) cor(ca.gss$colcoord[,1], cscores) ################################################## # Or estimated scores from statistical model ################################################## summary(rc1 <- rc(gss.tab, nd=1, se=c("jackknife"), family="poisson")) plot(rc1)