# Edps 589
# Fall 2021
# C.J.Anderson
#
# Step by Stepc practice of ordinal test for 2-way tables
#    using 2018 GSS data on quality of life & health.

# In brief the steps are
#     1. Create data set & check
#     2. Create tab object (i.e., turn data.frame into a table)
#     3. Run CMHtest using tab object and desired scores
#     4. Compute pearson product moment correlation using M2
#          and sample size.
#
#  Packages that I use

library(vcd)         # to create data using expand.grid
library(vcdExtra)    # for the MMH test
library(DescTools)
library(logmult)     # for rc(M) assocation models
library(ca)          # for correspondence analysis
library(mass)        # for loglm command
library(psych)       # To use "phi" command
#####################################################
# Step 1:  Create data set--use either method 1 or 2
#          but not both
######################################################
#
#  Method 1: read data in from an external file
#    which mean you need to set a working directory
#    either from the toolbar from Rstudio ordinal
#    the command.
#     
#	 Method 1 is preferable when dealing with larger
#    data sets.
#

setwd("D:\\Dropbox\\edps 589\\2 Chi-square")
gss.long <- read.table("gss2018_health_life.txt",header=TRUE)
gss.long

#
#  Method 2:  Use expand.grid to define variable
#     names and levels, create an object with the 
#     frequencies (counts) and merge to create the
#     data frame that includes the frequencies
#
#     Method 2 is fine for smaller problem and 
#     and when you want to make sure order of 
#     correpond to what you want.
#
def.var <- expand.grid(quality=c("excellent","very_good","good", "fair", "poor"),
		health=c("excellent","very_good","good", "fair", "poor")
			)
# look at what this command did"
def.var

count<-c(221, 120,  29,   7,  1,
         160, 410,  71,   5,  1,
		  66, 328, 341,  40,  2,
		  29,  81, 172, 138, 11,
		   2,  11,  27,  34, 22)
			
gss <- as.data.frame(cbind(def.var, count ))

#
# check data are in OK	
gss

###################################################
# Test inpdependence as a log-linear model
#   Use glm which is part of base R
###################################################

summary(ind.mod <- glm(count ~ quality + health, data=gss, family="poisson"))

#--- I will want pearson chi-square to latter
x2.pearson <- loglm(count ~ quality + health, data=gss)$pearson


###################################################
###################################################
# Step 2: Create a tab object
###################################################
#
gss.tab <- xtabs(count ~  quality + health, data=gss)

# Check your table
gss.tab

####################################################
# Step 3:
####################################################
# Cochran-Mantel-Haenszel test of association
#   (vcdExtra package)
# Note:	Can get more than just linear association 
#   types=c("cor","rmeans","cmeans","general")
#   where "general" Ho is no association
#         "cor" Ho is Pearson correlation is 0
#         "rmeans" Ho is row means are equal
#         "cmean"  Ho is column means are equal

(cmh.integer<- CMHtest(gss.tab,
                strata=NULL,
		rscores=1:5,          
		cscores=1:5,
		types=c("cor","general")
		))
m2 <- cmh.integer$table[1,1]
#######################################################
# Step 4: 		
#######################################################
#   Compute the Pearson correlation using the fact that 
#       M2= (n-1)r^2 or r = sqrt(M2/(n-1))
n <- sum(gss.tab)
( r <- sqrt( m2 /(n-1)) )


#########################################
# Repeat Steps 3 and 4 but using 
# mid-ranks which were given by me
#########################################
(cmh.midranks <- CMHtest(gss.tab,
                strata=NULL,
                rscores=c(240, 954.5, 1749.5, 2181.5, 2312),
                cscores=c(189.5,702,1414,2018,2281.5),
                types=c("cor","general")
))
m2 <-cmh.midranks$table[1,1]

# Pearson product moment correlation:
n <- sum(gss.tab)
( r <- sqrt( m2/(n-1)) )

##############################################
# Repeat Steps 3 & 4 using Scores from 
# correspondence analysis
##############################################
(cmh.ca <- CMHtest(gss.tab,
           strata=NULL,
	   rscores=c(-.9254,-.3754,.5643,1.5577,2.4021),
	   cscores=c(-.9580,-.6407,.1597,1.0745,1.9739),
	   types=c("cor","general")
		))
m2 <- cmh.ca$table[1,1]	
# Compute correlation		
n <- sum(gss.tab)
( r <- sqrt( 858.33 /(n-1)) )


##################################################
# How to get best scores:  correpsondence analysis
#  -- just different scaling
##################################################
(ca.gss <- ca(gss.tab))
names(ca.gss)

ca.gss$rowcoord
rscores=c(-.9254,-.3754,.5643,1.5577,2.4021)
cor(ca.gss$rowcoord[,1], rscores)

cscores=c(-.9580,-.6407,.1597,1.0745,1.9739)
cor(ca.gss$colcoord[,1], cscores)

##################################################
# Or estimated scores from statistical model
##################################################
summary(rc1 <- rc(gss.tab, nd=1, se=c("jackknife"), family="poisson"))
plot(rc1)