These are analyses reported in lecture notes on logistic regression.

Set up

libraries

library(vcd)
## Loading required package: grid
library(ResourceSelection)
## ResourceSelection 0.3-5   2019-07-22
library(pROC)               
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(DescTools)
library(texreg)
## Version:  1.37.5
## Date:     2020-06-17
## Author:   Philip Leifeld (University of Essex)
## 
## Consider submitting praise using the praise or praise_interactive functions.
## Please cite the JSS article in your publications -- see citation("texreg").

Data

In long format (data frame):

setwd("C:/Users/cja/Dropbox/edps 589/5 logistic regression")
hsb <- read.csv(file="hsb_data.csv", header=TRUE, sep=",")
names(hsb)
##  [1] "id"       "sex"      "race"     "ses"      "sctyp"    "hsp"     
##  [7] "locus"    "concpt"   "mot"      "car"      "rdg"      "wrtg"    
## [13] "math"     "sci"      "civ"      "achieve"  "sci_math" "type"    
## [19] "xmean"    "xsum"     "program"  "group"
head(hsb)
##   id sex race ses sctyp hsp locus concpt  mot car  rdg wrtg math  sci  civ
## 1  1   2    1   1     1   3  0.29   0.88 0.67  10 33.6 43.7 40.2 39.0 40.6
## 2  4   2    1   2     1   3  0.06   0.03 0.00  15 38.9 41.1 32.7 41.7 40.6
## 3 23   2    1   1     1   3 -0.22   0.32 0.33   1 35.2 38.5 39.9 34.7 40.6
## 4 26   2    1   2     1   2  1.12  -0.74 0.67  15 31.0 41.1 36.0 36.9 45.6
## 5 29   2    1   2     1   2 -0.90   0.03 0.67   4 36.3 44.3 36.1 33.6 40.6
## 6 31   1    1   2     1   2 -0.21  -1.38 0.00   9 34.2 46.3 44.5 39.0 35.6
##   achieve sci_math type xmean  xsum program group
## 1   197.1     -1.2 case 39.42 197.1       0     1
## 2   195.0      9.0 case 39.00 195.0       0     1
## 3   188.9     -5.2 case 37.78 188.9       0     1
## 4   190.6      0.9 case 38.12 190.6       1     1
## 5   190.9     -2.5 case 38.18 190.9       1     1
## 6   199.6     -5.5 case 39.92 199.6       1     1

Long table:

var.levels <- expand.grid(academic=c("no", "yes"),
                          sctyp=c("public","private"),
              ses = c("low","middle","high"))
( hsb3way <- data.frame(var.levels, count=c(91, 40,  4,  4, 138, 111,
                                            14, 36, 44, 82,   1,  35 )))                  
##    academic   sctyp    ses count
## 1        no  public    low    91
## 2       yes  public    low    40
## 3        no private    low     4
## 4       yes private    low     4
## 5        no  public middle   138
## 6       yes  public middle   111
## 7        no private middle    14
## 8       yes private middle    36
## 9        no  public   high    44
## 10      yes  public   high    82
## 11       no private   high     1
## 12      yes private   high    35

Wide table:

var.levels2 <- expand.grid(sctyp=c("public","private"),
              ses = c("low","middle","high"))
hsb3way.no <- data.frame(var.levels2, no=c(40, 4, 111,  36, 82,   35 ))                       
hsb3way.yes <- data.frame(var.levels2, yes=c(91,4,136,14,44,1))                   
hsb3way.wide <- merge(hsb3way.yes,hsb3way.no, by=c("sctyp","ses"))
hsb3way.wide$ncases <- hsb3way.wide$yes + hsb3way.wide$no
(hsb3way.wide)
##     sctyp    ses yes  no ncases
## 1 private   high   1  35     36
## 2 private    low   4   4      8
## 3 private middle  14  36     50
## 4  public   high  44  82    126
## 5  public    low  91  40    131
## 6  public middle 136 111    247

Some Logit Models

Independence

summary(  i.mod <- glm(cbind(yes,no) ~ 1 , data=hsb3way.wide, family=binomial))
## 
## Call:
## glm(formula = cbind(yes, no) ~ 1, family = binomial, data = hsb3way.wide)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6  
## -6.2252   0.0852  -2.9563  -3.0768   4.8559   2.0650  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.06022    0.08182  -0.736    0.462
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.81  on 5  degrees of freedom
## Residual deviance: 84.81  on 5  degrees of freedom
## AIC: 111.85
## 
## Number of Fisher Scoring iterations: 3

Homogeneous Associaiton

or all 2-way interactions

summary(  h.logit <- glm(cbind(yes,no)  ~ sctyp + ses , data=hsb3way.wide,family=binomial)  )
## 
## Call:
## glm(formula = cbind(yes, no) ~ sctyp + ses, family = binomial, 
##     data = hsb3way.wide)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6  
## -1.8337   0.7284   0.6186   0.5513  -0.1931  -0.2439  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    0.8587     0.1857   4.623 3.78e-06 ***
## sctypprivate  -1.3766     0.2793  -4.929 8.25e-07 ***
## sesmiddle     -0.6244     0.2209  -2.826  0.00471 ** 
## seshigh       -1.5848     0.2577  -6.149 7.81e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.8101  on 5  degrees of freedom
## Residual deviance:  4.6765  on 2  degrees of freedom
## AIC: 37.72
## 
## Number of Fisher Scoring iterations: 4
(X2 <- sum(residuals(h.logit,type="pearson")^2) )
## [1] 3.787392

Logit Models as poisson regression models

# As a poisson --- one way to get  X2 goodness-of-fit statistics
summary( h.poi <- glm(count ~ sctyp + academic + ses + sctyp*academic + sctyp*ses + ses*academic, data=hsb3way,family=poisson) )
## 
## Call:
## glm(formula = count ~ sctyp + academic + ses + sctyp * academic + 
##     sctyp * ses + ses * academic, family = poisson, data = hsb3way)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6        7        8  
## -0.1075   0.1641   0.5667  -0.4760  -0.1577   0.1777   0.5214  -0.3050  
##       9       10       11       12  
##  0.4456  -0.3155  -1.7503   0.5027  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                4.5221     0.1034  43.742  < 2e-16 ***
## sctypprivate              -3.4332     0.4064  -8.448  < 2e-16 ***
## academicyes               -0.8593     0.1858  -4.625 3.74e-06 ***
## sesmiddle                  0.4185     0.1323   3.163  0.00156 ** 
## seshigh                   -0.8059     0.1828  -4.409 1.04e-05 ***
## sctypprivate:academicyes   1.3856     0.2792   4.962 6.97e-07 ***
## sctypprivate:sesmiddle     0.9889     0.4034   2.451  0.01423 *  
## sctypprivate:seshigh       1.0754     0.4230   2.543  0.01100 *  
## academicyes:sesmiddle      0.6113     0.2207   2.769  0.00562 ** 
## academicyes:seshigh        1.5844     0.2578   6.146 7.94e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 485.7696  on 11  degrees of freedom
## Residual deviance:   4.6219  on  2  degrees of freedom
## AIC: 85.535
## 
## Number of Fisher Scoring iterations: 4
(X2 <- sum(residuals(h.poi,type="pearson")^2))
## [1] 3.748049
# This will give both G2 and X2
library(MASS)
summary( h.poi2 <- loglm(count ~ sctyp + academic + ses + sctyp*academic + sctyp*ses + ses*academic, data=hsb3way) )
## Formula:
## count ~ sctyp + academic + ses + sctyp * academic + sctyp * ses + 
##     ses * academic
## attr(,"variables")
## list(count, sctyp, academic, ses)
## attr(,"factors")
##          sctyp academic ses sctyp:academic sctyp:ses academic:ses
## count        0        0   0              0         0            0
## sctyp        1        0   0              1         1            0
## academic     0        1   0              1         0            1
## ses          0        0   1              0         1            1
## attr(,"term.labels")
## [1] "sctyp"          "academic"       "ses"            "sctyp:academic"
## [5] "sctyp:ses"      "academic:ses"  
## attr(,"order")
## [1] 1 1 1 2 2 2
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: R_GlobalEnv>
## attr(,"predvars")
## list(count, sctyp, academic, ses)
## attr(,"dataClasses")
##     count     sctyp  academic       ses 
## "numeric"  "factor"  "factor"  "factor" 
## 
## Statistics:
##                       X^2 df  P(> X^2)
## Likelihood Ratio 4.621902  2 0.0991669
## Pearson          3.747989  2 0.1535093

Identification Constraints

Different programs may use different ID constraints. Different parameterizations on impact the value of the regression coefficients but not the fitted values, odds ration estimates, residuals, goodness-of-model fit to data, etc.

Dummy: first set to zer0

The first is set to 0,

hsb3way.wide$low   <- ifelse(hsb3way.wide$ses=="low",1,0)
hsb3way.wide$mid   <- ifelse(hsb3way.wide$ses=="middle",1,0)
hsb3way.wide$hig   <- ifelse(hsb3way.wide$ses=="high",1,0)

summary( logit.bin <- glm(cbind(yes,no) ~  mid + hig + sctyp, family = binomial, data = hsb3way.wide) )
## 
## Call:
## glm(formula = cbind(yes, no) ~ mid + hig + sctyp, family = binomial, 
##     data = hsb3way.wide)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6  
## -1.8337   0.7284   0.6186   0.5513  -0.1931  -0.2439  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    0.8587     0.1857   4.623 3.78e-06 ***
## mid           -0.6244     0.2209  -2.826  0.00471 ** 
## hig           -1.5848     0.2577  -6.149 7.81e-10 ***
## sctypprivate  -1.3766     0.2793  -4.929 8.25e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.8101  on 5  degrees of freedom
## Residual deviance:  4.6765  on 2  degrees of freedom
## AIC: 37.72
## 
## Number of Fisher Scoring iterations: 4

Alternate Dummy Codes

By defaults glm in R will set the 1st coefficient to 0, but SAS sets the last to 0. Below we switch to last coefficients

# -- change how Dummy codes are defined
hsb3way.wide$Dsctyp <- ifelse(hsb3way.wide$sctyp=="public",1,0)
hsb3way.wide$Dlow   <- ifelse(hsb3way.wide$ses=="low",1,0)
hsb3way.wide$Dmid   <- ifelse(hsb3way.wide$ses=="middle",1,0)

summary( logit.bin2 <- glm(cbind(yes,no) ~  Dlow + Dmid + Dsctyp, family = binomial, data = hsb3way.wide) )
## 
## Call:
## glm(formula = cbind(yes, no) ~ Dlow + Dmid + Dsctyp, family = binomial, 
##     data = hsb3way.wide)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6  
## -1.8337   0.7284   0.6186   0.5513  -0.1931  -0.2439  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.1027     0.3059  -6.874 6.25e-12 ***
## Dlow          1.5848     0.2577   6.149 7.81e-10 ***
## Dmid          0.9605     0.2153   4.460 8.19e-06 ***
## Dsctyp        1.3766     0.2793   4.929 8.25e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.8101  on 5  degrees of freedom
## Residual deviance:  4.6765  on 2  degrees of freedom
## AIC: 37.72
## 
## Number of Fisher Scoring iterations: 4

Effects codes

hsb3way.wide$Esctyp <- ifelse(hsb3way.wide$sctyp=="public",1,-1)
hsb3way.wide$Elow   <- ifelse(hsb3way.wide$ses=="low",1,0)
hsb3way.wide$Elow   <- ifelse(hsb3way.wide$ses=="high",-1, hsb3way.wide$Elow)
hsb3way.wide$Emid   <- ifelse(hsb3way.wide$ses=="middle",1,0)
hsb3way.wide$Emid   <- ifelse(hsb3way.wide$ses=="high",-1,hsb3way.wide$Emid)

summary( logit.bin3 <- glm(cbind(yes,no) ~  Elow + Emid + Esctyp, family = binomial, data = hsb3way.wide) )
## 
## Call:
## glm(formula = cbind(yes, no) ~ Elow + Emid + Esctyp, family = binomial, 
##     data = hsb3way.wide)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6  
## -1.8337   0.7284   0.6186   0.5513  -0.1931  -0.2439  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.5660     0.1459  -3.880 0.000105 ***
## Elow          0.7364     0.1430   5.149 2.62e-07 ***
## Emid          0.1120     0.1173   0.955 0.339684    
## Esctyp        0.6883     0.1396   4.929 8.25e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.8101  on 5  degrees of freedom
## Residual deviance:  4.6765  on 2  degrees of freedom
## AIC: 37.72
## 
## Number of Fisher Scoring iterations: 4

Comparison

screenreg(list(logit.bin, logit.bin2, logit.bin3),
          custom.model.names=c("First=0", "Last =0", "Effect"))
## 
## ==================================================
##                 First=0     Last =0     Effect    
## --------------------------------------------------
## (Intercept)       0.86 ***   -2.10 ***   -0.57 ***
##                  (0.19)      (0.31)      (0.15)   
## mid              -0.62 **                         
##                  (0.22)                           
## hig              -1.58 ***                        
##                  (0.26)                           
## sctypprivate     -1.38 ***                        
##                  (0.28)                           
## Dlow                          1.58 ***            
##                              (0.26)               
## Dmid                          0.96 ***            
##                              (0.22)               
## Dsctyp                        1.38 ***            
##                              (0.28)               
## Elow                                      0.74 ***
##                                          (0.14)   
## Emid                                      0.11    
##                                          (0.12)   
## Esctyp                                    0.69 ***
##                                          (0.14)   
## --------------------------------------------------
## AIC              37.72       37.72       37.72    
## BIC              36.89       36.89       36.89    
## Log Likelihood  -14.86      -14.86      -14.86    
## Deviance          4.68        4.68        4.68    
## Num. obs.         6           6           6       
## ==================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

Same model, just different value of the coefficients.

Same odds ratios

Holding sctype constant: public vs private odds ratios

(odds.dummy1 <- exp(0 - logit.bin$coefficients[4]))
## sctypprivate 
##     3.961426
(odds.dummy2 <-  exp(logit.bin2$coefficients[4]- 0))
##   Dsctyp 
## 3.961426
(odds.effect3 <-  exp(logit.bin3$coefficients[4]+logit.bin3$coefficients[4]))
##   Esctyp 
## 3.961426

You can re-code school type to get exactly what are in the lecture notes

Nominal versus Ordinal Explanatory Variable?

SES is definitely ordinal, but first treat is as a factor

hsb3way.wide$xses  <- as.numeric(hsb3way.wide$ses)

# -- ses as factor
summary( logit.factor <- glm(cbind(yes,no) ~  ses + sctyp, family = binomial, data = hsb3way.wide) )
## 
## Call:
## glm(formula = cbind(yes, no) ~ ses + sctyp, family = binomial, 
##     data = hsb3way.wide)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6  
## -1.8337   0.7284   0.6186   0.5513  -0.1931  -0.2439  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    0.8587     0.1857   4.623 3.78e-06 ***
## sesmiddle     -0.6244     0.2209  -2.826  0.00471 ** 
## seshigh       -1.5848     0.2577  -6.149 7.81e-10 ***
## sctypprivate  -1.3766     0.2793  -4.929 8.25e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.8101  on 5  degrees of freedom
## Residual deviance:  4.6765  on 2  degrees of freedom
## AIC: 37.72
## 
## Number of Fisher Scoring iterations: 4

Note SES will be numeric (this is special case of when treated as a factor)

summary( logit.numeric <- glm(cbind(yes,no) ~  xses + sctyp, family = binomial, data = hsb3way.wide) )
## 
## Call:
## glm(formula = cbind(yes, no) ~ xses + sctyp, family = binomial, 
##     data = hsb3way.wide)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6  
## -1.9954   0.5755   0.8124   0.1059  -0.6799   0.3775  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    1.7493     0.2762   6.333 2.40e-10 ***
## xses          -0.7972     0.1289  -6.185 6.20e-10 ***
## sctypprivate  -1.3604     0.2784  -4.887 1.02e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.8101  on 5  degrees of freedom
## Residual deviance:  5.5889  on 3  degrees of freedom
## AIC: 36.633
## 
## Number of Fisher Scoring iterations: 4

Compute goodness-of-fit statistics for logit.numeric

hsb3way.wide$pi.numeric <- logit.numeric$fitted
Xsq.yes <- (hsb3way.wide$yes -     hsb3way.wide$pi.numeric*hsb3way.wide$ncases)**2   /(hsb3way.wide$pi.numeric*hsb3way.wide$ncases)
Xsq.no <-  (hsb3way.wide$no   - (1-hsb3way.wide$pi.numeric)*hsb3way.wide$ncases)**2 /((1-hsb3way.wide$pi.numeric)*hsb3way.wide$ncases)
(Xsq <- sum(Xsq.yes + Xsq.no))
## [1] 4.507544
Gsq.yes <- 2*hsb3way.wide$yes *log(hsb3way.wide$yes/(hsb3way.wide$ncases*hsb3way.wide$pi.numeric))
Gsq.no <- 2*hsb3way.wide$no * log(hsb3way.wide$no/(hsb3way.wide$ncases*(1-hsb3way.wide$pi.numeric)))
( Gsq <- sum(Gsq.yes+Gsq.no) )
## [1] 5.588936
# -- LR test of restriction on values for SES
anova(logit.numeric, logit.factor)
## Analysis of Deviance Table
## 
## Model 1: cbind(yes, no) ~ xses + sctyp
## Model 2: cbind(yes, no) ~ ses + sctyp
##   Resid. Df Resid. Dev Df Deviance
## 1         3     5.5889            
## 2         2     4.6765  1  0.91243

Can treat SES as numeric with evenly spaced scores.

Odds Ratio with ordinal/numeric SES

Just some here

logit.numeric$coefficients
##  (Intercept)         xses sctypprivate 
##    1.7492951   -0.7972102   -1.3604133
# -- Holding schtyp constant, odds of academic are 
exp(logit.numeric$coefficients[2])
##      xses 
## 0.4505843
# the odds for an increase of SES by 1 unit

# -- odds academic of low vs high ses
exp((3-1)*logit.numeric$coefficients[2])
##      xses 
## 0.2030262

Fitting Data at Individual Level

(1) Simple: program ~ math

summary(logit.mod1 <- glm(program ~ math, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math, family = binomial, data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2997  -0.9562   0.4355   0.9806   1.9107  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.58539    0.58066  -9.619   <2e-16 ***
## math         0.10933    0.01119   9.770   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 709.27  on 598  degrees of freedom
## AIC: 713.27
## 
## Number of Fisher Scoring iterations: 4
j <- order(hsb$math)                                    
plot(hsb$math[j],logit.mod1$fitted[j], 
    type='l',
    lwd=2,
    ylim=c(0,1),
    col="blue",
    main="Model 1: program ~ math")

(2) SES as nominal

hsb$ses <- as.factor(hsb$ses)
summary(logit.mod2 <- glm(program ~ math + ses, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math + ses, family = binomial, data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2142  -0.9260   0.4107   0.9469   2.0086  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.52362    0.59008  -9.361  < 2e-16 ***
## math         0.09889    0.01147   8.620  < 2e-16 ***
## ses2         0.35070    0.23577   1.487    0.137    
## ses3         1.15033    0.27543   4.177 2.96e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 689.30  on 596  degrees of freedom
## AIC: 697.3
## 
## Number of Fisher Scoring iterations: 3
# - subset for graphing
hsb$fitted2 <- logit.mod2$fitted
low <- hsb[ which(hsb$ses=="1"),]
mid <- hsb[ which(hsb$ses=="2"),]
hi  <- hsb[ which(hsb$ses=="3"),]
j <- order(low$math)                                    
plot(low$math[j],low$fitted2[j], 
    type='l',
    lwd=2,
    ylim=c(0,1),
    xlim=c(30,80),
    col="blue",
    main="Model 2: program ~ math + ses (as factor) "
    )
j <- order(mid$math)                                    
lines(mid$math[j],mid$fitted2[j],col="green",lwd=2)
j <- order(hi$math)                                 
lines(hi$math[j],hi$fitted2[j],col="red",lwd=2)
legend("topleft",legend=c("high","middle","low"), 
       col=c("red","green","blue"), lty=1,lwd=2, cex=1)

(3) numeric SES

hsb$ses <- as.numeric(hsb$ses)
summary(logit.mod3 <- glm(program ~ math + ses, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math + ses, family = binomial, data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1836  -0.9460   0.4366   0.9652   2.0587  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.19142    0.61298 -10.101  < 2e-16 ***
## math         0.09801    0.01140   8.595  < 2e-16 ***
## ses          0.58374    0.13762   4.242 2.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 690.76  on 597  degrees of freedom
## AIC: 696.76
## 
## Number of Fisher Scoring iterations: 3
# - subset for graphing
hsb$fitted3 <- logit.mod3$fitted
low <- hsb[ which(hsb$ses=="1"),]
mid <- hsb[ which(hsb$ses=="2"),]
hi  <- hsb[ which(hsb$ses=="3"),]
j <- order(low$math)                                    
plot(low$math[j],low$fitted3[j], 
    type='l',
    lwd=2,
    ylim=c(0,1),
    xlim=c(30,80),
    col="blue",
    main="Model 3: program ~ math + ses (numeric) "
    )
j <- order(mid$math)                                    
lines(mid$math[j],mid$fitted3[j],col="green",lwd=2)
j <- order(hi$math)                                 
lines(hi$math[j],hi$fitted3[j],col="red",lwd=2)
legend("topleft",legend=c("high","middle","low"), 
       col=c("red","green","blue"), lty=1,lwd=2, cex=1)

(4) program ~ math + ses + sctyp

hsb$sctyp <- as.factor(hsb$sctyp)
summary(logit.mod4 <- glm(program ~ math + ses + sctyp, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math + ses + sctyp, family = binomial, 
##     data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1063  -0.8883   0.2979   0.8911   2.1101  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.24834    0.62568  -9.986  < 2e-16 ***
## math         0.09858    0.01166   8.454  < 2e-16 ***
## ses          0.49856    0.14069   3.544 0.000395 ***
## sctyp2       1.36466    0.28982   4.709 2.49e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 665.42  on 596  degrees of freedom
## AIC: 673.42
## 
## Number of Fisher Scoring iterations: 4
# - subset for graphing
hsb$fitted4 <- logit.mod4$fitted
low.public  <- hsb[ (which(hsb$ses==1 & hsb$sctyp=="1")),]
low.private <- hsb[ (which(hsb$ses==1 & hsb$sctyp=="2")),]
mid.public  <- hsb[ (which(hsb$ses==2 & hsb$sctyp=="1")),]
mid.private <- hsb[ (which(hsb$ses==2 & hsb$sctyp=="2")),]
hi.public   <- hsb[ (which(hsb$ses==3 & hsb$sctyp=="1")),]
hi.private  <- hsb[ (which(hsb$ses==3 & hsb$sctyp=="2")),]
j <- order(low.public$math)                                
plot(low.public$math[j],low.public$fitted4[j], 
    type='l',
    lwd=2,
    ylim=c(0,1),
    xlim=c(30,80),
    col="blue",
    main="Model 4: program ~ math + ses (numeric) + school type  "
    )
j <- order(low.private$math)                                    
lines(low.private$math[j],low.private$fitted4[j],col="darkblue",lwd=2)
j <- order(mid.public$math)                                 
lines(mid.public$math[j],mid.public$fitted4[j],col="green",lwd=2)
j <- order(mid.private$math)                                    
lines(mid.private$math[j],mid.private$fitted4[j],col="darkgreen",lwd=2)
j <- order(hi.public$math)                                  
lines(hi.public$math[j],hi.public$fitted4[j],col="red",lwd=2)
j <- order(hi.private$math)                                 
lines(hi.private$math[j],hi.private$fitted4[j],col="darkred",lwd=2)
legend("bottomright",legend=c("private high SES", "private middle SES","private low SES","public high SES", "public middle SES", "public low SES "), 
       col=c("darkred","darkgreen","darkblue","red","green","blue"), lty=1,lwd=2, cex=1)

(5) An interaction program ~ math + ses + sctyp + ses*sctyp

hsb$sctyp <- as.factor(hsb$sctyp)
summary(logit.mod5 <- glm(program ~ math + ses + sctyp + ses*sctyp, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math + ses + sctyp + ses * sctyp, family = binomial, 
##     data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1263  -0.8776   0.2376   0.8949   2.0717  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -6.1130     0.6251  -9.780   <2e-16 ***
## math          0.1006     0.0118   8.526   <2e-16 ***
## ses           0.3804     0.1483   2.566   0.0103 *  
## sctyp2       -1.1328     1.1223  -1.009   0.3128    
## ses:sctyp2    1.1929     0.5293   2.254   0.0242 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 659.44  on 595  degrees of freedom
## AIC: 669.44
## 
## Number of Fisher Scoring iterations: 5
# - subset for graphing
hsb$fitted5 <- logit.mod5$fitted
low.public  <- hsb[ (which(hsb$ses==1 & hsb$sctyp=="1")),]
low.private <- hsb[ (which(hsb$ses==1 & hsb$sctyp=="2")),]
mid.public  <- hsb[ (which(hsb$ses==2 & hsb$sctyp=="1")),]
mid.private <- hsb[ (which(hsb$ses==2 & hsb$sctyp=="2")),]
hi.public   <- hsb[ (which(hsb$ses==3 & hsb$sctyp=="1")),]
hi.private  <- hsb[ (which(hsb$ses==3 & hsb$sctyp=="2")),]
j <- order(low.public$math)                                
plot(low.public$math[j],low.public$fitted5[j], 
    type='l',
    lwd=2,
    ylim=c(0,1),
    xlim=c(30,80),
    col="blue",
    main="Model 5: program ~ math + ses + schyp + ses*schtyp "
    )
j <- order(low.private$math)                                    
lines(low.private$math[j],low.private$fitted5[j],col="darkblue",lwd=2)
j <- order(mid.public$math)                                 
lines(mid.public$math[j],mid.public$fitted5[j],col="green",lwd=2)
j <- order(mid.private$math)                                    
lines(mid.private$math[j],mid.private$fitted5[j],col="darkgreen",lwd=2)
j <- order(hi.public$math)                                  
lines(hi.public$math[j],hi.public$fitted5[j],col="red",lwd=2)
j <- order(hi.private$math)                                 
lines(hi.private$math[j],hi.private$fitted5[j],col="darkred",lwd=2)

legend("bottomright",legend=c("private high SES", "private middle SES","private low SES","public high SES", "public middle SES", "public low SES "), 
       col=c("darkred","darkgreen","darkblue","red","green","blue"), lty=1,lwd=2, cex=1)

Diagonistics for model 5

Hosmer-Lemeshow

hoslem.test(logit.mod5$y, fitted(logit.mod5), g=10)
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  logit.mod5$y, fitted(logit.mod5)
## X-squared = 7.0278, df = 8, p-value = 0.5336

qq plot residuals

Using Pearson (adjusted standardized residuals

resid <- rstandard(logit.mod5, pearson=TRUE)
qqnorm(resid,
       main="QQ plot of  Pearson  Residuals",
         col="blue")

### ROC and concordance #### Concordance index

Cstat(logit.mod1,resp=hsb$program)
## [1] 0.750556
Cstat(logit.mod2,resp=hsb$program)
## [1] 0.7687856
Cstat(logit.mod3,resp=hsb$program)
## [1] 0.7679294
Cstat(logit.mod4,resp=hsb$program)
## [1] 0.7867873
Cstat(logit.mod5,resp=hsb$program)
## [1] 0.7888888

For ROC w/ multivariate logitistic regression

roc.data5 <-roc(logit.mod5$y , logit.mod5$fitted.values,ci=T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
names(roc.data5)
##  [1] "percent"            "sensitivities"      "specificities"     
##  [4] "thresholds"         "direction"          "cases"             
##  [7] "controls"           "fun.sesp"           "auc"               
## [10] "call"               "original.predictor" "original.response" 
## [13] "predictor"          "response"           "levels"            
## [16] "ci"
plot((1-roc.data5$specificities),roc.data5$sensitivities,
      type='l',
      lwd=2,
      col="blue",
     main="ROC for Models 5, 1 and chance",
     xlab="1-sensitivity (false positives)",
     ylab="Specificity (true positives)"
     )
roc.data1 <- roc(logit.mod1$y, logit.mod1$fitted.values,ci=T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
lines((1-roc.data1$specificities), roc.data1$sensitivities, col="pink", lwd=2)
text(.1,.99,"model 5: c=.79",col="blue")
text(.1,.94,"model 1: c=.75",col="pink")
text(.1,.89,"chance:  c=.50",col="black")
lines(c(0,1),c(0,1), lty=2)
legend("bottomright",
        legend=c("math + ses + schyp + ses*schtyp", "math","chance"),
        col=c("blue","pink","black"), lty=c(1,1,2), lwd=2)

More predictors and problem of multicolinearity

summary(logit.mod6  <- glm(program ~ math + ses + sctyp + ses*sctyp + ses*math + math*sctyp + math*ses*sctyp, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math + ses + sctyp + ses * sctyp + ses * 
##     math + math * sctyp + math * ses * sctyp, family = binomial, 
##     data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1257  -0.8892   0.2191   0.8954   2.1101  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -6.820981   1.910324  -3.571 0.000356 ***
## math             0.114402   0.037364   3.062 0.002200 ** 
## ses              0.751389   0.899646   0.835 0.403602    
## sctyp2          -0.290924  11.257039  -0.026 0.979382    
## ses:sctyp2       0.525944   5.134676   0.102 0.918415    
## math:ses        -0.007153   0.017197  -0.416 0.677467    
## math:sctyp2     -0.017047   0.227843  -0.075 0.940360    
## math:ses:sctyp2  0.013286   0.104381   0.127 0.898716    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 659.21  on 592  degrees of freedom
## AIC: 675.21
## 
## Number of Fisher Scoring iterations: 6

center all variables

hsb$cmath <- scale(hsb$math,center=TRUE,scale=FALSE)
hsb$cses <- scale(hsb$ses,center=TRUE,scale=FALSE)
hsb$sctyp <- as.numeric(hsb$sctyp)
hsb$csctyp <- scale(hsb$sctyp,center=TRUE,scale=FALSE)

summary(logit.mod7 <- glm(program ~ cmath + cses + csctyp + cses*csctyp + cses*cmath + cmath*csctyp +cmath*cses*csctyp, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ cmath + cses + csctyp + cses * csctyp + 
##     cses * cmath + cmath * csctyp + cmath * cses * csctyp, family = binomial, 
##     data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1257  -0.8892   0.2191   0.8954   2.1101  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        0.090215   0.098616   0.915 0.360290    
## cmath              0.101395   0.012566   8.069 7.08e-16 ***
## cses               0.570854   0.156184   3.655 0.000257 ***
## csctyp             1.301414   0.317330   4.101 4.11e-05 ***
## cses:csctyp        1.214812   0.613601   1.980 0.047725 *  
## cmath:cses        -0.005071   0.021691  -0.234 0.815147    
## cmath:csctyp       0.010035   0.047429   0.212 0.832442    
## cmath:cses:csctyp  0.013286   0.104381   0.127 0.898716    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 659.21  on 592  degrees of freedom
## AIC: 675.21
## 
## Number of Fisher Scoring iterations: 6
summary(logit.mod8 <-  glm(program ~ math + ses + sctyp + ses*sctyp + rdg + sci + wrtg + civ, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math + ses + sctyp + ses * sctyp + rdg + 
##     sci + wrtg + civ, family = binomial, data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2309  -0.8324   0.1797   0.8307   2.1526  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.41108    1.41007  -4.547 5.45e-06 ***
## math         0.06638    0.01624   4.088 4.35e-05 ***
## ses         -0.89589    0.60236  -1.487  0.13694    
## sctyp       -1.14500    1.13485  -1.009  0.31300    
## rdg          0.04145    0.01557   2.662  0.00778 ** 
## sci         -0.03295    0.01488  -2.215  0.02679 *  
## wrtg         0.01379    0.01425   0.968  0.33296    
## civ          0.04113    0.01318   3.122  0.00180 ** 
## ses:sctyp    1.22596    0.54124   2.265  0.02351 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 629.52  on 591  degrees of freedom
## AIC: 647.52
## 
## Number of Fisher Scoring iterations: 5

Examine correlations among achievement tests

var.names <- c("math","rdg","sci","wrtg","civ")
var.data <- hsb[var.names]
cor.mat <- cor(var.data,method="pearson")
round(cor.mat,2)
##      math  rdg  sci wrtg  civ
## math 1.00 0.68 0.65 0.63 0.53
## rdg  0.68 1.00 0.69 0.63 0.59
## sci  0.65 0.69 1.00 0.57 0.52
## wrtg 0.63 0.63 0.57 1.00 0.59
## civ  0.53 0.59 0.52 0.59 1.00
# Note eigen-vectors can be reflected (i.e., multiply by -1)
eigen(cor.mat)
## eigen() decomposition
## $values
## [1] 3.4351681 0.5285769 0.4125608 0.3297962 0.2938980
## 
## $vectors
##            [,1]       [,2]       [,3]        [,4]        [,5]
## [1,] -0.4570127 -0.2941759 -0.2688896 -0.76713446  0.20929173
## [2,] -0.4697851 -0.1837444  0.2013959  0.05109347 -0.83807465
## [3,] -0.4472481 -0.4684827  0.4051036  0.43430444  0.47724613
## [4,] -0.4444552  0.2427263 -0.7384324  0.44293682  0.04547653
## [5,] -0.4157765  0.7754395  0.4216003 -0.15517991  0.15490600
hoslem.test(logit.mod5$y, fitted(logit.mod5), g=10)
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  logit.mod5$y, fitted(logit.mod5)
## X-squared = 7.0278, df = 8, p-value = 0.5336

qq plot residuals

Using Pearson (adjusted standardized residuals

resid <- rstandard(logit.mod5, pearson=TRUE)
qqnorm(resid,
     main="QQ plot of  Pearson  Residuals",
       col="blue")
lines(resid,resid)

ROC and concordance

Concordance indices

Cstat(logit.mod1,resp=hsb$program)
## [1] 0.750556
Cstat(logit.mod2,resp=hsb$program)
## [1] 0.7687856
Cstat(logit.mod3,resp=hsb$program)
## [1] 0.7679294
Cstat(logit.mod4,resp=hsb$program)
## [1] 0.7867873
Cstat(logit.mod5,resp=hsb$program)
## [1] 0.7888888

For ROC w/ multivariate logitistic regression

roc.data5 <-roc(logit.mod5$y , logit.mod5$fitted.values,ci=T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
names(roc.data5)
##  [1] "percent"            "sensitivities"      "specificities"     
##  [4] "thresholds"         "direction"          "cases"             
##  [7] "controls"           "fun.sesp"           "auc"               
## [10] "call"               "original.predictor" "original.response" 
## [13] "predictor"          "response"           "levels"            
## [16] "ci"
plot((1-roc.data5$specificities),roc.data5$sensitivities,
      type='l',
      lwd=2,
      col="blue",
     main="ROC for Models 5, 1 and chance",
     xlab="1-sensitivity (false positives)",
     ylab="Specificity (true positives)"
     )
roc.data1 <- roc(logit.mod1$y, logit.mod1$fitted.values,ci=T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
lines((1-roc.data1$specificities), roc.data1$sensitivities, col="pink", lwd=2)
text(.1,.99,"model 5: c=.79",col="blue")
text(.1,.94,"model 1: c=.75",col="pink")
text(.1,.89,"chance:  c=.50",col="black")
lines(c(0,1),c(0,1), lty=2)
legend("bottomright",
        legend=c("math + ses + schyp + ses*schtyp", "math","chance"),
        col=c("blue","pink","black"), lty=c(1,1,2), lwd=2)

Even more More predictors and problem of multicolinearity

summary(logit.mod6  <- glm(program ~ math + ses + sctyp + ses*sctyp + ses*math + math*sctyp + math*ses*sctyp, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math + ses + sctyp + ses * sctyp + ses * 
##     math + math * sctyp + math * ses * sctyp, family = binomial, 
##     data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1257  -0.8892   0.2191   0.8954   2.1101  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -6.53006   11.73324  -0.557    0.578
## math            0.13145    0.23686   0.555    0.579
## ses             0.22544    5.36591   0.042    0.966
## sctyp          -0.29092   11.25704  -0.026    0.979
## ses:sctyp       0.52594    5.13468   0.102    0.918
## math:ses       -0.02044    0.10855  -0.188    0.851
## math:sctyp     -0.01705    0.22784  -0.075    0.940
## math:ses:sctyp  0.01329    0.10438   0.127    0.899
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 659.21  on 592  degrees of freedom
## AIC: 675.21
## 
## Number of Fisher Scoring iterations: 6
# center all variables
hsb$cmath <- scale(hsb$math,center=TRUE,scale=FALSE)
hsb$cses <- scale(hsb$ses,center=TRUE,scale=FALSE)
hsb$sctyp <- as.numeric(hsb$sctyp)
hsb$csctyp <- scale(hsb$sctyp,center=TRUE,scale=FALSE)

summary(logit.mod7 <- glm(program ~ cmath + cses + csctyp + cses*csctyp + cses*cmath + cmath*csctyp +cmath*cses*csctyp, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ cmath + cses + csctyp + cses * csctyp + 
##     cses * cmath + cmath * csctyp + cmath * cses * csctyp, family = binomial, 
##     data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1257  -0.8892   0.2191   0.8954   2.1101  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        0.090215   0.098616   0.915 0.360290    
## cmath              0.101395   0.012566   8.069 7.08e-16 ***
## cses               0.570854   0.156184   3.655 0.000257 ***
## csctyp             1.301414   0.317330   4.101 4.11e-05 ***
## cses:csctyp        1.214812   0.613601   1.980 0.047725 *  
## cmath:cses        -0.005071   0.021691  -0.234 0.815147    
## cmath:csctyp       0.010035   0.047429   0.212 0.832442    
## cmath:cses:csctyp  0.013286   0.104381   0.127 0.898716    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 659.21  on 592  degrees of freedom
## AIC: 675.21
## 
## Number of Fisher Scoring iterations: 6
hsb$sctyp <- as.factor(hsb$sctyp)
summary(logit.mod8 <-  glm(program ~ math + ses + sctyp + ses*sctyp + rdg + sci + wrtg + civ, data=hsb,family = binomial)) 
## 
## Call:
## glm(formula = program ~ math + ses + sctyp + ses * sctyp + rdg + 
##     sci + wrtg + civ, family = binomial, data = hsb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2309  -0.8324   0.1797   0.8307   2.1526  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -7.55608    0.75424 -10.018  < 2e-16 ***
## math         0.06638    0.01624   4.088 4.35e-05 ***
## ses          0.33006    0.15326   2.154  0.03127 *  
## sctyp2      -1.14500    1.13485  -1.009  0.31300    
## rdg          0.04145    0.01557   2.662  0.00778 ** 
## sci         -0.03295    0.01488  -2.215  0.02679 *  
## wrtg         0.01379    0.01425   0.968  0.33296    
## civ          0.04113    0.01318   3.122  0.00180 ** 
## ses:sctyp2   1.22596    0.54124   2.265  0.02351 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 831.35  on 599  degrees of freedom
## Residual deviance: 629.52  on 591  degrees of freedom
## AIC: 647.52
## 
## Number of Fisher Scoring iterations: 5
screenreg(list(logit.mod1,logit.mod5, logit.mod8),
          custom.model.names=c("Model 1", "Model 5", "Model 8"))
## 
## =====================================================
##                 Model 1      Model 5      Model 8    
## -----------------------------------------------------
## (Intercept)       -5.59 ***    -6.11 ***    -7.56 ***
##                   (0.58)       (0.63)       (0.75)   
## math               0.11 ***     0.10 ***     0.07 ***
##                   (0.01)       (0.01)       (0.02)   
## ses                             0.38 *       0.33 *  
##                                (0.15)       (0.15)   
## sctyp2                         -1.13        -1.15    
##                                (1.12)       (1.13)   
## ses:sctyp2                      1.19 *       1.23 *  
##                                (0.53)       (0.54)   
## rdg                                          0.04 ** 
##                                             (0.02)   
## sci                                         -0.03 *  
##                                             (0.01)   
## wrtg                                         0.01    
##                                             (0.01)   
## civ                                          0.04 ** 
##                                             (0.01)   
## -----------------------------------------------------
## AIC              713.27       669.44       647.52    
## BIC              722.07       691.43       687.10    
## Log Likelihood  -354.64      -329.72      -314.76    
## Deviance         709.27       659.44       629.52    
## Num. obs.        600          600          600       
## =====================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
# Examine correlations among achievement tests
var.names <- c("math","rdg","sci","wrtg","civ")
var.data <- hsb[var.names]
cor.mat <- cor(var.data,method="pearson")
round(cor.mat,2)
##      math  rdg  sci wrtg  civ
## math 1.00 0.68 0.65 0.63 0.53
## rdg  0.68 1.00 0.69 0.63 0.59
## sci  0.65 0.69 1.00 0.57 0.52
## wrtg 0.63 0.63 0.57 1.00 0.59
## civ  0.53 0.59 0.52 0.59 1.00
# Note eigen-vectors can be reflected (i.e., multiply by -1)
eigen(cor.mat)
## eigen() decomposition
## $values
## [1] 3.4351681 0.5285769 0.4125608 0.3297962 0.2938980
## 
## $vectors
##            [,1]       [,2]       [,3]        [,4]        [,5]
## [1,] -0.4570127 -0.2941759 -0.2688896 -0.76713446  0.20929173
## [2,] -0.4697851 -0.1837444  0.2013959  0.05109347 -0.83807465
## [3,] -0.4472481 -0.4684827  0.4051036  0.43430444  0.47724613
## [4,] -0.4444552  0.2427263 -0.7384324  0.44293682  0.04547653
## [5,] -0.4157765  0.7754395  0.4216003 -0.15517991  0.15490600