Overview

This document mostly shows plots of random effects and how extract them.

Setup

Libraries

library(lme4)
## Loading required package: Matrix
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(HLMdiag)
## Warning: package 'HLMdiag' was built under R version 4.0.5
## 
## Attaching package: 'HLMdiag'
## The following object is masked from 'package:stats':
## 
##     covratio

Read in data

We will use data previously created so that we can skip creating various variables.

setwd("D:\\Dropbox\\edps587\\lectures\\7 inference2")
#setwd("C:\\Users\\cja\\Dropbox\\edps587\\lectures\\7 inference2")

hsb <- read.table("hsball.txt",header=TRUE)

# Number of schools will come in handy
N <- length(unique(hsb$id))

Model we’ll Work with

summary( model.1 <- lmer (mathach ~ cSES + (1 |id),data=hsb,REML=FALSE) )
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: mathach ~ cSES + (1 | id)
##    Data: hsb
## 
##      AIC      BIC   logLik deviance df.resid 
##  46728.4  46755.9 -23360.2  46720.4     7181 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.09692 -0.73196  0.01945  0.75739  2.91423 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept)  8.612   2.935   
##  Residual             37.005   6.083   
## Number of obs: 7185, groups:  id, 160
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   12.6494     0.2437  157.7181   51.90   <2e-16 ***
## cSES           2.1912     0.1086 7023.0025   20.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## cSES 0.003

Graph of EBLUPS by school +/- 1 sd

Step 1: get setimates of random effects

Extract random effects, which yields a “mer” object

ranU <- ranef(model.1)
class(ranU)
## [1] "ranef.mer"

Step 2: change from mer to data.frame

Change class of “ranU” from mer to data.frame

df.1 <- as.data.frame(ranU)
head(df.1)
##   grpvar        term  grp    condval    condsd
## 1     id (Intercept) 1224 -2.6753498 0.8493480
## 2     id (Intercept) 1288  0.7470468 1.1238806
## 3     id (Intercept) 1296 -4.5904308 0.8411883
## 4     id (Intercept) 1308  2.9791973 1.2341147
## 5     id (Intercept) 1317  0.4962998 0.8411883
## 6     id (Intercept) 1358 -1.2514746 1.0387323

“df” is short for “data frame”

Step 3: sort runU

Sort by ranU, which is “condval” (conditional value)

df <- df.1[order(df.1$condval),]

Step 4: integers to replace school id

Add integers for school id’s, which is needed to plot them in Step 5.

df$xgrp <- seq(1:N)

Setp 5: Draw graph

plot(df$xgrp, df$condval, type="p",                      # plot points
     main="Model 1: Random intercepts +/- 1 std error",  # title
       xlab="Schools (sort by Uo)",                       # label for horizontal axis
       ylab="EBLUP of Uo",                                # label for vertical axis
       col="blue")

arrows(df$xgrp, df$condval - df$condsd,       # origin
       df$xgrp, df$condval + df$condsd,       # terminus
         length=0.00)

To draw in the sd errors, I drew arrows but set the arrowhead to size 0. This is a hack solution, but it does the trick.

Histogram of U_oj

hist(df.1$condval, 
     main="Estimated U_oj from Simple Model",
     col="gray",
     xlab="Emprical Bayes Estimates of U_oj")

QQ plot of Us with 95% error bars

Simple QQplot for assessing normality of the \(U_{0j}\)s,

qqdata <- qqnorm(df$condval)

A QQplot but with error bars. This uses the same hack to get error bars in the plot.

plot(qqdata$x,qqdata$y,
     type="p", pch=19, col="red",
     main="Normal QQ plot of Uoj with 95% Confidence Bands",
     xlab="Theoretical Value",
     ylab="Estimated Uoj (EBLUPS)",
     ylim=c(-8,8))
     
arrows(qqdata$x, qqdata$y - 1.96*df$condsd, 
       qqdata$x, qqdata$y + 1.96*df$condsd, 
         length=0.00, col="gray")

qqline(df$condval, col="steelblue")

School Specific Regressions

These fitted regressions for each school,

Step 1: Merge random effects and orginal data set

df$id <- df$grp
hsb <- merge(hsb,df,by.x="id")

Step 2: Compute population average and school specific (conditional) values

Population average values

# This fixed effects
gamma <- as.data.frame(fixef(model.1))

hsb$g00 <- gamma[1,1]
hsb$g10 <- gamma[2,1]

# Population average model
hsb$ypop.ave     <- hsb$g00 + hsb$g10 * hsb$cSES

School specific regressions

# School specific (conditionally) regressions
hsb$yschool.spec <- hsb$g00 + hsb$g10 * hsb$cSES + hsb$condval

hsb$yschool.0 <- hsb$g00 +  hsb$condval

step 3: Plot population average

plot(hsb$cSES, hsb$ypop.ave, type='l',
     ylim=c(0,25),
     xlab="Centered SES",
     ylab="Fitted Math Scores",
       main="Population Average model")

step 4

Plot separate regression line for each school. These are the conditional regressions, conditional on the \(\hat{U}_{0j}\)s.

# I create an empty plot to set up frame for graph
plot(hsb$cSES,hsb$ypop.ave,type='n',
     ylim=c(0,25),
     xlab="Centered SES",
       ylab="Fitted Math Scores",
       main="School Specific Regressions")
    hsb <- hsb[order(hsb$xgrp),]
    
# Now are in the subject specific regressions
for (i in (1:N)) {
   sub <- hsb[ which(hsb$xgrp==i),]
   lines(sub$cSES, sub$yschool.spec,type="l", col=i)
}

Note that I used the index i to draw a different color for each regression

step 4b: Plot conditional regressions for 10 schools

Looking at all 160 regression lines is hard to see what’s going on. So we can take a subset of them to plot.

# Create empty frame 
plot(hsb$cSES,hsb$yschool.spec,type='n',
     ylim=c(0,25),
     xlab="Centered SES",
       ylab="Fitted Math Scores",
       main="School Specific Regressions, just 10 of them")

# Add in the 10 subgroups
hsb <- hsb[order(hsb$xgrp),]
for (i in (50:60)) {
   sub <- hsb[ which(hsb$xgrp==i),]
   lines(sub$cSES, sub$yschool.spec, type="l", col=i)
}

It would be better to take a random sample of schools, but in this case the schools are not ordered by any of the variables in the data set.

QQPlot of population average values

qqnorm(hsb$ypop.ave, pch=1, main="QQplot of Population Averages")

qqline(hsb$ypop.ave, col = "steelblue", lwd = 2)

Histogram from HLMdiag

Get the residuals from HLMresid—this is not working even after changing depricated comdend.

res1 <- hlm_resid(model.1, level=1, type="EB", standardize=TRUE)
head(res1)

Now draw histogram and overlay a normal distribution.

h<- hist(res1,breaks=15,density=20,main="Standardize Residuals")  # draw historgram
xfit <- seq(-40, 40, length=50)                  # sets range & number of quantiles
yfit <- dnorm(xfit, mean=0, sd=sd(res1))         # should be normal
yfit <- yfit*diff(h$mids[1:2])*length(res1)      # use mid-points 
lines(xfit, yfit, col='blue', lwd=2)             # draws normal

Alternative, use what we already found,

## ----hist_normal2----------------------------------------------------
h<- hist((hsb$math-hsb$yschool.spec),
    breaks=15, col="lightblue1",
    main="Raw (conditional) Residuals: Rij")

Note that the values on x-axis are conditional residuals, conditional on \(U_{0j}\)s.

Random Intercept and Slope Models

Simple one

model.2 <- lmer (mathach ~ cSES + (1 +cSES |id),data=hsb,REML=FALSE)
summary(model.2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: mathach ~ cSES + (1 + cSES | id)
##    Data: hsb
## 
##      AIC      BIC   logLik deviance df.resid 
##  46723.0  46764.3 -23355.5  46711.0     7179 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.09688 -0.73200  0.01796  0.75445  2.89906 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  id       (Intercept)  8.6213  2.9362       
##           cSES         0.6783  0.8236   0.02
##  Residual             36.7000  6.0581       
## Number of obs: 7185, groups:  id, 160
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  12.6494     0.2437 157.7206   51.90   <2e-16 ***
## cSES          2.1931     0.1278 156.1499   17.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## cSES 0.012

More Complex One

For the model that I wanted to fit, I had trouble with singularity. To deal with this I multipled cSES by 1/10 to move the variance random effect for cSES away from 0. This worked.

To interpret effects in scale of cSES divide by 10

  •     tau_cses/10 or tau_cses^2/100
  •     gamma_cses/10

Clumsy but it works

hsb$mover <- rep(0.10, nrow(hsb))    
x <- as.matrix(hsb$cSES,  ncol=1)
y <- as.matrix(hsb$mover, ncol=1)
hsb$xyses <- x*y

Using re-scaled SES, fit model 3

model.3 <- lmer(mathach ~ 1 + xyses + female + meanses + (1 + xyses + female | id),
             control = lmerControl(optimizer ="Nelder_Mead"), 
             data=hsb, REML=FALSE)
summary(model.3)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: mathach ~ 1 + xyses + female + meanses + (1 + xyses + female |  
##     id)
##    Data: hsb
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
##      AIC      BIC   logLik deviance df.resid 
##  46516.5  46592.1 -23247.2  46494.5     7174 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1768 -0.7314  0.0288  0.7560  2.9538 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr       
##  id       (Intercept)  3.1846  1.7845              
##           xyses       59.7193  7.7278   -0.06      
##           female       0.9107  0.9543   -0.60 -0.33
##  Residual             36.3569  6.0297              
## Number of obs: 7185, groups:  id, 160
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  13.2600     0.1829 141.7547  72.488  < 2e-16 ***
## xyses        21.5345     1.2545 155.8441  17.165  < 2e-16 ***
## female       -1.1791     0.1816 126.1119  -6.492 1.76e-09 ***
## meanses       5.7693     0.3414 151.0396  16.898  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) xyses  female
## xyses   -0.039              
## female  -0.635 -0.033       
## meanses -0.018  0.003  0.038

Extract random effects for this model

U <- as.data.frame(ranef(model.3))

Uoj <- U[which(U$term=="(Intercept)"),]
U1j <- U[which(U$term=="xyses"),]
U2j <- U[which(U$term=="female"),]

QQplot U’s from Complex Model

The reference lines were drawn in using a bit of trail and error

par(mfrow=c(2,2))          # a 2 x 2 matrix of plots

qqnorm(Uoj$condval,main="QQplot: Random Intercepts")
qqline(Uoj$condval,col="red")

qqnorm(U1j$condval,main="QQplot: Random cSES Effects")
qqline(U1j$condval,col="red")

qqnorm(U2j$condval,main="QQplot: Random female Effects")
qqline(U2j$condval,col="red")


x<- seq(0:100)
y<- seq(0:100)
plot(x,y, type="n", axes=FALSE, ylab="", xlab="")
text(55,100,"mathach ~ 1", cex=.9)
text(55,80," +cSES+female", cex=.9)
text(55,60,"+ meanses + (1",  cex=.9)
text(55,40,"+cSES+female|id)", cex=.9)
text(60,20,"Deviance=46494.5", cex=.9)
text(60,3,"AIC=46516.5", cex=.9)

#text(30,50,"tau_00 = 3.18")
#text(30.40,"Variance for cses...")
#text(30,30,"tau_11 = 1.49")
#text(30,20,
## ----qqplot_complex_model--------------------------------------------
par(mfrow=c(2,2))          # a 2 x 2 matrix of plots

qqnorm(Uoj$condval,main="Random Intercepts")
qqline(Uoj$condval,col="red")

qqnorm(U1j$condval,main="Random cSES Effects")
qqline(U1j$condval,col="red")

qqnorm(U2j$condval,main="Random female Effects")
qqline(U2j$condval,col="red")


x<- seq(0:100)
y<- seq(0:100)
plot(x,y, type="n", axes=FALSE, ylab="", xlab="")
text(55,100,"mathach ~ 1", cex=.95)
text(55,80," +cSES+female", cex=.95)
text(55,60,"+ meanses + (1",  cex=.95)
text(55,40,"+cSES+female|id)", cex=.95)
text(60,20,"Deviance=46494.5", cex=.95)
text(60,3,"AIC=46516.5", cex=.95)

#text(30,50,"tau_00 = 3.18")
#text(30.40,"Variance for cses...")
#text(30,30,"tau_11 = 1.49")
#text(30,20,"tau_22 = 0.91")
#text(30,10,"sigma^2=36.36")

I could not fit all text that I would have liked, but I did fit enough so that I would remember what model I’m looking at

Bi-variate Scatter Plot

# lattice plot of all random effects 
plot(ranef(model.3), main="Estimated Random Effects",col="blue")
## $id

Shrinkage

The variance of \(\hat{U}_{kj}\)s are less than \(\tau^{}\)s. The \(\hat{U}_{kj}\) are shrunken toward the over all mean; that is, \[\hat{x}_{k+} \le |\hat{U}_{jk}| \le |\hat{x}_{kj}|\].

The implication is the \[\mbox{var}(\hat{U}_{kj}) < \hat{\tau}_{k}^2\]

As an illustration of shrinkage

Compare this to \(\hat{\tau}_1^2= 8.612\)

# model.1
var(df.1$condval)
## [1] 7.85327

Compare these to \(\hat{\tau}_{k}^2= 3.1846\), \(59.719\), and 0.9107$

#   model.3
var(Uoj$condval)
## [1] 2.214774
var(U1j$condval)
## [1] 16.21794
var(U2j$condval)
## [1] 0.2462087

Plot showing the shrinkage

This requires computing the grand mean for math, the school means for math, and the school specific math scores. The latter are conditional on the random effects.

For this graphic, I will use model.1 and set cSES=0 (i.e., use yschool.0 here)

Grand mean

hsb$grand <- rep(mean(hsb$mathach), nrow(hsb))

Get mean math for each group

grp.mean <- aggregate(hsb$mathach, by=list(hsb$id), FUN=mean)
names(grp.mean) <- c("id","mean.math")
hsb <- merge(hsb, grp.mean, by="id")

For a nice graph

The data need to be ordered; this will do nicely

hsb <- hsb[order(hsb$yschool.0),]

We also need an id for this ordering,

nj <- as.data.frame(table(hsb$id))
names(nj) <- c("id","count")
nj$integer <- 1:160
j <- 1
for (i in 1:160) {
  for (k in 1:nj$count[i]) {
     hsb$id.int[j] <- i
     j <- j + 1
  }  
}   

The graph

par(mfrow=c(1,1))

plot(hsb$id.int, hsb$grand, type="l", col="steelblue",
     main="Shrinkgage: Model 1 when cses=0",
     xlab="School ID",
     ylab="Math",
     cex=1.4,
     lwd=2)

lines(hsb$id.int, hsb$yschool.0, col="red", type='l')
lines(hsb$id.int, hsb$mean.math, col="green",type="l")
legend(1, 18, legend=c("Grand Mean", "Conditional", "School Mean"),
       col=c("steelblue","red", "green"), lty=1:2, cex=1.1)