This document mostly shows plots of random effects and how extract them.
library(lme4)
## Loading required package: Matrix
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(HLMdiag)
## Warning: package 'HLMdiag' was built under R version 4.0.5
##
## Attaching package: 'HLMdiag'
## The following object is masked from 'package:stats':
##
## covratio
We will use data previously created so that we can skip creating various variables.
setwd("D:\\Dropbox\\edps587\\lectures\\7 inference2")
#setwd("C:\\Users\\cja\\Dropbox\\edps587\\lectures\\7 inference2")
hsb <- read.table("hsball.txt",header=TRUE)
# Number of schools will come in handy
N <- length(unique(hsb$id))
summary( model.1 <- lmer (mathach ~ cSES + (1 |id),data=hsb,REML=FALSE) )
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: mathach ~ cSES + (1 | id)
## Data: hsb
##
## AIC BIC logLik deviance df.resid
## 46728.4 46755.9 -23360.2 46720.4 7181
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.09692 -0.73196 0.01945 0.75739 2.91423
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 8.612 2.935
## Residual 37.005 6.083
## Number of obs: 7185, groups: id, 160
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 12.6494 0.2437 157.7181 51.90 <2e-16 ***
## cSES 2.1912 0.1086 7023.0025 20.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cSES 0.003
Extract random effects, which yields a “mer” object
ranU <- ranef(model.1)
class(ranU)
## [1] "ranef.mer"
Change class of “ranU” from mer to data.frame
df.1 <- as.data.frame(ranU)
head(df.1)
## grpvar term grp condval condsd
## 1 id (Intercept) 1224 -2.6753498 0.8493480
## 2 id (Intercept) 1288 0.7470468 1.1238806
## 3 id (Intercept) 1296 -4.5904308 0.8411883
## 4 id (Intercept) 1308 2.9791973 1.2341147
## 5 id (Intercept) 1317 0.4962998 0.8411883
## 6 id (Intercept) 1358 -1.2514746 1.0387323
“df” is short for “data frame”
Sort by ranU, which is “condval” (conditional value)
df <- df.1[order(df.1$condval),]
Add integers for school id’s, which is needed to plot them in Step 5.
df$xgrp <- seq(1:N)
plot(df$xgrp, df$condval, type="p", # plot points
main="Model 1: Random intercepts +/- 1 std error", # title
xlab="Schools (sort by Uo)", # label for horizontal axis
ylab="EBLUP of Uo", # label for vertical axis
col="blue")
arrows(df$xgrp, df$condval - df$condsd, # origin
df$xgrp, df$condval + df$condsd, # terminus
length=0.00)
To draw in the sd errors, I drew arrows but set the arrowhead to size 0. This is a hack solution, but it does the trick.
hist(df.1$condval,
main="Estimated U_oj from Simple Model",
col="gray",
xlab="Emprical Bayes Estimates of U_oj")
Simple QQplot for assessing normality of the \(U_{0j}\)s,
qqdata <- qqnorm(df$condval)
A QQplot but with error bars. This uses the same hack to get error bars in the plot.
plot(qqdata$x,qqdata$y,
type="p", pch=19, col="red",
main="Normal QQ plot of Uoj with 95% Confidence Bands",
xlab="Theoretical Value",
ylab="Estimated Uoj (EBLUPS)",
ylim=c(-8,8))
arrows(qqdata$x, qqdata$y - 1.96*df$condsd,
qqdata$x, qqdata$y + 1.96*df$condsd,
length=0.00, col="gray")
qqline(df$condval, col="steelblue")
These fitted regressions for each school,
df$id <- df$grp
hsb <- merge(hsb,df,by.x="id")
# This fixed effects
gamma <- as.data.frame(fixef(model.1))
hsb$g00 <- gamma[1,1]
hsb$g10 <- gamma[2,1]
# Population average model
hsb$ypop.ave <- hsb$g00 + hsb$g10 * hsb$cSES
# School specific (conditionally) regressions
hsb$yschool.spec <- hsb$g00 + hsb$g10 * hsb$cSES + hsb$condval
hsb$yschool.0 <- hsb$g00 + hsb$condval
plot(hsb$cSES, hsb$ypop.ave, type='l',
ylim=c(0,25),
xlab="Centered SES",
ylab="Fitted Math Scores",
main="Population Average model")
Plot separate regression line for each school. These are the conditional regressions, conditional on the \(\hat{U}_{0j}\)s.
# I create an empty plot to set up frame for graph
plot(hsb$cSES,hsb$ypop.ave,type='n',
ylim=c(0,25),
xlab="Centered SES",
ylab="Fitted Math Scores",
main="School Specific Regressions")
hsb <- hsb[order(hsb$xgrp),]
# Now are in the subject specific regressions
for (i in (1:N)) {
sub <- hsb[ which(hsb$xgrp==i),]
lines(sub$cSES, sub$yschool.spec,type="l", col=i)
}
Note that I used the index i to draw a different color for each regression
Looking at all 160 regression lines is hard to see what’s going on. So we can take a subset of them to plot.
# Create empty frame
plot(hsb$cSES,hsb$yschool.spec,type='n',
ylim=c(0,25),
xlab="Centered SES",
ylab="Fitted Math Scores",
main="School Specific Regressions, just 10 of them")
# Add in the 10 subgroups
hsb <- hsb[order(hsb$xgrp),]
for (i in (50:60)) {
sub <- hsb[ which(hsb$xgrp==i),]
lines(sub$cSES, sub$yschool.spec, type="l", col=i)
}
It would be better to take a random sample of schools, but in this case the schools are not ordered by any of the variables in the data set.
qqnorm(hsb$ypop.ave, pch=1, main="QQplot of Population Averages")
qqline(hsb$ypop.ave, col = "steelblue", lwd = 2)
Get the residuals from HLMresid—this is not working even after changing depricated comdend.
res1 <- hlm_resid(model.1, level=1, type="EB", standardize=TRUE)
head(res1)
Now draw histogram and overlay a normal distribution.
h<- hist(res1,breaks=15,density=20,main="Standardize Residuals") # draw historgram
xfit <- seq(-40, 40, length=50) # sets range & number of quantiles
yfit <- dnorm(xfit, mean=0, sd=sd(res1)) # should be normal
yfit <- yfit*diff(h$mids[1:2])*length(res1) # use mid-points
lines(xfit, yfit, col='blue', lwd=2) # draws normal
Alternative, use what we already found,
## ----hist_normal2----------------------------------------------------
h<- hist((hsb$math-hsb$yschool.spec),
breaks=15, col="lightblue1",
main="Raw (conditional) Residuals: Rij")
Note that the values on x-axis are conditional residuals, conditional on \(U_{0j}\)s.
model.2 <- lmer (mathach ~ cSES + (1 +cSES |id),data=hsb,REML=FALSE)
summary(model.2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: mathach ~ cSES + (1 + cSES | id)
## Data: hsb
##
## AIC BIC logLik deviance df.resid
## 46723.0 46764.3 -23355.5 46711.0 7179
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.09688 -0.73200 0.01796 0.75445 2.89906
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 8.6213 2.9362
## cSES 0.6783 0.8236 0.02
## Residual 36.7000 6.0581
## Number of obs: 7185, groups: id, 160
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 12.6494 0.2437 157.7206 51.90 <2e-16 ***
## cSES 2.1931 0.1278 156.1499 17.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cSES 0.012
For the model that I wanted to fit, I had trouble with singularity. To deal with this I multipled cSES by 1/10 to move the variance random effect for cSES away from 0. This worked.
To interpret effects in scale of cSES divide by 10
tau_cses/10 or tau_cses^2/100
gamma_cses/10
Clumsy but it works
hsb$mover <- rep(0.10, nrow(hsb))
x <- as.matrix(hsb$cSES, ncol=1)
y <- as.matrix(hsb$mover, ncol=1)
hsb$xyses <- x*y
Using re-scaled SES, fit model 3
model.3 <- lmer(mathach ~ 1 + xyses + female + meanses + (1 + xyses + female | id),
control = lmerControl(optimizer ="Nelder_Mead"),
data=hsb, REML=FALSE)
summary(model.3)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: mathach ~ 1 + xyses + female + meanses + (1 + xyses + female |
## id)
## Data: hsb
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## 46516.5 46592.1 -23247.2 46494.5 7174
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1768 -0.7314 0.0288 0.7560 2.9538
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 3.1846 1.7845
## xyses 59.7193 7.7278 -0.06
## female 0.9107 0.9543 -0.60 -0.33
## Residual 36.3569 6.0297
## Number of obs: 7185, groups: id, 160
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.2600 0.1829 141.7547 72.488 < 2e-16 ***
## xyses 21.5345 1.2545 155.8441 17.165 < 2e-16 ***
## female -1.1791 0.1816 126.1119 -6.492 1.76e-09 ***
## meanses 5.7693 0.3414 151.0396 16.898 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) xyses female
## xyses -0.039
## female -0.635 -0.033
## meanses -0.018 0.003 0.038
Extract random effects for this model
U <- as.data.frame(ranef(model.3))
Uoj <- U[which(U$term=="(Intercept)"),]
U1j <- U[which(U$term=="xyses"),]
U2j <- U[which(U$term=="female"),]
The reference lines were drawn in using a bit of trail and error
par(mfrow=c(2,2)) # a 2 x 2 matrix of plots
qqnorm(Uoj$condval,main="QQplot: Random Intercepts")
qqline(Uoj$condval,col="red")
qqnorm(U1j$condval,main="QQplot: Random cSES Effects")
qqline(U1j$condval,col="red")
qqnorm(U2j$condval,main="QQplot: Random female Effects")
qqline(U2j$condval,col="red")
x<- seq(0:100)
y<- seq(0:100)
plot(x,y, type="n", axes=FALSE, ylab="", xlab="")
text(55,100,"mathach ~ 1", cex=.9)
text(55,80," +cSES+female", cex=.9)
text(55,60,"+ meanses + (1", cex=.9)
text(55,40,"+cSES+female|id)", cex=.9)
text(60,20,"Deviance=46494.5", cex=.9)
text(60,3,"AIC=46516.5", cex=.9)
#text(30,50,"tau_00 = 3.18")
#text(30.40,"Variance for cses...")
#text(30,30,"tau_11 = 1.49")
#text(30,20,
## ----qqplot_complex_model--------------------------------------------
par(mfrow=c(2,2)) # a 2 x 2 matrix of plots
qqnorm(Uoj$condval,main="Random Intercepts")
qqline(Uoj$condval,col="red")
qqnorm(U1j$condval,main="Random cSES Effects")
qqline(U1j$condval,col="red")
qqnorm(U2j$condval,main="Random female Effects")
qqline(U2j$condval,col="red")
x<- seq(0:100)
y<- seq(0:100)
plot(x,y, type="n", axes=FALSE, ylab="", xlab="")
text(55,100,"mathach ~ 1", cex=.95)
text(55,80," +cSES+female", cex=.95)
text(55,60,"+ meanses + (1", cex=.95)
text(55,40,"+cSES+female|id)", cex=.95)
text(60,20,"Deviance=46494.5", cex=.95)
text(60,3,"AIC=46516.5", cex=.95)
#text(30,50,"tau_00 = 3.18")
#text(30.40,"Variance for cses...")
#text(30,30,"tau_11 = 1.49")
#text(30,20,"tau_22 = 0.91")
#text(30,10,"sigma^2=36.36")
I could not fit all text that I would have liked, but I did fit enough so that I would remember what model I’m looking at
# lattice plot of all random effects
plot(ranef(model.3), main="Estimated Random Effects",col="blue")
## $id
The variance of \(\hat{U}_{kj}\)s are less than \(\tau^{}\)s. The \(\hat{U}_{kj}\) are shrunken toward the over all mean; that is, \[\hat{x}_{k+} \le |\hat{U}_{jk}| \le |\hat{x}_{kj}|\].
The implication is the \[\mbox{var}(\hat{U}_{kj}) < \hat{\tau}_{k}^2\]
Compare this to \(\hat{\tau}_1^2= 8.612\)
# model.1
var(df.1$condval)
## [1] 7.85327
Compare these to \(\hat{\tau}_{k}^2= 3.1846\), \(59.719\), and 0.9107$
# model.3
var(Uoj$condval)
## [1] 2.214774
var(U1j$condval)
## [1] 16.21794
var(U2j$condval)
## [1] 0.2462087
This requires computing the grand mean for math, the school means for math, and the school specific math scores. The latter are conditional on the random effects.
For this graphic, I will use model.1 and set cSES=0 (i.e., use yschool.0 here)
hsb$grand <- rep(mean(hsb$mathach), nrow(hsb))
grp.mean <- aggregate(hsb$mathach, by=list(hsb$id), FUN=mean)
names(grp.mean) <- c("id","mean.math")
hsb <- merge(hsb, grp.mean, by="id")
The data need to be ordered; this will do nicely
hsb <- hsb[order(hsb$yschool.0),]
We also need an id for this ordering,
nj <- as.data.frame(table(hsb$id))
names(nj) <- c("id","count")
nj$integer <- 1:160
j <- 1
for (i in 1:160) {
for (k in 1:nj$count[i]) {
hsb$id.int[j] <- i
j <- j + 1
}
}
par(mfrow=c(1,1))
plot(hsb$id.int, hsb$grand, type="l", col="steelblue",
main="Shrinkgage: Model 1 when cses=0",
xlab="School ID",
ylab="Math",
cex=1.4,
lwd=2)
lines(hsb$id.int, hsb$yschool.0, col="red", type='l')
lines(hsb$id.int, hsb$mean.math, col="green",type="l")
legend(1, 18, legend=c("Grand Mean", "Conditional", "School Mean"),
col=c("steelblue","red", "green"), lty=1:2, cex=1.1)