Introduction

The R covered here reproduced mini-study on impact of distribution of \(U_j\)s on results. This corresponds to the R script title which is given below.

The function that simulates empty/null HLM where distribution of \(U_{oj}\) is a mixture of two normal distributions. There are a number of things that you can change easliy.

After the function is are some examples of it's use.

As written is simulates 4 cases by varying \(\sigma^2\): 1, 9, 25 and 100

You can change values of \(\sigma^2\).

Set Up

Load lmer & lmerTest

library(lme4)
## Loading required package: Matrix
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step

Set working directory

setwd("D:\\Documents\\Dropbox\\edps587\\lectures\\7 inference2")

Read in data

hsb <- read.table("hsball.txt",header=TRUE)

A quick look at the data

head(hsb)
##     id minority female    ses mathach size sector pracad disclim himinty
## 1 1224        0      1 -1.528   5.876  842      0   0.35   1.597       0
## 2 1224        0      1 -0.588  19.708  842      0   0.35   1.597       0
## 3 1224        0      0 -0.528  20.349  842      0   0.35   1.597       0
## 4 1224        0      0 -0.668   8.781  842      0   0.35   1.597       0
## 5 1224        0      0 -0.158  17.898  842      0   0.35   1.597       0
## 6 1224        0      0  0.022   4.583  842      0   0.35   1.597       0
##   meanses  cSES
## 1  -0.428 -1.10
## 2  -0.428 -0.16
## 3  -0.428 -0.10
## 4  -0.428 -0.24
## 5  -0.428  0.27
## 6  -0.428  0.45

Function to simulate very simple HLM

The function is named 'simmix' and takes as input the following:

You can change the input to see how even a little violation messes up parameter estimates of \(\tau_{00}\).

The definition of the function

simmix <- function(N, nj, s, tau, inmu) {
   k <- 1                                # index for micro
   Uoj <- matrix( ,nrow=N,ncol=1)        # initalize matrix for U_0j
   yij <- matrix( ,nrow=N*nj,ncol=1)     # initalize matrix for y_ij
   cut  <- runif(N,min=0,max=1)          # draw uniform cut-points
   for(j in 1:N){
     mu <- inmu                          # read in value of mu
     if (cut[j]<0.5) {                   # use cut-point
        mu <-  -inmu                     # if <0.5, then mu=-inmu
     }                                     # i.e., draw of other distribution
     Uoj[j,1] <- rnorm(1,mean=mu,sd=tau) # draw random U_oj
     id <- j                             # index for macro
     for (i in 1:nj){ 
       yij[k,1]  <- Uoj[j,1] + rnorm(1,mean=0,sd=s) # compute y_ij
       k <- k+1                          # up-date index for mirco
     }
   }
   id <- as.data.frame(rep(1:N,each=nj))   # finishing up

   simdata <- cbind(k,id,as.data.frame(yij))
   names(simdata) <- c("index", "id", "yij")
   simdata <<- simdata

   Uoj <- as.data.frame(Uoj)
   names(Uoj) <- c("Uoj")
   Uoj <<- Uoj
}

Examples

All of these examples set \(N=1000\) groups, \(n_j=5\), \(\tau_{00}=1\) and \(\mu= \pm 2\).

First level of \(\tau_{00}\)

N <- 1000
nj <- 5
t <- 1
mu <- 2

What is varied in this set is the value of the within standard deviation

s <- 1

You can change any of the above, but you might also want to change the titles in the graphs below to correspond to your input values.

Simulate data, graph distribution of \(Y_{ij}\) and \(U_{0j}\)and fit HLM:

simmix(N=N ,nj=nj, s=s, tau=t, inmu=mu)
simdata1 <-simdata
hist(simdata1$yij,breaks=15,xlab=expression(Y_ij), main=expression(paste("model.1: ", sigma^2, "=1,",  tau, "=1, ICC=0.500") ) ) 

Uoj1 <-Uoj 
hist(Uoj1$Uoj,breaks=15,xlab=expression(U_0j), main=expression(paste("model.1: ",sigma^2,"=1,",  tau, "=1, ICC=0.500") )  )

model.1 <- lmer(yij ~ 1 +(1 | id), simdata1,REML=FALSE)
summary(model.1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: yij ~ 1 + (1 | id)
##    Data: simdata1
## 
##      AIC      BIC   logLik deviance df.resid 
##  17402.6  17422.1  -8698.3  17396.6     4997 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.05897 -0.61522 -0.00584  0.61741  2.96176 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 4.8796   2.2090  
##  Residual             0.9932   0.9966  
## Number of obs: 5000, groups:  id, 1000
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept) 3.244e-02  7.126e-02 1.000e+03   0.455    0.649

Change \(\sigma^2=1\) to \(\sigma^2=9\),

simmix(N=1000, nj=5, s=3, tau=t, inmu=2)
simdata2 <- simdata
hist(simdata2$yij,breaks=15,xlab=expression(y_ij),main=expression(paste("model.2: ",sigma^2,"=9 ,",  tau, "=1,   ICC=0.100") ))

Uoj2 <-Uoj
hist(Uoj2$Uoj,breaks=15,xlab=expression(U_0j),main=expression(paste("model.2: ",sigma^2,"=9 ",  tau, "=1,   ICC=0.100") ))

model.2 <- lmer(yij ~ 1 +(1 | id), simdata2,REML=FALSE)
summary(model.2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: yij ~ 1 + (1 | id)
##    Data: simdata2
## 
##      AIC      BIC   logLik deviance df.resid 
##  26558.5  26578.0 -13276.2  26552.5     4997 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5882 -0.6215 -0.0081  0.6094  3.4747 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 5.288    2.300   
##  Residual             9.013    3.002   
## Number of obs: 5000, groups:  id, 1000
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)   -0.07787    0.08421  999.99996  -0.925    0.355

Changing \(\sigma^2=25\),

simmix(N=1000, nj=5, s=5, tau=t, inmu=2)
simdata3 <- simdata
hist(simdata3$yij,breaks=15,xlab=expression(y_ij),main=expression(paste("model.3: ",sigma^2,"=25,",  tau,"=1, ICC=0.0385") ) )

Uoj3 <-Uoj
hist(Uoj3$Uoj,breaks=15,xlab=expression(U_0j),main=expression(paste("model.3: ",sigma^2,"=25,",  tau,"=1, ICC=0.0385") ) )

model.3 <- lmer(yij ~ 1 +(1 | id), simdata3,REML=FALSE)
summary(model.3)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: yij ~ 1 + (1 | id)
##    Data: simdata3
## 
##      AIC      BIC   logLik deviance df.resid 
##  31056.9  31076.4 -15525.4  31050.9     4997 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4511 -0.6498 -0.0003  0.6386  3.5152 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept)  5.075   2.253   
##  Residual             25.372   5.037   
## Number of obs: 5000, groups:  id, 1000
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept) 3.621e-02  1.008e-01 1.000e+03   0.359    0.719

...and now \(\sigma^2=100\),

simmix(N=1000,nj=5,s=10,tau=t,inmu=2)
simdata4 <- simdata
hist(simdata4$yij,breaks=15,xlab=expression(y_ij),main=expression(paste("model.4: ",sigma^2,"=100,", tau,"=1, ICC=0.0099") ) )

Uoj4 <-Uoj
hist(Uoj4$Uoj,breaks=15,xlab=expression(U_0j),main=expression(paste("model.4: ",sigma^2,"=100,", tau,"=1, ICC=0.0099") ) )

model.4 <- lmer(yij ~ 1 +(1 | id), simdata4,REML=FALSE)
summary(model.4)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: yij ~ 1 + (1 | id)
##    Data: simdata4
## 
##      AIC      BIC   logLik deviance df.resid 
##  37360.0  37379.5 -18677.0  37354.0     4997 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4436 -0.6585  0.0004  0.6626  3.4348 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept)  3.438   1.854   
##  Residual             99.591   9.980   
## Number of obs: 5000, groups:  id, 1000
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)    0.1401     0.1528 1000.0000   0.916     0.36

Let look at histograms of all historgrams of estimated \(\hat{U}_{ij}\) in a single figure. Note that to get the \(\hat{U}_{ij}\)s we need to first get them by using coef(model.x), which produces a "mer" object of random effects

par(mfrow=c(2,2))

random1.uoj <- coef(model.1)$id[,"(Intercept)"]
hist(random1.uoj, main=expression(paste("model.1: ", sigma^2, "=1 ",  tau, "=1,   ICC=0.500") ) )

random2.uoj <- coef(model.2)$id[,"(Intercept)"]
hist(random2.uoj,main=expression(paste("model.2: ",sigma^2,"=9,",  tau, "=1,   ICC=0.100") ))

random3.uoj <- coef(model.3)$id[,"(Intercept)" ]
hist(random3.uoj,main=expression(paste("model.3: ",sigma^2,"=25,",  tau,"=1, ICC=0.0385") ))

random4.uoj<- coef(model.4)$id[,"(Intercept)" ]
hist(random4.uoj,main=expression(paste("model.4: ",sigma^2,"=100,", tau,"=1, ICC=0.0099") ))

Another way to look at these distribution and how close they are to normal is to use qqplots.

par(mfrow=c(2,2))

plot(ranef(model.1),main=expression(paste("model.1: ",sigma^2,"=1,",  tau, "=1, ICC=0.500") ) )
## $id

plot(ranef(model.2),main=expression(paste("model.2: ",sigma^2,"=9,",  tau, "=1, ICC=0.100") ) )
## $id

plot(ranef(model.3),main=expression(paste("model.3: ",sigma^2,"=25,",  tau,"=1, ICC=0.0385") ) )
## $id

plot(ranef(model.4),main=expression(paste("model.4: ",sigma^2,"=100,", tau,"=1, ICC=0.0099") ) )
## $id