Introductons

Many item response models can be fit as multilevel logistic regression models. In this document, the Rasch model will be fit to 2004 GSS vocabulary items (only five out of 11 of them). The sample size is large (~1155), which makes the long file very long. It takes time to run the model in Jags.You could just take a sub-sample of data if want to run jags and not tie up your computer for a long time.

I will fit model using MLE as well as Bayesian. For Bayesian I fit the model just rjags, runjags and jagsUI, the latter 2 using parallel processing. The time they take varies dramatically.

Set Up

The libraries

library(lme4)
library(rjags)
library(runjags)
library(jagsUI)
library(coda)
library(nlme)

And now for the data, which are in a long format.

setwd("D:/Dropbox/edps 590BAY/Lectures/8 Multilevel models")
   
vocab <- read.table("vocab_data_long.txt",header=TRUE)

vo5 <- as.data.frame(cbind(vocab$id,vocab$y,vocab$A,vocab$B,vocab$C,vocab$D,vocab$E))
names(vo5) <- c("id","y","A","B","C","D","E")

N <- length(unique(vo5$id))
indicator <- c(rep(1,5), rep(0,5))
vo5$row.to.use <- rep(indicator, N)
fixed <- subset(vo5,vo5$row.to.use==1)
vo5 <- fixed

head(vo5,n=15)
##    id y A B C D E row.to.use
## 1   1 1 1 0 0 0 0          1
## 2   1 1 0 1 0 0 0          1
## 3   1 1 0 0 1 0 0          1
## 4   1 1 0 0 0 1 0          1
## 5   1 1 0 0 0 0 1          1
## 11  2 1 1 0 0 0 0          1
## 12  2 1 0 1 0 0 0          1
## 13  2 1 0 0 1 0 0          1
## 14  2 1 0 0 0 1 0          1
## 15  2 1 0 0 0 0 1          1
## 21  3 1 1 0 0 0 0          1
## 22  3 1 0 1 0 0 0          1
## 23  3 0 0 0 1 0 0          1
## 24  3 1 0 0 0 1 0          1
## 25  3 1 0 0 0 0 1          1

MLE of Rasch as Multilevel Logistic Regression

Using glmer with laplace algorithm,

rasch1 <- glmer(y ~ -1 + A + B + C + D + E + (1 | id),data=vo5, family=binomial)
summary(rasch1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: y ~ -1 + A + B + C + D + E + (1 | id)
##    Data: vo5
## 
##      AIC      BIC   logLik deviance df.resid 
##   4081.3   4121.3  -2034.7   4069.3     5769 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.2228  0.0701  0.1541  0.2888  6.2255 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 2.395    1.548   
## Number of obs: 5775, groups:  id, 1155
## 
## Fixed effects:
##   Estimate Std. Error z value Pr(>|z|)    
## A   2.2535     0.1168   19.29   <2e-16 ***
## B   3.6689     0.1669   21.98   <2e-16 ***
## C  -1.6021     0.1020  -15.70   <2e-16 ***
## D   3.8825     0.1775   21.88   <2e-16 ***
## E   2.4120     0.1210   19.93   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##   A      B      C      D     
## B  0.370                     
## C -0.057 -0.115              
## D  0.358  0.343 -0.118       
## E  0.410  0.372 -0.067  0.360

or using adaptive quadrature

rasch2 <- glmer(y ~ -1 + A + B + C + D + E + (1 | id),data=vo5, family=binomial,  nAGQ=10) 
summary(rasch2)
## Generalized linear mixed model fit by maximum likelihood (Adaptive
##   Gauss-Hermite Quadrature, nAGQ = 10) [glmerMod]
##  Family: binomial  ( logit )
## Formula: y ~ -1 + A + B + C + D + E + (1 | id)
##    Data: vo5
## 
##      AIC      BIC   logLik deviance df.resid 
##   4092.5   4132.5  -2040.3   4080.5     5769 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.1120  0.0730  0.1567  0.2968  5.8319 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 2.306    1.518   
## Number of obs: 5775, groups:  id, 1155
## 
## Fixed effects:
##   Estimate Std. Error z value Pr(>|z|)    
## A  2.20170    0.11521   19.11   <2e-16 ***
## B  3.64010    0.16642   21.87   <2e-16 ***
## C -1.52370    0.09883  -15.42   <2e-16 ***
## D  3.85657    0.17702   21.79   <2e-16 ***
## E  2.36273    0.11962   19.75   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##   A      B      C      D     
## B  0.349                     
## C -0.016 -0.082              
## D  0.339  0.331 -0.086       
## E  0.384  0.353 -0.027  0.342

Bayesian Multilevel Logistic Regression

We need a data list,

dataList <- list(
            id=vo5$id,
            y=vo5$y,
            A=vo5$A,
            B=vo5$B,
            C=vo5$C,
            D=vo5$D,
            E=vo5$E,
            n=length(vo5$y),
           Nid=length(unique(vo5$id)) 
            )

And the model, which will be used throughout this document

logreg1 <-"model {
    for (i in 1:n) {
        y[i] ~ dbern(p[i])
        p[i] <- 1/(1 + exp(-eta[i]))   
        eta[i] <- theta[id[i]] + ba*A[i] + bb*B[i] + bc*C[i] + bd*D[i] + be*E[i]
    }
    for (j in 1:Nid) {
        theta[j] ~ dnorm(0,ptau)
    } 
        ptau ~ dgamma(0.01,0.01)
        tau <- 1/sqrt(ptau)
        
        ba ~ dnorm(0,1/1000)
        bb ~ dnorm(0,1/1000)
        bc ~ dnorm(0,1/1000)
        bd ~ dnorm(0,1/1000)
        be ~ dnorm(0,1/1000)
        }"
writeLines(logreg1,con="logreg1.txt")       

These starting values will be used for all.

start1<- list("ba"=2,"bb"=3.0,"bc"=-1.5, "bd"=3.0, "be"=2.0,
         .RNG.name="base::Wichmann-Hill", .RNG.seed=333) 
start2<- list("ba"=rnorm(1,0,4),"bb"=rnorm(1,0,4),"bc"=rnorm(1,0,4), "bd"=rnorm(1,0,4), "be"=rnorm(1,0,4),
        .RNG.name="base::Marsaglia-Multicarry", .RNG.seed=24)
start3<- list("ba"=rnorm(1,1,4),"bb"=rnorm(1,1,4),"bc"=rnorm(1,1,4), "bd"=rnorm(1,0,4), "be"=rnorm(1,0,4),
            .RNG.name="base::Mersenne-Twister", .RNG.seed=2000)
start4<- list("ba"=rnorm(1,-1,4),"bb"=rnorm(1,-1,4),"bc"=rnorm(1,1,4), "bd"=rnorm(1,1,4), "be"=rnorm(1,1,4),
        .RNG.name="base::Super-Duper", .RNG.seed=67)

start <- list(start1,start2,start3,start4)

Rjags

How long does this take when using rjags:

start_time <- Sys.time()
logreg1.rjags <- jags.model(file="logreg1.txt",
                           data=dataList,
                           inits=start,
                           n.chains=4,
                           n.adapt=500)

Samples.rjags <- coda.samples(logreg1.rjags, 
                              data=dataList,
                              variable.names=c("ba","bb", "bc", "bd", "be","ptau"),
                              n.iter=10000
                              )
end.time <- Sys.time()
end.time - start.time

Time difference of 1.236624 hour on home desktop

runjags

Sampling using use parallel processing option in runjags.

start.time <- Sys.time()
logreg1.runjags <- run.jags(model=logreg1,      
                   method="parallel",  
                   monitor=c("ba","bb", "bc", "bd", "be","ptau"),
                   data=dataList,
                   sample=10000,                  
                   n.chains=4,
                   adapt=500,
                   inits=start)
end.time <- Sys.time()
end.time - start.time
 
#plot(logreg1.runjags)
#add.summary(logreg1.runjags)

Time difference of 41.04933 mins on home computer

jagsUI

Let’s see if this is faster…jagsUI with parallel

start.time <- Sys.time()
logreg1.jagsui <- jags(model.file="logreg1.txt",
                  data=dataList,
                          inits=start,
                  parameters.to.save=c("ba","bb", "bc", "bd", "be","ptau","theta"),
                  parallel=TRUE,  
                    n.iter=10000,                 
                    n.burnin=500,
                          n.chains=4)
## 
## Processing function input....... 
## 
## Done. 
##  
## Beginning parallel processing using 4 cores. Console output will be suppressed.
## 
## Parallel processing completed.
## 
## Calculating statistics....... 
## 
## Done.
end.time <- Sys.time()
end.time - start.time
## Time difference of 19.42533 mins

Note the time.

names(logreg1.jagsui)
##  [1] "sims.list"   "mean"        "sd"          "q2.5"        "q25"        
##  [6] "q50"         "q75"         "q97.5"       "overlap0"    "f"          
## [11] "Rhat"        "n.eff"       "pD"          "DIC"         "summary"    
## [16] "samples"     "modfile"     "model"       "parameters"  "mcmc.info"  
## [21] "run.date"    "parallel"    "bugs.format" "calc.DIC"

Some diagnostics and look at the results:

densplot(logreg1.jagsui$samples[,1:6])

traceplot(logreg1.jagsui$samples[,1:6])

gelman.diag(logreg1.jagsui$samples[,1:6])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## ba         1.00       1.00
## bb         1.00       1.00
## bc         1.00       1.00
## bd         1.00       1.00
## be         1.00       1.01
## ptau       1.01       1.02
## 
## Multivariate psrf
## 
## 1
gelman.plot(logreg1.jagsui$samples[,1:6])

geweke.diag(logreg1.jagsui$samples[,1:6], frac1=.10, frac2=.50)
## [[1]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##      ba      bb      bc      bd      be    ptau 
## -0.7401 -1.4453  0.8356 -1.4486 -0.8427  0.8532 
## 
## 
## [[2]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##      ba      bb      bc      bd      be    ptau 
## -0.1282 -0.2653  0.2363  0.3234  0.1619  0.2563 
## 
## 
## [[3]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##      ba      bb      bc      bd      be    ptau 
## -0.7563 -0.3158  0.5881 -0.6347 -0.1961  0.7404 
## 
## 
## [[4]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##       ba       bb       bc       bd       be     ptau 
##  0.14020  0.60218  0.39572  0.05174  0.26806 -0.20128
geweke.plot(logreg1.jagsui$samples[,1:6], frac1=.10, frac2=.50)

autocorr.plot(logreg1.jagsui$samples[,1:6])

HPDinterval(logreg1.jagsui$samples[,1:6])
## [[1]]
##          lower      upper
## ba    1.982599  2.4371467
## bb    3.331069  3.9929713
## bc   -1.714603 -1.3267142
## bd    3.514825  4.2058788
## be    2.143011  2.6082227
## ptau  0.345739  0.5460484
## attr(,"Probability")
## [1] 0.95
## 
## [[2]]
##           lower      upper
## ba    1.9831209  2.4425735
## bb    3.3090116  3.9559166
## bc   -1.7065796 -1.3216395
## bd    3.5197016  4.2126832
## be    2.1239931  2.5886886
## ptau  0.3451329  0.5543917
## attr(,"Probability")
## [1] 0.95
## 
## [[3]]
##           lower      upper
## ba    1.9743593  2.4369120
## bb    3.3321285  3.9801633
## bc   -1.7366980 -1.3396034
## bd    3.5209769  4.2258321
## be    2.1289127  2.6026968
## ptau  0.3329045  0.5343095
## attr(,"Probability")
## [1] 0.95
## 
## [[4]]
##          lower      upper
## ba    1.985213  2.4279747
## bb    3.340465  3.9878304
## bc   -1.712420 -1.3287899
## bd    3.535642  4.2108181
## be    2.133687  2.6010685
## ptau  0.338771  0.5359314
## attr(,"Probability")
## [1] 0.95
(results <- logreg1.jagsui$summary[1:10,])
##                mean         sd       2.5%        25%        50%        75%
## ba        2.2043112 0.11610494  1.9798658  2.1255198  2.2028800  2.2816640
## bb        3.6481513 0.16675415  3.3321410  3.5345136  3.6451076  3.7574873
## bc       -1.5253342 0.09944009 -1.7249497 -1.5917248 -1.5239611 -1.4576758
## bd        3.8636370 0.17720012  3.5258895  3.7423414  3.8601314  3.9800770
## be        2.3657936 0.11955792  2.1364537  2.2849753  2.3636275  2.4442204
## ptau      0.4376149 0.05262294  0.3448268  0.4003559  0.4338834  0.4713256
## theta[1]  1.4379482 1.17754790 -0.8037551  0.6402975  1.4135100  2.2022491
## theta[2]  1.4273310 1.19262704 -0.8595527  0.6125480  1.3971239  2.2150123
## theta[3]  0.1319675 1.10582945 -1.9521247 -0.6317047  0.1087527  0.8720199
## theta[4]  0.1314300 1.10067381 -1.9457904 -0.6246794  0.1106263  0.8611415
##               97.5%      Rhat n.eff overlap0         f
## ba        2.4362898 1.0009266  2711        0 1.0000000
## bb        3.9861102 1.0010656  2273        0 1.0000000
## bc       -1.3334704 1.0010378  2557        0 1.0000000
## bd        4.2208673 1.0013915  1866        0 1.0000000
## be        2.6064717 1.0018692  1309        0 1.0000000
## ptau      0.5498018 1.0050583   524        0 1.0000000
## theta[1]  3.8369850 1.0000116 38000        1 0.8906316
## theta[2]  3.8327655 1.0001349 15573        1 0.8862368
## theta[3]  2.3396563 0.9999843 38000        1 0.5385526
## theta[4]  2.3478239 1.0002273 15761        1 0.5404737

Item Characteristic Curves

theta <- seq(from= -5, to=5, by=.1)
bA <- results[1,1]
bB <- results[2,1]
bC <- results[3,1]
bD <- results[4,1]
bE <- results[5,1]

icc <- matrix(NA, nrow=length(theta), ncol=6)
for (i in 1:length(theta)) {
  icc[i,1] <- theta[i]
  icc[i,2] <- 1/(1 + exp(-(theta[i] + bA)))
  icc[i,3] <- 1/(1 + exp(-(theta[i] + bB)))
  icc[i,4] <- 1/(1 + exp(-(theta[i] + bC)))
  icc[i,5] <- 1/(1 + exp(-(theta[i] + bD)))
  icc[i,6] <- 1/(1 + exp(-(theta[i] + bE)))
}                

par(mfrow=c(1,1))
plot(theta, icc[,2], type="l", col="blue", lwd=2, cex=2,
     xlab="Theta",
     ylab="P(response=correct)",
     main="5 items from GSS Vocabulary")
lines(theta,icc[,3], col="red", lwd=2)
lines(theta,icc[,4], col="purple", lwd=2)
lines(theta,icc[,5], col="black", lwd=2)
lines(theta,icc[,6], col="cyan", lwd=2)
legend(4.0 ,0.4, legend=c("A","B","C","D","E"), cex=1.5,
       col=c("blue","red","purple","black","cyan"),
       lty=c(1,1,1,1,1))

Do Items B and D have same ICC?

samples <-logreg1.jagsui$samples
samps <- rbind(samples[[1]],samples[[2]], samples[[3]], samples[[4]])
bA.post <-samps[,1]
bB.post <-samps[,2]
bC.post <-samps[,3]
bD.post <-samps[,4]
bE.post <-samps[,5]

The test can be based only on item difficulty parameter because the items all have the same slope (i.e., 1)

diffBD <- bB.post - bD.post
hist(diffBD, main="Difference between Item B and D difficulties")

quantile(diffBD, c(.025, .975))
##       2.5%      97.5% 
## -0.6089143  0.1755641
mean(bD.post>bB.post)
## [1] 0.8606053

Do Items A and E have same ICC?

diff.AE <- bA.post - bE.post
hist(diff.AE, main="Difference between Item A and E difficulties", xlab="bA-bE")

quantile(diff.AE, c(.025, .975))
##        2.5%       97.5% 
## -0.41892156  0.09297176
mean(bE>bA)
## [1] 1

Do items B & D differ from A & E?

contrast <- (bB.post + bD.post)/2 - (bA.post + bE.post)/2
hist(contrast, main="Contrast: mean(B & D) - mean(A & E)")

quantile(contrast, c(.025, .975))
##     2.5%    97.5% 
## 1.232108 1.714595

Posterior Distribution of Theta?

We need to get the theta samples (one for each of the respondents 1155 respondents).

theta.post <- samps[, 7:1161]  # nsamples x nrespondents

# Some of the respondents distributions (can get these from plot(logreg.jagsui))
par(mfrow=c(2,2))
hist(theta.post[,1])
hist(theta.post[,2])
hist(theta.post[, 500])
hist(theta.post[,1155])

# Estimates for each individual
theta.respond <- colMeans(theta.post)

# Estimate for each sample
theta.sample <- rowMeans(theta.post)