Many item response models can be fit as multilevel logistic regression models. In this document, the Rasch model will be fit to 2004 GSS vocabulary items (only five out of 11 of them). The sample size is large (~1155), which makes the long file very long. It takes time to run the model in Jags.You could just take a sub-sample of data if want to run jags and not tie up your computer for a long time.
I will fit model using MLE as well as Bayesian. For Bayesian I fit the model just rjags, runjags and jagsUI, the latter 2 using parallel processing. The time they take varies dramatically.
The libraries
library(lme4)
library(rjags)
library(runjags)
library(jagsUI)
library(coda)
library(nlme)
And now for the data, which are in a long format.
setwd("D:/Dropbox/edps 590BAY/Lectures/8 Multilevel models")
vocab <- read.table("vocab_data_long.txt",header=TRUE)
vo5 <- as.data.frame(cbind(vocab$id,vocab$y,vocab$A,vocab$B,vocab$C,vocab$D,vocab$E))
names(vo5) <- c("id","y","A","B","C","D","E")
N <- length(unique(vo5$id))
indicator <- c(rep(1,5), rep(0,5))
vo5$row.to.use <- rep(indicator, N)
fixed <- subset(vo5,vo5$row.to.use==1)
vo5 <- fixed
head(vo5,n=15)
## id y A B C D E row.to.use
## 1 1 1 1 0 0 0 0 1
## 2 1 1 0 1 0 0 0 1
## 3 1 1 0 0 1 0 0 1
## 4 1 1 0 0 0 1 0 1
## 5 1 1 0 0 0 0 1 1
## 11 2 1 1 0 0 0 0 1
## 12 2 1 0 1 0 0 0 1
## 13 2 1 0 0 1 0 0 1
## 14 2 1 0 0 0 1 0 1
## 15 2 1 0 0 0 0 1 1
## 21 3 1 1 0 0 0 0 1
## 22 3 1 0 1 0 0 0 1
## 23 3 0 0 0 1 0 0 1
## 24 3 1 0 0 0 1 0 1
## 25 3 1 0 0 0 0 1 1
Using glmer with laplace algorithm,
rasch1 <- glmer(y ~ -1 + A + B + C + D + E + (1 | id),data=vo5, family=binomial)
summary(rasch1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: y ~ -1 + A + B + C + D + E + (1 | id)
## Data: vo5
##
## AIC BIC logLik deviance df.resid
## 4081.3 4121.3 -2034.7 4069.3 5769
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.2228 0.0701 0.1541 0.2888 6.2255
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 2.395 1.548
## Number of obs: 5775, groups: id, 1155
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## A 2.2535 0.1168 19.29 <2e-16 ***
## B 3.6689 0.1669 21.98 <2e-16 ***
## C -1.6021 0.1020 -15.70 <2e-16 ***
## D 3.8825 0.1775 21.88 <2e-16 ***
## E 2.4120 0.1210 19.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## A B C D
## B 0.370
## C -0.057 -0.115
## D 0.358 0.343 -0.118
## E 0.410 0.372 -0.067 0.360
or using adaptive quadrature
rasch2 <- glmer(y ~ -1 + A + B + C + D + E + (1 | id),data=vo5, family=binomial, nAGQ=10)
summary(rasch2)
## Generalized linear mixed model fit by maximum likelihood (Adaptive
## Gauss-Hermite Quadrature, nAGQ = 10) [glmerMod]
## Family: binomial ( logit )
## Formula: y ~ -1 + A + B + C + D + E + (1 | id)
## Data: vo5
##
## AIC BIC logLik deviance df.resid
## 4092.5 4132.5 -2040.3 4080.5 5769
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.1120 0.0730 0.1567 0.2968 5.8319
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 2.306 1.518
## Number of obs: 5775, groups: id, 1155
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## A 2.20170 0.11521 19.11 <2e-16 ***
## B 3.64010 0.16642 21.87 <2e-16 ***
## C -1.52370 0.09883 -15.42 <2e-16 ***
## D 3.85657 0.17702 21.79 <2e-16 ***
## E 2.36273 0.11962 19.75 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## A B C D
## B 0.349
## C -0.016 -0.082
## D 0.339 0.331 -0.086
## E 0.384 0.353 -0.027 0.342
We need a data list,
dataList <- list(
id=vo5$id,
y=vo5$y,
A=vo5$A,
B=vo5$B,
C=vo5$C,
D=vo5$D,
E=vo5$E,
n=length(vo5$y),
Nid=length(unique(vo5$id))
)
And the model, which will be used throughout this document
logreg1 <-"model {
for (i in 1:n) {
y[i] ~ dbern(p[i])
p[i] <- 1/(1 + exp(-eta[i]))
eta[i] <- theta[id[i]] + ba*A[i] + bb*B[i] + bc*C[i] + bd*D[i] + be*E[i]
}
for (j in 1:Nid) {
theta[j] ~ dnorm(0,ptau)
}
ptau ~ dgamma(0.01,0.01)
tau <- 1/sqrt(ptau)
ba ~ dnorm(0,1/1000)
bb ~ dnorm(0,1/1000)
bc ~ dnorm(0,1/1000)
bd ~ dnorm(0,1/1000)
be ~ dnorm(0,1/1000)
}"
writeLines(logreg1,con="logreg1.txt")
These starting values will be used for all.
start1<- list("ba"=2,"bb"=3.0,"bc"=-1.5, "bd"=3.0, "be"=2.0,
.RNG.name="base::Wichmann-Hill", .RNG.seed=333)
start2<- list("ba"=rnorm(1,0,4),"bb"=rnorm(1,0,4),"bc"=rnorm(1,0,4), "bd"=rnorm(1,0,4), "be"=rnorm(1,0,4),
.RNG.name="base::Marsaglia-Multicarry", .RNG.seed=24)
start3<- list("ba"=rnorm(1,1,4),"bb"=rnorm(1,1,4),"bc"=rnorm(1,1,4), "bd"=rnorm(1,0,4), "be"=rnorm(1,0,4),
.RNG.name="base::Mersenne-Twister", .RNG.seed=2000)
start4<- list("ba"=rnorm(1,-1,4),"bb"=rnorm(1,-1,4),"bc"=rnorm(1,1,4), "bd"=rnorm(1,1,4), "be"=rnorm(1,1,4),
.RNG.name="base::Super-Duper", .RNG.seed=67)
start <- list(start1,start2,start3,start4)
How long does this take when using rjags:
start_time <- Sys.time()
logreg1.rjags <- jags.model(file="logreg1.txt",
data=dataList,
inits=start,
n.chains=4,
n.adapt=500)
Samples.rjags <- coda.samples(logreg1.rjags,
data=dataList,
variable.names=c("ba","bb", "bc", "bd", "be","ptau"),
n.iter=10000
)
end.time <- Sys.time()
end.time - start.time
Time difference of 1.236624 hour on home desktop
Sampling using use parallel processing option in runjags.
start.time <- Sys.time()
logreg1.runjags <- run.jags(model=logreg1,
method="parallel",
monitor=c("ba","bb", "bc", "bd", "be","ptau"),
data=dataList,
sample=10000,
n.chains=4,
adapt=500,
inits=start)
end.time <- Sys.time()
end.time - start.time
#plot(logreg1.runjags)
#add.summary(logreg1.runjags)
Time difference of 41.04933 mins on home computer
Let’s see if this is faster…jagsUI with parallel
start.time <- Sys.time()
logreg1.jagsui <- jags(model.file="logreg1.txt",
data=dataList,
inits=start,
parameters.to.save=c("ba","bb", "bc", "bd", "be","ptau","theta"),
parallel=TRUE,
n.iter=10000,
n.burnin=500,
n.chains=4)
##
## Processing function input.......
##
## Done.
##
## Beginning parallel processing using 4 cores. Console output will be suppressed.
##
## Parallel processing completed.
##
## Calculating statistics.......
##
## Done.
end.time <- Sys.time()
end.time - start.time
## Time difference of 19.42533 mins
Note the time.
names(logreg1.jagsui)
## [1] "sims.list" "mean" "sd" "q2.5" "q25"
## [6] "q50" "q75" "q97.5" "overlap0" "f"
## [11] "Rhat" "n.eff" "pD" "DIC" "summary"
## [16] "samples" "modfile" "model" "parameters" "mcmc.info"
## [21] "run.date" "parallel" "bugs.format" "calc.DIC"
Some diagnostics and look at the results:
densplot(logreg1.jagsui$samples[,1:6])
traceplot(logreg1.jagsui$samples[,1:6])
gelman.diag(logreg1.jagsui$samples[,1:6])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## ba 1.00 1.00
## bb 1.00 1.00
## bc 1.00 1.00
## bd 1.00 1.00
## be 1.00 1.01
## ptau 1.01 1.02
##
## Multivariate psrf
##
## 1
gelman.plot(logreg1.jagsui$samples[,1:6])
geweke.diag(logreg1.jagsui$samples[,1:6], frac1=.10, frac2=.50)
## [[1]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## ba bb bc bd be ptau
## -0.7401 -1.4453 0.8356 -1.4486 -0.8427 0.8532
##
##
## [[2]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## ba bb bc bd be ptau
## -0.1282 -0.2653 0.2363 0.3234 0.1619 0.2563
##
##
## [[3]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## ba bb bc bd be ptau
## -0.7563 -0.3158 0.5881 -0.6347 -0.1961 0.7404
##
##
## [[4]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## ba bb bc bd be ptau
## 0.14020 0.60218 0.39572 0.05174 0.26806 -0.20128
geweke.plot(logreg1.jagsui$samples[,1:6], frac1=.10, frac2=.50)
autocorr.plot(logreg1.jagsui$samples[,1:6])
HPDinterval(logreg1.jagsui$samples[,1:6])
## [[1]]
## lower upper
## ba 1.982599 2.4371467
## bb 3.331069 3.9929713
## bc -1.714603 -1.3267142
## bd 3.514825 4.2058788
## be 2.143011 2.6082227
## ptau 0.345739 0.5460484
## attr(,"Probability")
## [1] 0.95
##
## [[2]]
## lower upper
## ba 1.9831209 2.4425735
## bb 3.3090116 3.9559166
## bc -1.7065796 -1.3216395
## bd 3.5197016 4.2126832
## be 2.1239931 2.5886886
## ptau 0.3451329 0.5543917
## attr(,"Probability")
## [1] 0.95
##
## [[3]]
## lower upper
## ba 1.9743593 2.4369120
## bb 3.3321285 3.9801633
## bc -1.7366980 -1.3396034
## bd 3.5209769 4.2258321
## be 2.1289127 2.6026968
## ptau 0.3329045 0.5343095
## attr(,"Probability")
## [1] 0.95
##
## [[4]]
## lower upper
## ba 1.985213 2.4279747
## bb 3.340465 3.9878304
## bc -1.712420 -1.3287899
## bd 3.535642 4.2108181
## be 2.133687 2.6010685
## ptau 0.338771 0.5359314
## attr(,"Probability")
## [1] 0.95
(results <- logreg1.jagsui$summary[1:10,])
## mean sd 2.5% 25% 50% 75%
## ba 2.2043112 0.11610494 1.9798658 2.1255198 2.2028800 2.2816640
## bb 3.6481513 0.16675415 3.3321410 3.5345136 3.6451076 3.7574873
## bc -1.5253342 0.09944009 -1.7249497 -1.5917248 -1.5239611 -1.4576758
## bd 3.8636370 0.17720012 3.5258895 3.7423414 3.8601314 3.9800770
## be 2.3657936 0.11955792 2.1364537 2.2849753 2.3636275 2.4442204
## ptau 0.4376149 0.05262294 0.3448268 0.4003559 0.4338834 0.4713256
## theta[1] 1.4379482 1.17754790 -0.8037551 0.6402975 1.4135100 2.2022491
## theta[2] 1.4273310 1.19262704 -0.8595527 0.6125480 1.3971239 2.2150123
## theta[3] 0.1319675 1.10582945 -1.9521247 -0.6317047 0.1087527 0.8720199
## theta[4] 0.1314300 1.10067381 -1.9457904 -0.6246794 0.1106263 0.8611415
## 97.5% Rhat n.eff overlap0 f
## ba 2.4362898 1.0009266 2711 0 1.0000000
## bb 3.9861102 1.0010656 2273 0 1.0000000
## bc -1.3334704 1.0010378 2557 0 1.0000000
## bd 4.2208673 1.0013915 1866 0 1.0000000
## be 2.6064717 1.0018692 1309 0 1.0000000
## ptau 0.5498018 1.0050583 524 0 1.0000000
## theta[1] 3.8369850 1.0000116 38000 1 0.8906316
## theta[2] 3.8327655 1.0001349 15573 1 0.8862368
## theta[3] 2.3396563 0.9999843 38000 1 0.5385526
## theta[4] 2.3478239 1.0002273 15761 1 0.5404737
theta <- seq(from= -5, to=5, by=.1)
bA <- results[1,1]
bB <- results[2,1]
bC <- results[3,1]
bD <- results[4,1]
bE <- results[5,1]
icc <- matrix(NA, nrow=length(theta), ncol=6)
for (i in 1:length(theta)) {
icc[i,1] <- theta[i]
icc[i,2] <- 1/(1 + exp(-(theta[i] + bA)))
icc[i,3] <- 1/(1 + exp(-(theta[i] + bB)))
icc[i,4] <- 1/(1 + exp(-(theta[i] + bC)))
icc[i,5] <- 1/(1 + exp(-(theta[i] + bD)))
icc[i,6] <- 1/(1 + exp(-(theta[i] + bE)))
}
par(mfrow=c(1,1))
plot(theta, icc[,2], type="l", col="blue", lwd=2, cex=2,
xlab="Theta",
ylab="P(response=correct)",
main="5 items from GSS Vocabulary")
lines(theta,icc[,3], col="red", lwd=2)
lines(theta,icc[,4], col="purple", lwd=2)
lines(theta,icc[,5], col="black", lwd=2)
lines(theta,icc[,6], col="cyan", lwd=2)
legend(4.0 ,0.4, legend=c("A","B","C","D","E"), cex=1.5,
col=c("blue","red","purple","black","cyan"),
lty=c(1,1,1,1,1))
samples <-logreg1.jagsui$samples
samps <- rbind(samples[[1]],samples[[2]], samples[[3]], samples[[4]])
bA.post <-samps[,1]
bB.post <-samps[,2]
bC.post <-samps[,3]
bD.post <-samps[,4]
bE.post <-samps[,5]
The test can be based only on item difficulty parameter because the items all have the same slope (i.e., 1)
diffBD <- bB.post - bD.post
hist(diffBD, main="Difference between Item B and D difficulties")
quantile(diffBD, c(.025, .975))
## 2.5% 97.5%
## -0.6089143 0.1755641
mean(bD.post>bB.post)
## [1] 0.8606053
diff.AE <- bA.post - bE.post
hist(diff.AE, main="Difference between Item A and E difficulties", xlab="bA-bE")
quantile(diff.AE, c(.025, .975))
## 2.5% 97.5%
## -0.41892156 0.09297176
mean(bE>bA)
## [1] 1
contrast <- (bB.post + bD.post)/2 - (bA.post + bE.post)/2
hist(contrast, main="Contrast: mean(B & D) - mean(A & E)")
quantile(contrast, c(.025, .975))
## 2.5% 97.5%
## 1.232108 1.714595
We need to get the theta samples (one for each of the respondents 1155 respondents).
theta.post <- samps[, 7:1161] # nsamples x nrespondents
# Some of the respondents distributions (can get these from plot(logreg.jagsui))
par(mfrow=c(2,2))
hist(theta.post[,1])
hist(theta.post[,2])
hist(theta.post[, 500])
hist(theta.post[,1155])
# Estimates for each individual
theta.respond <- colMeans(theta.post)
# Estimate for each sample
theta.sample <- rowMeans(theta.post)