This document illustrates multiple regression in jag using 1 school from the nels data. This is an extension of linear regression for 1 predictor to multipl predictors.

Step up

The packages that will be used

library(ellipse)
library(ggplot2)
library(coda)
library(runjags)
library(rjags)
library(jagsUI)
library(BayesFactor)

set.seed(19843)

Set my working directory and read in and check the data

setwd("C:/Users/cja/Dropbox/edps 590BAY/Lectures/7 Multiple Linear Regression")
nels <- read.table("nels_school_62821.txt",header=TRUE)
head(nels)

##   schid studid sex race homework schtyp  ses paredu math clastruc size urban
## 1 62821      0   1    4        4      4 1.63      6   62        3    3     1
## 2 62821      1   2    4        5      4 0.48      4   68        3    3     1
## 3 62821      3   1    4        5      4 0.69      5   56        3    3     1
## 4 62821      4   2    4        5      4 0.60      4   68        3    3     1
## 5 62821      6   2    4        2      4 0.77      4   61        3    3     1
## 6 62821      7   2    4        2      4 1.19      5   61        3    3     1
##   geograph perminor rati0
## 1        2        3    10
## 2        2        3    10
## 3        2        3    10
## 4        2        3    10
## 5        2        3    10
## 6        2        3    10

Exploratory and Descriptive Statistics

We should always do some exploratory data analysis (e.g., basic descriptive statistics). I put them in a little table for future reference

# Descriptive statistics
ybar<- mean(nels$math)
std <- sd(nels$math)
s2  <- var(nels$math)
n   <- length(nels$math)
descriptive <- c(n,ybar,std,s2)
names(descriptive) <- c("N","Ybar","std","s^2")
descriptive

##         N      Ybar       std       s^2 
## 67.000000 62.820896  5.675373 32.209860

summary(nels$math)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   43.00   60.00   63.00   62.82   67.50   71.00

A histogram of math scores

hist(nels$math, main="NELS Math Scores", xlab="Math Scores", breaks=15)

Since we’re adding more predictor variables we should also compute correlations. The following gets the correlation matrix

# Correlations between potential predictors ---> see www.cookbook-r.com
xs <- data.frame(nels$math,nels$homework,nels$paredu,nels$ses)
ctab <- cor(xs)
round(ctab,2)     # round correlations to 2 decimials for easier viewing

##               nels.math nels.homework nels.paredu nels.ses
## nels.math          1.00          0.33       -0.26    -0.10
## nels.homework      0.33          1.00        0.00     0.04
## nels.paredu       -0.26          0.00        1.00     0.79
## nels.ses          -0.10          0.04        0.79     1.00

Or if you prefer a figure of a correlation matrix

# Figure of correlations --- grey scale
plotcorr(ctab,mar=c(0.1,0.1,0.1,0.1))

I like color, so

# Figures of correlations in color 
colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")
plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255),
         mar = c(0.1, 0.1, 0.1, 0.1))

Another view of bi-variate relationships

pairs(xs, cex=1.5)

The following a closer look at potential predictor variables, starting with how to treate “race”.

# What about race?     
(r.tab <- table(nels$race))

## 
##  1  2  3  4 
##  5  1  1 60

tot <- sum(r.tab)
w <- r.tab[4]

Since 60 out of 67 are whites, I decided to re-code this as a dichotomous variable.

# Dichotomize race into white/not-white
nels$white <- ifelse(nels$race==4, 1, 0)
nels$white <- as.factor(nels$white)

We’ll treat gender as a categorical/factor variable

nels$gender <- as.factor(nels$sex)

Ordinary Least Squares Regression

Just for comparison, I fit a couple of OLS regression models that turn out to be the more interesting ones. One with all predictors and one with a subject of them.

# All predictors
nels$gender <- as.factor(nels$sex)
summary(lm(math~gender + ses + paredu + homework + white, data=nels))

## 
## Call:
## lm(formula = math ~ gender + ses + paredu + homework + white, 
##     data = nels)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.8831  -2.4426   0.3711   3.4205   8.9577 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  70.3048     5.0930  13.804  < 2e-16 ***
## gender2       1.5486     1.3083   1.184  0.24115    
## ses           3.6973     2.5384   1.457  0.15037    
## paredu       -3.0370     1.2020  -2.527  0.01413 *  
## homework      1.0629     0.3746   2.838  0.00616 ** 
## white1       -0.7322     2.2943  -0.319  0.75072    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.227 on 61 degrees of freedom
## Multiple R-squared:  0.2161, Adjusted R-squared:  0.1518 
## F-statistic: 3.363 on 5 and 61 DF,  p-value: 0.009523

Ones I think will be important

summary(lm(math~ paredu + homework, data=nels))

## 
## Call:
## lm(formula = math ~ paredu + homework, data = nels)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.6051  -1.8546   0.5524   3.8143   9.2089 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  66.8935     3.6633  18.260  < 2e-16 ***
## paredu       -1.5636     0.6899  -2.266  0.02682 *  
## homework      1.0930     0.3735   2.926  0.00475 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.23 on 64 degrees of freedom
## Multiple R-squared:  0.1766, Adjusted R-squared:  0.1508 
## F-statistic: 6.862 on 2 and 64 DF,  p-value: 0.001995

Bayesian Model 1: all predictors

The data

The data list with all the predictors

dataList <- list(
         y     =nels$math,
                 pared =nels$pared,
                 hmwk  =nels$homework,
         ses   =nels$ses,
                 gender=nels$gender,
                 white = nels$white,
                 N=length(nels$math),
                 sdY = sd(nels$math)
                 )

The Model

The model with all predictors is basically the same as simple linear regression, but there are just more variables. Rather than just use b1, b2, etc, I added more descriptive names to the coefficients.

model1 = "model { for (i in 1:N){
          y[i] ~ dnorm(mu[i] , precision)
          mu[i] <- b0  + bpared*pared[i] + bhmwk*hmwk[i] + bses*ses[i]+ bsex*gender[i]
                   + bwhite*white[i]        
          }
          b0 ~ dnorm(0 , 1/(100*sdY^2) )   
          bpared ~ dnorm(0 , 1/(100*sdY^2) )   
          bhmwk ~ dnorm(0 , 1/(100*sdY^2) )   
          bses ~ dnorm(0 , 1/(100*sdY^2) )   
          bsex ~ dnorm(0 , 1/(100*sdY^2) )   
          bwhite ~ dnorm(0 , 1/(100*sdY^2) )   
          sigma ~ dunif( 1E-3, 1E+30 )
          precision <- 1/sigma^2           
          } "

writeLines(model1, con="model1.txt")

Initial Values (Starting values)

Need some starting values — on set per chain that I plan to run. I used sample mean and standard deviation for one, but used random draws from normal or uniform for rest of them.

initsList = list(
list("b0"=mean(nels$math), "bpared"=0, "bhmwk"=0, "bses"=0, "bsex"=0, "bwhite"=0, "sigma"=sd(nels$math)),
list("b0"=rnorm(1,50,5),"bpared"=rnorm(1,-2,1),"bhmwk"=rnorm(1,2,1),"bses"=rnorm(1,0,1), "bsex"=rnorm(1,1,1),  "bwhite"=rnorm(1,0,1),"sigma"=sd(nels$math)), 
list("b0"=rnorm(1,50,5), "bpared"=rnorm(1,1,1),"bhmwk"=rnorm(1,1,1), "bses"=rnorm(1,1,1),   "bsex"=rnorm(1,1,1), "bwhite"=rnorm(1,1,1), "sigma"=runif(1,0,10)), 
list("b0"=rnorm(1,50,6), "bpared"=rnorm(1,0,1),"bhmwk"=rnorm(1,0,1), "bses"=rnorm(1,0,1), "bsex"=rnorm(1,0,1),"bwhite"=rnorm(1,0,1),"sigma"=runif(1,0,10)) 
 )

Get some samples

Run simulations of drawing samples from posterior using runjags

mod1.runjags <- run.jags(model=model1, 
    monitor=c("b0","bpared","bhmwk","bses","bsex","bwhite","sigma","dic"),
    data=dataList, 
        n.chains=4,
        inits=initsList)

## module dic loaded

## Compiling rjags model...
## Calling the simulation using the rjags method...
## Adapting the model for 1000 iterations...
## Burning in the model for 4000 iterations...
## Running the model for 10000 iterations...
## Simulation complete
## Calculating summary statistics...
## Calculating the Gelman-Rubin statistic for 7 variables....
## Finished running the simulation

No error messages but should check convergence.

Convergence checks

plot(mod1.runjags)

Without doing anything else there are problems:

Chains for all parameters except sigma are not mixing well

High auto-correlations for b0, bpared, bses, bwhite

Before trying to get better convergence, we will look at the results, which has further diagnostic information.

print(mod1.runjags)

## 
## JAGS model summary statistics from 40000 samples (chains = 4; adapt+burnin = 5000):
##                                                                               
##        Lower95   Median  Upper95     Mean      SD Mode     MCerr MC%ofSD SSeff
## b0      54.616   67.942    81.87   68.114  7.1335   --   0.52968     7.4   181
## bpared  -5.236  -2.8444 -0.41521  -2.8536  1.2561   --  0.084199     6.7   223
## bhmwk   0.3354   1.0802    1.848   1.0816 0.38505   -- 0.0064281     1.7  3588
## bses   -1.6286   3.3634   8.4533   3.3879  2.5976   --   0.11243     4.3   534
## bsex   -1.0743   1.6074   4.2138    1.597  1.3381   --  0.030102     2.2  1976
## bwhite -5.0253 -0.35544     4.26 -0.38709  2.3953   --   0.11833     4.9   410
## sigma   4.4213   5.3045   6.3426   5.3424 0.49772   -- 0.0045245     0.9 12101
##                      
##          AC.10   psrf
## b0     0.91305  1.008
## bpared 0.89455 1.0071
## bhmwk  0.18888 1.0009
## bses   0.69914 1.0054
## bsex   0.39517 1.0023
## bwhite 0.81539 1.0089
## sigma  0.04252 1.0001
## 
## Model fit assessment:
## DIC = 420.3812
## [PED not available from the stored object]
## Estimated effective number of parameters:  pD = 7.29695
## 
## Total time taken: 5.5 seconds

Adding Iterations

We can draw more samples; however, rather than re-run what we already have, you can use the following. Note that we don’t need to burnin because we alreay done this. Since I bumped up the number of iterations to 200,000, it will take some time (it takes 30 seconds on my desktop).

mod1.extend <- extend.jags(mod1.runjags,  
                           burnin=0, 
                                       sample=200000)
plot(mod1.extend)
print(mod1.extend)

Mixing is better, but still have large auto-correlations.

Thinning

When auto-correlations are large, thinning will often help. Only every 10th sample will be kept. This will increase computation time.

mod1.thin10 <- run.jags(model=model1, 
            monitor=c("b0","bpared","bhmwk","bses","bsex","bwhite","sigma","dic"),
            data=dataList, 
                        n.chains=4, 
                        inits=initsList,
                        sample=100000,
                        thin=10)

plot(mod1.thin10)

gelman.plot(mod1.thin10)

print(mod1.thin10)


Much better.

# Auto-run
When you don't want to think about what you're doing and want to let the software make more decisions, you can use autorun

```r
mod1b.runjags <- autorun.jags(model=model1, 
                 monitor=c("b0","bpared","bhmwk","bses","bsex","bwhite","sigma","dic"),
                 data=dataList, 
                               n.chains=4, 
                               inits=initsList,
                               thin=10)

## 
## Auto-run JAGS
## 
## Running a pilot chain...
## Compiling rjags model...
## Calling the simulation using the rjags method...
## Adapting the model for 1000 iterations...
## Burning in the model for 4000 iterations...
## Running the model for 100000 iterations...
## Simulation complete
## Finished running the simulation
## 
## Calculating the Gelman-Rubin statistic for 7 variables....
## The Gelman-Rubin statistic is below 1.05 for all parameters
## 
## Calculating the necessary sample length based on the Raftery and
## Lewis's diagnostic...
## Indicated sample length achieved
## Auto-run JAGS complete

plot(mod1b.runjags)

## Generating plots...

print(mod1b.runjags)

## 
## JAGS model summary statistics from 40000 samples (thin = 10; chains = 4; adapt+burnin = 5000):
##                                                                               
##        Lower95   Median  Upper95     Mean      SD Mode     MCerr MC%ofSD SSeff
## b0       55.21   68.636   81.703   68.572  6.7536   --   0.14351     2.1  2215
## bpared -5.3484  -2.9235 -0.54903  -2.9193  1.2204   --  0.022883     1.9  2844
## bhmwk  0.33087    1.072   1.8331   1.0752 0.38328   -- 0.0023544     0.6 26503
## bses   -1.5544   3.4787   8.6287   3.4907  2.5833   --  0.043872     1.7  3467
## bsex    -1.104   1.5529   4.1637   1.5612  1.3468   -- 0.0092109     0.7 21379
## bwhite -5.0871 -0.49092   4.0475 -0.47795  2.3238   --  0.038452     1.7  3652
## sigma   4.4206    5.299   6.3357   5.3368 0.49652   -- 0.0027603     0.6 32356
##                         
##            AC.100   psrf
## b0        0.33043 1.0007
## bpared    0.26322 1.0009
## bhmwk   0.0019581 1.0002
## bses      0.19621 1.0006
## bsex    -0.017841      1
## bwhite    0.17869 1.0002
## sigma  -0.0017561      1
## 
## Model fit assessment:
## DIC = 420.2259
## [PED not available from the stored object]
## Estimated effective number of parameters:  pD = 7.25751
## 
## Total time taken: 23.6 seconds

Model 2: Refinement

I will drop non-significant predictors, which means change data list, model, starting values, and call to run jags.

dataList <- list(
         y     =nels$math,
                 pared =nels$paredu,
                 hmwk  =nels$homework,
         N=length(nels$math),
         sdY = sd(nels$math)
         )
model2 = "model {  for (i in 1:N){     
                       y[i] ~ dnorm(mu[i] , precision)
                       mu[i] <- b0  + bpared*pared[i] + bhmwk*hmwk[i]     
                              }
                     b0 ~ dnorm(0 , 1/(100*sdY^2) )   
                     bpared ~ dnorm(0 , 1/(100*sdY^2) )   
                     bhmwk ~ dnorm(0 , 1/(100*sdY^2) )    
                     sigma ~ dunif( 1E-3, 1E+30 )
                     precision <- 1/sigma^2           
              } "
writeLines(model2, con="model2.txt")

initsList = list(list("b0"=mean(nels$math), "bpared"=0, "bhmwk"=0,            "sigma"=sd(nels$math)), 
                 list("b0"=rnorm(1,50,5),   "bpared"=rnorm(1,-2,1), "bhmwk"=rnorm(1,2,1), "sigma"=sd(nels$math)), 
                 list("b0"=rnorm(1,50,5),   "bpared"=rnorm(1,1,1), "bhmwk"=rnorm(1,1,1),  "sigma"=runif(1,0,10)), 
                 list("b0"=rnorm(1,50,6),   "bpared"=rnorm(1,0,1), "bhmwk"=rnorm(1,0,1),  "sigma"=runif(1,0,10)) 
                 )

mod2.runjags <- run.jags(model=model2, 
                monitor=c("b0","bpared","bhmwk","sigma","dic"),
                data=dataList, 
                            n.chains=4, 
                            inits=initsList)

## Compiling rjags model...
## Calling the simulation using the rjags method...
## Adapting the model for 1000 iterations...
## Burning in the model for 4000 iterations...
## Running the model for 10000 iterations...
## Simulation complete
## Calculating summary statistics...
## Calculating the Gelman-Rubin statistic for 4 variables....
## Finished running the simulation

plot(mod2.runjags)

## Generating plots...

print(mod2.runjags)

## 
## JAGS model summary statistics from 40000 samples (chains = 4; adapt+burnin = 5000):
##                                                                             
##        Lower95  Median  Upper95    Mean      SD Mode     MCerr MC%ofSD SSeff
## b0      59.484  66.685   73.753  66.684  3.6529   --   0.13766     3.8   704
## bpared -2.8482 -1.5355 -0.17882 -1.5298 0.68701   --  0.025103     3.7   749
## bhmwk  0.36139  1.1052   1.8513  1.1039 0.38023   -- 0.0054682     1.4  4835
## sigma   4.4648  5.2948   6.3181   5.333 0.48022   -- 0.0034044     0.7 19897
##                       
##           AC.10   psrf
## b0      0.70072 1.0031
## bpared  0.68834 1.0028
## bhmwk  0.078037 1.0005
## sigma  0.005675      1
## 
## Model fit assessment:
## DIC = 416.9521
## [PED not available from the stored object]
## Estimated effective number of parameters:  pD = 4.08572
## 
## Total time taken: 4.4 seconds

This model look pretty good. Also according to DIC statistics (smaller is better), the simpler model is preferred.

But could use some thinning

mod2.runjags.thin5 <- run.jags(model=model2, 
                monitor=c("b0","bpared","bhmwk","sigma","dic"),
                data=dataList, 
                            n.chains=4, 
                            inits=initsList,
                            thin=5)

## Compiling rjags model...
## Calling the simulation using the rjags method...
## Adapting the model for 1000 iterations...
## Burning in the model for 4000 iterations...
## Running the model for 50000 iterations...
## Simulation complete
## Calculating summary statistics...
## Calculating the Gelman-Rubin statistic for 4 variables....
## Finished running the simulation

plot(mod2.runjags.thin5)

## Generating plots...

print(mod2.runjags.thin5)

## 
## JAGS model summary statistics from 40000 samples (thin = 5; chains = 4; adapt+burnin = 5000):
##                                                                            
##        Lower95 Median  Upper95    Mean      SD Mode     MCerr MC%ofSD SSeff
## b0      59.226 66.549   74.022  66.586  3.7555   --  0.063559     1.7  3491
## bpared -2.9273 -1.507 -0.15376 -1.5079 0.70781   --  0.011904     1.7  3535
## bhmwk  0.33824 1.1002   1.8449  1.1004   0.383   -- 0.0025947     0.7 21789
## sigma   4.4186 5.3001   6.2862  5.3349 0.48323   -- 0.0025093     0.5 37085
##                         
##             AC.50   psrf
## b0         0.1787 1.0005
## bpared    0.17577 1.0003
## bhmwk  -0.0072221 1.0003
## sigma   0.0048231 1.0002
## 
## Model fit assessment:
## DIC = 417.1137
## [PED not available from the stored object]
## Estimated effective number of parameters:  pD = 4.14044
## 
## Total time taken: 9.8 seconds

Model 3: Interactions

Since adding interactions, often leads to multicolinearity, I centered the predictors between adding the interaction.

nels$chomework <- nels$homework -mean(nels$homework)
nels$cparedu  <-  nels$paredu  -mean(nels$paredu )

Below is the rest of the code for this model.

dataList <- list(
          y     =nels$math,
                 cpared =nels$cparedu,
                 chmwk  =nels$chomework,
         N      =length(nels$math),
         sdY    = sd(nels$math)
                 )

model3 = "model {  for (i in 1:N){
                       y[i] ~ dnorm(mu[i] , precision)
                       mu[i] <- b0  + bpared*cpared[i] + bhmwk*chmwk[i]  
                                + bintact*cpared[i]*chmwk[i]    
                     }
                     b0 ~ dnorm(0 , 1/(100*sdY^2) )   
                     bpared ~ dnorm(0 , 1/(100*sdY^2) )   
                     bhmwk ~ dnorm(0 , 1/(100*sdY^2) )    
                     bintact ~ dnorm(0 , 1/(100*sdY^2) )    
                     sigma ~ dunif( 1E-3, 1E+30 )
                     precision <- 1/sigma^2           
              } "
writeLines(model3, con="model3.txt")

initsList = list(list("b0"=mean(nels$math), "bpared"=0, "bhmwk"=0, "bintact"=0, "sigma"=sd(nels$math)), 
                 list("b0"=rnorm(1,50,5),   "bpared"=rnorm(1,-2,1), "bhmwk"=rnorm(1,2,1), "bintact"=.3, "sigma"=sd(nels$math)), 
                 list("b0"=rnorm(1,50,5),   "bpared"=rnorm(1,1,1), "bhmwk"=rnorm(1,1,1),  "bintact"=.5,  "sigma"=runif(1,0,10)), 
                 list("b0"=rnorm(1,50,6),   "bpared"=rnorm(1,0,1), "bhmwk"=rnorm(1,0,1),  "bintact"=-.3,  "sigma"=runif(1,0,10)) 
                 )

mod3.runjags <- run.jags(model=model3,
                monitor=c("b0","bpared","bhmwk","bintact","sigma","dic"),
                data=dataList, 
                            n.chains=4, 
                            thin=5, 
                            inits=initsList)

## Compiling rjags model...
## Calling the simulation using the rjags method...
## Adapting the model for 1000 iterations...
## Burning in the model for 4000 iterations...
## Running the model for 50000 iterations...
## Simulation complete
## Calculating summary statistics...
## Calculating the Gelman-Rubin statistic for 5 variables....
## Finished running the simulation

A quick convergence check looks good:

plot(mod3.runjags)
print(mod3.runjags)

Since the 95% interval for the interaction include 0, so we will delete it. This takes us back to model 2; however, in what follows I will go back to un-centered variables.

Model 4: Final model with various derived statistics

I will add some values that I want computed and some derived variables. For some of the analyses done here, I find it more convenient to use rjags than runjags, so I make the switch here

Since I’m switching back to UNCENTERED I

dataList <- list(
         y     =nels$math,
                 pared =nels$paredu,
                 hmwk  =nels$homework,
         N     =length(nels$math),
         sdY   = sd(nels$math)
                 )

nels_final = "model {  for (i in 1:N){
            y[i] ~ dnorm(mu[i] , precision)
            mu[i] <- b0  + bpared*pared[i] + bhmwk*hmwk[i]    

            res[i] <- y[i] - mu[i]               # Data residuals
            emp.new[i] ~ dnorm(mu[i],precision)  # New Predicted y 
            res.new[i] <- emp.new[i] - mu[i]     # New Predicted residuals
            }
            b0 ~ dnorm(0 , 1/(100*sdY^2) )   
            bpared ~ dnorm(0 , 1/(100*sdY^2) )   
            bhmwk ~ dnorm(0 , 1/(100*sdY^2) )    
            sigma ~ dunif( 1E-3, 1E+30 )
            precision <- 1/sigma^2 
# Derived quantities            
            fit <- sum(res[])                    # sum data residuals this iterations
            fit.new <- sum(res.new[])            # sum predicted residuals 
            fit.mean <- mean(emp.new[])          # mean of posterior predictions
              } "
writeLines(nels_final, con="nels_final.txt")

I added res[ ] as raw residuals, emp.new[ ] are predicted y, and res.new[ ] are the residuals of these new predictions. The predicted y will have the same values of cpared[ ] and chmwk[ ].

I used the same initial or starting values as above, so we don’t need to re-run this here. Which brings us to the simulation itself.

b0Init = mean(nels$math)
bparedInit = 0
sigmaInit = sd(nels$math)
initsList = list(b0=b0Init, bpared=bparedInit, sigma=sigmaInit )

nels_final <- jags.model(file="nels_final.txt", 
                        data=dataList,
                        inits=initsList,
                        n.chains=4,
                        n.adapt=1000)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 67
##    Unobserved stochastic nodes: 71
##    Total graph size: 453
## 
## Initializing model

update (nels_final, n.iter=1000)             

# gets samples from all chains for 2000 iterations
Samples <- coda.samples(nels_final, 
             variable.names=c("b0","bpared","bhmwk", "sigma", "fit", "fit.new",
                          "fit.mean", "emp.new"), 
             thin=5,
                   n.iter=10000)

A long summary table…

summary(Samples)

## 
## Iterations = 2005:12000
## Thinning interval = 5 
## Number of chains = 4 
## Sample size per chain = 2000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                 Mean      SD Naive SE Time-series SE
## b0          66.49750  3.6827 0.041173       0.139571
## bhmwk        1.10770  0.3776 0.004221       0.005675
## bpared      -1.49420  0.6932 0.007750       0.025215
## emp.new[1]  61.92388  5.4583 0.061026       0.062949
## emp.new[2]  66.11967  5.4639 0.061088       0.061060
## emp.new[3]  64.54135  5.4967 0.061455       0.060254
## emp.new[4]  66.14776  5.4571 0.061012       0.062405
## emp.new[5]  62.79306  5.4515 0.060949       0.063467
## emp.new[6]  61.30821  5.4324 0.060736       0.060604
## emp.new[7]  62.36250  5.4093 0.060478       0.060486
## emp.new[8]  58.66693  5.5559 0.062117       0.064473
## emp.new[9]  59.72026  5.4580 0.061023       0.061628
## emp.new[10] 60.49381  5.5910 0.062510       0.063942
## emp.new[11] 66.02119  5.5707 0.062282       0.063666
## emp.new[12] 58.56380  5.5149 0.061658       0.062205
## emp.new[13] 61.30308  5.4590 0.061033       0.061298
## emp.new[14] 66.02456  5.4581 0.061024       0.064652
## emp.new[15] 65.68315  5.5174 0.061687       0.061697
## emp.new[16] 61.55768  5.5250 0.061771       0.063105
## emp.new[17] 61.17417  5.4070 0.060452       0.057476
## emp.new[18] 61.62857  5.5488 0.062037       0.067921
## emp.new[19] 62.70420  5.4399 0.060819       0.068910
## emp.new[20] 63.03393  5.5132 0.061640       0.063727
## emp.new[21] 63.43973  5.4535 0.060972       0.060554
## emp.new[22] 63.47964  5.4183 0.060579       0.057697
## emp.new[23] 60.18914  5.4923 0.061406       0.062135
## emp.new[24] 64.85283  5.4429 0.060853       0.061568
## emp.new[25] 58.68533  5.5579 0.062139       0.063367
## emp.new[26] 62.05714  5.4902 0.061383       0.064070
## emp.new[27] 61.71570  5.5305 0.061833       0.067010
## emp.new[28] 66.09023  5.4330 0.060742       0.060749
## emp.new[29] 64.61274  5.4130 0.060519       0.060678
## emp.new[30] 58.58430  5.6063 0.062680       0.063349
## emp.new[31] 63.08384  5.5163 0.061674       0.063373
## emp.new[32] 63.48198  5.4353 0.060769       0.060119
## emp.new[33] 59.05091  5.5438 0.061981       0.061666
## emp.new[34] 59.83191  5.4464 0.060893       0.061730
## emp.new[35] 66.09367  5.3872 0.060231       0.060785
## emp.new[36] 63.42571  5.3429 0.059735       0.059740
## emp.new[37] 61.20671  5.3892 0.060253       0.060261
## emp.new[38] 63.05330  5.4362 0.060779       0.063013
## emp.new[39] 58.57584  5.5952 0.062557       0.063120
## emp.new[40] 62.31636  5.4214 0.060613       0.060285
## emp.new[41] 62.03083  5.5308 0.061836       0.061847
## emp.new[42] 62.37218  5.4104 0.060490       0.060483
## emp.new[43] 64.81593  5.4448 0.060874       0.060779
## emp.new[44] 69.73373  5.7736 0.064550       0.070897
## emp.new[45] 64.56394  5.4053 0.060433       0.060434
## emp.new[46] 63.12365  5.6284 0.062927       0.077000
## emp.new[47] 63.09093  5.6472 0.063137       0.073453
## emp.new[48] 62.40110  5.4297 0.060706       0.060447
## emp.new[49] 64.85136  5.4491 0.060923       0.060932
## emp.new[50] 62.34146  5.4342 0.060756       0.060762
## emp.new[51] 63.88778  5.4461 0.060889       0.062577
## emp.new[52] 63.26789  5.5547 0.062103       0.073622
## emp.new[53] 63.45712  5.3767 0.060114       0.061839
## emp.new[54] 63.03875  5.4714 0.061172       0.064802
## emp.new[55] 62.38551  5.3885 0.060246       0.059350
## emp.new[56] 58.62468  5.4661 0.061113       0.063493
## emp.new[57] 63.11593  5.4934 0.061418       0.064614
## emp.new[58] 61.91269  5.5074 0.061574       0.064803
## emp.new[59] 64.19030  5.5564 0.062122       0.069889
## emp.new[60] 60.90330  5.4870 0.061347       0.063040
## emp.new[61] 64.51353  5.3779 0.060126       0.060684
## emp.new[62] 63.01218  5.4667 0.061120       0.062668
## emp.new[63] 65.96812  5.4270 0.060676       0.065297
## emp.new[64] 67.54321  5.6409 0.063067       0.069959
## emp.new[65] 59.72728  5.3761 0.060107       0.061287
## emp.new[66] 64.50857  5.4493 0.060925       0.062771
## emp.new[67] 63.52057  5.3150 0.059424       0.059704
## fit          0.45647 43.9119 0.490950       0.502944
## fit.mean    62.81345  0.9246 0.010338       0.010043
## fit.new     -0.04226 43.6506 0.488029       0.494434
## sigma        5.33523  0.4892 0.005469       0.005627
## 
## 2. Quantiles for each variable:
## 
##                 2.5%      25%      50%    75%   97.5%
## b0           59.2816  63.9595 66.48057 68.976 73.7045
## bhmwk         0.3505   0.8563  1.11064  1.360  1.8454
## bpared       -2.8640  -1.9576 -1.48922 -1.031 -0.1714
## emp.new[1]   51.3284  58.2829 61.88842 65.623 72.7983
## emp.new[2]   55.2376  62.4454 66.10685 69.793 76.7005
## emp.new[3]   53.8679  60.8888 64.54554 68.150 75.3875
## emp.new[4]   55.5242  62.4545 66.18051 69.807 76.8751
## emp.new[5]   52.1168  59.2204 62.71808 66.340 73.5861
## emp.new[6]   50.4855  57.6569 61.26766 64.935 72.2343
## emp.new[7]   51.8267  58.8013 62.29371 65.936 73.1408
## emp.new[8]   47.8221  54.9644 58.64448 62.440 69.5310
## emp.new[9]   48.7003  56.1222 59.76034 63.249 70.3682
## emp.new[10]  49.4524  56.7600 60.52680 64.222 71.2994
## emp.new[11]  55.1862  62.2511 66.00610 69.762 76.9223
## emp.new[12]  47.4110  54.8927 58.56491 62.280 69.1503
## emp.new[13]  50.4878  57.6903 61.28069 64.961 72.1929
## emp.new[14]  55.4134  62.3852 65.93625 69.655 76.7823
## emp.new[15]  54.7446  61.9991 65.68322 69.308 76.6447
## emp.new[16]  50.8607  57.8712 61.50029 65.210 72.4866
## emp.new[17]  50.6884  57.5328 61.18649 64.715 71.9590
## emp.new[18]  50.8375  57.8942 61.61943 65.381 72.4939
## emp.new[19]  52.0081  59.0514 62.68089 66.343 73.6176
## emp.new[20]  52.3820  59.4016 62.99120 66.643 74.0831
## emp.new[21]  52.7306  59.7798 63.46176 67.025 74.1222
## emp.new[22]  52.8106  59.9089 63.43999 67.097 74.2095
## emp.new[23]  49.3452  56.4310 60.22491 63.942 70.9809
## emp.new[24]  54.0396  61.3213 64.79297 68.482 75.6964
## emp.new[25]  47.5807  55.0021 58.74031 62.452 69.4789
## emp.new[26]  51.1636  58.4627 62.07976 65.686 72.6894
## emp.new[27]  50.9348  57.9654 61.72664 65.508 72.5275
## emp.new[28]  55.5672  62.4122 66.01478 69.754 76.8441
## emp.new[29]  54.0399  60.9949 64.62850 68.274 75.2177
## emp.new[30]  47.8993  54.8568 58.58767 62.228 69.8558
## emp.new[31]  52.3661  59.3944 63.03900 66.736 73.9092
## emp.new[32]  52.6858  59.8345 63.54496 67.126 73.8809
## emp.new[33]  48.1444  55.3294 59.03553 62.781 70.0219
## emp.new[34]  49.2559  56.2135 59.81038 63.452 70.5498
## emp.new[35]  55.5649  62.4375 66.10004 69.730 76.4957
## emp.new[36]  52.8998  59.8862 63.40725 66.931 73.8787
## emp.new[37]  50.5945  57.6487 61.19946 64.782 71.9109
## emp.new[38]  52.4871  59.4136 63.01600 66.640 73.8884
## emp.new[39]  47.6389  54.8974 58.59182 62.288 69.5995
## emp.new[40]  51.7354  58.6488 62.35841 65.903 73.3252
## emp.new[41]  51.0475  58.3090 62.04286 65.770 72.8319
## emp.new[42]  51.9062  58.6231 62.39329 66.010 73.1336
## emp.new[43]  54.1355  61.0587 64.87453 68.455 75.6856
## emp.new[44]  58.3369  65.9660 69.72869 73.617 81.0389
## emp.new[45]  53.7943  60.9327 64.56680 68.096 75.3052
## emp.new[46]  51.8898  59.4934 63.15261 66.839 74.2487
## emp.new[47]  51.8769  59.4204 63.03619 66.851 74.0766
## emp.new[48]  51.7895  58.7708 62.38174 66.069 73.2484
## emp.new[49]  54.1500  61.1645 64.78711 68.495 75.5657
## emp.new[50]  51.7203  58.6721 62.35418 65.993 73.0444
## emp.new[51]  53.1472  60.2527 63.86419 67.430 74.7422
## emp.new[52]  52.4220  59.5575 63.25625 66.970 74.3286
## emp.new[53]  52.9708  59.8598 63.41377 67.054 73.8923
## emp.new[54]  52.1568  59.4047 63.03981 66.580 73.7548
## emp.new[55]  51.6268  58.8515 62.42228 65.916 72.8812
## emp.new[56]  47.9325  54.9485 58.58765 62.276 69.4550
## emp.new[57]  52.3430  59.4090 63.07352 66.710 74.1591
## emp.new[58]  51.3469  58.1973 61.85897 65.533 72.7567
## emp.new[59]  53.1265  60.5175 64.13644 67.932 75.0796
## emp.new[60]  49.9518  57.2599 60.99603 64.622 71.5114
## emp.new[61]  53.7256  60.9163 64.60060 68.116 74.9174
## emp.new[62]  52.4360  59.3809 62.95474 66.654 73.7962
## emp.new[63]  55.1658  62.3880 66.01192 69.694 76.5614
## emp.new[64]  56.5678  63.7071 67.48893 71.305 78.5679
## emp.new[65]  49.3266  56.1931 59.67262 63.313 70.4570
## emp.new[66]  53.7960  60.8895 64.52616 68.126 75.2862
## emp.new[67]  53.1118  59.9911 63.48887 67.124 74.0451
## fit         -87.6905 -28.8280  0.06544 29.648 84.6324
## fit.mean     60.9658  62.1994 62.81730 63.428 64.6215
## fit.new     -85.7105 -28.9857  0.01065 28.857 87.4800
## sigma         4.4891   4.9909  5.29425  5.646  6.4081

When you get plots and print Samples, you will get something for every value in the variable names list. Note that this include 67 values of fit. This is one reason that I don’t include all the these “extras” when working on model refinement.

To be able to use the samples, I need to do some data manipulation. In “Samples”“, the data are in separate chains. The object”Samples"" is an mcmc object which is difficult for me to manipulate, so I changed the object type.

ffit <- as.array(Samples)                          # change from mcmc to array object                                 
b <- rbind(ffit[,,1],ffit[,,2],ffit[, , 3], ffit[, , 4])  # combine the chains

The following takes Samples and breaks out specific estimates into different objects. To know what the “numbers” for the indices, I had to use what I know about the sample size of nels data and the number of other parameters (looking at the samples helps).

b0 <- b[,1]
bpared <- b[,2]
bhmwk <- b[,3]
pred <- b[,4:70]     
fit <- b[,71]
fit.mean <- b[,72]
fit.new <- b[,73]
sigma <- b[,74]

A little checking to see if things make sense. I expect the following two values to be very similar, because one is the mean of the data and the other is the mean of the postierior distribution of the fitted values.

mean(nels$math)

## [1] 62.8209

mean(pred)

## [1] 62.81345

I wanted to look at the data and posterior distribution in different ways. In the first case, I collapsed over students so that I have one fitted value for each replications. This is like finding means for pred and nels$math, but doesn’t collapse as much.

fitted <- apply(pred,2,mean)

And some graphics

par(mfrow=c(2,2))
hist(nels$math,breaks=10,main="Distribution of Data")
hist(fitted,breaks=10,main="Posterior Predictions (8000 iterations)")
R2 <- round(cor(nels$math, fitted)**2, digit=4)
plot(nels$math, fitted, 
     main=paste("Fitted by Observed Math  R2=",R2),
     xlab="Observed Math Scores",
     ylab="Mean over Samples of Fitted Math")

plot(nels$homework, nels$math,main="Data Points")

plot(nels$homework, fitted, ylim=c(58,70))
# Get the means to add to plot
schMathHmwk <- aggregate(nels$math, list(nels$homework), mean)
lines(schMathHmwk$Group.1,schMathHmwk$x,type='p',pch=19,col="blue")

I know collapse over replications (which makes more sense), and get a single mean value for each student.

# Compute statistics over rows (students)
ymeans <- apply(pred,1,"mean")
ymin  <- apply(pred,1,"min")
ymax  <- apply(pred,1,"max")
ysd   <- apply(pred,1,"sd")

And the Bayesian p-values which gives another way to look at how well the model is doing for different aspects of the data.

pmean <- mean(mean(nels$math) < ymeans)
pmin  <- mean(min(nels$math) < ymin)
pmax  <- mean(max(nels$math) < ymax)
psd   <- mean(sd(nels$math) < ysd)

Understanding what these are is a bit easier by looking at them relative to data.

par(mfrow=c(2,2))
 
hist(ymin,breaks=15,main=paste("Simulated within Jags: Minimums", "\nBayesian P-value =", round(pmin, 2)))
lines(c(min(nels$math),min(nels$math)),c(0,2000),col="blue",lwd=2)
2

## [1] 2

hist(ymax,breaks=15,main=paste("Simulated within Jags: Maximums", "\nBayesian P-value =", round(pmax, 2)))
lines(c(max(nels$math),max(nels$math)),c(0,2000),col="blue",lwd=2)

hist(ymeans,breaks=15,main=paste("Simulated within Jags:  Means", "\nBayesian P-value =", round(pmean, 2)))
lines(c(mean(nels$math),mean(nels$math)),c(0,2000),col="blue",lwd=2)

hist(ysd,breaks=15,main=paste("Simulated within Jags:  SDs", "\nBayesian P-value =", round(psd, 2)))
lines(c(sd(nels$math),sd(nels$math)),c(0,2000),col="blue",lwd=2)

Robust Regression: Student’s t-distribution

We swap out “dnorm” by “dt”, add priors for degrees of freedom (nu), and add nu to list of parameters to monitor. Only the parts of the code that change are given below. The model,

dataList <- list(
         y     =nels$math,
                 pared =nels$paredu,
                 hmwk  =nels$homework,
         N     =length(nels$math),
         sdY   =sd(nels$math)
                 )

ymean <- mean(nels$math) 
sdy <- sd(nels$math)

initsList = list(list("b0"=ymean, "bpared"=0, "bhmwk"=0, "sigma"=sdy), 
list("b0"=rnorm(1,50,5), "bpared"=rnorm(1,-2,1), "bhmwk"=rnorm(1,2,1), "sigma"=sdy), 
 list("b0"=rnorm(1,50,5),   "bpared"=rnorm(1,1,1), "bhmwk"=rnorm(1,1,1), "sigma"=runif(1,0,10)),
list("b0"=rnorm(1,50,6),   "bpared"=rnorm(1,0,1), "b"=rnorm(1,0,1),  "sigma"=runif(1,0,10)) )

tmr = "model { for (i in 1:N){
# Likelihood          
              y[i] ~ dt(mu[i] , precision, nu)
               mu[i] <- b0  + bpared*pared[i] + bhmwk*hmwk[i]   
             }
            b0 ~ dnorm(0 , 1/(100*sdY^2) )   
            bpared ~ dnorm(0 , 1/(100*sdY^2) )   
            bhmwk ~ dnorm(0 , 1/(100*sdY^2) )    
            sigma ~ dunif( 1E-3, 1E+30 )
            precision <- 1/sigma^2  
            nuMinusOne  ~ dexp(1/29)  
            nu <- nuMinusOne+1                       
           }"

writeLines(tmr, con="tmr.txt")

and the call for samples,

tmr.runjags <- run.jags(model=tmr, 
               monitor=c("b0","bpared","bhmwk","sigma","nu","dic"),
               data=dataList, 
               n.chains=4, 
               thin=5, 
               inits=initsList)

## Compiling rjags model...

## Warning in rjags::jags.model(model, data = dataenv, inits = inits, n.chains =
## length(runjags.object$end.state), : Unused initial value for "b" in chain 4

## Calling the simulation using the rjags method...
## Adapting the model for 1000 iterations...
## Burning in the model for 4000 iterations...
## Running the model for 50000 iterations...
## Simulation complete
## Calculating summary statistics...
## Calculating the Gelman-Rubin statistic for 5 variables....
## Finished running the simulation

plot(tmr.runjags)

## Generating plots...

gelman.diag(tmr.runjags)

## Potential scale reduction factors:
## 
##        Point est. Upper C.I.
## b0              1       1.00
## bpared          1       1.01
## bhmwk           1       1.00
## sigma           1       1.00
## nu              1       1.00
## 
## Multivariate psrf
## 
## 1

gelman.plot(tmr.runjags)

cumuplot(tmr.runjags,probs=c(.25,.50,.75),lwd=c(1,2),lty=c(2,1),col=c("blue","red"))

## Warning in as.mcmc.runjags(x): Combining the 4 mcmc chains together

print(tmr.runjags)

## 
## JAGS model summary statistics from 40000 samples (thin = 5; chains = 4; adapt+burnin = 5000):
##                                                                             
##        Lower95  Median  Upper95    Mean      SD Mode     MCerr MC%ofSD SSeff
## b0      59.806  66.948   73.703   66.92  3.5452   --   0.07547     2.1  2207
## bpared -2.8669 -1.5299 -0.20261 -1.5304 0.67811   --  0.014231     2.1  2270
## bhmwk  0.38694  1.1135   1.8453  1.1145 0.36836   -- 0.0028999     0.8 16135
## sigma   3.6299  4.8425   6.0682  4.8333 0.61075   -- 0.0036504     0.6 27993
## nu      1.7769  16.587   77.459  25.436  25.553   --   0.17407     0.7 21551
##                        
##            AC.50   psrf
## b0       0.33775  1.001
## bpared   0.33422 1.0011
## bhmwk  0.0094361 1.0001
## sigma  0.0054262 1.0002
## nu     -0.003377 1.0001
## 
## Model fit assessment:
## DIC = 416.425
## [PED not available from the stored object]
## Estimated effective number of parameters:  pD = 4.81995
## 
## Total time taken: 4.9 minutes

Bayes factor

If you feel compelled to compute these, it’s pretty if you use the BayesFactor package. This package does more then just linear regressions. See http://bayesfactorpcl.r-forge.r-project.org/ for more information about the package.

You basically, ask for a regression using the regressionBF command with (in this case) all our possible variables. The following code gives Bayes factor for all possible models where the comparison model (default) is model with only an intercept.

library(BayesFactor)
nels$xwhite <- as.numeric(nels$white)
(bf <- regressionBF(math ~ paredu + homework + ses + xwhite + sex, data=nels))

## Bayes factor analysis
## --------------
## [1] paredu                                  : 1.736485   ±0%
## [2] homework                                : 6.983524   ±0%
## [3] ses                                     : 0.3338801  ±0%
## [4] xwhite                                  : 0.2545051  ±0%
## [5] sex                                     : 0.3031462  ±0%
## [6] paredu + homework                       : 17.83269   ±0%
## [7] paredu + ses                            : 1.247574   ±0%
## [8] paredu + xwhite                         : 0.5765412  ±0%
## [9] paredu + sex                            : 0.741968   ±0%
## [10] homework + ses                         : 3.116336   ±0%
## [11] homework + xwhite                      : 2.160394   ±0%
## [12] homework + sex                         : 2.687732   ±0%
## [13] ses + xwhite                           : 0.1284928  ±0.01%
## [14] ses + sex                              : 0.1397701  ±0.01%
## [15] xwhite + sex                           : 0.110552   ±0.01%
## [16] paredu + homework + ses                : 11.84489   ±0%
## [17] paredu + homework + xwhite             : 6.19577    ±0%
## [18] paredu + homework + sex                : 8.491123   ±0%
## [19] paredu + ses + xwhite                  : 0.4952247  ±0%
## [20] paredu + ses + sex                     : 0.7544532  ±0%
## [21] paredu + xwhite + sex                  : 0.294487   ±0%
## [22] homework + ses + xwhite                : 1.276575   ±0%
## [23] homework + ses + sex                   : 1.388366   ±0%
## [24] homework + xwhite + sex                : 1.006036   ±0%
## [25] ses + xwhite + sex                     : 0.06318426 ±0.01%
## [26] paredu + homework + ses + xwhite       : 4.475333   ±0%
## [27] paredu + homework + ses + sex          : 7.718168   ±0%
## [28] paredu + homework + xwhite + sex       : 3.32971    ±0%
## [29] paredu + ses + xwhite + sex            : 0.3445106  ±0%
## [30] homework + ses + xwhite + sex          : 0.6339349  ±0%
## [31] paredu + homework + ses + xwhite + sex : 3.246735   ±0%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS

If you have lots and lots of possible models, you may want just the top ones (again compared against model with only an intercept), the add the line

#The best 5
head(bf,n=5)

## Bayes factor analysis
## --------------
## [1] paredu + homework             : 17.83269 ±0%
## [2] paredu + homework + ses       : 11.84489 ±0%
## [3] paredu + homework + sex       : 8.491123 ±0%
## [4] paredu + homework + ses + sex : 7.718168 ±0%
## [5] homework                      : 6.983524 ±0%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS

or compare the best with the top competitors and put the best model in the denominator.

# Compare the best with top 5
(top_models <- head(bf)/max(bf) )

## Bayes factor analysis
## --------------
## [1] paredu + homework             : 1         ±0%
## [2] paredu + homework + ses       : 0.6642233 ±0%
## [3] paredu + homework + sex       : 0.4761549 ±0%
## [4] paredu + homework + ses + sex : 0.4328101 ±0%
## [5] homework                      : 0.3916136 ±0%
## [6] paredu + homework + xwhite    : 0.3474389 ±0%
## 
## Against denominator:
##   math ~ paredu + homework 
## ---
## Bayes factor type: BFlinearModel, JZS

Speeding up convergence

As models become more complex and with large data sets, the time it take for an algorithm to converge can be an issue. Possible step to recude computational time incude

Turn of real time progress indicator off.

Use parallel procressing which can be done in jagsUI using the parallel=TRUE option.  This is illustrated below.  It does take a little time to set up parallel but for large problems, the cpu savings is likely to be worth it.

In terms of wrappers (serial processing), from me mini study the fastest is jagsUI and rjags and the slowest was runjags. There is a “parallel.jags” comand in runJags, but it is buggy and I couldn’t get it to run (trouble with how many intital values are needed).

BLAS matters (Basic Linear ALgebra Subprograms).  For PC, the easiest thing to do is install Microsoft Open R which uses the Intel MKL libraries for linear algebra.  For Apple, you might be able to install for example OpenBLAS or ATLAS.  For more one this see https://csantill.github.io/RPerformanceWBLAS.

User faster computer or cluster computing.

Write you underlying C, but this is more than we'll get into.

Parallel Processing with GIBSS

Times vary from run to run. Expect to see bigger differences

Expect to see bigger differences with more iterations

Expect to see bigger differences more parameters (complex models)

More chains (should be <= number of cores (I didn't check but                           think this should be so).

```
Larger data sets
```

Set up

dataList <- list(y=nels$math,
                 x=nels$homework,
                 x2=nels$pared,
                 N=length(nels$math),
                 sdY = sd(nels$math)
                 )

nelsLR1 = "model {                                    
                 for (i in 1:N){
                  y[i] ~ dnorm(mu[i] , precision)
                  mu[i] <- b0 + b1*x[i] +b2*x2[i]        
                 }
                 b0 ~ dnorm(0 , 1/(100*sdY^2) )   
                 b1 ~ dnorm(0 , 1/(100*sdY^2) )   
                 b2 ~ dnorm(0 , 1/(100*sdY^2) )   
                 precision ~ dgamma(.01, .01)
               } 
"
writeLines(nelsLR1, con="nelsLR1.txt")

rjags

initsList <- list("b0"=mean(nels$math), "b1"=sd(nels$math), "b2"=sd(nels$pared), "precision"=1/sd(nels$math)**2) 

start_time <- Sys.time()
jagsNelsLR1 <- jags.model(file="nelsLR1.txt", 
                        data=dataList,
                        inits=initsList,
                        n.chains=4,
                        n.adapt=4000)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 67
##    Unobserved stochastic nodes: 4
##    Total graph size: 249
## 
## Initializing model

Samples1 <- coda.samples(jagsNelsLR1, variable.names=c("b0", "b1", "b2", "precision"), n.iter=10000)
end_time <- Sys.time()
end_time - start_time

## Time difference of 0.2376001 secs

runjags

initsList = list(list("b0"=mean(nels$math), "b1"=sd(nels$math), "b2"=sd(nels$pared), "precision"=1/sd(nels$math)**2), 
                 list("b0"=rnorm(1,-2,4), "b1"=rnorm(1,0,4), "b2"=rnorm(1,0,4), "precision"=1/2),
                 list("b0"=rnorm(1,0,1),  "b1"=rnorm(1,6,4),"b2"=rnorm(1,0,4), "precision"=1/5),
                 list("b0"=rnorm(1,2,2),  "b1"=rnorm(1,-2,4), "b2"=rnorm(1,2,4) ,"precision"=1/1515 )   
                 )

start_time <- Sys.time()
out.runjags <- run.jags(model=nelsLR1, 
              monitor=c("b0","b1","precision","dic"),
              data=dataList, 
                        n.chains=4, 
                          inits=initsList)

## Compiling rjags model...
## Calling the simulation using the rjags method...
## Note: the model did not require adaptation
## Burning in the model for 4000 iterations...
## Running the model for 10000 iterations...
## Simulation complete
## Calculating summary statistics...
## Calculating the Gelman-Rubin statistic for 3 variables....
## Finished running the simulation

end_time <- Sys.time()
end_time - start_time

## Time difference of 4.029239 secs

jagsUI – serial

start_time <- Sys.time()                 
out.jagsUI <- jags(model.file="nelsLR1.txt",
              data=dataList,
                    inits=initsList, 
              parameters.to.save=c("b0", "b1", "precision"), 
              n.iter=10000,
              n.burnin=4000,
              n.chains=4,
              verbose=FALSE)
end_time <- Sys.time()
end_time - start_time

## Time difference of 0.780725 secs

parallel via jagsUI

start_time <- Sys.time()                 
parallel.jagsUI <- jags(model.file="nelsLR1.txt",
              data=dataList,
                    inits=initsList, 
              parameters.to.save=c("b0", "b1", "precision"), 
              n.iter=10000,
              n.burnin=4000,
              n.chains=4,
                parallel=TRUE,
         )

## 
## Processing function input....... 
## 
## Done. 
##  
## Beginning parallel processing using 4 cores. Console output will be suppressed.
## 
## Parallel processing completed.
## 
## Calculating statistics....... 
## 
## Done.

end_time <- Sys.time()
end_time - start_time

## Time difference of 1.126086 secs

Output from jagsUI

whiskerplot(parallel.jagsUI, parameters=c("b0", "b1", "precision"),
            quantiles=c(0.025,0.975), zeroline=TRUE)

To use coda plotting functions input the samples For example

gelman.diag(parallel.jagsUI$samples)

## Potential scale reduction factors:
## 
##           Point est. Upper C.I.
## b0                 1          1
## b1                 1          1
## precision          1          1
## deviance           1          1
## 
## Multivariate psrf
## 
## 1

gelman.plot(parallel.jagsUI$samples)

geweke.diag(parallel.jagsUI$samples, frac1=.10, frac2=.50)

## [[1]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##        b0        b1 precision  deviance 
##    0.7223   -0.9711   -0.6222   -0.1078 
## 
## 
## [[2]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##        b0        b1 precision  deviance 
##  -0.17285   0.80700   0.09737   0.10310 
## 
## 
## [[3]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##        b0        b1 precision  deviance 
##  -0.88975  -0.24626  -1.13175  -0.07542 
## 
## 
## [[4]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##        b0        b1 precision  deviance 
##   -0.2039   -1.0467   -1.1420    0.5857

HPDinterval(parallel.jagsUI$samples)

## [[1]]
##                  lower        upper
## b0         59.45565174  73.69246050
## b1          0.33349739   1.82383930
## precision   0.02415297   0.04934476
## deviance  408.82828128 418.32989459
## attr(,"Probability")
## [1] 0.95
## 
## [[2]]
##                  lower        upper
## b0         59.02142255  73.64977475
## b1          0.37791396   1.88535199
## precision   0.02432796   0.04893697
## deviance  408.83838638 418.68602439
## attr(,"Probability")
## [1] 0.95
## 
## [[3]]
##                  lower        upper
## b0         58.78927878  73.79093899
## b1          0.35589567   1.84473257
## precision   0.02465636   0.04958267
## deviance  408.90426892 418.63359969
## attr(,"Probability")
## [1] 0.95
## 
## [[4]]
##                  lower        upper
## b0         59.22713425  73.99270161
## b1          0.33941995   1.84281849
## precision   0.02423601   0.04907277
## deviance  408.88364212 418.57991066
## attr(,"Probability")
## [1] 0.95

autocorr.plot(parallel.jagsUI$samples)

Edpsy 590BAY: Gibbs Multiple Regression via jags

C. J. Anderson

September 19, 2021