#.libPaths("C:/Program Files/R/R-4.1.1/library")
To make this run but will drop when put into master set of examples.
library(rstan)
## Loading required package: StanHeaders
## Loading required package: ggplot2
## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
library(loo)
## This is loo version 2.4.1
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## - Windows 10 users: loo may be very slow if 'mc.cores' is set in your .Rprofile file (see https://github.com/stan-dev/loo/issues/94).
##
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
##
## loo
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.16.1). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:rstan':
##
## loo
## The following object is masked from 'package:stats':
##
## ar
library(coda)
##
## Attaching package: 'coda'
## The following object is masked from 'package:rstan':
##
## traceplot
setwd("D:/Dropbox/edps 590BAY/Lectures/10 Ham_Stan_brms")
nels <- read.table("school23_data.txt",header=TRUE)
names(nels)
## [1] "school" "student" "sex" "race" "homew" "schtype"
## [7] "ses" "pared" "math" "classtr" "schsize" "urban"
## [13] "geo" "minority" "ratio"
# sort by schools (may speed things up for large data sets):
nels <- nels[order(nels$school,nels$student) , ]
options(mc.cores=parallel::detectCores())
rstan_options(auto_write = TRUE)
Need information on number schools & create school.id
school <- unique(nels$school)
N <- length(school)
school.int <- as.data.frame(cbind(school,seq(1:N)))
names(school.int) <- c("school", "school.id")
nels <- merge(nels,school.int,by="school")
# total number of students
n <- length(nels$math)
hlm1 <- brm(math ~ 1 + (1 | school.id),
data=nels,
warmup=2000,
iter=4000,
chains=4,
cores=4,
seed=11121
)
## Compiling Stan program...
## Start sampling
Corresponding stan data and code
dataList <- standata(hlm1)
hlm1.stan <- stancode(hlm1)
This provide trace plots and densities,
plot(hlm1)
If you want the diagnostics from coda (ones we used with Gibbs sampling), you need to extract the samples from brm as an mcmc object. Note that as.mcmc is deprecated, but I haven’t found the function that can be used instead.
hlm1.mcmc <- as.mcmc(hlm1)
## Warning: as.mcmc.brmsfit is deprecated and will eventually be removed.
traceplot(hlm1.mcmc)
densplot(hlm1.mcmc)
gelman.diag(hlm1.mcmc)
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## b_Intercept 1 1.01
## sd_school.id__Intercept 1 1.01
## sigma 1 1.00
## r_school.id[1,Intercept] 1 1.01
## r_school.id[2,Intercept] 1 1.00
## r_school.id[3,Intercept] 1 1.00
## r_school.id[4,Intercept] 1 1.00
## r_school.id[5,Intercept] 1 1.00
## r_school.id[6,Intercept] 1 1.00
## r_school.id[7,Intercept] 1 1.00
## r_school.id[8,Intercept] 1 1.00
## r_school.id[9,Intercept] 1 1.00
## r_school.id[10,Intercept] 1 1.00
## r_school.id[11,Intercept] 1 1.00
## r_school.id[12,Intercept] 1 1.00
## r_school.id[13,Intercept] 1 1.01
## r_school.id[14,Intercept] 1 1.00
## r_school.id[15,Intercept] 1 1.00
## r_school.id[16,Intercept] 1 1.00
## r_school.id[17,Intercept] 1 1.00
## r_school.id[18,Intercept] 1 1.01
## r_school.id[19,Intercept] 1 1.00
## r_school.id[20,Intercept] 1 1.00
## r_school.id[21,Intercept] 1 1.00
## r_school.id[22,Intercept] 1 1.00
## r_school.id[23,Intercept] 1 1.00
## lp__ 1 1.00
##
## Multivariate psrf
##
## 1
gelman.plot(hlm1.mcmc)
autocorr.plot(hlm1.mcmc)
geweke.diag(hlm1.mcmc)
## [[1]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## b_Intercept sd_school.id__Intercept sigma
## 0.10854 -0.05424 1.04189
## r_school.id[1,Intercept] r_school.id[2,Intercept] r_school.id[3,Intercept]
## -0.27439 -0.16852 1.18729
## r_school.id[4,Intercept] r_school.id[5,Intercept] r_school.id[6,Intercept]
## 0.68336 -1.06509 0.30441
## r_school.id[7,Intercept] r_school.id[8,Intercept] r_school.id[9,Intercept]
## -0.10863 0.02425 -0.92654
## r_school.id[10,Intercept] r_school.id[11,Intercept] r_school.id[12,Intercept]
## 0.43256 0.70673 -0.49497
## r_school.id[13,Intercept] r_school.id[14,Intercept] r_school.id[15,Intercept]
## -0.23715 -1.19675 -0.59860
## r_school.id[16,Intercept] r_school.id[17,Intercept] r_school.id[18,Intercept]
## -0.81785 1.69402 -0.10737
## r_school.id[19,Intercept] r_school.id[20,Intercept] r_school.id[21,Intercept]
## -0.04788 -0.42454 -0.34229
## r_school.id[22,Intercept] r_school.id[23,Intercept] lp__
## -0.22318 0.30522 1.00040
##
##
## [[2]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## b_Intercept sd_school.id__Intercept sigma
## -1.0902 -1.3315 -0.7543
## r_school.id[1,Intercept] r_school.id[2,Intercept] r_school.id[3,Intercept]
## 0.8718 -0.7431 1.7296
## r_school.id[4,Intercept] r_school.id[5,Intercept] r_school.id[6,Intercept]
## 1.1987 0.6886 -0.4700
## r_school.id[7,Intercept] r_school.id[8,Intercept] r_school.id[9,Intercept]
## 1.1250 1.0406 1.4821
## r_school.id[10,Intercept] r_school.id[11,Intercept] r_school.id[12,Intercept]
## 1.6837 1.5428 0.6244
## r_school.id[13,Intercept] r_school.id[14,Intercept] r_school.id[15,Intercept]
## 1.7808 1.2711 1.4200
## r_school.id[16,Intercept] r_school.id[17,Intercept] r_school.id[18,Intercept]
## 0.3766 1.5801 1.6146
## r_school.id[19,Intercept] r_school.id[20,Intercept] r_school.id[21,Intercept]
## 1.2005 0.9612 0.8120
## r_school.id[22,Intercept] r_school.id[23,Intercept] lp__
## 1.6317 1.3010 -0.6127
##
##
## [[3]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## b_Intercept sd_school.id__Intercept sigma
## 1.4669 -0.6414 -3.2995
## r_school.id[1,Intercept] r_school.id[2,Intercept] r_school.id[3,Intercept]
## -0.7993 -0.5237 -1.4105
## r_school.id[4,Intercept] r_school.id[5,Intercept] r_school.id[6,Intercept]
## -1.0622 -1.0344 -1.2764
## r_school.id[7,Intercept] r_school.id[8,Intercept] r_school.id[9,Intercept]
## -0.2900 -0.6783 -1.4330
## r_school.id[10,Intercept] r_school.id[11,Intercept] r_school.id[12,Intercept]
## -0.9485 -0.7540 -1.4864
## r_school.id[13,Intercept] r_school.id[14,Intercept] r_school.id[15,Intercept]
## -1.1929 0.2303 -0.8430
## r_school.id[16,Intercept] r_school.id[17,Intercept] r_school.id[18,Intercept]
## -1.1772 -0.8660 -0.9336
## r_school.id[19,Intercept] r_school.id[20,Intercept] r_school.id[21,Intercept]
## -0.6953 -0.9655 -0.8315
## r_school.id[22,Intercept] r_school.id[23,Intercept] lp__
## -0.3003 -0.7386 0.2180
##
##
## [[4]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## b_Intercept sd_school.id__Intercept sigma
## 0.58021 -0.56919 -0.33500
## r_school.id[1,Intercept] r_school.id[2,Intercept] r_school.id[3,Intercept]
## -0.57214 -0.40371 -0.88240
## r_school.id[4,Intercept] r_school.id[5,Intercept] r_school.id[6,Intercept]
## 0.56548 -0.44539 -0.17710
## r_school.id[7,Intercept] r_school.id[8,Intercept] r_school.id[9,Intercept]
## -0.80998 -0.03684 0.30853
## r_school.id[10,Intercept] r_school.id[11,Intercept] r_school.id[12,Intercept]
## -0.11912 -0.15905 -0.22563
## r_school.id[13,Intercept] r_school.id[14,Intercept] r_school.id[15,Intercept]
## 0.06420 -0.37986 -0.55414
## r_school.id[16,Intercept] r_school.id[17,Intercept] r_school.id[18,Intercept]
## -0.21614 -0.19762 -0.66882
## r_school.id[19,Intercept] r_school.id[20,Intercept] r_school.id[21,Intercept]
## -0.66693 -0.25080 -0.20310
## r_school.id[22,Intercept] r_school.id[23,Intercept] lp__
## -0.73856 -0.38457 -0.40697
effectiveSize(hlm1.mcmc)
## b_Intercept sd_school.id__Intercept sigma
## 1719.398 2157.094 14075.964
## r_school.id[1,Intercept] r_school.id[2,Intercept] r_school.id[3,Intercept]
## 3199.767 7765.179 9031.354
## r_school.id[4,Intercept] r_school.id[5,Intercept] r_school.id[6,Intercept]
## 4287.016 4553.169 5889.439
## r_school.id[7,Intercept] r_school.id[8,Intercept] r_school.id[9,Intercept]
## 4267.457 5182.562 4650.450
## r_school.id[10,Intercept] r_school.id[11,Intercept] r_school.id[12,Intercept]
## 4491.417 4358.061 4976.168
## r_school.id[13,Intercept] r_school.id[14,Intercept] r_school.id[15,Intercept]
## 4700.320 5547.942 4365.282
## r_school.id[16,Intercept] r_school.id[17,Intercept] r_school.id[18,Intercept]
## 4461.206 5198.408 2919.780
## r_school.id[19,Intercept] r_school.id[20,Intercept] r_school.id[21,Intercept]
## 4600.983 5149.036 3835.855
## r_school.id[22,Intercept] r_school.id[23,Intercept] lp__
## 4460.820 6603.583 1702.233
And if all looks good
summary(hlm1)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: math ~ 1 + (1 | school.id)
## Data: nels (Number of observations: 519)
## Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~school.id (Number of levels: 23)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 5.40 0.99 3.80 7.69 1.00 2057 2991
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 50.77 1.19 48.41 53.16 1.00 1732 2638
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 9.03 0.29 8.48 9.61 1.00 13779 5974
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
hlm2 <- brm(math ~ 1 + homew + ses + (1 + homew| school.id),
data=nels,
warmup=2000,
iter=4000,
chains=4,
cores=4,
seed=11121
)
## Compiling Stan program...
## Start sampling
plot(hlm2)
summary(hlm2)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: math ~ 1 + homew + ses + (1 + homew | school.id)
## Data: nels (Number of observations: 519)
## Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~school.id (Number of levels: 23)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 7.41 1.37 5.18 10.57 1.00 1546
## sd(homew) 4.02 0.77 2.76 5.75 1.00 1400
## cor(Intercept,homew) -0.83 0.08 -0.94 -0.62 1.00 1779
## Tail_ESS
## sd(Intercept) 2746
## sd(homew) 2516
## cor(Intercept,homew) 3374
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 47.01 1.70 43.65 50.23 1.00 1535 2879
## homew 1.83 0.91 -0.00 3.64 1.00 1389 2463
## ses 2.76 0.51 1.77 3.76 1.00 6906 6471
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 7.19 0.23 6.75 7.67 1.00 8430 5980
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(conditional_effects(hlm2), ask = FALSE)
#or
# plot(conditional_effects(hlm2), points=TRUE, ask = FALSE)
pp_check(hlm2) # shows dens_overlay plot by default
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
There are many possible posterior predictive checks. I suggest checking the brms manual. Just some of them are below.
``{r eval=FALSE} pp_check(hlm2, type = “error_hist”, ndraws = 11) pp_check(hlm2, type = “scatter_avg”, ndraws = 100) pp_check(hlm2, type = “stat_2d”) pp_check(hlm2, type = “loo_pit”)
# Level 2: Multiple Predictors, Factors and Cross-level Interactions
brm handles factors like lm or glm (i.e., dummy codes). We need to declare the variables as factors.
```r
nels$urban <- as.factor(nels$urban)
nels$schtyp <- as.factor(nels$schtype)
nels$classtr <- as.factor(nels$classtr)
Then just are above,
hlm3 <- brm(math ~ 1 + homew + ses + urban + ses*urban + (1 + homew| school.id),
data=nels,
warmup=2000,
iter=4000,
chains=4,
cores=4,
seed=11121
)
## Compiling Stan program...
## Start sampling
plot(hlm3) # trace plots & density
summary(hlm3)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: math ~ 1 + homew + ses + urban + ses * urban + (1 + homew | school.id)
## Data: nels (Number of observations: 519)
## Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~school.id (Number of levels: 23)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 7.63 1.37 5.34 10.75 1.00 2607
## sd(homew) 3.92 0.71 2.75 5.57 1.00 2385
## cor(Intercept,homew) -0.81 0.09 -0.93 -0.58 1.00 2599
## Tail_ESS
## sd(Intercept) 4302
## sd(homew) 3713
## cor(Intercept,homew) 4497
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 48.22 2.19 43.92 52.52 1.00 2484 3616
## homew 1.86 0.89 0.10 3.65 1.00 2388 3632
## ses 1.43 0.79 -0.14 2.98 1.00 4691 5137
## urban2 -2.90 2.47 -7.70 2.03 1.00 2763 3779
## urban3 -1.03 2.37 -5.70 3.70 1.00 2606 3448
## ses:urban2 2.14 1.31 -0.42 4.77 1.00 5664 6167
## ses:urban3 2.61 1.20 0.20 4.93 1.00 6732 6231
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 7.15 0.23 6.71 7.63 1.00 9954 5841
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(conditional_effects(hlm3), ask = FALSE)
pp_check(hlm3) # shows dens_overlay plot by default
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
real normal_lccdf(reals y | reals mu, reals sigma)
The log of the complementary cumulative normal distribution of y given location mu and scale sigma; normal_lccdf will overflow to 0 for y−μσ below -37.5 and underflow to −∞ for y−μσ above 8.25; see above for discussion of Phi_approx as an alternative.
By default centering is used. If you want QR decomposition (which is especially good for correlated predictors), this can be requested using brmsformula. I am including this because we talked about it for Stan.
The model formula can be specified separately and then put into the brm call. There are a number of options and two that I added here are center (which is TRUE by default) and QR decomposition.
# Options for the forumla
fm3 <- brmsformula(math ~ 1 + homew + ses + urban + ses*urban + (1 + homew| school.id),
center=TRUE,
decomp="QR"
)
The rest is repeating what we did above, except using fm3 as the formula.
hlm3.alt <- brm(fm3,
data=nels,
warmup=2000,
iter=4000,
chains=4,
cores=4,
seed=11121
)
## Compiling Stan program...
## Start sampling
plot(hlm3.alt) # trace plots & density
summary(hlm3.alt)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: math ~ 1 + homew + ses + urban + ses * urban + (1 + homew | school.id)
## Data: nels (Number of observations: 519)
## Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~school.id (Number of levels: 23)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 7.63 1.35 5.38 10.61 1.00 2208
## sd(homew) 3.94 0.72 2.74 5.60 1.00 2028
## cor(Intercept,homew) -0.81 0.09 -0.93 -0.57 1.00 1978
## Tail_ESS
## sd(Intercept) 3902
## sd(homew) 3904
## cor(Intercept,homew) 3346
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 48.28 2.15 44.09 52.50 1.00 1912 3556
## homew 1.85 0.87 0.13 3.56 1.00 2082 3695
## ses 1.44 0.78 -0.07 2.99 1.00 5423 5439
## urban2 -3.00 2.49 -7.88 1.96 1.00 1826 3007
## urban3 -1.12 2.40 -5.89 3.64 1.00 1843 2913
## ses:urban2 2.13 1.30 -0.42 4.72 1.00 6703 6253
## ses:urban3 2.60 1.16 0.35 4.87 1.00 7134 6215
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 7.15 0.24 6.70 7.64 1.00 8693 6292
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(conditional_effects(hlm3.alt), ask = FALSE)
In terms of criteria to compare models in order worst to best are Bayes Factor, DIC, waic, loo, and psis-loo.
The criterion “waic” (Widely accessible information criterion) is asymptotically equivalent to loo (Leave-one-out-validation), but loo is faster and easier to compute. Some work has shown that loo is the preferred for model comparison. For these data and models, R throws warnings when I ask for “waic” and suggests using loo instead..
loo is noisy and a solution suggested for this is to use Pareto smoothed importance sampling LOO or PSIS-loo (Verhari, Gelman & Gabry, “Practical Bayesian model evaluation using leave-one-out cross-validation, http://www.stat.columbia.edu/~gelman/research/unpublished/loo_stan.pdf)
Since I didn’t get is at first, I can add this. The command below computes PSIS-LOO and compares values for our 3 models.
For more information a good starting place is https://mc-stan.org/loo/reference/loo-glossary.html
Smaller values of looic are better (just like other information criteria), and for elpd larger is better. Note that looic equals −2∗elpd
hlm1 <- add_criterion(hlm1, criterion="loo")
hlm1$criteria$loo
##
## Computed from 8000 by 519 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1889.8 13.7
## p_loo 22.0 1.3
## looic 3779.6 27.3
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
hlm2 <- add_criterion(hlm2, criterion="loo")
## Warning: Found 2 observations with a pareto_k > 0.7 in model 'hlm2'. It is
## recommended to set 'moment_match = TRUE' in order to perform moment matching for
## problematic observations.
hlm2$criteria$loo
##
## Computed from 8000 by 519 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1779.9 14.6
## p_loo 36.0 2.3
## looic 3559.9 29.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 514 99.0% 1423
## (0.5, 0.7] (ok) 3 0.6% 1943
## (0.7, 1] (bad) 2 0.4% 397
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
hlm3 <- add_criterion(hlm3, criterion="loo")
hlm3$criteria$loo
##
## Computed from 8000 by 519 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1778.8 14.7
## p_loo 39.0 2.5
## looic 3557.6 29.3
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 512 98.7% 1504
## (0.5, 0.7] (ok) 7 1.3% 354
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
If you wan to compare, this is a better
loo(hlm1, hlm2, hlm3, compare=TRUE, moment_match=TRUE, save_psis=TRUE)
## Output of model 'hlm1':
##
## Computed from 8000 by 519 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1889.8 13.7
## p_loo 22.0 1.3
## looic 3779.6 27.3
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'hlm2':
##
## Computed from 8000 by 519 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1779.9 14.6
## p_loo 36.0 2.3
## looic 3559.9 29.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 514 99.0% 1423
## (0.5, 0.7] (ok) 3 0.6% 1943
## (0.7, 1] (bad) 2 0.4% 397
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'hlm3':
##
## Computed from 8000 by 519 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1778.8 14.7
## p_loo 39.0 2.5
## looic 3557.6 29.3
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 512 98.7% 1504
## (0.5, 0.7] (ok) 7 1.3% 354
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff se_diff
## hlm3 0.0 0.0
## hlm2 -1.2 2.6
## hlm1 -111.0 12.7
Note that
elpd_loo is the Expected Log Pointwise Predictive Density, which is the sum of N individual pointwise log predictive densities.
p_loo is the defective number of parameters (i.e., the difference between elpd_loo and the non-cross validity log posterior predictive density).
elpd_diff is the difference in elpl_loo for two models. If more than 2 models are compared then it is the difference relative to the model with the highest elpd_loo.
For more information a good starting place is https://mc-stan.org/loo/reference/loo-glossary.html
Although most of the time you can modify your models within brms, you might want to modify the Stan code and run it using rstan (e.g., a simulation study where only data changes, or this is a starting point for your own model). Also this is a good way to learn Stan coding.
For the data input to Stan by brms,
data_1 <- standata(hlm1)
and for the model code,
stan_code <- stancode(hlm1)
stan_code
## // generated with brms 2.16.1
## functions {
## }
## data {
## int<lower=1> N; // total number of observations
## vector[N] Y; // response variable
## // data for group-level effects of ID 1
## int<lower=1> N_1; // number of grouping levels
## int<lower=1> M_1; // number of coefficients per level
## int<lower=1> J_1[N]; // grouping indicator per observation
## // group-level predictor values
## vector[N] Z_1_1;
## int prior_only; // should the likelihood be ignored?
## }
## transformed data {
## }
## parameters {
## real Intercept; // temporary intercept for centered predictors
## real<lower=0> sigma; // dispersion parameter
## vector<lower=0>[M_1] sd_1; // group-level standard deviations
## vector[N_1] z_1[M_1]; // standardized group-level effects
## }
## transformed parameters {
## vector[N_1] r_1_1; // actual group-level effects
## r_1_1 = (sd_1[1] * (z_1[1]));
## }
## model {
## // likelihood including constants
## if (!prior_only) {
## // initialize linear predictor term
## vector[N] mu = Intercept + rep_vector(0.0, N);
## for (n in 1:N) {
## // add more terms to the linear predictor
## mu[n] += r_1_1[J_1[n]] * Z_1_1[n];
## }
## target += normal_lpdf(Y | mu, sigma);
## }
## // priors including constants
## target += student_t_lpdf(Intercept | 3, 51, 13.3);
## target += student_t_lpdf(sigma | 3, 0, 13.3)
## - 1 * student_t_lccdf(0 | 3, 0, 13.3);
## target += student_t_lpdf(sd_1 | 3, 0, 13.3)
## - 1 * student_t_lccdf(0 | 3, 0, 13.3);
## target += std_normal_lpdf(z_1[1]);
## }
## generated quantities {
## // actual population-level intercept
## real b_Intercept = Intercept;
## }
If you don’t want to look at Stan code, you may want to know what the priors are that were used to fit a model. For example,
prior_summary(hlm2)
## prior class coef group resp dpar nlpar bound
## (flat) b
## (flat) b homew
## (flat) b ses
## student_t(3, 51, 13.3) Intercept
## lkj_corr_cholesky(1) L
## lkj_corr_cholesky(1) L school.id
## student_t(3, 0, 13.3) sd
## student_t(3, 0, 13.3) sd school.id
## student_t(3, 0, 13.3) sd homew school.id
## student_t(3, 0, 13.3) sd Intercept school.id
## student_t(3, 0, 13.3) sigma
## source
## default
## (vectorized)
## (vectorized)
## default
## default
## (vectorized)
## default
## (vectorized)
## (vectorized)
## (vectorized)
## default
Note that for sigma this is actually a half non-central t-distribution. Other distributions can be put here. Gelman recommends a half Cauchy distribution (a truncated Cauchy,
\[ f(y| \mu, \sigma) = \left\{ \begin{array}{ll} \frac{2}{\pi\sigma} \frac{1}{1+(y-\mu)^2/\sigma^2} & y \ge \mu \\ 0 & \mbox{otherwise} \end{array}\right. \]
How to change your priors? You can define priors and ask brms to create the Stan code.
prior1 <- prior(normal(0, 5), class = "Intercept")
prior2 <- prior(cauchy(0, 1), class = "sd")
hlm1priors <- c(prior1, prior2)
hlm1.stan <- make_stancode(math ~ 1 + (1 | school.id), data=nels, prior=hlm1priors)
data1 <-standata(hlm1)
hlm1.rstan <- stan(model_code = hlm1.stan,
model_name = "Nels23: math ~ 1 + (1 |school.id)",
data = data1,
iter = 4000,
chains = 4,
cores = 4,
verbose = FALSE
)
## Warning: There were 318 divergent transitions after warmup. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 2 chains where the estimated Bayesian Fraction of Missing Information was low. See
## http://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 1.25, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#tail-ess
Based on the following diagnostics, my changes make things pretty bad. Some of the chains are not mixing well, some large auto-correlations, and strange looking posteriors.
stan_trace(hlm1.rstan)
## 'pars' not specified. Showing first 10 parameters by default.
stan_dens(hlm1.rstan)
## 'pars' not specified. Showing first 10 parameters by default.
stan_hist(hlm1.rstan)
## 'pars' not specified. Showing first 10 parameters by default.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_ac(hlm1.rstan)
## 'pars' not specified. Showing first 10 parameters by default.
print(hlm1.rstan)
## Inference for Stan model: Nels23: math ~ 1 + (1 |school.id).
## 4 chains, each with iter=4000; warmup=2000; thin=1;
## post-warmup draws per chain=2000, total post-warmup draws=8000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5%
## Intercept 38.73 5.58 12.73 10.76 27.70 45.81 47.80 49.90
## sigma 9.04 0.01 0.29 8.50 8.84 9.03 9.23 9.61
## sd_1[1] 14.61 4.87 13.25 4.26 5.71 7.28 23.65 48.53
## z_1[1,1] 1.32 0.03 0.31 0.72 1.12 1.29 1.51 1.99
## z_1[1,2] 1.25 0.01 0.43 0.42 1.00 1.23 1.49 2.16
## z_1[1,3] 1.08 0.01 0.48 0.04 0.81 1.10 1.37 2.06
## z_1[1,4] 0.41 0.15 0.46 -0.53 0.08 0.45 0.80 1.09
## z_1[1,5] 0.10 0.21 0.58 -1.02 -0.33 0.08 0.65 0.99
## z_1[1,6] 0.99 0.01 0.33 0.29 0.79 1.01 1.20 1.65
## z_1[1,7] 0.59 0.11 0.40 -0.28 0.32 0.63 0.90 1.20
## z_1[1,8] -0.28 0.29 0.76 -1.64 -0.88 -0.38 0.51 0.87
## z_1[1,9] 0.94 0.02 0.31 0.26 0.75 0.97 1.14 1.53
## z_1[1,10] 0.39 0.16 0.47 -0.59 0.05 0.44 0.79 1.10
## z_1[1,11] -0.14 0.26 0.70 -1.40 -0.68 -0.22 0.56 0.92
## z_1[1,12] 0.56 0.12 0.41 -0.31 0.28 0.61 0.89 1.19
## z_1[1,13] 0.18 0.20 0.56 -0.91 -0.24 0.17 0.69 1.02
## z_1[1,14] 1.25 0.02 0.35 0.62 1.03 1.22 1.45 2.02
## z_1[1,15] 1.21 0.02 0.32 0.64 1.01 1.19 1.39 1.89
## z_1[1,16] 0.69 0.09 0.38 -0.17 0.45 0.75 0.97 1.28
## z_1[1,17] -0.41 0.32 0.82 -1.84 -1.06 -0.55 0.47 0.82
## z_1[1,18] 2.11 0.19 0.63 0.97 1.54 2.17 2.58 3.25
## z_1[1,19] 0.54 0.12 0.42 -0.35 0.26 0.60 0.87 1.18
## z_1[1,20] 0.17 0.20 0.55 -0.92 -0.24 0.15 0.68 1.01
## z_1[1,21] 0.73 0.08 0.34 -0.04 0.53 0.79 0.98 1.28
## z_1[1,22] 0.34 0.16 0.49 -0.68 -0.02 0.38 0.77 1.07
## z_1[1,23] 1.08 0.01 0.36 0.36 0.87 1.09 1.29 1.81
## r_1_1[1] 17.26 5.66 12.97 4.96 7.99 10.38 28.36 45.54
## r_1_1[2] 17.10 5.93 13.91 2.31 7.04 10.46 28.85 47.71
## r_1_1[3] 15.67 6.08 13.90 0.21 5.46 9.49 27.02 46.49
## r_1_1[4] 9.63 5.59 12.83 -2.79 0.48 2.96 20.66 38.02
## r_1_1[5] 7.05 5.53 12.72 -5.29 -1.97 0.52 17.69 35.22
## r_1_1[6] 14.63 5.67 13.16 1.53 5.20 7.99 25.82 43.80
## r_1_1[7] 11.13 5.61 12.90 -1.48 1.92 4.51 22.06 39.55
## r_1_1[8] 3.76 5.44 12.56 -8.41 -5.17 -2.59 13.82 31.68
## r_1_1[9] 14.14 5.69 13.05 1.35 4.74 7.45 25.35 42.74
## r_1_1[10] 9.50 5.60 12.90 -3.15 0.28 2.93 20.18 38.29
## r_1_1[11] 5.03 5.52 12.65 -7.26 -3.98 -1.49 15.48 33.11
## r_1_1[12] 10.96 5.59 12.91 -1.65 1.73 4.34 21.99 39.23
## r_1_1[13] 7.70 5.54 12.78 -4.76 -1.39 1.13 18.35 35.97
## r_1_1[14] 16.87 5.78 13.39 3.46 7.24 10.18 27.72 45.93
## r_1_1[15] 16.42 5.70 13.18 3.57 7.00 9.59 27.66 45.25
## r_1_1[16] 12.02 5.65 13.00 -0.92 2.70 5.47 23.09 40.71
## r_1_1[17] 2.65 5.43 12.51 -9.58 -6.19 -3.70 12.85 30.48
## r_1_1[18] 23.72 5.67 12.99 11.61 14.43 16.76 35.01 52.12
## r_1_1[19] 10.76 5.57 12.87 -1.87 1.53 4.21 21.42 38.96
## r_1_1[20] 7.63 5.57 12.76 -4.86 -1.41 1.05 18.41 35.76
## r_1_1[21] 12.35 5.64 12.87 -0.21 3.16 5.75 23.58 40.65
## r_1_1[22] 9.05 5.56 12.81 -3.56 -0.15 2.56 19.86 37.16
## r_1_1[23] 15.46 5.76 13.32 1.94 5.79 8.97 27.08 44.32
## b_Intercept 38.73 5.58 12.73 10.76 27.70 45.81 47.80 49.90
## lp__ -1950.70 6.73 16.82 -1971.82 -1963.13 -1957.72 -1934.03 -1917.86
## n_eff Rhat
## Intercept 5 1.39
## sigma 962 1.00
## sd_1[1] 7 1.30
## z_1[1,1] 89 1.04
## z_1[1,2] 844 1.01
## z_1[1,3] 1961 1.00
## z_1[1,4] 9 1.17
## z_1[1,5] 7 1.22
## z_1[1,6] 727 1.01
## z_1[1,7] 13 1.11
## z_1[1,8] 7 1.25
## z_1[1,9] 234 1.03
## z_1[1,10] 9 1.16
## z_1[1,11] 7 1.25
## z_1[1,12] 12 1.12
## z_1[1,13] 8 1.20
## z_1[1,14] 369 1.01
## z_1[1,15] 344 1.01
## z_1[1,16] 17 1.08
## z_1[1,17] 7 1.27
## z_1[1,18] 11 1.19
## z_1[1,19] 12 1.12
## z_1[1,20] 8 1.21
## z_1[1,21] 19 1.08
## z_1[1,22] 9 1.17
## z_1[1,23] 1305 1.00
## r_1_1[1] 5 1.38
## r_1_1[2] 5 1.36
## r_1_1[3] 5 1.37
## r_1_1[4] 5 1.38
## r_1_1[5] 5 1.38
## r_1_1[6] 5 1.37
## r_1_1[7] 5 1.38
## r_1_1[8] 5 1.37
## r_1_1[9] 5 1.38
## r_1_1[10] 5 1.37
## r_1_1[11] 5 1.38
## r_1_1[12] 5 1.37
## r_1_1[13] 5 1.37
## r_1_1[14] 5 1.37
## r_1_1[15] 5 1.37
## r_1_1[16] 5 1.38
## r_1_1[17] 5 1.37
## r_1_1[18] 5 1.38
## r_1_1[19] 5 1.37
## r_1_1[20] 5 1.38
## r_1_1[21] 5 1.38
## r_1_1[22] 5 1.37
## r_1_1[23] 5 1.37
## b_Intercept 5 1.39
## lp__ 6 1.33
##
## Samples were drawn using NUTS(diag_e) at Thu Nov 04 16:13:30 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).