To make this run but will drop when put into master set of examples.
library(rstan)
## Loading required package: StanHeaders
## Loading required package: ggplot2
## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
As rstan recommends
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
Get the data
setwd("C:/Users/cja/Dropbox/edps 590BAY/Lectures/10 Ham_Stan_brms")
nels <- read.table("nels_school_62821.txt",header=TRUE)
names(nels)
## [1] "schid" "studid" "sex" "race" "homework" "schtyp"
## [7] "ses" "paredu" "math" "clastruc" "size" "urban"
## [13] "geograph" "perminor" "rati0"
Rather than list out all the regression parameters, we can make use vectors (regression coefficients) and matrices (design matrix which contains the explanatory/predictor variables)
#############################################
# stan model definition #
#############################################
mlreg <- "
data {
int<lower=0> N; // number of students
int<lower=0> K; // number of predictors
matrix[N,K] x; // predictor matrix
vector[N] y; // outcome vector
}
parameters {
real b0; // intercept
vector[K] beta; // coefficients for predictors
real<lower=0> sigma; // error scale
}
model {
target += normal_lpdf(y | b0 + x*beta, sigma);
}
"
In the model you need to make sure that matrices are conformable in the specification of the model for y.
We could have added the intercept to beta and a column of ones in x.
For the data list we need to create the design matrix, x, and include the number of columns of x (i.e., number of predictors and coefficients for the parameter vector beta). x should be of type matrix.
N <- nrow(nels)
y = nels$math
x <- matrix(cbind(nels$homework,nels$ses),nrow=N)
K <- ncol(x)
dat <- list(N = N, K=K, y = y, x = x);
And to run rstan, nothing really new here.
#########################################
# Get estimates #
#########################################
start.time <- Sys.time()
fitLRsimple <- stan(
model_code = mlreg ,
model_name = "Multiple linear regression",
data = dat,
iter = 2000,
chains = 4,
cores = 4,
warmup = floor(2000/2),
verbose = FALSE,
sample_file = file.path(tempdir(), 'nels_mlreg.txt'))
end.time <- Sys.time()
end.time-start.time
## Time difference of 57.97207 secs
After the model runs, check for convergence and ask for what ever statistics (summaries) and graphics to help you here.
print(fitLRsimple, probs=c(.025,.50,.975), digits=2)
## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 50% 97.5% n_eff Rhat
## b0 60.62 0.05 2.06 56.49 60.67 64.61 1757 1
## beta[1] 1.11 0.01 0.39 0.36 1.11 1.87 2377 1
## beta[2] -1.40 0.03 1.48 -4.21 -1.41 1.65 1929 1
## sigma 5.51 0.01 0.52 4.62 5.46 6.65 2462 1
## lp__ -206.89 0.04 1.47 -210.51 -206.60 -205.06 1319 1
##
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:37:39 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_trace(fitLRsimple)
stan_dens(fitLRsimple)
stan_hist(fitLRsimple)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_ac(fitLRsimple)
And if all looks good
print(fitLRsimple)
## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## b0 60.62 0.05 2.06 56.49 59.25 60.67 62.01 64.61 1757 1
## beta[1] 1.11 0.01 0.39 0.36 0.85 1.11 1.38 1.87 2377 1
## beta[2] -1.40 0.03 1.48 -4.21 -2.38 -1.41 -0.44 1.65 1929 1
## sigma 5.51 0.01 0.52 4.62 5.15 5.46 5.83 6.65 2462 1
## lp__ -206.89 0.04 1.47 -210.51 -207.55 -206.60 -205.81 -205.06 1319 1
##
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:37:39 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(fitLRsimple)
## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)
nels$chomework <- nels$homework - mean(nels$homework)
nels$cses <- nels$ses - mean(nels$ses)
N <- nrow(nels)
y = nels$math
x <- matrix(cbind(nels$chomework,nels$cses),nrow=N)
K <- ncol(x)
cdat <- list(N = N, K=K, y = y, x = x);
start.time <- Sys.time()
fitLRcenter <- stan(
model_code = mlreg ,
model_name = "Multiple linear regression",
data = cdat,
iter = 2000,
chains = 4,
cores = 4,
warmup = floor(2000/2),
verbose = FALSE,
sample_file = file.path(tempdir(), 'nels_mlreg.txt'))
end.time <- Sys.time()
end.time-start.time
## Time difference of 7.617656 secs
stan_trace(fitLRcenter)
stan_dens(fitLRcenter)
stan_hist(fitLRcenter)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_ac(fitLRcenter)
print(fitLRcenter, probs=c(.025,.50,.975), digits=2)
## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 50% 97.5% n_eff Rhat
## b0 62.81 0.01 0.67 61.48 62.81 64.13 4563 1
## beta[1] 1.11 0.01 0.39 0.38 1.11 1.90 4335 1
## beta[2] -1.40 0.02 1.47 -4.27 -1.40 1.52 4166 1
## sigma 5.49 0.01 0.49 4.63 5.46 6.53 4088 1
## lp__ -206.82 0.03 1.44 -210.60 -206.49 -205.07 1733 1
##
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:37:51 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
see https://mc-stan.org/docs/2_28/stan-users-guide/QR-reparameterization.html
mlreg3 <- "
data {
int<lower=0> N; // number of students
int<lower=0> K; // number of predictors
matrix[N,K] x; // predictor matrix
vector[N] y; // outcome vector
}
transformed data {
matrix[N,K] Q_ast;
matrix[K,K] R_ast;
matrix[K,K] R_ast_inverse;
Q_ast = qr_thin_Q(x) * sqrt(N-1); // thin and scale QR decomposition
R_ast = qr_thin_R(x) / sqrt(N-1);
R_ast_inverse = inverse(R_ast);
}
parameters {
real alpha;
vector[K] theta;
real<lower=0> sigma;
}
model {
y ~ normal(Q_ast * theta + alpha, sigma);
}
generated quantities {
vector[K] beta;
beta = R_ast_inverse * theta;
}
"
#########################################
# model 3L QR #
#########################################
start.time <- Sys.time()
fitLR3 <- stan(
model_code = mlreg3,
model_name = "Multiple linear regression",
data = dat,
iter = 2000,
chains = 4,
cores = 4,
warmup = floor(2000/2),
verbose = FALSE
)
end.time <- Sys.time()
end.time - start.time
## Time difference of 1.145867 mins
After the model runs, check for convergence and ask for what ever statistics (summaries) and graphics to help you here.
stan_trace(fitLR3)
stan_dens(fitLR3)
stan_hist(fitLR3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_ac(fitLR3)
And if all looks good
print(fitLR3)
## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## alpha 60.63 0.05 2.05 56.61 59.27 60.61 62.01 64.56 1480
## theta[1] 2.85 0.05 1.97 -1.03 1.53 2.84 4.16 6.67 1485
## theta[2] -0.91 0.02 0.98 -2.79 -1.57 -0.91 -0.26 1.04 1780
## sigma 5.52 0.01 0.51 4.65 5.16 5.48 5.82 6.62 2041
## beta[1] 1.11 0.01 0.41 0.29 0.85 1.11 1.39 1.91 2608
## beta[2] -1.39 0.04 1.49 -4.25 -2.39 -1.39 -0.40 1.59 1780
## lp__ -145.35 0.04 1.54 -149.17 -146.10 -144.97 -144.23 -143.50 1316
## Rhat
## alpha 1
## theta[1] 1
## theta[2] 1
## sigma 1
## beta[1] 1
## beta[2] 1
## lp__ 1
##
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:39:03 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(fitLR3)
## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)
We have to center and put centered value in data list. We could use the rescale function but I prefer this.
N <- nrow(nels)
y = nels$math
# center
nels$cx1 <- nels$homework - mean(nels$homework)
nels$cx2 <- nels$ses - mean(nels$ses)
cx <- matrix(cbind(nels$cx1, nels$cx2), nrow=N)
K <- ncol(cx)
dat <- list(N = N, K=K, y = y, x = cx);
By keeping the same name for x we can reuse code. The model is
mlreg4 <- "
data {
int<lower=0> N; // number of students
int<lower=0> K; // number of predictors
matrix[N,K] x; // predictor matrix
vector[N] y; // outcome vector
}
transformed data {
matrix[N,K] Q_ast;
matrix[K,K] R_ast;
matrix[K,K] R_ast_inverse;
Q_ast = qr_thin_Q(x) * sqrt(N-1); // thin and scale QR decomposition
R_ast = qr_thin_R(x) / sqrt(N-1);
R_ast_inverse = inverse(R_ast);
}
parameters {
real alpha;
vector[K] theta;
real<lower=0> sigma;
}
model {
y ~ normal(Q_ast * theta + alpha, sigma);
}
generated quantities {
vector[K] beta;
beta = R_ast_inverse * theta;
}
"
Get our samples,
start.time <- Sys.time()
fitLR4 <- stan(
model_code = mlreg4,
model_name = "Multiple linear regression: Center & QR",
data = dat,
iter = 2000,
chains = 4,
cores = 4,
warmup = floor(2000/2),
verbose = FALSE
)
end.time <- Sys.time()
end.time-start.time
## Time difference of 9.157471 secs
stan_trace(fitLR4)
stan_dens(fitLR4)
stan_hist(fitLR4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_ac(fitLR4)
And if all looks good
print(fitLR4)
## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## alpha 62.82 0.01 0.68 61.44 62.37 62.82 63.26 64.20 4066
## theta[1] 1.89 0.01 0.66 0.60 1.44 1.89 2.34 3.18 4565
## theta[2] -0.66 0.01 0.68 -2.03 -1.11 -0.65 -0.21 0.68 4744
## sigma 5.49 0.01 0.49 4.61 5.14 5.45 5.80 6.57 4027
## beta[1] 1.11 0.01 0.39 0.36 0.85 1.11 1.37 1.87 4558
## beta[2] -1.43 0.02 1.48 -4.44 -2.42 -1.42 -0.46 1.49 4744
## lp__ -145.27 0.03 1.46 -148.88 -146.03 -144.92 -144.19 -143.50 2143
## Rhat
## alpha 1
## theta[1] 1
## theta[2] 1
## sigma 1
## beta[1] 1
## beta[2] 1
## lp__ 1
##
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:39:17 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(fitLR4)
## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)