Model defintion for Multiple Regression
- Output and various graphics
Center predictors
QR parameterization
QR and Centering

To make this run but will drop when put into master set of examples.

library(rstan)

## Loading required package: StanHeaders

## Loading required package: ggplot2

## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)

## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

As rstan recommends

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

Get the data

setwd("C:/Users/cja/Dropbox/edps 590BAY/Lectures/10 Ham_Stan_brms")

nels <- read.table("nels_school_62821.txt",header=TRUE)
names(nels)

##  [1] "schid"    "studid"   "sex"      "race"     "homework" "schtyp"  
##  [7] "ses"      "paredu"   "math"     "clastruc" "size"     "urban"   
## [13] "geograph" "perminor" "rati0"

Model defintion for Multiple Regression

Rather than list out all the regression parameters, we can make use vectors (regression coefficients) and matrices (design matrix which contains the explanatory/predictor variables)

#############################################
# stan model definition                     #
#############################################
mlreg <- "
data {
  int<lower=0> N;   // number of students
  int<lower=0> K;   // number of predictors
  matrix[N,K] x;   // predictor matrix
  vector[N] y;      // outcome vector
} 
parameters {
  real b0;              // intercept
  vector[K] beta;       // coefficients for predictors
  real<lower=0> sigma;  // error scale
} 
model {
  target += normal_lpdf(y | b0 + x*beta, sigma);
} 
"

In the model you need to make sure that matrices are conformable in the specification of the model for y.

We could have added the intercept to beta and a column of ones in x.

For the data list we need to create the design matrix, x, and include the number of columns of x (i.e., number of predictors and coefficients for the parameter vector beta). x should be of type matrix.

N <- nrow(nels) 
y = nels$math
x <- matrix(cbind(nels$homework,nels$ses),nrow=N)
K <- ncol(x)
dat <- list(N = N, K=K, y = y, x = x);

And to run rstan, nothing really new here.

#########################################
# Get estimates                         #
#########################################
start.time <- Sys.time()
fitLRsimple <- stan(
     model_code = mlreg , 
       model_name = "Multiple linear regression", 
     data = dat, 
       iter = 2000, 
       chains = 4,
       cores = 4, 
       warmup = floor(2000/2),
       verbose = FALSE, 
       sample_file = file.path(tempdir(), 'nels_mlreg.txt')) 
end.time <- Sys.time()
end.time-start.time

## Time difference of 57.97207 secs

After the model runs, check for convergence and ask for what ever statistics (summaries) and graphics to help you here.

Output and various graphics

print(fitLRsimple, probs=c(.025,.50,.975), digits=2)

## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##            mean se_mean   sd    2.5%     50%   97.5% n_eff Rhat
## b0        60.62    0.05 2.06   56.49   60.67   64.61  1757    1
## beta[1]    1.11    0.01 0.39    0.36    1.11    1.87  2377    1
## beta[2]   -1.40    0.03 1.48   -4.21   -1.41    1.65  1929    1
## sigma      5.51    0.01 0.52    4.62    5.46    6.65  2462    1
## lp__    -206.89    0.04 1.47 -210.51 -206.60 -205.06  1319    1
## 
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:37:39 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

stan_trace(fitLRsimple)

stan_dens(fitLRsimple)

stan_hist(fitLRsimple)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ac(fitLRsimple)

And if all looks good

print(fitLRsimple)

## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##            mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## b0        60.62    0.05 2.06   56.49   59.25   60.67   62.01   64.61  1757    1
## beta[1]    1.11    0.01 0.39    0.36    0.85    1.11    1.38    1.87  2377    1
## beta[2]   -1.40    0.03 1.48   -4.21   -2.38   -1.41   -0.44    1.65  1929    1
## sigma      5.51    0.01 0.52    4.62    5.15    5.46    5.83    6.65  2462    1
## lp__    -206.89    0.04 1.47 -210.51 -207.55 -206.60 -205.81 -205.06  1319    1
## 
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:37:39 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

plot(fitLRsimple)

## ci_level: 0.8 (80% intervals)

## outer_level: 0.95 (95% intervals)

Center predictors

nels$chomework <- nels$homework - mean(nels$homework)
nels$cses <- nels$ses - mean(nels$ses)

N <- nrow(nels) 
y = nels$math
x <- matrix(cbind(nels$chomework,nels$cses),nrow=N)
K <- ncol(x)
cdat <- list(N = N, K=K, y = y, x = x);

start.time <- Sys.time()
fitLRcenter <- stan(
     model_code = mlreg , 
       model_name = "Multiple linear regression", 
     data = cdat, 
       iter = 2000, 
       chains = 4,
       cores = 4, 
       warmup = floor(2000/2),
       verbose = FALSE, 
       sample_file = file.path(tempdir(), 'nels_mlreg.txt')) 
end.time <- Sys.time()
end.time-start.time

## Time difference of 7.617656 secs

stan_trace(fitLRcenter)

stan_dens(fitLRcenter)

stan_hist(fitLRcenter)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ac(fitLRcenter)

print(fitLRcenter, probs=c(.025,.50,.975), digits=2)

## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##            mean se_mean   sd    2.5%     50%   97.5% n_eff Rhat
## b0        62.81    0.01 0.67   61.48   62.81   64.13  4563    1
## beta[1]    1.11    0.01 0.39    0.38    1.11    1.90  4335    1
## beta[2]   -1.40    0.02 1.47   -4.27   -1.40    1.52  4166    1
## sigma      5.49    0.01 0.49    4.63    5.46    6.53  4088    1
## lp__    -206.82    0.03 1.44 -210.60 -206.49 -205.07  1733    1
## 
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:37:51 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

QR parameterization

see https://mc-stan.org/docs/2_28/stan-users-guide/QR-reparameterization.html

mlreg3 <- "
data {
  int<lower=0> N;   // number of students
  int<lower=0> K;   // number of predictors
  matrix[N,K] x;   // predictor matrix
  vector[N] y;      // outcome vector
} 
transformed data {
   matrix[N,K] Q_ast;
   matrix[K,K] R_ast;
   matrix[K,K] R_ast_inverse;
   
   Q_ast = qr_thin_Q(x) * sqrt(N-1);   // thin and scale QR decomposition
   R_ast = qr_thin_R(x) / sqrt(N-1);
   R_ast_inverse = inverse(R_ast);
}
parameters {
  real alpha;             
  vector[K] theta;      
  real<lower=0> sigma;  
} 
model {
  y  ~ normal(Q_ast * theta + alpha, sigma);
} 
generated quantities {
  vector[K] beta;
  beta = R_ast_inverse * theta; 
}
"

#########################################
# model 3L QR                           #
#########################################
start.time <- Sys.time()
fitLR3 <- stan(
           model_code = mlreg3, 
             model_name = "Multiple linear regression", 
           data = dat, 
             iter = 2000, 
             chains = 4,
             cores = 4, 
             warmup = floor(2000/2),
             verbose = FALSE
           ) 
end.time <- Sys.time()
end.time - start.time

## Time difference of 1.145867 mins

After the model runs, check for convergence and ask for what ever statistics (summaries) and graphics to help you here.

stan_trace(fitLR3)

stan_dens(fitLR3)

stan_hist(fitLR3)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ac(fitLR3)

And if all looks good

print(fitLR3)

## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##             mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff
## alpha      60.63    0.05 2.05   56.61   59.27   60.61   62.01   64.56  1480
## theta[1]    2.85    0.05 1.97   -1.03    1.53    2.84    4.16    6.67  1485
## theta[2]   -0.91    0.02 0.98   -2.79   -1.57   -0.91   -0.26    1.04  1780
## sigma       5.52    0.01 0.51    4.65    5.16    5.48    5.82    6.62  2041
## beta[1]     1.11    0.01 0.41    0.29    0.85    1.11    1.39    1.91  2608
## beta[2]    -1.39    0.04 1.49   -4.25   -2.39   -1.39   -0.40    1.59  1780
## lp__     -145.35    0.04 1.54 -149.17 -146.10 -144.97 -144.23 -143.50  1316
##          Rhat
## alpha       1
## theta[1]    1
## theta[2]    1
## sigma       1
## beta[1]     1
## beta[2]     1
## lp__        1
## 
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:39:03 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

plot(fitLR3)

## ci_level: 0.8 (80% intervals)

## outer_level: 0.95 (95% intervals)

QR and Centering

We have to center and put centered value in data list. We could use the rescale function but I prefer this.

N <- nrow(nels) 
y = nels$math
# center  
nels$cx1 <- nels$homework - mean(nels$homework)
nels$cx2 <- nels$ses - mean(nels$ses)
cx <- matrix(cbind(nels$cx1, nels$cx2), nrow=N)
K <- ncol(cx)
dat <- list(N = N, K=K, y = y, x = cx);

By keeping the same name for x we can reuse code. The model is

mlreg4 <- "
data {
  int<lower=0> N;   // number of students
  int<lower=0> K;   // number of predictors
  matrix[N,K] x;   // predictor matrix
  vector[N] y;      // outcome vector
} 
transformed data {
   matrix[N,K] Q_ast;
   matrix[K,K] R_ast;
   matrix[K,K] R_ast_inverse;
   
   Q_ast = qr_thin_Q(x) * sqrt(N-1);   // thin and scale QR decomposition
   R_ast = qr_thin_R(x) / sqrt(N-1);
   R_ast_inverse = inverse(R_ast);
}
parameters {
  real alpha;             
  vector[K] theta;      
  real<lower=0> sigma;  
} 
model {
  y  ~ normal(Q_ast * theta + alpha, sigma);
} 
generated quantities {
  vector[K] beta;
  beta = R_ast_inverse * theta; 
}
"

Get our samples,

start.time <- Sys.time()
fitLR4 <- stan(
           model_code = mlreg4, 
             model_name = "Multiple linear regression: Center & QR", 
           data = dat, 
             iter = 2000, 
             chains = 4,
             cores = 4, 
             warmup = floor(2000/2),
             verbose = FALSE
           )
end.time <- Sys.time()
end.time-start.time

## Time difference of 9.157471 secs

stan_trace(fitLR4)

stan_dens(fitLR4)

stan_hist(fitLR4)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ac(fitLR4)

And if all looks good

print(fitLR4)

## Inference for Stan model: Multiple linear regression.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##             mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff
## alpha      62.82    0.01 0.68   61.44   62.37   62.82   63.26   64.20  4066
## theta[1]    1.89    0.01 0.66    0.60    1.44    1.89    2.34    3.18  4565
## theta[2]   -0.66    0.01 0.68   -2.03   -1.11   -0.65   -0.21    0.68  4744
## sigma       5.49    0.01 0.49    4.61    5.14    5.45    5.80    6.57  4027
## beta[1]     1.11    0.01 0.39    0.36    0.85    1.11    1.37    1.87  4558
## beta[2]    -1.43    0.02 1.48   -4.44   -2.42   -1.42   -0.46    1.49  4744
## lp__     -145.27    0.03 1.46 -148.88 -146.03 -144.92 -144.19 -143.50  2143
##          Rhat
## alpha       1
## theta[1]    1
## theta[2]    1
## sigma       1
## beta[1]     1
## beta[2]     1
## lp__        1
## 
## Samples were drawn using NUTS(diag_e) at Wed Oct 19 13:39:17 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

plot(fitLR4)

## ci_level: 0.8 (80% intervals)

## outer_level: 0.95 (95% intervals)

NELS23 Multiple Regression via rstan

Carolyn J. Anderson

10/18/2022

Model defintion for Multiple Regression

Output and various graphics

Center predictors

QR parameterization

QR and Centering