Nels23: Linear Regression via Rstan

This is the 2nd document in the set and uses the Nels 23 schools to estimate a simple linear regression model.

library(rstan)
library(loo)
library(stargazer)

setwd("C:/Users/cja/Dropbox/edps 590BAY/Lectures/10 Ham_Stan_brms")

nels <- read.table("school23_data.txt",header=TRUE)
nels <- nels[order(nels$school,nels$student) , ]

# Get information on numbers of school and students
school <- unique(nels$school)               
N <- length(school)                        
# total number of students
n <- length(nels$math)               
names(nels)

##  [1] "school"   "student"  "sex"      "race"     "homew"    "schtype" 
##  [7] "ses"      "pared"    "math"     "classtr"  "schsize"  "urban"   
## [13] "geo"      "minority" "ratio"

stargazer(nels, type="text")

## 
## ===================================================================
## Statistic  N     Mean     St. Dev.   Min   Pctl(25) Pctl(75)  Max  
## -------------------------------------------------------------------
## school    519 35,488.710 26,056.710 6,053   7,801    62,821  72,991
## student   519   49.863     28.623     0      24.5      74      99  
## sex       519   1.520      0.500      1       1        2       2   
## race      519   3.615      0.797      1       4        4       5   
## homew     519   1.971      1.484      0       1        3       7   
## schtype   519   1.977      1.328      1       1        4       4   
## ses       519   -0.001     0.881    -2.410  -0.620   0.730   1.850 
## pared     519   3.289      1.440      1       2        4       6   
## math      519   51.723     10.709     30      43      61.5     71  
## classtr   519   3.819      0.790      2       3        4       5   
## schsize   519   3.229      1.400      1       2        4       7   
## urban     519   1.832      0.859      1       1        3       3   
## geo       519   2.125      0.704      1       2        3       3   
## minority  519   2.501      2.037      0       1        3       7   
## ratio     519   16.757     4.931      10      13       20      28  
## -------------------------------------------------------------------

#############################################
# Simple Linear Regression in stan model 
#############################################
lr.1 <- "
data{
  int<lower=0> n;
  vector[n] x;   
  vector[n] y;   
}
parameters {
  real beta0;
  real beta1;
  real<lower=0> sigma; 
} 
model {
  target += normal_lpdf(beta0 |0, 30);
  target += normal_lpdf(beta1 |0, 30);
  target += cauchy_lpdf(sigma | 0, 1);
  target += normal_lpdf(y | beta0+beta1*x, sigma);

}
"

   beta0 ~ normal(0,10); 
   beta1 ~ normal(0,10);
   sigma ~ cauchy(0,1);
   y ~ normal(,sigma);

#################################
# Data List                     #
#################################
y <- as.numeric(nels$math)
hmwk <- as.numeric(nels$homew)
n <- nrow(nels) 
dataList <- list(n=n,y=y,x=hmwk)

#################################
# Run rstan                     #
#################################

fit.lr1 <- stan(
      model_code = lr.1, 
      model_name = "Nels23: math ~ 1 + homework",
      data = dataList, 
      iter = 2000,
      chains = 4,
      cores = 4,
      verbose = FALSE)

Some checking the convergence

stan_trace(fit.lr1 )

stan_dens(fit.lr1 )

stan_hist(fit.lr1 )

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ac(fit.lr1 )

stan_scat(fit.lr1 ,pars=c("beta0","beta1"))

stan_scat(fit.lr1 ,pars=c("beta1","sigma"))

pairs(fit.lr1, pars = c("beta0", "beta1", "sigma", "lp__"), las = 1)

And if all looks good

print(fit.lr1)

## Inference for Stan model: Nels23: math ~ 1 + homework.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##           mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff
## beta0    45.53    0.02 0.71    44.11    45.06    45.52    46.00    46.92  1857
## beta1     3.13    0.01 0.29     2.58     2.94     3.13     3.33     3.71  1870
## sigma     9.66    0.01 0.30     9.11     9.46     9.65     9.86    10.28  2474
## lp__  -1927.32    0.03 1.23 -1930.62 -1927.86 -1927.01 -1926.42 -1925.92  1609
##       Rhat
## beta0    1
## beta1    1
## sigma    1
## lp__     1
## 
## Samples were drawn using NUTS(diag_e) at Wed Nov 03 14:45:06 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

plot(fit.lr1 )

## ci_level: 0.8 (80% intervals)

## outer_level: 0.95 (95% intervals)

Nels23: Linear Regression via Rstan

Carolyn J. Anderson

10/27/2021