This is the 2nd document in the set and uses the Nels 23 schools to estimate a simple linear regression model.
library(rstan)
library(loo)
library(stargazer)
setwd("C:/Users/cja/Dropbox/edps 590BAY/Lectures/10 Ham_Stan_brms")
nels <- read.table("school23_data.txt",header=TRUE)
nels <- nels[order(nels$school,nels$student) , ]
# Get information on numbers of school and students
school <- unique(nels$school)
N <- length(school)
# total number of students
n <- length(nels$math)
names(nels)
## [1] "school" "student" "sex" "race" "homew" "schtype"
## [7] "ses" "pared" "math" "classtr" "schsize" "urban"
## [13] "geo" "minority" "ratio"
stargazer(nels, type="text")
##
## ===================================================================
## Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
## -------------------------------------------------------------------
## school 519 35,488.710 26,056.710 6,053 7,801 62,821 72,991
## student 519 49.863 28.623 0 24.5 74 99
## sex 519 1.520 0.500 1 1 2 2
## race 519 3.615 0.797 1 4 4 5
## homew 519 1.971 1.484 0 1 3 7
## schtype 519 1.977 1.328 1 1 4 4
## ses 519 -0.001 0.881 -2.410 -0.620 0.730 1.850
## pared 519 3.289 1.440 1 2 4 6
## math 519 51.723 10.709 30 43 61.5 71
## classtr 519 3.819 0.790 2 3 4 5
## schsize 519 3.229 1.400 1 2 4 7
## urban 519 1.832 0.859 1 1 3 3
## geo 519 2.125 0.704 1 2 3 3
## minority 519 2.501 2.037 0 1 3 7
## ratio 519 16.757 4.931 10 13 20 28
## -------------------------------------------------------------------
#############################################
# Simple Linear Regression in stan model
#############################################
lr.1 <- "
data{
int<lower=0> n;
vector[n] x;
vector[n] y;
}
parameters {
real beta0;
real beta1;
real<lower=0> sigma;
}
model {
target += normal_lpdf(beta0 |0, 30);
target += normal_lpdf(beta1 |0, 30);
target += cauchy_lpdf(sigma | 0, 1);
target += normal_lpdf(y | beta0+beta1*x, sigma);
}
"
or
beta0 ~ normal(0,10);
beta1 ~ normal(0,10);
sigma ~ cauchy(0,1);
y ~ normal(,sigma);
#################################
# Data List #
#################################
y <- as.numeric(nels$math)
hmwk <- as.numeric(nels$homew)
n <- nrow(nels)
dataList <- list(n=n,y=y,x=hmwk)
#################################
# Run rstan #
#################################
fit.lr1 <- stan(
model_code = lr.1,
model_name = "Nels23: math ~ 1 + homework",
data = dataList,
iter = 2000,
chains = 4,
cores = 4,
verbose = FALSE)
Some checking the convergence
stan_trace(fit.lr1 )
stan_dens(fit.lr1 )
stan_hist(fit.lr1 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_ac(fit.lr1 )
stan_scat(fit.lr1 ,pars=c("beta0","beta1"))
stan_scat(fit.lr1 ,pars=c("beta1","sigma"))
pairs(fit.lr1, pars = c("beta0", "beta1", "sigma", "lp__"), las = 1)
And if all looks good
print(fit.lr1)
## Inference for Stan model: Nels23: math ~ 1 + homework.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## beta0 45.53 0.02 0.71 44.11 45.06 45.52 46.00 46.92 1857
## beta1 3.13 0.01 0.29 2.58 2.94 3.13 3.33 3.71 1870
## sigma 9.66 0.01 0.30 9.11 9.46 9.65 9.86 10.28 2474
## lp__ -1927.32 0.03 1.23 -1930.62 -1927.86 -1927.01 -1926.42 -1925.92 1609
## Rhat
## beta0 1
## beta1 1
## sigma 1
## lp__ 1
##
## Samples were drawn using NUTS(diag_e) at Wed Nov 03 14:45:06 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(fit.lr1 )
## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)