# Edps/Psych/Stat 587 # Spring 2020 # Carolyn J. Anderson # # R-code to reproduce material in Lecture notes on Random Intercept and Slope Models # for NELS N=23 data # # Variables and coding of nels23 data # # school = 'SCHOOL ID' # id = 'STUDENT ID' # gender = 'STUDENT GENDER' 1 = 'MALE' 2 = 'FEMALE' . = 'MISSING' # race = 'STUDENT RACE' 1 = 'ASIAN/PI' 2 = 'HISPANIC' 3 = 'BLACK' # 4 = 'WHITE' 5 = 'NATIVE AMERICAN' . = 'MISSING' # homew = 'TIME ON MATH HOMEWORK' # 0 = 'NONE' 1 = 'LT 1 HOUR' 2 = '1 HOUR' 3 = '2 HOURS' 4 = '3 HOURS' 5 = '4 TO 6 HOURS' # 6 = '7 TO 9 HOURS' 7 = 'GE 10 HOURS' . = 'MISSING'; # schtyp = 'SCHOOL TYPE' # 1 = 'PUBLIC' 2 = 'CATHOLIC' 3 = 'PRIVATE RELIGIOUS' 4 = 'PRIVATE' . = 'MISSING' # ses = 'SOCIOECONOMIC STATUS' # pared = 'PARENTAL EDUCATION' # 1 = 'NO HS DEGREE' 2 = 'HS GRAD / GED' 3 = 'HS / LT 4 YEAR COLLEGE' # 4 = 'COLLEGE GRAD' 5 = "MASTER'S" 6 = 'DOCTORATE' . = 'MISSING' # math = 'MATH SCORE' # classstr = 'CLASS STRUCTURE' # schsize = 'SCHOOL SIZE' # 1 = '1 TO 199' 2 = '200 TO 399' 3 = '400 TO 599' 4 = '600 TO 799' # 5 = '800 TO 999' 6 = '1000 TO 1199' 7 = '1200+' . = 'MISSING' # urban = 'URBANICITY' 1 = 'URBAN' 2 = 'SUBURBAN' 3 = 'RURAL' . = 'MISSING' # geo = 'GEOGRAPHIC REGION' # 1 = 'NORTHEAST' 2 = 'NORTH CENTRAL' 3 = 'SOUTH' 4 = 'WEST' . = 'MISSING' # minority = 'PERCENT MINORITY' # 0 = '0%' 1 = '1-5%' 2 = '6-10%' 3 = '11-20%' 4 = '21-40%' # 5 = '41-60%' 6 = '61-90%' 7 = '91-100%' . = 'MISSING' # ratio = 'STUDENT-TEACHER RATIO' 10 = ' LE 10' . = 'MISSING' # public = 'PUBLIC SCHOOL BINARY' 0 = 'ALL OTHERS' 1 = 'PUBLIC' . = 'MISSING' # # Load packages that we'll need # library(lme4) library(lmerTest) library(lattice) library(texreg) # nice lmer output # Set working dircetory setwd('D/Dropbox/edps587/lectures/4 randomslope') # Read in data nels23 <- read.table(file="school23_data.txt", header=FALSE) names(nels23) <- c('id', 'student', 'gender', 'race', 'homework', 'schtype', 'ses', 'pared', 'math', 'classstr', 'schsize', 'urban', 'geo', 'minority', 'ratio') nels23 <- nels23[order(id),] # # Names of variables and Look at first 23 # names(nels23) head(nels23,25) # # compute mean ses and center ----------- *********schtyp gets messed up somehow********* # meanses <- aggregate(ses~id, data=nels23, "mean") names(meanses) <- c('id','meanses') nels23 <- merge(nels23,meanses, by = c('id')) cses <- nels23$ses - nels23$meanses nels23 <- cbind(nels23,cses) # # Recheck data after making new variables---do this # everytime after you muck with the data. # names(nels23) head(nels23) # # Recode race as a binary variable in new column # create new column nels23$white <- NA # The recoding nels23$white[nels23$race==4] <- 1 nels23$white[nels23$race<=3] <- 0 nels23$white[nels23$race>=5] <- 0 # # ReCode schtype to private # nels23$private <- NA nels23$private[nels23$schtype==1] <- 0 nels23$private[nels23$schtype>=2] <- 1 names(nels23) head(nels23) ##################################################### # Result in lecture notes ##################################################### ##################################### # First look at data: one per school# ##################################### par(mfrow=c(1,1)) xyplot(nels23$math ~ nels23$homework | as.factor(nels23$id),type=c('p','r')) # # All regressions in one # # You only need to do the following once...this gives you various things in # Data set-up # sort by schools: nels23 <- nels23[order(nels23$id) , ] # Get information on number schools & create consecutive integer school.id school <- unique(nels23$id) ## N <- length(school) ## need N and school.id school.int <- as.data.frame(cbind(school,seq(1:N))) ## for dataList & model names(school.int) <- c("id", "school.id") ## nels23 <- merge(nels23,school.int,by="id") ## # total number of students n <- length(nels23$math) ## need n for dataList & model # Double-check set-up head(nels23,n=50) # You don't need this but might want it (sample size per school) nj <- matrix(999,nrow=N,ncol=1) for (j in 1:N) { nj[j] <- nrow(subset(nels23,nels23$school.id==j)) } # Graph to illustrate need for random intercept and slope plot(nels23$homew,nels23$math, type="n", col="blue", lwd=2, main="NELS: Linear Regression by School \n(for where there is data)", xlab="Time Spent Doing Homework", ylab="Math Scores" ) for (j in 1:N) { sub <- subset(nels23,nels23$school.id==j) fitted <- fitted(lm(math~homework,sub)) lines(sub$homework,fitted,col=j) } ######################## require(lmerTest) ####################### #p25 --- number of schools nschools <- length(unique(nels23$id)) nschools # # page 27 Null model = random effects ANOVA # # Fit model and get default summary) summary(model.null <- lmer(math ~ 1 + (1|id), data=nels23, REML=FALSE)) # Nicer (summary) # - with se for coeffiecents screenreg(model.null,single.row=TRUE,custom.model.names=c("Null")) # - with 95% CI rather than se for coefficients screenreg(model.null,single.row=TRUE,custom.model.names=c("Null"),ci.force=TRUE) # for more information on using screenreg see # https://www.rdocumentation.org/packages/texreg/versions/1.36.18/topics/texreg # # Computing ICC (one of many) # vars <- as.data.frame(VarCorr(model.null))[4] total <- sum(vars) tau00 <- vars[1,1] icc.0 <- tau00/total round(icc.0,digits=4) # # page 30 Random intercept: homework # summary(model.ri <- lmer(math ~ 1 + homework + (1|id), data=nels23, REML=FALSE)) vars <- as.data.frame(VarCorr(model.ri))[4] total <- sum(vars) tau00 <- vars[1,1] icc1 <- tau00/total round(icc1,digits=4) # #p 35 Random intercept and slope homework # require(lmerTest) summary(model.ris <- lmer(math ~ 1 + homework + (1 + nels23$homew|id), data=nels23, REML=FALSE)) # if you want the rather than correlation: cov(U0j,U1j) = corr(U0j,U1j)*std(U0j)*st(U1j) covU0U1 <- -.83*7.700*4.097 covU0U1 # Or if you don't want to compute cov(U0j,U1j) but want it... as.data.frame(VarCorr(model.ris))[4] # # Comparison and summary so far....note that covariance(U0,U1) is given # screenreg(list(model.null,model.ri,model.ris),single.row=TRUE,custom.model.names=c("Null","Random Intercept", "Random Int & Slope")) # # # #p 40 Plot of school mean math +/1 1 standard deviation # mavg <- aggregate(math~id, data=nels23, "mean") msdev <- aggregate(math~id, data=nels23, "sd") sch.id <- seq(1:23) plot(sch.id, mavg[,2], type='p', ylim=range(c(mavg[,2]-msdev[,2], mavg[,2]+msdev[,2])), pch=19, xlab="Schools", ylab="Mean Math +/- SD", main="School Mean Math +/- 1 Std.Dev." ) # hack: we draw arrows but with very special "arrowheads" # For this graph, thanks to http://stackoverflow.com/questions/13032777/scatter-plot-with-error-bars arrows(sch.id, mavg[,2]-msdev[,2], sch.id, mavg[,2]+msdev[,2], length=0.05, angle=90, code=3) # #page 41 Graph of overall regression # Plot population average regression # intercept <- fixef(model.ris)[1] slope <- fixef(model.ris)[2] random <- ranef(model.ris) student.int <- random$id[1] student.slope <- random$id[2] plot(math ~ homework,type = 'n', data = nels23, cex.main = 1.5, xlab = 'Time Spent Doing Homework', ylab = "Fitted Math Scores", main = "Population Average Regression " ) # Population average line for(i in min(nels23$homework):max(nels23$homework)){ abline(a=intercept, b=slope, col='black',lwd=4) } # #page 42 Population and School Specific # school.list <- unique(nels23$id) plot(math~ nels23$homew,type = 'n', data = nels23, ylim = c(30, 80), xlim = c(0, 7), cex.main = 1.5, xlab = 'Time Spent Doing Homework', ylab = "Fitted Math Scores", main = "Population & School Specific Regressions " ) # draw in school specific line for (i in 1:length(school.list)){ abline(a=(intercept+student.int[i,]), b=(slope+student.slope[i,]), type='l', col=i) } # Population average line for(i in min(nels23$homew):max(nels23$homew)){ abline(a=intercept, b=slope, col='black',lwd=4) } # #p 44 School specific for random intercept model # intercept <- fixef(model.ri)[1] slope <- fixef(model.ri)[2] random <- ranef(model.ri) student.int <- random$id[1] plot(math~ homework,type = 'n', data = nels23, ylim = c(30, 80), xlim = c(0, 7), cex.main = 1.5, xlab = 'Time Spent Doing Homework', ylab = "Fitted Math Scores", main = "Population & School Specific: Random Intercept " ) # draw in school specific line for (i in 1:length(school.list)){ abline(a=(intercept+student.int[i,]), b=(slope), type='l', col=i) } # #page 45 Graph of estimated variances of random intercept and intercept & slope # hmwk <- seq(from=1, to=7, by=.1) vars.ri <- as.data.frame(VarCorr(model.ri))[4] total.ri <- rep(sum(vars.ri), 61) vars.ris <- as.data.frame(VarCorr(model.ris))[4] total.ris<- vars.ris[1,] + 2*vars.ris[3,]*hmwk + vars.ris[2,]*(hmwk**2) + vars.ris[4,] plot(hmwk,total.ris,type = 'l', cex.main = 1.5, col='blue', xlim=c(1,7), ylim=c(0,700), xlab = 'Time Spent Doing Homework', ylab = "Estimated Variance of Math Scores", main = "Variance Function" ) # random inercept abline(a=sum(vars.ri), b=0, col='red') legend(1,690, c('Random Intercept & Slope','Random Intercept'), lty=c(1,1), lwd=c(2.5,2.5), col=c('blue','red')) # # p87 Random intercept and slope homework, but 2 more level 1 variables # summary(model1 <- lmer(math ~ homework + private + white + (1 + homework | id), data=nels23, REML=FALSE)) # and if you want cov(U0j,U1j) ... as.data.frame(VarCorr(model1))[4] # # p92 Random slope for white & homework--- Model 2 # summary(model2 <- lmer(math ~ homework + private + white + (1 + homework + white | id), data=nels23, REML=FALSE)) as.data.frame(VarCorr(model2))[4] # # p96 Random slope for private & homework --- bad model but warning isn't convincing # summary(model3 <- lmer(math ~ homework + private + white + (1 + homework + private | id), data=nels23, REML=FALSE)) as.data.frame(VarCorr(model3))[4] # # p72 --- working with Model 2, plot of average schools for different levels of discrete variables # par(mfrow=c(1,1)) fixed <- fixef(model2) prv_wht <- fixed[1] + fixed[2]*nels23$homework + fixed[3] + fixed[4] plot(nels23$homework, prv_wht, type='l', col='black', ylim = c(40, 70), xlim = c(1,7), xlab='Time Spent Doing Homework', ylab='Fitted Math Scores', main='Population Average Regressions (Model 2)' ) abline(a=(fixed[1] + fixed[4]), b=fixed[2], col='red') abline(a=(fixed[1] + fixed[3]), b=fixed[2], col='blue') abline(a=(fixed[1]), b=fixed[2], col='green') legend(1,70, c('Private, White', 'Public, White', 'Private, Non-White', 'Public, Non-White'), lty=c(1,1,1,1), col=c('black', 'red', 'blue', 'green') ) # # Page 99 --- working with Model 2, plot of the 23 school regressions (HLM) # # Population and School Specific # # These are model based and don't take into acount where the data are (lectures notes # figures are better) # par(mfrow=c(2,2)) random <- ranef(model2, level=0:2) U0j <- random$id[,1] U1j <- random$id[,2] U2j <- random$id[,3] plot(nels23$math ~ nels23$homework, type = 'n', cex.main = 1.5, xlab = 'Time Spent Doing Homework', ylab = "Fitted Math Scores", main = "Private, White " ) # draw in scho0l specific lines: Private, white for (i in 1:23){ abline(a=(fixed[1]+fixed[3]*homework +fixed[4] + U0j[i]+ U2j[i]), b=(fixed[2]+U1j[i]), type='l', col=i) } plot(nels23$math ~ nels23$homework, type = 'n', cex.main = 1.5, xlab = 'Time Spent Doing Homework', ylab = "Fitted Math Scores", main = "Private, Non-White " ) # draw in scho0l specific lines: Private, non-white for (i in 1:23){ abline(a=(fixed[1]+fixed[3] + U0j[i]), b=(fixed[2]+U1j[i]), type='l', col='blue') } plot(math ~ homework, type = 'n', cex.main = 1.5, xlab = 'Time Spent Doing Homework', ylab = "Fitted Math Scores", main = "Public, White " ) # draw in scho0l specific lines: Public, white for (i in 1:23){ abline(a=(fixed[1]+fixed[4] + U0j[i] + U2j[i]), b=(fixed[2]+U1j[i]), type='l', col='blue') } plot(math ~ homework, type = 'n', cex.main = 1.5, xlab = 'Time Spent Doing Homework', ylab = "Fitted Math Scores", main = "Public, non-White " ) # draw in scho0l specific lines: Public, non-white for (i in 1:23){ abline(a=(fixed[1]+ U0j[i]), b=(fixed[2]+U1j[i]), type='l', col='blue') } ################################################################## # Different kinds of centering # ################################################################## nels23$homework.mean <- rep(mean(nels23$homework), 519) # Grand mean center homew nels23$homew.GMC <- nels23$homework - mean(nels23$homework) # Group Mean center homew.GpM <- aggregate(homework~id, data=nels23, "mean") names(homew.GpM) <- c('id','homew.GpM') nels23 <- merge(nels23,homew.GpM, by = c('id')) nels23$homew.GpMC <- nels23$homework - nels23$homew.GpM ############################################################### # Random Intercept Models with different types of centering # ############################################################### # # Raw Score # summary(model.raw <- lmer(math ~ 1 + homework + (1 |id), nels23, REML=FALSE)) (n2lnlike <- -2* logLik(model.raw)) # or (dev.raw <- deviance(model.raw)) # # Grand mean centered # model.GMC <- lmer(math ~ 1 + nels23$homew.GMC + (1 |id), nels23, REML=FALSE) summary(model.GMC) (dev.GMC <- deviance(model.GMC)) # # Group (school) mean centered # model.GpMC <- lmer(math ~ 1 + nels23$homew.GpMC + (1 |id), nels23, REML=FALSE) summary(model.GpMC) deviance(model.GpMC) # # Raw Score + School Mean # model.RGpm <- lmer(math ~ 1 + homework + nels23$homew.GpM + (1 |id), nels23, REML=FALSE) summary(model.RGpm) deviance(model.RGpm) # # School mean centered + School Mean # summary(model.GpMC <- lmer(math ~ 1 + nels23$homew.GpMC + nels23$homew.GpM + (1 |id), nels23, REML=FALSE)) deviance(model.GpMC) ####################################################################### # Random Intercept & Slope Models with different types of centering # ####################################################################### # # Raw Score # summary(model.raw2 <- lmer(math ~ 1 + homework + (1 + homework |id), nels23, REML=FALSE)) # # Grand mean centered # summary(model.GMC <- lmer(math ~ 1 + nels23$homew.GMC + (1 + nels23$homew.GMC|id), nels23, REML=FALSE)) # # Group (school) mean centered # summary(model.GpMC2 <- lmer(math ~ 1 + nels23$homew.GpMC + (1 + nels23$homew.GpMC|id), nels23, REML=FALSE)) # # Raw Score + School Mean # summary(model.RGpm2 <- lmer(math ~ 1 + homework + nels23$homew.GpM + (1 + homework |id), nels23, REML=FALSE)) # # School mean centered + School Mean # summary(model.GpMC2b <- lmer(math ~ 1 + nels23$homew.GpMC + nels23$homew.GpM + (1 + nels23$homew.GpMC|id), nels23, REML=FALSE)) screenreg(list(model.raw2,model.GMC,model.GpMC2,model.RGpm2,model.GpMC2b), custom.model.names=c("Raw Score C","Grand Mean C", "School Mean C","Raw+School Mean","School mean C & mean"))