# Edpsy 590BAY # Fall 2021 # C.J.Anderson # # Comparing 2 proportions: US Presdients 1789-1926 vs 1928-2016 # # Data file has missing values that are coded at 9999 (either uncontested # or height is unknown # # setwd("D:\\Dropbox\\edps 590BAY\\Lectures\\2 Proportion") elections <- read.table(file="all_presidential_data.txt",header=T) elections <- na.omit(elections) # delete the ones with data missing # In the data set missing is indicated # by NA elections$oldnew <- ifelse(elections$year<1928,1,0) # create a variable used to split the data elections$oldnew <- as.factor(elections$oldnew) # new variable needs to be a factor head(elections) # look at data split.data <- split(elections,elections$oldnew) new <- split.data[1] new <- as.data.frame(new) names(new) <- c("year","winner","wcm","loser","lcm","wintaller","winlefty","oldnew") old <- split.data[2] old <- as.data.frame(old) names(old) <- c("year","winner","wcm","loser","lcm","wintaller","winlefty","oldnew") # Find the posteriors table(new$wintaller) y.new <- 16 # looking at result of table n.new <- nrow(new) a.new <- y.new + 1 # posterior is beta with a.new and b.new b.new <- n.new - y.new + 1 theta.new <- (a.new-1)/(a.new+b.new-2) table(old$wintaller) # posterior is beta with a.old and b.old y.old <- 15 n.old <- nrow(old) a.old <- y.new + 1 b.old <- n.old - y.old +1 theta.old <- (a.old-1)/(a.old+b.old-2) # Monte carlo S= 1e4 theta.new <- rbeta(S,a.new, b.new) theta.old <- rbeta(S,a.old, b.old) mean(theta.new>theta.old) cor(theta.new, theta.old) # Take look at the simulated posterior distributions par(mfrow=c(2,2)) hist(theta.new, main="1e4 samples from posterior 1928-2016") hist(theta.old, main="1e4 samples from posterior 1879-1924") plot(theta.new, theta.old, main="scatter plot of simulated values", xlim=c(0,1), ylim=c(0,1)) lines(c(0,1), c(0,1), col="blue") plot.new( ) text(.5,.5,"r(theta1,theta2)=.0112") text(.49,.4,"Pr(theta1>theta2)= .9369") # Find proportion/probability of times that theta1 > theta2 mean(theta1>theta2)