# Edps 590BAY # Fall 2021 # C.J.Anderson # # Anorexia data estimation of mean using Metropolis algorithm # --- sample statistics # --- tuning # --- running chains # --- etc # library(HDInterval) library(MCMCpack) setwd("D:/Dropbox/edps 590BAY/Lectures/\9 Metropolis Hastings") anorex <- read.table("anorexia_data.txt",header=TRUE) # create change anorex$change <- anorex$weight2 - anorex$weight1 ############################################# # Sample statistics ############################################# n=nrow(anorex) ybar = mean(anorex$change) s2 = var(anorex$change) stderr <- sqrt(s2/n) sample.stat <- c(n,ybar,s2,stderr) names(sample.stat) <- c("n","ybar","s2","stderr") sample.stat iter.set <- 5000 ################################################################# # Some tuning: Try different jump sd ################################################################# # --- Function metroLogNorm with graphing removed of trace in real-time removed metroLogNorm <- function(niter, y, current, tau, jump) { x <- matrix(c(current,0),nrow=1,ncol=2) # save the results of random walk iteration <- matrix(1,nrow=1,ncol=1) # to plot accepted values on y-axis } n <- length(y) mu <- mean(y) for (i in 2:niter) { propose <- rnorm(1,mean=x[i-1,1],sd=jump) ln.ratio <- dnorm(propose, mu, tau, log=TRUE) - dnorm(current, mu, tau, log=TRUE) u <- log( runif(1,0,1) ) if (u <= ln.ratio) { current <- propose accept <- 1 } else { accept <- 0 } x <- rbind(x,c(current,accept)) iteration <- c(iteration,i) } return(x) } # --- End function par(mfrow=c(2,2)) # --- For tunings and/or metroLogNorm nchains <- 20 y <- anorex$change min.jump <- .01 max.jump <- 10 njumps <- 50 # ---- for metroLogNorm ybar <- mean(y) stderr <- sd(y)/sqrt(length(y)) n.iter <-1000 # --- for tunning acceptance <- matrix(NA, nrow=njumps, ncol=2) jumps <- seq(from=min.jump, to=max.jump, length.out=njumps) #---- Start of loop through jump standard deviations i <- 1 for (jump.sd in jumps) { chain <- metroLogNorm(niter=n.iter, y, ybar, stderr, jump.sd) jsd <- round(jump.sd, digits=4) plot(seq(1:n.iter), chain[,1], type="l", main=paste("Jump sd =", jsd)) accept <- table(chain[,2]) percent <- accept[2]/n.iter * 100 acceptance[i, 1] <- percent acceptance[i, 2] <- jump.sd i <- i + 1 } # --- Graph them par(mfrow=c(1,1)) plot(acceptance[, 2], acceptance[, 1], type='l', lwd=2, xlab="Jump sd", ylab="Percent Accept", main="Acceptance Rate for Anorexia") lines(c(min.jump,max.jump),c(50,50), lty=2, col="cyan") lines(c(min.jump,max.jump),c(20,20), lty=2, col="cyan") # Examine Table acceptance #################################################################### # Run chains #################################################################### source("D:/Dropbox/edps 590BAY/Lectures/\9 Metropolis Hastings"/logMetroNorm.txt) chain1 <- metroLogNorm(niter=iter.set ,anorex$change ,ybar, stderr, 1.75) chain2 <- metroLogNorm(niter=iter.set, anorex$change ,0, stderr ,1.75) chain3 <- metroLogNorm(niter=iter.set, anorex$change, -4, stderr, 1.75) chain4 <- metroLogNorm(niter=iter.set, anorex$change, 4 ,stderr, 1.75) # # What are acceptance rates # table(chain1[,2]) n1 <-table(chain1[,2]) n1[2]/iter.set * 100 table(chain2[,2]) n2 <-table(chain2[,2]) n2[2]/iter.set * 100 table(chain3[,2]) n3 <-table(chain3[,2]) n3[2]/iter.set * 100 table(chain4[,2]) n4 <-table(chain4[,2]) n4[2]/iter.set * 100 # # First 20 iterations/samples # par(mfrow=c(1,1)) first <- seq(1:20) plot(first,chain1[first,1],type="l",col="blue",lwd=2, main="Early iterations", xlab="Iteration (sample)", ylab="Value of parameter (mean)", ylim=c(-4.5,5) ) lines(first,chain2[first,1],col="red",lwd=2) lines(first,chain3[first,1],col="green",lwd=2) lines(first,chain4[first,1],col="purple",lwd=2) # # All samples and chains # par(mfrow=c(1,1)) all <- seq(1:length(chain1[,1])) plot(all,chain1[,1],type="l",col="blue",lwd=2, main="All Samples", xlab="Iteration (sample)", ylab="Value of parameter (mean)", ylim=c(-4.5,5) ) lines(all,chain2[,1],col="red",lwd=2) lines(all,chain3[,1],col="green",lwd=2) lines(all,chain4[,1],col="purple",lwd=2) # # Use 1000-5000 # ch1 <- chain1[1000:5000,] ch2 <- chain2[1000:5000,] ch3 <- chain3[1000:5000,] ch4 <- chain4[1000:5000,] # # Combine distributions to get posterior distribution # post.dist <- rbind(ch1,ch2,ch3, ch4) mean1 <- mean(ch1[,1]) mean2 <- mean(ch2[,1]) mean3 <- mean(ch3[,1]) mean4 <- mean(ch4[,1]) mean.post <- mean(post.dist[,1]) (means <- c(mean.post, mean1,mean2,mean3,mean4)) summary(ch1[,1]) summary(ch2[,1]) summary(ch3[,1]) summary(ch4[,1]) summary(post.dist[,1]) # # Auto-correlations # par(mfrow=c(2,2)) acf(ch1[,1], lag.max =iter.set, plot = TRUE) acf(ch2[,1], lag.max =iter.set, plot = TRUE) acf(ch3[,1], lag.max =iter.set, plot = TRUE) acf(ch4[,1], lag.max =iter.set, plot = TRUE) par(mfrow=c(1,1)) acf(post.dist[,1], lag.max =iter.set, plot = TRUE) # # Densities # std <- sd(post.dist[,1]) par(mfrow=c(2,2)) hist(ch1[,1],main='chain1',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue') x <- ch1[,1] lines(density(x,na.rm=T),col="red",lwd=2) hist(ch2[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue') x <- ch2[,1] lines(density(x,na.rm=T),col="red",lwd=2) hist(ch3[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue') x <- ch3[,1] lines(density(x,na.rm=T),col="red",lwd=2) hist(ch4[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue') x <- ch4[,1] lines(density(x,na.rm=T),col="red",lwd=2) par(mfrow=c(1,1)) hist(post.dist[,1],main='Posterior Distribution',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue',lwd=2) x <- post.dist[,1] lines(density(x,na.rm=T),col="red",lwd=2)