# Edps 590BAY
# Fall 2021
# C.J.Anderson
#
# Anorexia data estimation of mean using Metropolis algorithm
#   --- sample statistics
#   --- tuning
#   --- running chains
#   --- etc
#
library(HDInterval)
library(MCMCpack)


setwd("D:/Dropbox/edps 590BAY/Lectures/\9 Metropolis Hastings")

anorex <- read.table("anorexia_data.txt",header=TRUE)

# create change 
anorex$change <- anorex$weight2 - anorex$weight1

#############################################
# Sample statistics
#############################################
n=nrow(anorex)
ybar = mean(anorex$change)
s2 = var(anorex$change)
stderr <- sqrt(s2/n)
sample.stat <- c(n,ybar,s2,stderr)
names(sample.stat) <- c("n","ybar","s2","stderr")
sample.stat



iter.set <- 5000

#################################################################
# Some tuning:  Try different jump sd
#################################################################

# --- Function metroLogNorm with graphing removed of trace in real-time removed
metroLogNorm <- function(niter, y, current, tau, jump) {

               x <- matrix(c(current,0),nrow=1,ncol=2)  # save the results of random walk 
               iteration <- matrix(1,nrow=1,ncol=1)     # to plot accepted values on y-axis }
               n <- length(y)
                mu <- mean(y)

               for (i in 2:niter) {  
                propose <- rnorm(1,mean=x[i-1,1],sd=jump)				 
                ln.ratio <-  dnorm(propose, mu, tau, log=TRUE) - dnorm(current, mu, tau, log=TRUE) 

      	u <- log( runif(1,0,1) )
               if (u <= ln.ratio) {
                     current <- propose
                     accept <- 1 
                } else {
                     accept <- 0
                }
            
                 x <- rbind(x,c(current,accept))
                 iteration <- c(iteration,i)          
                }
             return(x)
}
# ---  End function


par(mfrow=c(2,2))

# --- For tunings and/or metroLogNorm
nchains <- 20
y <- anorex$change
min.jump <- .01
max.jump <- 10
njumps <- 50

# ----  for metroLogNorm
ybar <- mean(y)
stderr <- sd(y)/sqrt(length(y))
n.iter <-1000

# --- for tunning
acceptance <- matrix(NA, nrow=njumps, ncol=2)
jumps <- seq(from=min.jump, to=max.jump, length.out=njumps)

#----  Start of loop through jump standard deviations
i <- 1
for (jump.sd in jumps) {
   chain <- metroLogNorm(niter=n.iter, y, ybar, stderr, jump.sd)

   jsd <- round(jump.sd, digits=4)
   plot(seq(1:n.iter), chain[,1], type="l", main=paste("Jump sd =", jsd))

   accept <- table(chain[,2])
   percent <- accept[2]/n.iter * 100
   acceptance[i, 1] <- percent
   acceptance[i, 2] <- jump.sd
   i <- i + 1
}   

# --- Graph them
par(mfrow=c(1,1))
plot(acceptance[, 2], acceptance[, 1], type='l', lwd=2, 
     xlab="Jump sd",
	 ylab="Percent Accept",
	 main="Acceptance Rate for Anorexia")
lines(c(min.jump,max.jump),c(50,50), lty=2, col="cyan")
lines(c(min.jump,max.jump),c(20,20), lty=2, col="cyan")

# Examine Table
acceptance


####################################################################
# Run chains
####################################################################
source("D:/Dropbox/edps 590BAY/Lectures/\9 Metropolis Hastings"/logMetroNorm.txt)

chain1 <- metroLogNorm(niter=iter.set ,anorex$change ,ybar, stderr, 1.75)
chain2 <- metroLogNorm(niter=iter.set, anorex$change ,0, stderr ,1.75)
chain3 <- metroLogNorm(niter=iter.set, anorex$change, -4, stderr, 1.75)
chain4 <- metroLogNorm(niter=iter.set, anorex$change, 4 ,stderr, 1.75)

#
# What are acceptance rates
#
table(chain1[,2])
n1 <-table(chain1[,2])
n1[2]/iter.set * 100

table(chain2[,2])
n2 <-table(chain2[,2])
n2[2]/iter.set * 100

table(chain3[,2])
n3 <-table(chain3[,2])
n3[2]/iter.set * 100

table(chain4[,2])
n4 <-table(chain4[,2])
n4[2]/iter.set * 100

#
# First 20 iterations/samples
#
par(mfrow=c(1,1))
first <- seq(1:20)
plot(first,chain1[first,1],type="l",col="blue",lwd=2,
      main="Early iterations",
      xlab="Iteration (sample)",
      ylab="Value of parameter (mean)",
      ylim=c(-4.5,5)
)
lines(first,chain2[first,1],col="red",lwd=2)
lines(first,chain3[first,1],col="green",lwd=2)
lines(first,chain4[first,1],col="purple",lwd=2)

#
# All samples and chains
#
par(mfrow=c(1,1))
all <- seq(1:length(chain1[,1]))
plot(all,chain1[,1],type="l",col="blue",lwd=2,
      main="All Samples",
      xlab="Iteration (sample)",
      ylab="Value of parameter (mean)",
      ylim=c(-4.5,5)
)
lines(all,chain2[,1],col="red",lwd=2)
lines(all,chain3[,1],col="green",lwd=2)
lines(all,chain4[,1],col="purple",lwd=2)

#
# Use 1000-5000
#
ch1 <- chain1[1000:5000,]
ch2 <- chain2[1000:5000,]
ch3 <- chain3[1000:5000,]
ch4 <- chain4[1000:5000,]

#
# Combine distributions to get posterior distribution
#
post.dist <- rbind(ch1,ch2,ch3, ch4)

mean1 <- mean(ch1[,1])
mean2 <- mean(ch2[,1])
mean3 <- mean(ch3[,1])
mean4 <- mean(ch4[,1])
mean.post <-  mean(post.dist[,1])
(means <- c(mean.post, mean1,mean2,mean3,mean4))

summary(ch1[,1])
summary(ch2[,1])
summary(ch3[,1])
summary(ch4[,1])
summary(post.dist[,1])

#
# Auto-correlations
#
par(mfrow=c(2,2))
acf(ch1[,1], lag.max =iter.set, plot = TRUE)   
acf(ch2[,1], lag.max =iter.set, plot = TRUE)   
acf(ch3[,1], lag.max =iter.set, plot = TRUE)   
acf(ch4[,1], lag.max =iter.set, plot = TRUE)   


par(mfrow=c(1,1))
acf(post.dist[,1], lag.max =iter.set, plot = TRUE)   

#
# Densities
#

std <- sd(post.dist[,1])

par(mfrow=c(2,2))
hist(ch1[,1],main='chain1',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue')
x <- ch1[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


hist(ch2[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue')
x <-  ch2[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


hist(ch3[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue')
x <-  ch3[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


hist(ch4[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue')
x <-  ch4[,1]
lines(density(x,na.rm=T),col="red",lwd=2)




par(mfrow=c(1,1))
hist(post.dist[,1],main='Posterior Distribution',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue',lwd=2)
x <- post.dist[,1]
lines(density(x,na.rm=T),col="red",lwd=2)