# Edps 590BAY
# Fall 2021
# C.J.Anderson
#
# Estimating both (mean) theta and sigma^2
#    o using mathematical results for estimation
#    o graphing bi-variate distribution of parameters estimates
#    o monte carlo sampling from posterior
#       -- plots show how to use greek letters in plots with sub- and superscripts
#       -- how to overlay normal distributions in histogram
#    o bias, variance and MSE 
#    o a little Gibbs with semi-conjugate priors (just one chain)
#
#  Need to change some dnorm so use sd and not var
#
#
library(HDInterval)
library(MCMCpack)

#setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 590BAY\\Lectures\\4 Normal mean & variance")

#setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\3 Normal")
# laptop
setwd("D:\\Dropbox\\edps 590BAY\\Lectures\\3 Normal")

anorex <- read.table("anorexia_data.txt",header=TRUE)

# create change 
anorex$change <- anorex$weight2 - anorex$weight1

#############################################
# Sample statistics
#############################################
n=nrow(anorex)
ybar = mean(anorex$change)
s2 = var(anorex$change)
var.mean <- s2/n
sd.err <- sqrt(var.mean)
sample.stat <- c(n,ybar,s2,var.mean,sd.err)
names(sample.stat) <- c("n","ybar","s2","var.mean","std.err")
sample.stat

###################
# Set priors
###################
mu0      <- 0     
kappa0   <- 1
sigma0sq <- 500
nu0      <- 1

################################################
# Estimates of posterior mean and variance
################################################

kappa.n <- n + kappa0
nu.n <- nu0 + n

mu.n <- (kappa0*mu0 + n*ybar)/kappa.n

sigma2.n <- (nu0*sigma0sq + (n-1)*s2 + (kappa0*n/kappa.n)*(ybar-mu0)**2)/nu.n

tau2.n <- sigma2.n/kappa.n

# summary of first analysis: data, priors and posterior
sample.stat
(priors1 <- c(mu0,kappa0,sigma0sq,nu0))
(posterior1 <- c(mu.n,kappa.n,sigma2.n,tau2.n))


####################################################
# Overlay of prior and posterior
####################################################
# Theta (mean)
par(mfrow=c(1,1))

theta <- seq(from=-15 ,to=20, length.out=1000)

Prior <- dnorm(theta, mu0, sqrt(sigma0sq/kappa0))
Posterior <- dnorm(theta,mu.n,sqrt(tau.n))
plot(theta,Posterior,type="l",main="Prior and Posterior for Anorexia:  Mean",
	 xlab="Mean",
	 col="blue"
	 )
lines(theta,Prior) 
text(9,.375,"N(2.7260,0.9444)",col="blue")
text(10,.04,"N(0,500)",col="black")
legend(-15,.375,legend=c("Posterior","Prior"), col=c("blue","black"),lwd=2)
text(-11,.24,"Sample Statistics:")
text(-10,.22,"n = 72")
text(-10,.2,"ybar = 2.7638")
text(-10,.18,"var/n = 0.8852")


# Sigma2
sigma2 <- seq(from=0, to=150, length.out=1000)

PriorS2 <- dinvgamma(sigma2, nu0/2, nu0*s2/2)
PostS2 <- dinvgamma(sigma2, nu.n/2, nu.n*sigma2.n/2)
mode <- (nu.n*sigma2.n/2)/(nu.n/1+nu.n/2)

plot(sigma2,PostS2,type="l",main="Prior and Posterior: Variance of y",
	 xlab="Sigma2",ylab="Density",
	 col="blue"
	 )
lines(sigma2,PriorS2)
text(27,.027,"IGamma(36.50,2516.46)",col="blue")
text(18,.009,"IGamma(0.50,31.87)",col="black")
legend(6,.036,legend=c("Posterior","Prior"), col=c("blue","black"),lwd=2)
text(126,.03,"s2 = 68.7378")
text(115,.028,"sigma2_post = 68.9441")
text(115,.026,"mode of post = 67.1056")


ranss <- rinvgamma(2000,nu.n/2,nu.n*sigma2.n/2)
hist(ranss,breaks=30,main="Random Draws from Posterior of variance (Sigma2n")
lines(sigma2,PostS2* 10040)
text(100,325,"...and Posterior Overlaid")


#######################################################
#  For graphing the posterior bi-variate distributions
#  -- not used in lecture
#######################################################
theta <- seq(from=-6 ,to=12,length.out=100)
sigma2 <- seq(from=0, to=100, length.out=100)
precision <- seq(from=0, to=0.03, length.out=100)

grid1 <- matrix(0,nrow=100,ncol=100)
grid2 <- matrix(0,nrow=100,ncol=100)

par(mfrow=c(2,2))

### marginal theta.post 
theta.post <- matrix(0,nrow=100,ncol=1)

for (i in 1:100){
# cja changed
#  theta.post[i] <- dnorm(theta[i], mu.n,s2_theta.n)
 theta.post[i] <- dnorm(theta[i], mu.n,sqrt(tau.n))
}
mt <- round(mu.n,2)
vt <- round(tau.n,2)

#vt <- round(sigma2.n,2)
plot(theta,theta.post,type='l',main=paste("Posterior theta: N(",mt,",",vt,")"), ylab="Density",lwd=2)


### marginal sigma.post
sigma2.post <- matrix(0,nrow=100,ncol=1)
for (i in 1:100) {
   sigma2.post[i] = dinvgamma(sigma2[i], nu.n/2, nu.n*sigma2.n/2)
}
round(nu.n/2,2)
round(nu.n*sigma2.n/2,2)
plot(sigma2,sigma2.post,type="l",lwd=2 ,
     main=expression(paste("Posterior ",sigma^2," IGamma(36.50,2316.46)")),
	 ylab=expression(paste("Posterior ",sigma^2))
)

# For precision

for (i in 1:100) { 
  for (j in 1:100) {
#     grid1[i,j] <- dnorm(theta[i],mu.n,sqrt(tau.n)) * dgamma(precision[j], nu.n/2, nu.n*sigma2.n/2)
     grid1[i,j] <- dnorm(theta[i],mu.n,sqrt(tau.n)) * dgamma(precision[j], nu.n/2, nu.n*sigma2.n/2)
     grid2[i,j] <- dnorm(theta[i],mu.n,sqrt(tau.n)) * dinvgamma(sigma2[j], nu.n/2, nu.n*sigma2.n/2)
   }
   } 
contour(x=theta,y=precision,z=grid1, 
         main="bi-variate distribution: precision x theta",
         xlab=expression(theta),
         ylab=expression(1/sigma^{2}),
         ylim=c(0.0075,.0225))

contour(x=theta,y=sigma2,z=grid2, 
         main="bi-variate distribution: sigma2 x theta",
         xlab=expression(theta),
         ylab=expression(sigma^{2}),
         ylim=c(40,90))

################################################
# monte carlo sampling from posterior
################################################
set.seed(2093)                  # or get different result each time I run
# First draw values of sigma2 (and/or precision) from posterior
sigma2.mc <- 1/rgamma(10000, nu.n/2, nu.n*sigma2.n/2)
precision.mc <- rgamma(10000, nu.n/2, nu.n*sigma2.n/2)

# Use values of sigma2 to draw from random draws from normal
theta.mc <- rnorm(10000,mu.n, sqrt(sigma2.mc/n)) 
 
par(mfrow=c(2,2))

summary(theta.mc)
summary(sigma2.mc)
summary(precision.mc)

layout(matrix(c(1,1,0,0,1,1,3,3,2,2,3,3,2,2,0,0),4,4,byrow=FALSE))
hist(sigma2.mc,main=expression(paste(sigma^2, " via Monte Carlo Sampling")),
     xlab=expression(paste("Variance of y: ",sigma[n]^2)))
text(110,1500,"mean = 70.69",cex=1.5)
text(107,1350,"median = 69.48",cex=1.5)

hist(precision.mc,main=expression(paste("1/",sigma^2, " via Monte Carlo sampling")),
     xlab=expression(paste("Precision:  1/",sigma[n]^2)))
text(0.0225,1550,"mean = 0.0145",cex=1.5)
text(0.022,1390,"median = 0.0143",cex=1.5)

hist(theta.mc,main=expression(paste("Mean via Monte Carlo Sampling: ", theta," = ",my[n])),
     xlab=expression(paste("Mean: ", theta," = ",mu[n])))
text(-.1,1860,"mean = 2.621",cex=1.5)
text(-.3,1660,"median = 2.618",cex=1.5)
text(-.45,1450,expression(paste(tau['n'] ^2," = 0.9773")),cex=1.5)

###############################################
# compare fitted and data
###############################################
par(mfrow=c(1,1))
x <- seq(from=-15, to=25, length.out=1000)

hist(anorex$change, main="Data and Fitted from Monte Carlo", freq=FALSE,ylim=c(0,.08),breaks=20,
    xlab="Change in Weight",ces.axis=1.5)
curve(dnorm(x,mu.n,sqrt(sigma2.n)), add=TRUE,col="blue",lwd=2)
text(15,.05,"N(2.71,70.69)",col="blue",cex=1.5)
text(-10,.06,"ybar=2.76")
text(-10,.055,"var(y) = 63.74")

#################################################
# Bias, variance and MSE and influence of kappa0
#################################################
# Compute different values under different values of kappa
post.save <- matrix(99,nrow=101,ncol=5)
# Set priors
mu0      <- 0   
sigma0sq <- 500
nu0      <- 1
k <- seq(from=0, to=100, by=1)
i <- 1
for (kappa0 in k) { 

    kappa.n <- n + kappa0
    nu.n <- nu0 + n
	post.save[i,1] <- kappa0
    post.save[i,2] <- (kappa0*mu0 + n*ybar)/kappa.n
	post.save[i,3] <- kappa.n
    post.save[i,4] <- (nu0*sigma0sq + (n-1)*s2 + (kappa0*n/kappa.n)*(ybar-mu0)**2)/nu.n
    post.save[i,5] <- sigma2.n/kappa.n
	i <- i + 1
}
# summary of first analysis: data, priors and posterior
#sample.stat
#(priors1 <- c(mu0,kappa0,sigma0sq,nu0))
#(posterior1 <- c(mu.n,kappa.n,sigma2.n,tau2.n))

post.save <- as.data.frame(post.save)
names(post.save) <- c("kappa0","mu.n","kappa.n","sigma2.n","tau2.n")

ybar.101 <- rep(ybar,101)
var.mean101 <- rep(var.mean,101)
var.101 <- rep(s2,101)

par(mfrow=c(2,2))
plot(post.save$kappa0,post.save$mu.n, type="l",
     main="Estimate of mean",
	 col="blue",
	 lwd=2,
	 xlab=expression(kappa[0]),
	 ylab=expression(mu[n])
	 )
lines(post.save$kappa0,ybar.101, type="l",col="grey")
text(60,2.65,"Sample mean")

plot(post.save$kappa0,post.save$sigma2.n,type="l",
     main="Variance of population",
	 col="blue",
	 xlab=expression(kappa[0]),
	 ylab=expression(sigma[n]^2),
	 lwd=2,
	 ylim=c(60,75)
	 )
lines(post.save$kappa0,var.101, type="l",col="grey")
text(60,65,"Sample Variance of Polulation")

plot(post.save$kappa0,post.save$tau2.n,type="l",
     main="Variance of Mean",
	 col="blue",
	 xlab=expression(kappa[0]),
	 ylab=expression(tau[n]^2),
	 lwd=2
	 )
lines(post.save$kappa0,var.mean101, type="l",col="grey")
text(60,.915,"Sample Variance of Mean")

plot.new( ) 
text(.5,1.0,'Computed Using:')
text(.48,0.9,"n = 72")
text(.5,0.8,"ybar = 2.7639")
text(.529,0.7,"s2 = 63.7378")
text(.5,.6,"s2/n = 0.8852")
text(.43,.5,"muo = 0")
text(.4,.4,"sigma2_o = 500")
#
	



################################################
# A quick gibbs with semi-conjugate priors 
# (dependent sequences)
################################################

# set up 
n=nrow(anorex)
ybar = mean(anorex$change)
s2 = var(anorex$change)
sample.stat <- c(n,ybar,s2)
names(sample.stat) <- c("n","ybar","s2")
sample.stat
mu0      <- 0     
kappa0   <- 1
sigma0sq <- 500
nu0      <- 1
t02 <- 100


S <- 2000                        # vary this an see what happens (# number of iterations)
Phi <- matrix(nrow=S,ncol=2)     # row for each sample
                                 # columns for mu and sigma2
Phi[1,] <- phi <- c(ybar,1/s2)

# Gibbs sampling
set.seed(222)
for (s in 2:S){
  
 # new theta from full conditional
   mu.n <- ( mu0/t02 + n*ybar*phi[2] ) / ( 1/t02 + n*phi[2] )   # note:phi[2]=1/s2 when s=2
                                                                # and =1/s2.n when s>2
   t2n <- 1/(1/t02 + n*phi[2]) 
   phi[1] <- rnorm(1,mu.n, sqrt(t2n) )

# generate new 1/sigma2
  nu.n <- nu0 +  n
  s2.n <- ( nu0*sigma0sq + (n-1)*s2 + n*(ybar-phi[1])**2 ) /   nu.n
  phi[2] <- rgamma(1, nu.n/2, nu.n*s2.n/2)

  Phi[s,] <- phi 
}

layout(matrix(c(1,1,0,0,1,1,3,3,2,2,3,3,2,2,0,0),4,4,byrow=FALSE))
hist(Phi[,2],main="Pricision via Gibbs",xlab="precision",breaks=20)
hist(1/Phi[,2],main="Sigma2 via Gibbs",xlab='sigma2',breaks=20)
hist(Phi[,1],main="Theta from Gibbs",xlab='theta',breaks=20)

summary(Phi[,1])
summary(Phi[,2])
summary(1/Phi[,2])

ss <- seq(1,S)
# did is converge for theta?
layout(matrix(c(1,1,0,0,1,1,3,3,2,2,3,3,2,2,0,0),4,4,byrow=FALSE))
#par(mfrow=c(1,1))
plot(ss,Phi[,1],type='l', main="Sequence of thetas",xlab="Iteration",ylab="theta")
plot(ss,Phi[,2],type='l', main="Sequence of 1/sigma2",xlab="Iteration",ylab="1/sigma^2")
plot(ss,1/Phi[,2],type='l', main="Sequence of sigma2",xlab="Iteration",ylab="sigma^2")

# Note:
par(mfrow=c(1,1))
cor(Phi[,1],Phi[,2])
plot(Phi[,1],Phi[,2],type='l')