# edps 590BAY  ---- post this one
# Fall 2021
# C.J.Anderson
#
#
# Illustration of simple linear regression...and a couple of predictors
#          rjags     
#          run.jags 
#          jagsUI    (most verbose output)
#
library(coda)
library(rjags)

 setwd("D:/Dropbox/edps 590BAY/Lectures/6 Linear Regression")
 #setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 590BAY\\Lectures\\7 Linear Regression")

#source("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\DBDA2E-utilities.txt")

ano <- read.table("anorexia_data.txt",header=T)

ano$change <- ano$weight2 - ano$weight1

# This is just to check which treatement is which
ano$Rx <- ano$rx
chk<-aggregate(change~Rx,data=ano, FUN="mean")
chk
table(ano$rx)

# Create dummy codes ctrl vs others
ano$altRx <- ifelse(ano$rx==2,0,1)

# Mean for each level of altRx
aggregate(change~altRx,data=ano,FUN="mean")

# Ordinary least square regression
ols.lm <- lm(change~altRx, data=ano)
summary(ols.lm)

# or just a two independent sample t-test
t.test(change~altRx, data=ano)


############################################################################################
set.seed(234590)
  

# This is just to check which treatement is which
ano$Rx <- ano$rx
chk<-aggregate(change~Rx,data=ano, FUN="mean")
chk
table(ano$rx)

# Create dummy codes for treatments

ano$family <- ifelse(ano$rx==3,1,0)
ano$cogn   <- ifelse(ano$rx==1,1,0)

# Mean for each level of dummies--chk

aggregate(change~family,data=ano,FUN="mean")
aggregate(change~cogn,data=ano,FUN="mean")

### Ordinary least square regression
ols.lm <- lm(change~ family + cogn, data=ano)
summary(ols.lm)

############################# Bayesian Modeling @@@@@@@@@@@@@@@@@@@@@@@@
### Data
dataList <- list(y=ano$change, 
                 xfamily=ano$family,
                 xcogn=ano$cogn,
                 N=length(ano$change),
                 sdY = sd(ano$change)
                 )

anoModel1 <- "model {  
## Likelihood                                  
                 for (i in 1:N){
                  y[i] ~ dnorm(mu[i], precision)
                  mu[i] <- b0 + bfamily*xfamily[i] + bcogn*xcogn[i]  
                  yhat[i] <- b0 + bfamily*xfamily[i] + bcogn*xcogn[i]

                  res[i] <- y[i] - mu[i]
                  emp.new[i] ~ dnorm(mu[i], precision)   
                  res.new[i] <- emp.new[i] - mu[i]   
     
                 }
## Priors
                 b0 ~ dnorm(0 , 1/(10*sdY^2) )   
                 bfamily ~ dnorm(0 , 1/(10*sdY^2) )   
                 bcogn ~ dnorm(0 , 1/(10*sdY^2) )   
                 precision ~ dgamma(.01, .01)               
## Derived paramters
                 fit <- sum(res[])   
                 fit.new <- sum(res.new[])            
               }"

writeLines(anoModel1, con="anoModel1.txt")

b0Init = mean(ano$change)
b1Init = 0
b2Init = 0
precInit = sd(ano$change)

initsList = list(b0=b0Init, bfamily=b1Init, bcogn=b2Init, precision=precInit )

#create an object representing a Bayesian graphical model, 
jagsModel1 <- jags.model(file="anoModel1.txt",     # compiles and intializes model
                        data=dataList,
                        inits=initsList,
                        n.chains=4,
                        n.adapt=500)            # adaptive phase for 500 iterations

# gets samples from all chains 
# Only get the basic model parameters to assess convergence
Samples <- coda.samples(jagsModel1, variable.names=c("b0","bfamily","bcogn","precision"), n.iter=15000)

plot(Samples)

gelman.diag(Samples)

gelman.plot(Samples)

autocorr.plot(Samples,auto.layout=TRUE)

geweke.diag(Samples,frac1=0.1,frac2=0.5)

effectiveSize(Samples)

cumuplot(Samples,probs=c(.25,.50,.75),lwd=c(1,2),lty=c(2,1),col=c("blue","red"))

HPDinterval(Samples)

# output summary information
summary(Samples)


############ Re-run to get other statistics ################


## Extract fit and new.fit
Samples2 <- coda.samples(jagsModel1, variable.names=c("b0","bfamily","bcogn","precision","fit","fit.new","yhat"), n.iter=15000)

yhat1 <- Samples2[[1]]                    # From chain 1
ypred1 <- as.matrix(yhat1[ ,7:78])
ypost1 <- matrix(ypred1,ncol=1)

yhat2 <- Samples2[[2]]                    # From chain 2
ypred2 <- as.matrix(yhat2[ ,7:78])
ypost2 <- matrix(ypred2,ncol=1)

yhat3 <- Samples2[[3]]                    # From chain 3
ypred3 <- as.matrix(yhat3[ ,7:78])
ypost3 <- matrix(ypred3,ncol=1)

yhat4 <- Samples2[[4]]                    # From chain 4
ypred4 <- as.matrix(yhat4[ ,7:78])
ypost4 <- matrix(ypred4,ncol=1)

ypost <- cbind(ypost1,ypost2,ypost3,ypost4)

par(mfrow=c(2,2))
hist(ano$change,breaks=20,main="Data")
hist(ypost,breaks=20,main="Posterior Predictive")


# Girls within treatement have the same yhat so we could just look
# at one girl's posterior distribution
s <- summary(Samples)
a <- as.array(s)

y1 <- Samples2[[1]]           ####### I will just look at 1 chain
y11 <- as.matrix(y1)
yhat.rx1 <- y11[,7]
yhat.rx2 <- y11[,40]
yhat.rx3 <- y11[,76]


par(mfrow=c(2,2))
hist(yhat.rx1,main="Treatment 1: Family",breaks=20)

hist(yhat.rx2,main="Treatment 2: Standard",breaks=20)

hist(yhat.rx3,main="Treatment 3: Cognitive Behavioral",breaks=20)
hist(ypost,breaks=20,main="All chains, all girls")



girl.rx1 <- ano[which(ano$rx==1),]
girl.rx2 <- ano[which(ano$rx==2),]
girl.rx3 <- ano[which(ano$rx==3),]


par(mfrow=c(2,2))
xmean <- mean(yhat.rx1)
sd <- sd(yhat.rx1)
x <- yhat.rx1
hist(girl.rx1$change,main="Treatement 1: Cognitive\n 29 observations", freq=FALSE,ylim=c(0,.14),
     xlim=c(-15,25), breaks=20,
    xlab="Change in Weight")
	
curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2)


xmean <- mean(yhat.rx2)
sd <- sd(yhat.rx2)
x <- yhat.rx2
hist(girl.rx2$change,main="Treatement 2: Standard \n (26 observations)", freq=FALSE,ylim=c(0,.14),breaks=20,
    xlim=c(-15,25), xlab="Change in Weight")
	
curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2)


xmean <- mean(yhat.rx3)
sd <- sd(yhat.rx3)
x <- yhat.rx3
hist(girl.rx3$change,main="Treatement 3: Family \n (17 observations)", freq=FALSE,ylim=c(0,.14),breaks=20,
   xlim=c(-15,25), xlab="Change in Weight",ces.axis=1.5)
   
curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2)


table(ano$rx)

##############################################################
# FOR ANOTHER POSTERIOR PREDICTIVE CHECK                     #
##############################################################
library(jagsUI)

out.jagsUI <- jags(data=dataList, inits=NULL, parameters.to.save=c("b0","b1","sigma","fit","fit.new"),
                   model.file="anoModel1.txt", n.chains=4, n.iter=2000, n.burnin=1000)

print(out.jagsUI)


#Posterior predictive check plot---- this should be in jagsUI
# The p is Bayeisan p-value

pp.check(out.jagsUI, observed = 'fit', simulated = 'fit.new')