##  Code for Book: Applied Statistical Analysis in R
##   A Graduate course for Psychology, Human factors, and Data Science
##  (c) 2018 Shane T. Mueller, Ph.D.
##  Michigan Technological University
##  shanem@mtu.edu
##
##  Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210
##
##
## This file contains example software code relevant for Chapter 8.


#Some practice questions:
##look at the iris data set.
## see picture of the iris

## Is Sepal Length different from Sepal Width?
## Is Petal Width smaller than Petal Length?
## Is sepal length for versicolor smaller than virginica?

par(mfrow=c(2,1))
hist(rnorm(1000,mean=0,sd=1),col="gold",xlim=c(-3,3),
     main="Histogram of data")

numexps <- 100000
means <- rep(0,numexps)
sds <- rep(0,numexps)
expsize <- 15
for(i in 1:numexps)
{
  #Generate one experiment:
  data <- rnorm(expsize,mean=0,sd=1)
  #estimate its mean
  means[i] <- mean(data)
  sds[i] <- sd(data)
}

hist(means,xlim=c(-3,3),col="grey20",
     main="Histogram of means")

mean(means)
sd(means)


#pdf("c8-montecarlo.pdf",width=9,height=5)
par(mfrow=c(1,2))
x <- hist(means,col="gold",breaks=-20:20/10,main="Histogram of obtained means\n under null hypothesis")
abline(v=0.5,lwd=3,col="cadetblue3")
plot(x$mids,cumsum(x$density)/10,type="o",ylab="Cumulative density",xlab="Bin midpoint",pch=16,cex=.5)
abline(v=0.5,lwd=3,col="cadetblue3")
#dev.off()


## this compares the null distribution versus one where the mean is +.5.
par(mfrow=c(1,1))
cadblue <- rgb(122/255,197/255,205/255,.7)
x <- hist(means,col="gold",breaks=-20:20/10,main="Histogram of obtained means\ null hypothesis")
hist(means+.5,col=cadblue,add=T)

abline(v=0,lwd=3,col="red")
abline(v=.5,lwd=3,col="red")
abline(v=.25,lwd=3,col="red")
abline(v=.7,lwd=3,col="red")


###Example t distribution

#pdf("c8-tdist.pdf",width=9,height=5)
par(mfrow=c(1,1))
vals <- -500:500/100
plot(vals,dnorm(vals),type="l",lty=1,lwd=4,col="gold")

points(vals,dt(vals,2),type="l",main="t distribution",
       ylim=c(0,.5),xlab="Mean")
points(vals,dt(vals,5),type="l")
points(vals,dt(vals,15),type="l")
points(vals,dt(vals,50),type="l")
abline(v=2)
#dev.off()


1- pt(2.0,2)  ## 2 df/3 observations
1- pt(2.0,5)  ## 5 df/ 6 observations
1- pt(2.0,15)  ## 15 df/ 16 observations
1- pt(2.0,49)  ## 49 df/ 50 observations


##one-sample t-test by hand

set.seed(1000)
x0 <- rnorm(30,0)
x1 <- rnorm(20,.2)
x1a <- rnorm(200,.2)
x2 <- rnorm(20,.5)
x3 <- rnorm(20,mean=.2,sd=.2)
x4 <- exp(rnorm(100))-1


library(vioplot)
#pdf("c8-vioplots.pdf",width=9,height=6)
vioplot(x0,x1,x1a,x2,x3,x4,col="gold",
        names=c("x0","x1","x1a","x2","x3","x4"))
abline(0,0)
#dev.off()
qqnorm(x0)


##one-sample t-test by hand:

mu <- mean(x1)
sd <- sd(x1)
se <- sd/sqrt(length(x1))
t <- mu/se
1-pt(t,19)
(1- pt(t,19))*2


t.test(x0,alternative="greater")
t.test(x1,alternative="greater")
t.test(x1,alternative="two.sided")

t.test(x1a,alternative="greater")
t.test(x2,alternative="greater")
t.test(x3,alternative="greater")

t.test(x4)
##############################################################
###non-parametric one-sample test: sign test/binomial test
binom.test(sum(x0>0),length(x0),p=.5)
binom.test(sum(x1>0),length(x1),p=.5)

##BSDA library has a simpler to use version:
#install.packages("BSDA")
library(BSDA)
SIGN.test(x0)

binom.test(sum(x1>0),length(x1),p=.5)
SIGN.test(x1)
SIGN.test(x1a)
SIGN.test(x2)
SIGN.test(x3)
SIGN.test(x4)

###########################
### Bayesian one-sample
library(BayesFactor)
x <- ttestBF(x1)
print(x)


ttestBF(x0)

## Extract samples to get posterior distributions
samples <- ttestBF(x1,iterations=10000,posterior=TRUE)
#pdf("c8-posteriors.pdf",width=8,height=5)
par(mfrow=c(1,2))
hist(samples[,1],breaks=50,main="Posterior distribution of mu",xlab="Sampled values",col="gold")
hist(samples[,2],breaks=50,main="Posterior distribution of sigma2",xlab="Sampled Values",col="gold")

#dev.off()

##Error bars with 95% confidence on the sampled means:
quantile(samples[,2],c(.05,.95))


ttestBF(x2)


#####################################################################################
###End of (Chapter 8-part 1)


######################################################
### Chapter 8, part 2 
######################################################
##We will re-use these data sets:

set.seed(1000)
x0 <- rnorm(30,0)
x1 <- rnorm(20,.2)
x1a <- rnorm(200,.2)
x2 <- rnorm(20,.5)
x3 <- rnorm(20,mean=.2,sd=.2)
x4 <- exp(rnorm(100))-1
x0b <- x0 + runif(length(x0),min=-.1,max=.2)

######################################################
###Paired tests:


##t.test

t.test(x0,x0b,paired=T)
t.test(x0,x0b,paired=T,alternative="less")
t.test(x0-x0b)  ##one-sample version
t.test(x0,x0b) ##this is wrong, and not a paired t-test


##non-parametric paired tests:
diff <- x0-x0b
binom.test(sum(diff>0),length(diff))


library(BSDA)
SIGN.test(diff)
wilcox.test(diff,mu=0)
wilcox.test(x0b,x0,paired=T)
wilcox.test(x0b,x0,paired=T,alternative="greater")


##calculate wilcox.test by hand for one-sample
nums <- runif(100)-.4
v <- sum(rank(abs(nums))[nums>0])
1-psignrank(v,length(nums)-1)
wilcox.test(nums,test="one.sided")


##Bayes factor pair t-tests

ttestBF(x0,x0b,paired=T)
ttestBF(x0-x0b)  ##use a difference instead of pairing
ttestBF(x0-x0b,nullInterval=c(0,-Inf))  ##one-sided require specifying the region of the null.


######################################################
##Independent samples tests


##Independent samples t-test by hand:
muX <- mean(x1)
muY <- mean(x1a)

sdX <- sd(x1)
sdY <- sd(x1a)


se.pooled <- function(x,y)
{
  varx <- var(x)
  vary <- var(y)
  nx <- length(x)
  ny <- length(y)
  sqrt(varx/nx + vary/ny)
}

t <- (muX-muY)/se.pooled(x1,x1a)
t
pt(t,218,lower.tail=T)
pt(t,23.719,lower.tail=T)


## independent-samples t-test; one-sided using the t.test function:
t.test(x1,x1a,alternative="less")  


##non-parametric Wilcox/ Mann-Whitney U test:
wilcox.test(x1,x1a,alternative="less")

##Wilcox test for independent samples
set.seed(100)
x <- runif(20)
y <- runif(20)+.1
pairs <- outer(x,y,">=")
pairs  
sum(pairs)
length(pairs)


plot(pwilcox(0:400,20,20))
plot(dwilcox(0:400,20,20))
1-pwilcox(115,20,20)


wilcox.test(x,y)
wilcox.test(x,y,alternative = "less")
wilcox.test(log(10+x),log(10+y),alternative = "less")

wilcox.test(x1,x1a) 


##alternative way to execute the test:
groups <- rep(1:2,each=length(x))
values <- c(x,y)
w <- wilcox.test(values~groups)


#pdf("c8wilcox.pdf",width=8,height=4)
par(mfrow=c(1,2))
plot(pwilcox(0:400,20,20),type="s",xlab="U/W statistic",ylab="Cumulative Probability",main="Wilcox Rank Sum Distribution")
plot(dwilcox(0:400,20,20),type="s",xlab="U/W statistic",ylab="Density",main="Wilcox Rank Sum Distribution")
#dev.off()


##bayesian independent samples tests:
library(BayesFactor)
ttestBF(x1,x1a)
ttestBF(x,y)
ttestBF(x,y,nullInterval=c(-.1,.1))


##############################################################3
## Computing covariance and correlation
##
set.seed(100)
data <- runif (1000)
var.sample <- function ( x ) { sum (( x - mean ( x ) ) ^2) / ( length ( x ) -1) }
##these are the same:
var(data)
var.sample(data)


##factor the formula into two parts
var.sample2 <- function ( x ) { sum (( x - mean ( x ) ) * (x - mean ( x ) ) ) / ( length ( x ) -1) }
var.sample2(data)

##separate the first and the second:
var.sample3 <-function(x,y){sum((x-mean(x))*(y-mean(y)))/(length(x)-1)}
 var.sample3(data,data)

x<- -500:500/100
y <- rnorm(1001)*7-x
var.sample <-function(x,y){sum((x-mean(x))*(y-mean(y)))/(length(x)-1)}
var.sample(x,y)

#pdf("c8-scatter1.pdf",width=5,height=4)
plot(x,y,cex=.5,col="cadetblue3",pch=16)
text(0,22,paste("Covariance: ", round(var.sample(x,y),3)))
#dev.off()

var.sample(x,y)
var.sample(x,-y)
var.sample(-x,x)

cov(x,y)
cov(10*x,y)


cov.normalized <-function(x,y){
  cov <-  sum((x-mean(x))*(y-mean(y)))/(length(x)-1)
  var1 <- sum((x-mean(x))^2)/(length(x)-1)
  var2 <- sum((y-mean(y))^2)/(length(y)-1)
  cov/sqrt(var1)/sqrt(var2)
}

cov.normalized(x,y)
cov.normalized(x,y*1000)

x <- runif(10000)
y <- runif(10000)
z <-  x + y

 cor(x,y)
 cor(x,z)
 cor(y,z)
 cor(x,z)^2
 sqrt(.5)

 set.seed(1000)
 x<- -500:500/100
 y <- rnorm(1001)*5-x
 cor.test(x,y)

 sub<-1:100
 cor.test(x[sub],y[sub])
 sub2<-sample(1:1000,100)
 cor.test(x[sub2],y[sub2])
 
# pdf("c8scatter2.pdf",width=5,height=5)
 par(mfrow=c(1,1))
 plot(x,y)
 points(x[sub],y[sub],col="red",pch=16)
 points(x[sub2],y[sub2],col="blue",pch=16)
# dev.off()

#pdf("c8corsubsample.pdf",width=6,height=6)
par(mfrow=c(2,2))
sub<-sample(1:1000,50)
test <- cor.test(x[sub],y[sub])
plot(x[sub],y[sub ], main= paste("Correlation =",round(test$estimate,3)))
sub<-sample(1:1000,50)
test <- cor.test(x[sub],y[sub])
plot(x[sub],y[sub ], main= paste("Correlation =",round(test$estimate,3)))
sub<-sample(1:1000,50)
test <- cor.test(x[sub],y[sub])
plot(x[sub],y[sub ], main= paste("Correlation =",round(test$estimate,3)))
sub<-sample(1:1000,50)
test <- cor.test(x[sub],y[sub])
plot(x[sub],y[sub ], main= paste("Correlation =",round(test$estimate,3)))
#dev.off()


set.seed(100)
cors <- rep(0,10000)
for(i in 1:10000)
{  
  sub<-sample(1:1000,20);
  cors[i] <- cor.test(x[sub],y[sub])$estimate
}

#pdf("c8hist.pdf", width=5,height=4)
hist(cors,col="gold",xlab="Sampled correlation estimates")
#dev.off()


###############################
## Non-parametric correlation estimates:


x <- runif(100)
y <- runif(100)

cor.test(x,y)


##Change one value:
x[2] <- 5
y[2] <- 5
cor.test(x,y)

cor.test(x,y,method="spearman")
library(BayesFactor)  #Bayes factor test still fooled.
print(correlationBF(x,y))
 

x<- -500:500/100
 y <- rnorm(1001)*7 +x
 x2 <- exp(x)
pdf("c8-skew.pdf",width=6,height=4)
plot(x2,y,pch=16,col="cadetblue3",cex=1.3)
points(x2,y,pch=1,col="grey20",cex=1.3)
text(75,-20,paste("Pearson R = ",round(cor(x2,y),3)))
text(75,-25,paste("Spearman Rho = ",round(cor(x2,y,method="spearman"),3)))

dev.off()

 cor(x,y)
 cor(x,y)
 cor(x2,y)
cor(x2,y,method="spearman")

cor.test(rank(x2),rank(y)) 

data(iris)
pairs(iris)
#pdf("c8-iris.pdf",width=8,height=8)
pairs(iris,col=iris[,5],pch=16)
#dev.off()
library(GGally)
library(ggplot)
pdf("c8-pairs.pdf",width=12,height=12)
ggpairs(iris,ggplot2::aes(colour=Species)) 
dev.off()

cor(iris[,1:4])


for(i in 1:3)
{
  for (j in (i+1):4)
  {
    cat("Comparing",i,"to",j,"\n")
    print(cor.test(iris[,i],iris[,j]))
    
  }
}


##Bayes factor tests:
for(i in 1:3)
{
  for (j in (i+1):4)
  {
    cat("Comparing",i,"to",j,"\n")
    print(correlationBF(iris[,i],iris[,j]))
    
  }
}

#####################################
###Part 3 of Chapter 8

######################################
## Categorical data:
print(HairEyeColor)


#pdf("c8-hair1.pdf",width=8,height=4)
par(mfrow=c(1,2))
barplot(HairEyeColor[,,1],main="Male")
barplot(HairEyeColor[,,2],main="Female")
#dev.off()

#pdf("c8-hair2.pdf",width=8,height=4)
par(mfrow=c(1,2))
barplot(t(HairEyeColor[,,1]),col=c("brown4","blue","khaki","darkgreen"),main="Male eye color",xlab="Hair color")
barplot(t(HairEyeColor[,,2]),col=c("brown4","blue","khaki","darkgreen"),main="Female eye color",xlab="Hair color")
#dev.off()

#pdf("c8-hair3.pdf",width=6,height=4)
hc2 <- t(HairEyeColor[,,1]+HairEyeColor[,,2])
par(mfrow=c(1,1))
barplot(hc2,col=c("brown4","blue","khaki","darkgreen"))
#dev.off()


observed <- hc2[,4]


#Compute the expected proportions:
  
brownblond
brownblond <- hc2[,c(2,4)]

expected.prop <- rowSums(brownblond)/sum(brownblond)
expected <-  matrix(rep(colSums(brownblond),each=4),nrow=4)*
  matrix(rep(expected.prop,2),nrow=4)


#pdf("c8-expected.pdf",width=6,height=4)
barplot(expected,beside=T,names=c("Brown","Blond"),
        col=c("brown4","blue","khaki","darkgreen"),
        ylab="Expected count",main="Expected Distribution")

#dev.off()

sumdiff<-sum((brownblond - expected)^2/(expected))
sumdiff
pchisq(85.6,3)
chisq.test(brownblond)
#Pearson's Chi-squared test
#
#data:  brownblond
#X-squared = 85.5966, df = 3, p-value < 2.2e-16


chisq.test(hc2[,2],p=hc2[,4],rescale.p=T)
chisq.test(hc2[,c(2,4)])


##Chi-squared test is sort of a non-parametric test already.
## we can do a bayes factor version:
contingencyTableBF(brownblond ,sampleType = "indepMulti", fixedMargin = "cols")
colSums(hc2)


##ALternative uses of a chi-squared test:
##test for equality of outcome  for a marginal distribution
colSums(hc2)
chisq.test(colSums(hc2),p=c(.25 ,.25 ,.25 ,.25))

##Test for equality between two subsets:
## look at eye color for black versus brown hair

chisq.test(hc2[,1],p=hc2[,2], rescale.p=T)

chisq.test(hc2[,1:2])

##Notice that the above are different. One asserts that brown hair is the truth;
## the other expects them to be the same and looks for deviations of the sameness.


##############
## Chi-squared test on raw data outcomes rather than tables:
library(BayesFactor)
set.seed (100) 


sample1  <- sample(LETTERS [1:6] ,1000 , prob=c(1,1,1,1,1,.2),replace=T)
sample2  <- sample(LETTERS [1:6] ,1000 , replace=T)
sample3  <- sample(c("yes","no","maybe"), 1000, replace=T)

chisq.test(sample1,sample2)
chisq.test(sample1 ,sample3)

chisq.test(table(sample1 ,sample2))   ##this is the  same.

contingencyTableBF(table(sample1 ,sample2),
                   sampleType = "indepMulti", fixedMargin = "cols")


###Alternative: rather than whether they are independent, test for whether the
###marginal distributions differ
table  <- tapply(rep (1 ,2000),list(c(sample1 ,sample2),
                                
                                    c(rep(1:2, each =1000))),length)
table[is.na(table)]<- 0
table

chisq.test(table)


contingencyTableBF(table ,
                   sampleType = "indepMulti",
                   fixedMargin = "cols")

##So, although the to distribution are independent, they do not have the same distributions.
## We can only do this comparison because the set of labels are the same for the two;
## maybe they are choices before and after a treatment.


######################
## Technical issues (mostly on t-tests)
 
######################
## Non-normal and skewed data

## The non-parametric tests are not impacted by non-normality--this is their main benefit.  
## As N increases, the traditional tests are impacted minimally, as they are estimating
## the distribution of the estimate of the mean.   Likelihood and Bayesian tests are 
## highly dependent on the form of the distribution, and so violations here can have large consequences.
## In these cases, it is usually a good idea to try to transform your data in such a 
## way that they end up being roughly normally distributed. But you must also be confident
## that you can interpret the transformed data.  The alternative is to model the 
## distribution exactly using an alternative distribution.  This is often done in
## Bayesian and ML approaches, and even within more traditional approaches in the 
## context of generalized linear regression models.


########################
## Different sample size
x1 <- rnorm(20,.2)
y1  <- x1 + .3


rnorm(20,mean=.3,sd=.2)

t.test(x1,y1)
t.test(c(x1,rnorm(50,.2)),y1)


####################################
## Between versus within


###Exercise


set.seed(1000)
x0 <- rnorm(30,0)
x1 <- rnorm(20,.2)
x1a <- rnorm(50,.2)


y1  <- x1 + rnorm(20,mean=.25)
y1a <- x1a + rnorm(50,mean=.25)

##For this one, it does not matter
t.test(x1,y1,paired=T)
t.test(x1,y1,paired=F)

##for this one, it does matter:
t.test(x1a,y1a,paired=T)
t.test(x1a,y1a,paired=F)


set.seed (1001)
x <- runif (20)
y1 <- x + 1.3 + rnorm (20)*.3
y2 <- x + 1.3 + rnorm (20)*5
par(mfrow=c(1,2))
plot(x,y2)
matplot(cbind(x,y1,y2))
t.test(x,y1,paired=T)
t.test(x,y2,paired=T)

#########################
##Effect sizes:
## Cohen's d for independent samples:
(mean(x)-mean(y1)) / sd(c(x-mean(x),y1 -mean(y1)))
(mean(x)-mean(y2)) / sd(c(x-mean(x),y2 -mean(y2)))

## Cohen's d for paired samples:
delta1  <- x - y1
delta2  <- x - y2
mean(delta1)/sd(delta1)
mean(delta2)/sd(delta2)


library(effectsize)

##using some examples from earlier:
effectsize(t.test(x,y1,paired=T))
cohens_d(t.test(x,y1,paired=T))


effectsize(chisq.test(table))


#####
## Example: pre-post testing using paired-samples/one-sample tests

library(vioplot)
set.seed(101)
n <- 40
pretest <- exp(rnorm(n))
posttest <- exp(log(pretest) + rnorm(n)+.3)
difference <- posttest-pretest

vioplot(pretest,posttest,col="gold")
hist(pretest,breaks=20)
hist(posttest,breaks=20)
hist(difference,col="gold")
abline(v=0)
t.test(difference)
binom.test(sum(difference>0),40)
library(BayesFactor)
ttestBF(difference)


##In-class exercises:

##For each of these, answer the question, explaining the result using the 
## appropriate statistical test. Accompany the test with a figure showing 
##result.

##For BSDA::Movie, was the difference in moral aptitude positive or negative?
BSDA::Movie

##Compare toothgrowth data for OJ versus VC, under each dosage. Is OJ better or worse than VC?
ToothGrowth


##compare bone density by group for the BSDA::Bones data set
## Medical advice suggests that bone density should be 210 units. Determine
## whether the active group is higher than 210. Determine whether the nonactive group 
## is lower than 210.
BSDA::Bones


##Did rankings for dogs in 1998 differ than 1992?
## note--treat each dog rating as a separate independent identical observation measured twice.  
## We would like to have the raw numbers by which these values were established, 
## instead of averages within a class.
BSDA::Dogs


##

###For HairEyeColor, 
## Does the proportion of eye color depend on gender? 
## Does the proportion of hair color depend on gender?
## Does the combined proportion of hair and eye color depend on gender?
HairEyeColor


## Some examples from class
set.seed(100)
data <- rnorm(25,mean=.2)
data2 <- rnorm(30,mean=-.2)
data3 <- rnorm(30, mean=-5)

data4 <- data + rnorm(25,mean=.2,sd=.3)


t.test(data)
library(BSDA)
SIGN.test(data)
library(BayesFactor)

ttestBF(data)

##independent-samples t-test

t.test(data,data2)
t.test(data2,data3,var.equal = FALSE)
t.test(data2,data3,var.equal = TRUE)
  
ttestBF(data,data2)


t.test(data,data4,paired=T)
SIGN.test(data4>data)
wilcox.test(data,data4,paired=T)

ttestBF(data,data4,paired=T)