#################################################################
##  Code for Book: Applied Statistical Analysis in R
##   A Graduate course for Psychology, Human factors, and Data Science
##  (c) 2018-2020 Shane T. Mueller, Ph.D.
##  Michigan Technological University
##  shanem@mtu.edu
##
##  Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210
##
##
## This file contains example software code relevant for Chapter 20:
## Within-subject repeated measures ANOVA and randomized factors
##
##
##
##


##Let's suppose that we run an experiment with 20 participants, and measure response times
## for three conditions with five observations per condition.
## We have 15 observations per subject.
set.seed(101)
x1 <- factor(rep(c("a","b","c"),20*5))
subj <- factor(rep(1:20,each=3*5))
rt <- 200 + runif(20)[subj]*100+ c(-50,0,100)[x1] + rnorm(300) * 30


#pdf("c20-box1.pdf",width=8,height=5)
boxplot(rt~x1)
points(as.numeric(x1),rt,cex=2,col="grey")

##Notice that if we plot the actual subjects, there is a noticeable consistent variation.
rttab <- tapply(rt,list(x1,subj),mean)
matplot(rttab,pch=16,col="black",type="o",add=T,lty=3)
#dev.off()

#* the lines tend not to cross very much.
#*Every subject shows a very similar effect


aov1 <- aov(rt~x1)
summary(aov1)


#A simple way to do this would be to essentially fit a different intercept for each person.
#We coul do this by adding the subject code as a predictor, and if we remove the intercept,
#we won't even have to worry about the effects coding issues:

library(car)

aov2 <- aov(rt~0+subj+x1)
summary(aov2) 
  

Anova(aov2)


##Effect sizes:
library(sjstats)
library(effectsize)
eta_squared(aov2)
omega_squared(aov2)


#pdf("c20-unmeaned.pdf",width=8,height=5)  
boxplot(rt~x1)  
matplot(t(mean(rttab)+ (t(rttab)-(colMeans(rttab)))),pch=16,type="o",col="red",add=T)
#dev.off()


##aggregate before making the model:
dat.agg<-aggregate(rt,list(sub=subj,x1=x1),mean)
aov3 <- aov(x~0+sub+x1,dat=dat.agg)
summary(aov3)


aov4 <- aov(rt~x1*subj)

summary(aov4)
Anova(aov4)


anova(aov(rt~x1),aov(rt~x1:subj))


#Error strata in the Analysis of Variance Table

aov5 <- aov(rt~x1 + Error(subj / x1))
summary(aov5)


summary(aov5)

#Note: Anova(aov5) ##won't work


##Using ez library
library(ez)

data <- data.frame(subj,x1,rt)
ez1 <- ezANOVA(data=data,dv=rt,within=x1,wid=subj,
               detailed=T)

ez1.long <- ezANOVA(data=data,dv=rt,within=x1,wid=subj,
               detailed=T,return_aov=T)


print(ez1)
print(ez1.long)
#Notice that ez1 gives us the same result as the aov with Error() term.


##Example: Baron's Hays data
data1<-c(
  49,47,46,47,48,47,41,46,43,47,46,45,
  48,46,47,45,49,44,44,45,42,45,45,40,
  49,46,47,45,49,45,41,43,44,46,45,40,
  45,43,44,45,48,46,40,45,40,45,47,40) 

Hays.df <- data.frame(rt = data1,
                      subj = factor(rep(paste("subj", 1:12, sep=""), 4)),
                      shape = factor(rep(rep(c("shape1", "shape2"), c(12, 12)), 2)),
                      color = factor(rep(c("color1", "color2"), c(24, 24))))
par(mfrow=c(1,3))
boxplot(rt~shape,data=Hays.df)
boxplot(rt~color,data=Hays.df,col=c("grey40","gold"))
boxplot(rt~shape*color,data=Hays.df)
matplot(tapply(Hays.df$rt,Hays.df[,3:4],mean),type="b")

# Do a full aov model including the main effects's interactions with  subject, and calculate F values by hand by comparing lines of the table
# Build a model using the appropriate Error() term, and verify you can  obtain the same F tests.


################################################
################################################
## Repeated measurement
##

set.seed(500)
x1 <- factor(rep(c("a","b","c"),20*5))
subj <- factor(rep(1:20,each=3*5))
rt <- 200 + runif(20)[subj]*100+ c(-50,0,100)[x1] + rnorm(300) * 30
boxplot(rt~x1)
points(as.numeric(x1),rt,cex=2,col="grey")

set.seed(100)
data2 <- data.frame(subj=rep(subj,each=5),
                   cond = rep(x1,each=5),
                   rt = rep(rt,each=5) + rnorm(length(rt)*5))


data2[1:10,]


#Here, we have just observed each person in each condition five times  In terms of the analysis we do, any repetition
#within the subject x condition table should not increase the power of the test (although it may increase the 
#accuracy of our estimates.)  In a sense, the test we do should be roughly the same as what we get if we'd simply
#aggregate down to one observation per person.  Let's see if we can do this.


data2b  <- aggregate(data2$rt,list(subj=data2$subj,cond=data2$cond),mean)
data2b$rt <- data2b$x


##compare the aggregated vs non-aggergated versions:
aov2.a <- aov(rt~cond+Error(subj/cond),data=data2)
aov2.b <- aov(rt~cond+Error(subj/cond),data=data2b)

summary(aov2.a)
summary(aov2.b)


##post-hoc comparisons of means:
library(multcomp)
library(emmeans)

emmeans(aov2.a,spec="cond")
###EZ library:
library(ez)
ez2.a1 <- ezANOVA(data=data2,dv=rt,within=cond,wid=subj)
ez2.a2 <- ezANOVA(data=data2,dv=rt,within_full=cond,wid=subj) ##This doesn't work
ez2.b <- ezANOVA(data=data2b,dv=rt,within=cond,wid=subj)

ez2.a1
ez2.b


##Notice the effect of condition is identical across all four tests.  
library(nlme)
lme.a <- lme(rt~cond,data=data2,random=~1|subj)
gg <- glht(lme.a,linfct=mcp(cond="Tukey"))

print(gg)
summary(gg)
################################################
################################################
## Mixed designs: between and within variables.
##
data3 <- data.frame(subj=as.factor(c(subj, as.numeric(subj)+20)),
                   group = rep(c("Control","Experimental"),each=300),
                   cond = rep(x1,2),
                   rt = rep(rt,2) + rnorm(600))

head(data3)

#defining the model is as follows: because group is a between-subject factor, it 
#is not nested within the Error() notation, but cond is because it is a within-subject
#variable.


model3<- aov(rt~ group + cond + Error(subj/(cond)),data=data3)
summary(model3)


#interpreting model3 is tricky, because the effects appear in multiple error strata.  To find the right one, it is sometimes
#helpful to take a look at the degrees of freedom and be sure they are sensible.  The rule of thumb is that
#if we are trying to generalize an effect of a condition across people, so we should look at the error 
#related to the condition x person interaction. This is the second strata, and we'd report F(2,78)=1060, p<.001. Note that there were
#40 subjects and 3 conditions; (40-1)*(3-1)=78.


#To examine the group variable, the correct statistic is nested within sthe subj error strata F(1,38)=0.
#This can be confusing.  A way of figuring out the right test to do is to consider the one-way ANOVA you'd
#perform on just the factor you care about, aggregating over subject. The degrees of freedom should be the same, although
#the test statistic could differ.

##test the effect of between-subject group
data3.agg1 <- aggregate(data3$rt,list(group=data3$group,subj=data3$subj),mean)
data3.agg1
summary(aov(x~group,data=data3.agg1))

##aggregate over condition:
data3.agg2 <- aggregate(data3$rt,list(cond=data3$cond,subj=data3$subj),mean)
summary(aov(x~cond+Error(subj/cond),data=data3.agg2))


#Just as above, here the F test is F(2, 78)=2619.  This is exactly the same test in this case. If you have a non-orthogonal design, it probably could differ.
#How about the ezANOVA?
library(ez)


ez3.a <- ezANOVA(data=data3,dv=rt,within=cond,     wid=subj,between=group,detailed=T,return_aov=T)
ez3.b <- ezANOVA(data=data3,dv=rt,within_full=cond,wid=subj,between=group,detailed=T) ##This collapses too much and is wrong
print(ez3.b)
print(ez3.a)

#Notice that we get the same values for group and cond.
summary(model3)


####################
### Testing and correcting for sphericity.
##see textbook

#####################
##post-hoc tests in repeated measures ANOVA
#See textbook for discussion

##########################################################
## additional examples
##########################################################
##########################################################
##>Example 2 from Baron
  data1<-c(
  49,47,46,47,48,47,41,46,43,47,46,45,
  48,46,47,45,49,44,44,45,42,45,45,40,
  49,46,47,45,49,45,41,43,44,46,45,40,
  45,43,44,45,48,46,40,45,40,45,47,40) # across subjects then conditions


Hays.df <- data.frame(rt = data1,
                      subj = factor(rep(paste("subj", 1:12, sep=""), 4)),
                      shape = factor(rep(rep(c("shape1", "shape2"), c(12, 12)), 2)),
                      color = factor(rep(c("color1", "color2"), c(24, 24))))


#For this data, we are interested in knowing if shape and color of stimuli affect RT.

par(mfrow=c(1,3))
boxplot(rt~shape,data=Hays.df)
boxplot(rt~color,data=Hays.df,col=c("grey40","gold"))
boxplot(rt~shape*color,data=Hays.df)
matplot(tapply(Hays.df$rt,Hays.df[,3:4],mean),type="b")

##incorrect:
summary(aov(rt~ shape * color, data=Hays.df))

anova.hays <- aov(rt~color*shape +Error(subj/(color*shape)), data=Hays.df)
summary(anova.hays)

ezANOVA(Hays.df,,dv=rt,within=.(shape,color),wid=subj,detailed=T)


#  Exercise: compute aov() and ezANOVA models for each of Baron's examples:
#  Baron example 2 (Maxwell & Delaney,)
  
#Two within-subject variables:
  
MD.rt <- matrix(c( 
420, 420,  480, 480, 600, 780,
420, 480, 480, 360, 480, 600,
480, 480, 540, 660, 780, 780,
420, 540,540, 480, 780, 900,
540, 660,540, 480, 660, 720,
360, 420, 360, 360, 480, 540,
480, 480,600, 540, 720, 840,
480, 600, 660, 540, 720, 900,
540, 600,540, 480, 720, 780,
480, 420,540, 540, 660, 780),
ncol = 6,byrow = T) # byrow=T so the matrix’s layout is exactly like this


##More examples:
MD.df <- data.frame(
  rt  = as.vector(MD.rt),
  subj = factor(rep(paste("s", 1:10, sep=""), 6)),
  deg  = factor(rep(rep(c(0,4,8), c(10, 10, 10)), 2)),
  noise = factor(rep(c("no.noise", "noise"), c(30, 30))))

#Here, rt is the dependent variable, subj is a subject code, deg is visual tilt (0,4, 8 degrees), and noise is whether 
#pixel noise was involved.  We'd like to know whether deg and noise have effects.  Fit models treating subj as a randomized factor and 
#deg and noise and within-subject variables, using both aov and ezANOVA.


#Example 3: 

subj <- gl(10, 32, 320) # 10 subjects, each tested 32 times, total length 320
a <- gl(2, 16, 320) # first 16 trials with a1 then next 16 with a2
b <- gl(2,
8, 320) # first 8 triasl with b1, then next 8 with b2, etc.
c <- gl(2,
4, 320)
x <- rnorm(320)
d1 <- data.frame(subj, a, b, c, x)
d2 <- aggregate(x, list(a = a, b = b, c = c, subj = subj), mean)

#Test the a x b x c interaction, using etiher Error(subj/(a*b*c) or Error(subj/(a+b+c))), 
#on both d1 (raw data) and d2 (aggregated by subject over repetetitions)

summary(a1 <- aov(x~ a * b * c + Error(subj/(a*b*c)), d2))
summary(a2 <- aov(x~ a * b * c + Error(subj/(a+b+c)), d2))
summary(a3 <- aov(x~ a * b * c + Error(subj/(a*b*c)), d1))
summary(a3 <- aov(x~ a * b * c + Error(subj/(a+b+c)), d1))


#Try to test the same models using ezANOVA.


#Example 4:
#on random effect (subject) and one fixed effect (drug)
data <- c(
30,14,24,38,26,
28,18,20,34,28,
16,10,18,20,14,
34,22,30,44,30)
Stv.df <- data.frame(rt=data,
subj = factor(rep(paste("subj", 1:5, sep=""), 4)),
drug = factor(rep(paste("drug", 1:4, sep=""), c(5,5,5,5))))


#Example 5:
Ela.mat <-matrix(c(
19,22,28,16,26,22,
11,19,30,12,18,28,
20,24,24,24,22,29,
21,25,25,15,10,26,
18,24,29,19,26,28,
17,23,28,15,23,22,
20,23,23,26,21,28,
14,20,29,25,29,29,
16,20,24,30,34,36,
26,26,26,24,30,32,
22,27,23,33,36,45,
16,18,29,27,26,34,
19,21,20,22,22,21,
20,25,25,29,29,33,
21,22,23,27,26,35,
17,20,22,23,26,28), nrow = 16, byrow = T)

Ela.mul <- cbind.data.frame(subj=1:16, gp=factor(rep(1:2,rep(8,2))), Ela.mat)
dimnames(Ela.mul)[[2]] <-
c("subj","gp","d11","d12","d13","d21","d22","d23") # d12 = drug 1, dose 2
Ela.uni <- data.frame(effect = as.vector(Ela.mat),
subj = factor(paste("s", rep(1:16, 6), sep="")),
gp = factor(paste("gp", rep(rep(c(1, 2), c(8,8)), 6), sep="")),
drug = factor(paste("dr", rep(c(1, 2), c(48, 48)), sep="")),
dose=factor(paste("do", rep(rep(c(1,2,3), rep(16, 3)), 2), sep="")),
row.names = NULL)

#This has one between (grp) and two within-subject (drug,dose) variables
#data set: Ela.uni.  Make models with subject as a random factor.