#################################################################
##  Code for Book: Applied Statistical Analysis in R
##   A Graduate course for Psychology, Human factors, and Data Science
##  (c) 2018-2022 Shane T. Mueller, Ph.D.
##  Michigan Technological University
##  shanem@mtu.edu
##
##  Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210
##
##
## This file contains example software code relevant for Chapter 20:
## Within-subject repeated measures ANOVA and randomized factors
##
##
##
##


##The experiment works like this: people visit a web site we have created, and it contains different types of advertisements we have identified.
##For each person, we get end up getting a server log indicating whether they clicked on or hovered their mouse over each type of advertisement.  We want to know whether
##the social ads (ones sensitive to their on-line identity) were better than the non-social advertisements.

##Let's suppose there is actually no difference. Suppose that different advertisements we sample from actually differ in their ''true'' click-through
##rate, but not between social versus normal ads.  Simulate the population of click-through rates like this:

ads.social <- 0:100/100
ads.normal <- 0:100/100
ads.animated <- 150:250/250


#Now, suppose that in an experiment, we have selected four advertisements of each type, and created a web page including all eight 
#advertisements. In the study, each of 50 people either does or does not follow each link. Ignoring for the moment
#within-subject effects, we have a data set like this:

set.seed(100)  
social.eff <- sample(ads.social,4)
normal.eff <- sample(ads.normal,4)
animated.eff <- sample(ads.animated,4)

##100 of each ad type
clicks <- c(runif(50*4)<social.eff, runif(50*4)<normal.eff, runif(50*4) <animated.eff) ##600 long; in
type <- as.factor(rep(c("A","B","C"),each=200))                                       ## 600 long
ad <- as.factor(rep(rep(1:4,50),3))
ad2 <- as.factor(c(rep(1:4,50),rep(11:14,50), rep(21:24,50)))                           ##ad id; in blocks of type.

##
sub <- as.factor(rep(rep(1:50,each=4),3))

contrasts(type) <- contr.sum(3)
contrasts(ad) <- contr.sum(length(unique(ad)))
contrasts(ad2) <- contr.sum(length(unique(ad2)))
contrasts(sub) <- contr.sum(50)

ads <- data.frame(sub=sub,type=type,ad=ad,ad2=ad2,clicks=clicks)

ads[1:10,]


##model without considering any random factors:

aggregate(ads$clicks,list(ads$type),mean)

##treat subject as a randomized factor:
model1 <-   aov(clicks~type+Error(sub/type),data=ads)
summary(model1)
##but we can't do a post-hoc test
pairwise.t.test(clicks,type,data=ads)


#type is a between-ad variable
model2 <- aov(clicks ~ type + Error(ad))

summary(model2)

#But what do you do?  According to Clark, you should use minF:

#fmin = F1 x F2/(F1+F2)
76.6*7.7/(76.6+7.7)


###########################################################
##Modern approach: mixed effects models:


library(nlme)
am2 <- lme(clicks ~ type, random = (~1|sub), data=ads)  #subject and randomized factor
am1 <- lme(clicks ~ 1, random = (~1|sub), data=ads)  #subject and randomized factor

library(lme4)
lmer2 <- lmer(clicks~type + (1|sub) ,data=ads)


summary(am2)
summary(lmer2)


library(multcomp)

contrasts(ads$type)
##The pairs you pick depend on how you did the contrasts!
## we used sum-to-zero contrasts. A is represented as 1 0, B is represented as 0 1,
## and C is represented as -1 -1.  So A versus B is row 1 minus Row 2;
## A versus C is row 1 plus row 3 plus row B, and  vs C is 

#> contrasts(type)
#[,1] [,2]
#A    1    0
#B    0    1
#C   -1   -1
##contrasts are set up so that 

contrasts <- rbind(
        "Grand mean/intercept"=c(1,0,0),
         "A alone"= c(1,1,0),
         "B alone"= c(1,0,1),
         "C alone"= c(1,-1,-1),
         "A to B" = c(0,1,-1),
         "A to C" = c(0,2,1),
         "B to C" = c(0,1,2))

summary(glht(am2,contrasts))
summary(glht(lmer2,contrasts))

tukey <- glht(am2,
              linfct=mcp(type="Tukey"))
print(tukey)
summary(tukey)

##compare to:
summary(glht(am2,contrasts[5:7,]))


contrasts <- rbind(
        "A to B" = c(0,1,-1),
        "A to C" = c(0,2,1),
        "B to C" = c(0,1,2))
summary(glht(lmer2,contrasts))

am4 <- lme(clicks~type,random=list((~1|ad),(~1|sub)),data=ads) ##both
lmer4 <- lmer(clicks~type + (1|ad) + (1|sub),data=ads)


summary(am4)
summary(lmer4)


summary(glht(am4,contrasts))
summary(glht(lmer4,contrasts))


summary(glht(lmer4, linfct=mcp(type="Tukey")))


am4b <- lme(clicks~0+type,random=list((~1|ad),(~1|sub)),data=ads) ##both
lmer4b <- lmer(clicks~0+type + (1|ad) + (1|sub),data=ads)

##these two are essentially the same:
summary(am4b)

library(multcomp)


summary(glht(am5,contrasts))
summary(glht(am5,linfct=mcp(type="Tukey")))

glht(lmer4,contrasts)
summary(glht(lmer4,contrasts))

summary(glht(lmer4b, linfct=mcp(type="Tukey")))


#We might be concerned that we have really just allowed advertisement and subject to both be random effects, 
#but we have not accounted for the fact that advertisement is nested within type.  the right thing to do depends on the design.
#because we had four products with equivalent advertisements, am4/lmer4 is the right model. But if we had simply sampled four of each type
# and they were not the same, we have advertisement 


am5 <- lme(clicks~type,random=list(~1|ad2,~type|ad2,~1|sub),data=ads) ##both, ad nested within type
lmer5 <- lmer(clicks~ type + (1|ad2) + (type|ad2) + (1|sub),data=ads)


#Now, only a to c is signifiantly different. This makes sense because in the first case, we have repeated measures of the product we are advertising,
##but in this case we do not.  we would need to sample many more products (more than 4) to get a good estimate of whether ad-type had an effect.
summary(am5)
summary(lmer5)

summary(glht(am5,contrasts))
summary(glht(lmer5,contrasts))


summary(glht(lmer5, linfct=mcp(type="Tukey")))

##################
##Chick weight example:
##outcome is log(wt), which we previously found was somewhat linear
##Fixed effects: time, diet
## random effects:
## 1. chick is randomly sampled
## 2. Chick*time
## (possibly) chick--intercept of chick; baseline value differs


cw <- ChickWeight
cw$logwt <- log(ChickWeight$weight)
contrasts(cw$Diet) <- contr.poly(levels(cw$Diet))


##this allows each chick to have its own intercept, but a common slope for each diet and a mean intercept for each diet.
lmer.cw0 <- lmer(logwt~Time*Diet + (1|Chick), data=cw )
summary(lmer.cw0)

lmer.cw1 <- lmer(logwt~Time+Diet + (1|Chick),data=cw)
summary(lmer.cw1)
anova(lmer.cw0,lmer.cw1)

coef(lmer.cw0)
Anova(lmer.cw0)

#This allows each chick to have its own intercept and slope:
lmer.cw2 <- lmer(logwt~Time*Diet + (1+Time|Chick),data=cw)

lmer.cw2b <- lmer(logwt~Time*Diet + (Time|Chick)+  (1|Chick) ,data=cw)

summary(lmer.cw2)
summary(glht(lmer.cw2, linfct=mcp(Diet="Tukey")))
summary(glht(lmer.cw2b, linfct=mcp(Diet="Tukey")))


summary(lmer.cw2)
pr.cw <- profile(lmer.cw2)
confint(pr.cw)
contrasts.cw <- rbind("Diet 3 vs diet 2 by time"=c(0,0,0,0, 0,1,-1,0))
summary(glht(lmer.cw2,contrasts.cw))


##make clicks numeric
ads$clicks1 <- ads$clicks+0
library(ez)
ezm <- ezMixed(data=ads,dv=.(clicks1),
               random=.(sub,ad),
               fixed=.(type))
print(ezm)
mz <- (ezm$models$type$unrestricted)
summary(mz)
summary(glht(mz, linfct=mcp(type="Tukey")))


########################################################
## Additional example:
data <- read.csv("pooled-stemcomp.csv")
data <- data[data$lengthcond>0,]


##First, aggregate by subject, stem, stemcond, 
dat2 <- aggregate(data$uniquecount,data[,c(2,6,7,1)],max)
dat2$lengthcond <- as.factor(dat2$lengthcond)
dat2$stemcond <- as.factor(dat2$stemcond)

library(lme4)
        
stemmodel<-lmer(x~lengthcond + stemcond + (1|subnum),data=dat2)
stemmodel2<-lmer(x~lengthcond + stemcond + (1|subnum)+(1|stem),data=dat2)
stemmodel3<-lmer(x~lengthcond * stemcond + (1|subnum)+(1|stem),data=dat2)

stemmodel4<-lmer(x~ stemcond + (1|subnum)+(1|stem),data=dat2)
anova(stemmodel4,stemmodel2)

summary(stemmodel)
summary(stemmodel2)
summary(stemmodel3)
summary(stemmodel4)

anova(stemmodel)
anova(stemmodel2)
anova(stemmodel,stemmodel2)
anova(stemmodel2,stemmodel3)


plot(ranef(stemmodel))
plot(ranef(stemmodel2))

par(mfrow=c(1,2))
qqnorm(ranef(stemmodel2)$subnum)
       dotplot(ranef(stemmodel2))
       qqmath(ranef(stemmodel2))       
       plot(ranef(stemmodel2,whichel="subnum"))
       plot(ranef(stemmodel2,whichel="stem"))