################################################################# ## Code for Book: Applied Statistical Analysis in R ## A Graduate course for Psychology, Human factors, and Data Science ## (c) 2018 Shane T. Mueller, Ph.D. ## Michigan Technological University ## shanem@mtu.edu ## ## Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210 ## ## ## This file contains example software code relevant for Chapter 20: ## Within-subject repeated measures ANOVA and randomized factors ## ## ## ## ##The experiment works like this: people visit a web site we have created, and it contains different types of advertisements we have identified. ##For each person, we get end up getting a server log indicating whether they clicked on or hovered their mouse over each type of advertisement. We want to know whether ##the social ads (ones sensitive to their on-line identity) were better than the non-social advertisements. ##Let's suppose there is actually no difference. Suppose that different advertisements we sample from actually differ in their ''true'' click-through ##rate, but not between social versus normal ads. Simulate the population of click-through rates like this: ads.social <- 0:100/100 ads.normal <- 0:100/100 ads.animated <- 150:250/250 #Now, suppose that in an experiment, we have selected four advertisements of each type, and created a web page including all eight #advertisements. In the study, each of 50 people either does or does not follow each link. Ignoring for the moment #within-subject effects, we have a data set like this: set.seed(100) social.eff <- sample(ads.social,4) normal.eff <- sample(ads.normal,4) animated.eff <- sample(ads.animated,4) ##100 of each ad type clicks <- c(runif(50*4) contrasts(type) #[,1] [,2] #A 1 0 #B 0 1 #C -1 -1 ##contrasts are set up so that contrasts <- rbind( "Grand mean/intercept"=c(1,0,0), "A alone"= c(1,1,0), "B alone"= c(1,0,1), "C alone"= c(1,-1,-1), "A to B" = c(0,1,-1), "A to C" = c(0,2,1), "B to C" = c(0,1,2)) summary(glht(am2,contrasts)) summary(glht(lmer2,contrasts)) tukey <- glht(am2, linfct=mcp(type="Tukey")) print(tukey) summary(tukey) contrasts <- rbind( "A to B" = c(0,1,-1), "A to C" = c(0,2,1), "B to C" = c(0,1,2)) summary(glht(lmer2,contrasts)) am4 <- lme(clicks~type,random=list((~1|ad),(~1|sub)),data=ads) ##both lmer4 <- lmer(clicks~type + (1|ad) + (1|sub),data=ads) summary(am4) summary(lmer4) summary(glht(am4,contrasts)) summary(glht(lmer4,contrasts)) summary(glht(lmer4, linfct=mcp(type="Tukey"))) am4b <- lme(clicks~0+type,random=list((~1|ad),(~1|sub)),data=ads) ##both lmer4b <- lmer(clicks~0+type + (1|ad) + (1|sub),data=ads) ##these two are essentially the same: summary(am4b) library(multcomp) summary(glht(am5,contrasts)) summary(glht(am5,linfct=mcp(type="Tukey"))) glht(lmer4,contrasts) summary(glht(lmer4,contrasts)) summary(glht(lmer4b, linfct=mcp(type="Tukey"))) #We might be concerned that we have really just allowed advertisement and subject to both be random effects, #but we have not accounted for the fact that advertisement is nested within type. the right thing to do depends on the design. #because we had four products with equivalent advertisements, am4/lmer4 is the right model. But if we had simply sampled four of each type # and they were not the same, we have advertisement am5 <- lme(clicks~type,random=list(~1|ad2,~type|ad2,~1|sub),data=ads) ##both, ad nested within type lmer5 <- lmer(clicks~ type + (1|ad2) + (type|ad2) + (1|sub),data=ads) #Now, only a to c is signifiantly different. This makes sense because in the first case, we have repeated measures of the product we are advertising, ##but in this case we do not. we would need to sample many more products (more than 4) to get a good estimate of whether ad-type had an effect. summary(am5) summary(lmer5) summary(glht(am5,contrasts)) summary(glht(lmer5,contrasts)) summary(glht(lmer5, linfct=mcp(type="Tukey"))) ################## ##Chick weight example: ##outcome is log(wt), which we previously found was somewhat linear ##Fixed effects: time, diet ## random effects: ## 1. chick is randomly sampled ## 2. Chick*time ## (possibly) chick--intercept of chick; baseline value differs cw <- ChickWeight cw$logwt <- log(ChickWeight$weight) contrasts(cw$Diet) <- contr.poly(levels(cw$Diet)) ##this allows each chick to have its own intercept, but a common slope for each diet and a mean intercept for each diet. lmer.cw0 <- lmer(logwt~Time*Diet + (1|Chick), data=cw ) summary(lmer.cw0) lmer.cw1 <- lmer(logwt~Time+Diet + (1|Chick),data=cw) summary(lmer.cw1) anova(lmer.cw0,lmer.cw1) coef(lmer.cw0) Anova(lmer.cw0) #This allows each chick to have its own intercept and slope: lmer.cw2 <- lmer(logwt~Time*Diet + (1+Time|Chick),data=cw) lmer.cw2b <- lmer(logwt~Time*Diet + (Time|Chick)+ (1|Chick) ,data=cw) summary(glht(lmer.cw2, linfct=mcp(Diet="Tukey"))) summary(glht(lmer.cw2b, linfct=mcp(Diet="Tukey"))) summary(lmer.cw2) pr.cw <- profile(lmer.cw2) confint(pr.cw) contrasts.cw <- rbind("Diet 3 vs diet 2 by time"=c(0,0,0,0, 0,1,-1,0)) summary(glht(lmer.cw2,contrasts.cw)) ##make clicks numeric ads$clicks1 <- ads$clicks+0 library(ez) ezm <- ezMixed(data=ads,dv=.(clicks1), random=.(sub,ad), fixed=.(type)) print(ezm) mz <- (ezm$models$type$unrestricted) summary(mz) summary(glht(mz, linfct=mcp(type="Tukey"))) ######################################################## ## Additional example: data <- read.csv("pooled-stemcomp.csv") data <- data[data$lengthcond>0,] ##First, aggregate by subject, stem, stemcond, dat2 <- aggregate(data$uniquecount,data[,c(2,6,7,1)],max) dat2$lengthcond <- as.factor(dat2$lengthcond) dat2$stemcond <- as.factor(dat2$stemcond) library(lme4) stemmodel<-lmer(x~lengthcond + stemcond + (1|subnum),data=dat2) stemmodel2<-lmer(x~lengthcond + stemcond + (1|subnum)+(1|stem),data=dat2) stemmodel3<-lmer(x~lengthcond * stemcond + (1|subnum)+(1|stem),data=dat2) stemmodel4<-lmer(x~ stemcond + (1|subnum)+(1|stem),data=dat2) anova(stemmodel4,stemmodel2) summary(stemmodel) summary(stemmodel2) summary(stemmodel3) summary(stemmodel4) anova(stemmodel) anova(stemmodel2) anova(stemmodel,stemmodel2) anova(stemmodel2,stemmodel3) plot(ranef(stemmodel)) plot(ranef(stemmodel2)) par(mfrow=c(1,2)) qqnorm(ranef(stemmodel2)$subnum) dotplot(ranef(stemmodel2)) qqmath(ranef(stemmodel2)) plot(ranef(stemmodel2,whichel="subnum")) plot(ranef(stemmodel2,whichel="stem"))