################################################################# ## Code for Book: Applied Statistical Analysis in R ## A Graduate course for Psychology, Human factors, and Data Science ## (c) 2018-2020 Shane T. Mueller, Ph.D. ## Michigan Technological University ## shanem@mtu.edu ## ## Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210 ## ## ## This file contains example software code relevant for Chapter 20: ## Within-subject repeated measures ANOVA and randomized factors ## ## ## ## ##Let's suppose that we run an experiment with 20 participants, and measure response times ## for three conditions with five observations per condition. ## We have 15 observations per subject. set.seed(101) x1 <- factor(rep(c("a","b","c"),20*5)) subj <- factor(rep(1:20,each=3*5)) rt <- 200 + runif(20)[subj]*100+ c(-50,0,100)[x1] + rnorm(300) * 30 #pdf("c20-box1.pdf",width=8,height=5) boxplot(rt~x1) points(as.numeric(x1),rt,cex=2,col="grey") ##Notice that if we plot the actual subjects, there is a noticeable consistent variation. rttab <- tapply(rt,list(x1,subj),mean) matplot(rttab,pch=16,col="black",type="o",add=T,lty=3) #dev.off() #* the lines tend not to cross very much. #*Every subject shows a very similar effect aov1 <- aov(rt~x1) summary(aov1) #A simple way to do this would be to essentially fit a different intercept for each person. #We coul do this by adding the subject code as a predictor, and if we remove the intercept, #we won't even have to worry about the effects coding issues: library(car) aov2 <- aov(rt~0+subj+x1) summary(aov2) Anova(aov2) ##Effect sizes: library(sjstats) library(effectsize) eta_squared(aov2) omega_squared(aov2) #pdf("c20-unmeaned.pdf",width=8,height=5) boxplot(rt~x1) matplot(t(mean(rttab)+ (t(rttab)-(colMeans(rttab)))),pch=16,type="o",col="red",add=T) #dev.off() ##aggregate before making the model: dat.agg<-aggregate(rt,list(sub=subj,x1=x1),mean) aov3 <- aov(x~0+sub+x1,dat=dat.agg) summary(aov3) aov4 <- aov(rt~x1*subj) summary(aov4) Anova(aov4) anova(aov(rt~x1),aov(rt~x1:subj)) #Error strata in the Analysis of Variance Table aov5 <- aov(rt~x1 + Error(subj / x1)) summary(aov5) summary(aov5) #Note: Anova(aov5) ##won't work ##Using ez library library(ez) data <- data.frame(subj,x1,rt) ez1 <- ezANOVA(data=data,dv=rt,within=x1,wid=subj, detailed=T) ez1.long <- ezANOVA(data=data,dv=rt,within=x1,wid=subj, detailed=T,return_aov=T) print(ez1) print(ez1.long) #Notice that ez1 gives us the same result as the aov with Error() term. ##Example: Baron's Hays data data1<-c( 49,47,46,47,48,47,41,46,43,47,46,45, 48,46,47,45,49,44,44,45,42,45,45,40, 49,46,47,45,49,45,41,43,44,46,45,40, 45,43,44,45,48,46,40,45,40,45,47,40) Hays.df <- data.frame(rt = data1, subj = factor(rep(paste("subj", 1:12, sep=""), 4)), shape = factor(rep(rep(c("shape1", "shape2"), c(12, 12)), 2)), color = factor(rep(c("color1", "color2"), c(24, 24)))) par(mfrow=c(1,3)) boxplot(rt~shape,data=Hays.df) boxplot(rt~color,data=Hays.df,col=c("grey40","gold")) boxplot(rt~shape*color,data=Hays.df) matplot(tapply(Hays.df$rt,Hays.df[,3:4],mean),type="b") # Do a full aov model including the main effects's interactions with subject, and calculate F values by hand by comparing lines of the table # Build a model using the appropriate Error() term, and verify you can obtain the same F tests. ################################################ ################################################ ## Repeated measurement ## set.seed(500) x1 <- factor(rep(c("a","b","c"),20*5)) subj <- factor(rep(1:20,each=3*5)) rt <- 200 + runif(20)[subj]*100+ c(-50,0,100)[x1] + rnorm(300) * 30 boxplot(rt~x1) points(as.numeric(x1),rt,cex=2,col="grey") set.seed(100) data2 <- data.frame(subj=rep(subj,each=5), cond = rep(x1,each=5), rt = rep(rt,each=5) + rnorm(length(rt)*5)) data2[1:10,] #Here, we have just observed each person in each condition five times In terms of the analysis we do, any repetition #within the subject x condition table should not increase the power of the test (although it may increase the #accuracy of our estimates.) In a sense, the test we do should be roughly the same as what we get if we'd simply #aggregate down to one observation per person. Let's see if we can do this. data2b <- aggregate(data2$rt,list(subj=data2$subj,cond=data2$cond),mean) data2b$rt <- data2b$x ##compare the aggregated vs non-aggergated versions: aov2.a <- aov(rt~cond+Error(subj/cond),data=data2) aov2.b <- aov(rt~cond+Error(subj/cond),data=data2b) summary(aov2.a) summary(aov2.b) ##post-hoc comparisons of means: library(multcomp) library(emmeans) emmeans(aov2.a,spec="cond") ###EZ library: library(ez) ez2.a1 <- ezANOVA(data=data2,dv=rt,within=cond,wid=subj) ez2.a2 <- ezANOVA(data=data2,dv=rt,within_full=cond,wid=subj) ##This doesn't work ez2.b <- ezANOVA(data=data2b,dv=rt,within=cond,wid=subj) ez2.a1 ez2.b ##Notice the effect of condition is identical across all four tests. library(nlme) lme.a <- lme(rt~cond,data=data2,random=~1|subj) gg <- glht(lme.a,linfct=mcp(cond="Tukey")) print(gg) summary(gg) ################################################ ################################################ ## Mixed designs: between and within variables. ## data3 <- data.frame(subj=as.factor(c(subj, as.numeric(subj)+20)), group = rep(c("Control","Experimental"),each=300), cond = rep(x1,2), rt = rep(rt,2) + rnorm(600)) head(data3) #defining the model is as follows: because group is a between-subject factor, it #is not nested within the Error() notation, but cond is because it is a within-subject #variable. model3<- aov(rt~ group + cond + Error(subj/(cond)),data=data3) summary(model3) #interpreting model3 is tricky, because the effects appear in multiple error strata. To find the right one, it is sometimes #helpful to take a look at the degrees of freedom and be sure they are sensible. The rule of thumb is that #if we are trying to generalize an effect of a condition across people, so we should look at the error #related to the condition x person interaction. This is the second strata, and we'd report F(2,78)=1060, p<.001. Note that there were #40 subjects and 3 conditions; (40-1)*(3-1)=78. #To examine the group variable, the correct statistic is nested within sthe subj error strata F(1,38)=0. #This can be confusing. A way of figuring out the right test to do is to consider the one-way ANOVA you'd #perform on just the factor you care about, aggregating over subject. The degrees of freedom should be the same, although #the test statistic could differ. ##test the effect of between-subject group data3.agg1 <- aggregate(data3$rt,list(group=data3$group,subj=data3$subj),mean) data3.agg1 summary(aov(x~group,data=data3.agg1)) ##aggregate over condition: data3.agg2 <- aggregate(data3$rt,list(cond=data3$cond,subj=data3$subj),mean) summary(aov(x~cond+Error(subj/cond),data=data3.agg2)) #Just as above, here the F test is F(2, 78)=2619. This is exactly the same test in this case. If you have a non-orthogonal design, it probably could differ. #How about the ezANOVA? library(ez) ez3.a <- ezANOVA(data=data3,dv=rt,within=cond, wid=subj,between=group,detailed=T,return_aov=T) ez3.b <- ezANOVA(data=data3,dv=rt,within_full=cond,wid=subj,between=group,detailed=T) ##This collapses too much and is wrong print(ez3.b) print(ez3.a) #Notice that we get the same values for group and cond. summary(model3) #################### ### Testing and correcting for sphericity. ##see textbook ##################### ##post-hoc tests in repeated measures ANOVA #See textbook for discussion ########################################################## ## additional examples ########################################################## ########################################################## ##>Example 2 from Baron data1<-c( 49,47,46,47,48,47,41,46,43,47,46,45, 48,46,47,45,49,44,44,45,42,45,45,40, 49,46,47,45,49,45,41,43,44,46,45,40, 45,43,44,45,48,46,40,45,40,45,47,40) # across subjects then conditions Hays.df <- data.frame(rt = data1, subj = factor(rep(paste("subj", 1:12, sep=""), 4)), shape = factor(rep(rep(c("shape1", "shape2"), c(12, 12)), 2)), color = factor(rep(c("color1", "color2"), c(24, 24)))) #For this data, we are interested in knowing if shape and color of stimuli affect RT. par(mfrow=c(1,3)) boxplot(rt~shape,data=Hays.df) boxplot(rt~color,data=Hays.df,col=c("grey40","gold")) boxplot(rt~shape*color,data=Hays.df) matplot(tapply(Hays.df$rt,Hays.df[,3:4],mean),type="b") ##incorrect: summary(aov(rt~ shape * color, data=Hays.df)) anova.hays <- aov(rt~color*shape +Error(subj/(color*shape)), data=Hays.df) summary(anova.hays) ezANOVA(Hays.df,,dv=rt,within=.(shape,color),wid=subj,detailed=T) # Exercise: compute aov() and ezANOVA models for each of Baron's examples: # Baron example 2 (Maxwell & Delaney,) #Two within-subject variables: MD.rt <- matrix(c( 420, 420, 480, 480, 600, 780, 420, 480, 480, 360, 480, 600, 480, 480, 540, 660, 780, 780, 420, 540,540, 480, 780, 900, 540, 660,540, 480, 660, 720, 360, 420, 360, 360, 480, 540, 480, 480,600, 540, 720, 840, 480, 600, 660, 540, 720, 900, 540, 600,540, 480, 720, 780, 480, 420,540, 540, 660, 780), ncol = 6,byrow = T) # byrow=T so the matrix’s layout is exactly like this ##More examples: MD.df <- data.frame( rt = as.vector(MD.rt), subj = factor(rep(paste("s", 1:10, sep=""), 6)), deg = factor(rep(rep(c(0,4,8), c(10, 10, 10)), 2)), noise = factor(rep(c("no.noise", "noise"), c(30, 30)))) #Here, rt is the dependent variable, subj is a subject code, deg is visual tilt (0,4, 8 degrees), and noise is whether #pixel noise was involved. We'd like to know whether deg and noise have effects. Fit models treating subj as a randomized factor and #deg and noise and within-subject variables, using both aov and ezANOVA. #Example 3: subj <- gl(10, 32, 320) # 10 subjects, each tested 32 times, total length 320 a <- gl(2, 16, 320) # first 16 trials with a1 then next 16 with a2 b <- gl(2, 8, 320) # first 8 triasl with b1, then next 8 with b2, etc. c <- gl(2, 4, 320) x <- rnorm(320) d1 <- data.frame(subj, a, b, c, x) d2 <- aggregate(x, list(a = a, b = b, c = c, subj = subj), mean) #Test the a x b x c interaction, using etiher Error(subj/(a*b*c) or Error(subj/(a+b+c))), #on both d1 (raw data) and d2 (aggregated by subject over repetetitions) summary(a1 <- aov(x~ a * b * c + Error(subj/(a*b*c)), d2)) summary(a2 <- aov(x~ a * b * c + Error(subj/(a+b+c)), d2)) summary(a3 <- aov(x~ a * b * c + Error(subj/(a*b*c)), d1)) summary(a3 <- aov(x~ a * b * c + Error(subj/(a+b+c)), d1)) #Try to test the same models using ezANOVA. #Example 4: #on random effect (subject) and one fixed effect (drug) data <- c( 30,14,24,38,26, 28,18,20,34,28, 16,10,18,20,14, 34,22,30,44,30) Stv.df <- data.frame(rt=data, subj = factor(rep(paste("subj", 1:5, sep=""), 4)), drug = factor(rep(paste("drug", 1:4, sep=""), c(5,5,5,5)))) #Example 5: Ela.mat <-matrix(c( 19,22,28,16,26,22, 11,19,30,12,18,28, 20,24,24,24,22,29, 21,25,25,15,10,26, 18,24,29,19,26,28, 17,23,28,15,23,22, 20,23,23,26,21,28, 14,20,29,25,29,29, 16,20,24,30,34,36, 26,26,26,24,30,32, 22,27,23,33,36,45, 16,18,29,27,26,34, 19,21,20,22,22,21, 20,25,25,29,29,33, 21,22,23,27,26,35, 17,20,22,23,26,28), nrow = 16, byrow = T) Ela.mul <- cbind.data.frame(subj=1:16, gp=factor(rep(1:2,rep(8,2))), Ela.mat) dimnames(Ela.mul)[[2]] <- c("subj","gp","d11","d12","d13","d21","d22","d23") # d12 = drug 1, dose 2 Ela.uni <- data.frame(effect = as.vector(Ela.mat), subj = factor(paste("s", rep(1:16, 6), sep="")), gp = factor(paste("gp", rep(rep(c(1, 2), c(8,8)), 6), sep="")), drug = factor(paste("dr", rep(c(1, 2), c(48, 48)), sep="")), dose=factor(paste("do", rep(rep(c(1,2,3), rep(16, 3)), 2), sep="")), row.names = NULL) #This has one between (grp) and two within-subject (drug,dose) variables #data set: Ela.uni. Make models with subject as a random factor.