## Code for Book: Applied Statistical Analysis in R for Psychology and Human factors ## (c) Shane T. Mueller, Ph.D. ## Michigan Technological University ## shanem@mtu.edu ## ## Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210 ## ## ## This file contains example software code relevant for Chapter 4 (Graphics). ## ## The code in this file may be used and shared freely with or without attribution. ## Data from Pursuit Rotor Task: ## Read in the data: dat2 <- read.csv("pooled-pursuitrotor.csv") dim(dat2) head(dat2) table(dat2$subnum) table(dat2$subnum,dat2$trial) ##select on a specific trial/participant tmp <- dat2[dat2$trial==1&dat2$subnum==12818,] dim(dat2) dim(tmp) ##see how tmp is about 90 lines, but dat2 is 43791. ##tmp now consists of a single time series, sampled at roughly equal times. Let's see how often and how regular: table(tmp$tdiff) #pdf("c4-prtiming.pdf",width=9,height=6) par(mfrow=c(1,3)) plot(tmp$timeElapsed, main= paste("Mean:", round(mean(tmp$tdiff),3))) plot(tmp$tdiff) hist(tmp$tdiff) abline(v=mean(tmp$tdiff)) #dev.off() ################### ## Plotting the mouse coordinates: pdf("c4-prtpos.pdf",width=9,height=6) matplot(cbind(tmp$targX,tmp$targY), type="l",bty="L",las=1,lty=2, xlab="Time step",ylab="Pixel location", ylim=c(0,1500)) legend(550,1500,c("X position","Y position"),lty=1:2,col=1:2) matplot(acbind(tmp$mouseX,tmp$mouseY),add=T, type="l",las=1,lty=1) dev.off() #pdf("c4-f2.pdf",width=9,height=9) xmid <- mean(tmp$targX) ## find the x center ymid <- mean(tmp$targY) ## find they center plot(tmp$targX-xmid,ymid-tmp$targY,pch=1,cex=.2, xlim=c(-300,300),ylim=c(-300,300), xlab="Horizontal pixel",ylab="Vertical pixel") #mousex, mousey specify the mouse coordinates: Plot them too: points(tmp$mouseX-xmid,ymid-tmp$mouseY,type="l",col="grey") ##two circles around. Let's connect the target to the mouse at each step. This is 568 line segments, ##but it can be done with one line: segments(tmp$targX-xmid,ymid-tmp$targY,tmp$mouseX-xmid, ymid-tmp$mouseY,col="grey") ##This gives a nice visualizeation. The ontarget will mark when the cursor was on-target. ##lets draw these as circles; I'll do this with a for loop, checking each time. ##we could probably do this in a on-liner as well. for(i in 1:nrow(tmp)) { if(tmp[i,]$ontarget) { points(tmp$mouseX[i]-xmid,ymid-tmp$mouseY[i],col="red",cex=.3) }else{ points(tmp$mouseX[i]-xmid,ymid-tmp$mouseY[i],col="grey",cex=.3) } } #dev.off() ##put it in a function: plottrial <- function(tmp,header="") { xmid <- mean(tmp$targX) ymid <- mean(tmp$targY) meanoffset <- round(mean(tmp$diff),2) plot(tmp$targX-xmid,ymid-tmp$targY,pch=1,cex=.2, xlim=c(-300,300),ylim=c(-300,300), # main=paste(header,"\n","mean offset =", meanoffset), xlab="",ylab="",axes=F) #mousex, mousey specify the mouse coordinates: Plot them to: points(tmp$mouseX-xmid,ymid-tmp$mouseY,type="l",col="grey") segments(tmp$targX-xmid,ymid-tmp$targY,tmp$mouseX-xmid,ymid-tmp$mouseY,col="grey") ##I'll use a faster points method than we did above: points(tmp$mouseX-xmid,ymid-tmp$mouseY, col=c("grey","red")[tmp$ontarget+1],cex=.3) } plottrial(tmp,"test") ##how to we loop through all the trials: ##this does not work: sub1 <- dat2[dat2$subnum==12887,] for(i in levels(sub1$trial)) { print(i) } levels(sub1$trial) names(table(sub1$trial)) levels(as.factor(sub1$trial)) ###########iterate on the trials as levels of a factor: for(i in levels(as.factor(sub1$trial))) { print(i) } #pdf("c4-f3.pdf",width=6,height=7) sub1 <- dat2[dat2$subnum==12887,] par(mfrow=c(2,2)) for(i in levels(as.factor(sub1$trial))) { tmp <- sub1[sub1$trial==i,] plottrial(tmp) } #dev.off() ##Let's iterate through each subject and each trial. ##this can be slow within RStudio, so let's write it to a pdf #we could also do this within an .rmd file for similar effect. tapply(dat2$ontarget,list(dat2$sub,dat2$trial),mean) pdf("pr-experiment.pdf",width=7,height=8) for(sub in levels(as.factor(dat2$subnum))) { par(mfrow=c(2,2)) tmp <- dat2[dat2$subnum==sub,] for(trial in levels(as.factor(tmp$trial))) { print(paste("plotting subject:", sub, " trial: ", trial)) tmp2 <- tmp[tmp$trial==trial,] plottrial(tmp2,header=paste("plotting subject:", sub, " trial: ", trial)) } } dev.off() ############################################ ##New plotting functions: #histogram #pdf("c4-hists.pdf",width=8,height=7) pr.agg <- aggregate(dat2$diff,list(dat2$subnum,dat2$trial),mean) par(mfrow=c(2,2)) hist(pr.agg$x,xlab="Mean deviation") hist(pr.agg$x,xlab= "Mean deviation", col="navy",breaks=20) hist(pr.agg$x,xlab= "Mean deviation", col="red",breaks=c(0:40)) ##plots density instead of counts: hist(pr.agg$x,xlab= "Mean deviation", col="darkgreen",breaks=c(0,10:20,25,30,40),freq=F ) #dev.off() ##-------------------------- ##exercise here ##-------------------------- View(InsectSprays) ## Notice this is in long data format. ## Not included in book boxplot(InsectSprays$count~InsectSprays$spray) #pdf("c4bp1.pdf",width=6,height=5) boxplot(count ~ spray, data = InsectSprays, col = "lightgray", xlab="Spray treatment", ylab="Mortality count") ##this adds a layer: boxplot(count ~ spray, data = InsectSprays, notch = TRUE, add = TRUE, col = "blue") #dev.off() View(OrchardSprays) #pdf("c4bp2.pdf",width=6,height=5) boxplot(decrease ~ treatment, data = OrchardSprays, col = "darkgreen",xlab="Treatment",ylab="Observed Decrease") ##overlay the raw data: points(OrchardSprays$treatment,OrchardSprays$decrease,cex=.5) #dev.off() ## This is not really helpful, because we have only on observation per design boxplot(decrease~treatment+rowpos,data=OrchardSprays) #pdf("c4-boxplots2.pdf",width=9,height=5) par(mfrow=c(1,2)) ##boxplot also works with a data.frame, plotting each column. mallsales<-data.frame(Maurices=1000*exp(runif(365)), JCPenney =2000*exp(runif(365)), Sears = 3000*exp(runif(365))) boxplot(mallsales) birthdate <- round(1950 + runif(100)*40) tenure <- (2010-birthdate - 20 ) - runif(100)* 10 boxplot(data.frame(birthdate,tenure),main="Age and years worked at ACME corporation") #dev.off() ############################### ## image() plots ##an image plots a bitmap of a matrix of values: #pdf("c4image1.pdf",width=5,height=5) layout <- tapply(OrchardSprays$decrease, list(OrchardSprays$rowpos,OrchardSprays$colpos),mean) layout image(1:8,1:8,(layout),xlab="Row",ylab="Column") text(OrchardSprays$rowpos,OrchardSprays$colpos,OrchardSprays$decrease) #dev.off() ##you can do this by hand: data <- matrix(c(8,1,1,1,1,1,8, 1,0,0,0,0,0,1, 1,0,3,3,3,0,1, 1,0,0,0,0,0,1, 1,0,0,4,0,0,1, 1,0,0,0,0,0,1, 1,0,5,0,5,0,1, 1,0,0,0,0,0,1, 8,1,1,1,1,1,8) ,7) #pdf("c4image2.pdf",width=5,height=5) image(data,xlab="",ylab="") #dev.off() ##plot correlation matrices: data(iris) cor(iris[,1:4]) ##this is a correlation among measures on a bunch of irises. doing image() helps ##to visualize groups that go together: iris.cor <- cor(t(iris[,1:4])) round(iris.cor,2) #pdf("c4-iriscor.pdf",width=9,height=9) image(iris.cor, main="Correlation map of iris data") #dev.off() ##2D histogram: #pdf("c4image3.pdf",width=9,height=6) x <- rnorm(500,4,1.5) y <- rnorm(500,2,1.5) + x*.2 par(mfrow=c(1,2)) plot(x,y) ##Create a 2-D histogram with 1-wide cell grids. txy <- table(ceiling(x),ceiling(y)) txy image(as.numeric(rownames(txy))-.5, as.numeric(colnames(txy))-.5,txy,xlab="X value",ylab="Y value") ##we can even overplot the original data: points(x,y,col="yellow",pch=".",cex=3) #dev.off() ##--------------------------- ## Exercise here ##--------------------------- ################################ ## Bar plots #pdf("c4bar1.pdf",width=9,height=6) par(mfrow=c(1,2)) barplot(1:10) plot(1:10,type="h",lwd=12) #dev.off() #pdf("c4bar2.pdf",width=9,height=9) par(mfrow=c(2,2)) barplot(c(1,11,4,5),names=c("one","three","five","eight")) barplot(c(1,11,4),names=c("one","three","five"),las=1) #barplot(c(1,11,4),names=c("one","three","five"),las=2) xs <- barplot(c(1,11,4),names=c("one","three","five"), col=c("red","green","black"),las=2, lwd=3) text(xs,c(9,9,9),c("A","B","C")) barplot(c(1,11,4),names=c("one","three","five"), col=c("red","green","black"),las=1, lwd=3,horiz=T) #dev.off() #pdf("c4bar3.pdf",width=9,height=4) ##A problem with barplot axes par(mfrow=c(1,3)) barplot(-5:5,names=LETTERS[1:11]) barplot(-5:-10,name=LETTERS[5:10]) barplot(-5:-10,name=LETTERS[5:10],yaxt="n") axis(2,0:-10,0:10) #dev.off() ##there, the values are very close together so you can't see the differences: #pdf("c4bar4.pdf",width=9,height=4) par(mfrow=c(1,3)) barplot(1000:1010) barplot(1000:1010,ylim=c(1000,1010))##ugh barplot(1000:1010,ylim=c(999,1010),names=1000:1010, col=c("orange","black"),xpd=F)## chops off, but might be OK #dev.off() ###barplot can plot multiple series set.seed(100) data <- data.frame(q1=sample(letters[1:5],100,replace=T, prob=c(1,1,3,2,5)), q2=sample(letters[1:5],100,replace=T,prob=c(3,1,3,2,1)), q3=sample(letters[1:5],100,replace=T,prob=c(1,5,1,2,1)), q4=sample(letters[1:5],100,replace=T,prob=c(1,2,8,2,2)), q5=sample(letters[1:5],100,replace=T,prob=c(1,5,2,4,3))) datatable<-apply(data,2,table) datatable library(gplots)##needed for colorpanel #pdf("c4stackedbar.pdf",width=9,height=5) par(mfrow=c(1,2),mar=c(5,8,1,2)) ##change margins barplot(datatable,names=paste("Question",1:5),col=1:5,horiz=T,las=1, xlab="Proportion of each response", cex.names=.5) par(mar=c(5,10,1,2)) ##change margins ##uses gplots to create a color sequence: barplot(datatable,names=paste("Question",1:5), col=colorpanel(5,"grey","navy")[1:5],horiz=T,las=1, xlab="Proportion of each response") #dev.off() ### Stacked bar with uneven numbers ### juvy <- read.table(text="State White Black Hispanic Native Asian Other Alabama 300 507 27 0 3 9 Alaska 78 30 3 75 3 21 Arizona 237 114 255 54 9 51 Arkansas 198 315 33 0 6 3 California 900 1863 3729 42 138 54",header=T) cols=c("darkgreen","bisque","navy","red","violet","orange") juvymat <- as.matrix(juvy[,-1]) rownames(juvymat) <- juvy[,1] ##normalize within each state. juvymat.normed <- juvymat/rowSums(juvymat)*100 #pdf("c4-juvy.pdf",width=9,height=5) par(mfrow=c(1,2),mar=c(8,5,3,0)) barplot(t(juvymat),las=3,legend=T, args.legend=list(x=4,bty="n"), ylab="Number of residents", col=cols, main="Juvenile Residential Placements") barplot(t(juvymat.normed),las=3,legend=F, ylab="Percentage of residents", col=cols, main="Juvenile Residential Placements") #dev.off() set.seed(100) data2 <- data.frame(q1=sample(letters[1:10],100,replace=T), q2=sample(letters[1:10],100,replace=T), q3=sample(letters[1:10],100,replace=T), q4=sample(letters[1:10],100,replace=T), q5=sample(letters[1:10],100,replace=T)) datatable2<-apply(data2,2,table) ###barplot can plot multiple series ##plot by question or by answer #pdf("c4bar5.pdf",width=9,height=6) par(mfrow=c(2,2),mar=c(3,4,1,1)) barplot(datatable,col=1:5,beside=T) barplot(t(datatable),beside=T,col=1:5) barplot(datatable,col=1:5,beside=F) par(mar=c(5,10,1,2)) barplot(datatable,names=paste("Question",1:5),col=1:5,horiz=T,las=1, xlab="Proportion of each response") #dev.off() ##Plot stacked--after all, each answer must add up to 100% ##------------------------- ## barplot exercise here ##-------------------------- ##################################################################### ##################################################################### #Exercises: ## Exercise: pursuit rotor plot # Remove the x and y labels from the plot by setting xlab and ylab # Remove the x and y axes using xaxt/yaxt argument # Change the box type using the bty argument # Set the main title after you create the plot so that it tells which trial ## To find out about the parameter settings for plots, try help(par) ## plottrial <- function(tmp,header="") { xmid <- mean(tmp$targX) ymid <- mean(tmp$targY) meanoffset <- round(mean(tmp$diff),2) plot(tmp$targX-xmid,ymid-tmp$targY,pch=1,cex=.2, xlim=c(-300,300),ylim=c(-300,300), main=paste(header,"\n","mean offset =", meanoffset), xlab="",ylab="", xaxt="n",yaxt="n", bty="n") #mousex, mousey specify the mouse coordinates: Plot them to: points(tmp$mouseX-xmid,ymid-tmp$mouseY,type="l",col="grey") segments(tmp$targX-xmid,ymid-tmp$targY,tmp$mouseX-xmid,ymid-tmp$mouseY,col="grey") ##I'll use a faster points method than we did above: points(tmp$mouseX-xmid,ymid-tmp$mouseY,col=c("grey","red")[tmp$ontarget+1],cex=.3) } dat2 <- read.csv("pooled-pursuitrotor.csv") sub1 <- dat2[dat2$subnum==12887,] par(mfrow=c(2,2)) for(i in levels(as.factor(sub1$trial))) { trial <- i tmp <- sub1[sub1$trial==i,] plottrial(tmp,header=paste("Trial",i)) } #################################################################### ###Exercise: ##Error values like diff are often skewed, because they cannot be smaller than zero, they ##are difficult to make smaller once they are small, but they can get very large. Transform ##diff to its natural logarithm using log(), then make two histograms side-by-side # ##(using par(mfrow=c(1,2))) that have a gold filled color, one with raw diff scores and one ##with the transformed diff sores. Make the histogram filled with gold color, and specify ##the breaks argument to most clearly see the distributions. par(mfrow=c(1,2)) hist(dat2$diff,breaks=50,col="gold") hist(log(dat2$diff),breaks=50,col="gold") ###################################################################### ## Exercise: ## Add gridlines to an image plot using the \texttt{segments} command. #let's create an interesting grid. tmpx <- rnorm(10000,mean=8,sd=1.7) tmpy <- tmpx/2 + 3 + rnorm(10000,mean=3,sd=.7) grid <- table(round(tmpx),round(tmpy)) image(1:nrow(grid),1:ncol(grid),grid,col=grey(1:100/100)) segments(1:20,0,1:20,10,col="red") segments(0,1:20,20,1:20,col="red") ##this doesn't quite look right. The gridlines are down the center of each cell. Adjust by.5 image(1:nrow(grid),1:ncol(grid),grid,col=grey(1:100/100)) segments(1:20+.5,0,1:20+.5,15,col="red") segments(0,1:20+.5,20,1:20+.5,col="red") ################################################### ## Exercise: ## For the OrchardSprays data set, compute using tapply the average effect of each treatment, each row, and each column position. ## Plot average decrease by each of these variables, on a single 1x3 plot. ## Use tapply to compute the average effect of each treatment x colpos and treatment x rowpos. ## Make two stacked barplots with treatment as the main axis, and either row or column position as the division within each bar (use the matrix just created) ## Make two stacked barplots with row or column position as the main axis, and treatment as the division within each bar. Use the same matrix you just created. ## Look across the 7 plots you created and describe the relative importance of the spray, versus the row and column position in the orchard grid. par(mfrow=c(1,3)) barplot(tapply(OrchardSprays$decrease, list(OrchardSprays$treatment),mean), main="Decrease by treatment", col=1:8) barplot(tapply(OrchardSprays$decrease, list(OrchardSprays$rowpos),mean), main="Decrease by Row position", col=1:8) barplot(tapply(OrchardSprays$decrease, list(OrchardSprays$colpos),mean), main="Decrease by Column position", col=1:8) print(tapply(OrchardSprays$decrease, list(OrchardSprays$rowpos,OrchardSprays$treatment),mean)) tab2r <- tapply(OrchardSprays$decrease, list(OrchardSprays$rowpos,OrchardSprays$treatment),mean) tab2c <- tapply(OrchardSprays$decrease, list(OrchardSprays$colpos,OrchardSprays$treatment),mean) par(mfrow=c(2,2)) ##first by treatment, then by row/column barplot(tab2r,col=1:8,main="Treatment") barplot(tab2c,col=1:8,main="Treatment") ##first by row/column, then by treatment: barplot(t(tab2r),col=1:8,main="Row") barplot(t(tab2c),col=1:8,main="Column")