## Code for Book: Applied Statistical Analysis in R for Psychology and Human factors ## (c) 2018 Shane T. Mueller, Ph.D. ## Michigan Technological University ## shanem@mtu.edu ## ## Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210 ## ## ## This file contains example software code relevant for Chapter 6. ## ##basic color representation: rgb(.1,.1,.9) #pdf("c6col1.pdf",width=9,height=5) dat <- c(10,3,5,9,12,6,8,3,5,9) par(mfrow=c(1,2)) barplot(dat,col=1:10,main="Colors 1:10") barplot(dat,col=1:10+1,main="Colors 2:11") #dev.off() colors() #pdf("c6col2.pdf",width=9,height=5) seacolors <- colors()[substr(colors(),1,3)=="sea"] seacolors par(mfrow=c(1,3)) barplot(11:20,col=seacolors) ##using the palette: palette(seacolors[order(runif(10))]) matplot(matrix(runif(200),40,5),type="b",lwd=2,pch=16) barplot(11:20,col=1:10) #dev.off() ##Built-in color palettes #dat <- c(10,3,5,9,12,6,8,3,5,9) #pdf( "c6col3.pdf",width=8,height=7) par(mfrow=c(2,2)) barplot(dat,col=heat.colors(10),main="Heat colors") barplot(dat,col=terrain.colors(10),main="Terrain colors") barplot(dat,col=topo.colors(10), main ="Topo colors") barplot(dat,col=cm.colors(10), main = "Cyan to magenta") #dev.off() #These are usually not what you want for barplots, but ##they can be useful in other contexts: x <- c(3,2.5,0,12,15, 18, 22,33,26,23,18,17,8,9,5,11) y <- c(14, 8,2,15, 3, 1, 6, 9, 12,11,12,7,8,6,1,5) plot(x,y,type="b",pch=16) ##there is a start and an end--let's adjust the color: #pdf("c6col4.pdf",width=9,height=4) par(mfrow=c(1,3)) plot(x,y,type="b",pch=16, col = cm.colors(16),cex=3) text(x,y,1:16,cex=.8,col="black") plot(x,y,type="b",pch=16, col = terrain.colors(16),cex=3) text(x,y,1:16,cex=.8,col="white") ##Greyscale plot(x,y,type="l",cex=2) points(x,y,type="p",cex=3, col = grey(1:16/16),pch=16) points(x,y,type="p",cex=3,col="black") text(x,y,1:16,cex=.8,col= rep(c("white","black"),each=8)) ##notice reverse colors #dev.off() ## gplots color gradients: library(gplots) #pdf("c6col5.pdf",width=9,height=4) plot(1:50,rep(10,50),pch=15,cex=2,xlim=c(-20,50),ylim=c(0,12),col=redgreen(50), xaxt="n",yaxt="n",xlab="",ylab="",bty="n") text(-12,10,"redgreen:") points(1:50,rep(9,50),pch=15,cex=2,col=greenred(50)) text(-12,9,"greenred:") points(1:50,rep(8,50),pch=15,cex=2,col=bluered(50)) text(-12,8,"bluered:") points(1:50,rep(7,50),pch=15,cex=2,col=redblue(50)) text(-12,7,"redblue:") points(1:50,rep(6,50),pch=15,cex=2,col=colorpanel(50,"orange","red")) text(-12,6,"colorpanel: orange red") points(1:50,rep(5,50),pch=15,cex=2,col=colorpanel(50,"black","green")) text(-12,5,"colorpanel: black green") points(1:50,rep(4,50),pch=15,cex=2,col=colorpanel(50,"blue","green")) text(-12,4,"colorpanel: blue green") points(1:50,rep(3,50),pch=15,cex=2,col=colorpanel(50,"pink","red")) text(-12,3,"colorpanel: pink red") points(1:50,rep(2,50),pch=15,cex=2,col=colorpanel(50,"yellow","orange","brown")) text(-12,2,"colorpanel: yel-or-brn") points(1:50,rep(1,50),pch=15,cex=2,col=rich.colors(50)) text(-12,1,"richcolors:") points(1:50,rep(0,50),pch=15,cex=2,col=rich.colors(50,palette="blues")) text(-12,0,"richcolors: blues") #dev.off() ## a 2-d color scheme: #pdf("c6col6.pdf",width=6,height=6) n <- 1000 x<-rnorm(n)*10 + 15 y <- rnorm(n)*10+ 15 ##Color by distance from (3,2); use alpha channels so we can see overlap dist <- sqrt((x-5)^2+(y-3)^2) plot(x,y,col=rev(rich.colors(50,alpha=.5))[1+floor(dist)],pch=18,cex=3) #dev.off() ######################################################## ## Colorbrewer palettes ## The Martha Stewart of Statistical Computing ## #install.packages("RColorBrewer") library(RColorBrewer) ##Check out a few palettes available: display.brewer.all() display.brewer.all(type="seq") display.brewer.all(type="qual") display.brewer.all(type="div") display.brewer.pal(10,"Blues") display.brewer.pal(6,"Greens") display.brewer.pal(12,"BrBG") display.brewer.pal(7,"Accent") #pdf("c6col7.pdf",width=9,height=4) par(mfrow=c(1,3)) palette <- brewer.pal(11,"Spectral") palette(palette) barplot(runif(10),col=1:10,main="Spectral colors") barplot(runif(22),col=brewer.pal(11,"BrBG"),main="Brown-Green palette") ##or, something more meaningful dat <- runif(25) pal <- rev(brewer.pal(10,"YlOrRd")) col <- floor(dat*10)+1 barplot(dat,col=pal[col],main="Color depends on value") #dev.off() ##make a basic topographic map: #pdf("c6col8.pdf",width=9,height=6) x <- rnorm(2500)*3 y <- x + sqrt(rnorm(2500)^2) *8 tab <- table(round(x),round(y)) par(mfrow=c(2,3)) image(tab) image(tab,col=brewer.pal(11,"Oranges")) image(tab,col=brewer.pal(11,"Blues"),legend=T) #] "#FFF5EB" "#FEE6CE" "#FDD0A2" "#FDAE6B" "#FD8D3C" "#F16913" "#D94801" "#A63603" # "#7F2704" ## Use some built-in color schemes: ## image(tab,col=topo.colors(50)) image(tab,col=heat.colors(50)) image(tab,col=terrain.colors(50)) #dev.off() ######################################################### ## ColorRamps is another package that builds color palettes: #install.packages("colorRamps") library("colorRamps") #pdf("c6col9.pdf",width=9,height=5) par(mfrow=c(1,3)) image(tab,col=blue2green(50)) image(tab,col=blue2yellow(50)) barplot(runif(20),col=primary.colors(20)) #dev.off() ############################################ ## Building colors from RGB space ## ## What if we are concerned about colorblind readers? #pdf("c6-colorblind.pdf",width=9,height=5) par(mfrow=c(1,2)) colorblind <- c(rgb(0,0,0,maxColorValue=255), rgb(230,159,0,maxColorValue=255), rgb(86,180,233,maxColorValue=255), rgb(0,158,115,maxColorValue=255), rgb(240,228,66,maxColorValue=255), rgb(0,114,178,maxColorValue=255), rgb(213,94,0,maxColorValue=255), rgb(204,121,167,maxColorValue=255)) palette(colorblind) barplot(1:10,col=1:10) ##use density and angle to overlay a stipple pattern barplot(1:10,col=1:10) barplot(1:10,density=10,angle=runif(10)*180,add=T) barplot(1:10,density=10,angle=runif(10)*180,add=T) #dev.off() ############################################# ## Some thoughts on color schemes ## ## Using a consistent color scheme across plots in a publication ## adds unity and coherence, especially when consistently mapped ## onto the same groups/IVs set.seed(101) dat <- t(matrix(runif(20),5,4) * (1:5)^2) cats =c("Fr","So","Ju","Sr","Gr") #pdf("c6-colorscheme1.pdf",width=9,height=4) par(mfrow=c(1,4)) palette(c("midnightblue","gold","darkgreen","maroon","dodgerblue")) matplot(dat,type="l",lwd=3,lty=1,xaxt="n",ylim=c(1,25)) legend(2,25,rev(cats),lty=1,lwd=3,col=5:1) pie(c(1,13,5,2,1),labels=c("Fr","So","Ju","Sr","Gr"),col=1:5) dotchart(c(1,13,5,2,1),labels=c("Fr","So","Ju","Sr","Gr"),col=1:5,pch=16,pt.cex=3) barplot(c(10,5,8,4,6),names=cats,col=1:5) #dev.off() #pdf("c6-colorscheme2.pdf",width=9,height=4) par(mfrow=c(1,4)) ##Whoops, somebody didn't like the color scheme. palette( brewer.pal(9,"Set1")) matplot(dat,type="l",lwd=3,lty=1,xaxt="n",ylim=c(1,25)) legend(2,25,rev(cats),lty=1,lwd=3,col=5:1) pie(c(1,13,5,2,1),labels=c("Fr","So","Ju","Sr","Gr"),col=1:5) dotchart(c(1,13,5,2,1),labels=c("Fr","So","Ju","Sr","Gr"),col=1:5,pch=16,pt.cex=3) barplot(c(10,5,8,4,6),names=cats,col=1:5) #dev.off() ######################################################### ## Use related colors to plot data versus summary stats ## or related sub-levels of a factor #pdf("c6-colorpairings.pdf",width=8,height=5) x <- runif(1000)*50 xbin <- round(x) y <- x*(x/5-22)+runif(1000)*300 y.agg <- aggregate(y,list(xbin),mean) par(mfrow=c(1,2)) plot(x,y,col="seagreen3") points(y.agg$Group.1,y.agg$x,col="darkgreen",pch=16) boxplot(len~supp*dose,data=ToothGrowth, col=brewer.pal(6,"Paired"),main="Paired Plot colors") #dev.off() ######################################################### ## Use transparency to highlight overlap ## #pdf("c6-linda.pdf",width=7,height=7) par(mfrow=c(1,1),mar=c(0,0,0,0)) cols <- c(rgb(240,0,5,maxColorValue=255,alpha=120), rgb(230,225,5,maxColorValue=255,alpha=255), rgb(12,100,250,maxColorValue=255,alpha=120)) ##In a random sample, of women, we might have 40 self-identified feminists, ##and 10 employed in the financial industry plot(0,type="n",xlim=c(0,10),ylim=c(0,10),bty="n",xaxt="n",yaxt="n",xlab="",ylab="") segments(0:10,0,0:10,10,lty=3) segments(0,0:10,10,0:10,lty=3) rect(0,0,10,10,col=cols[3]) rect(0,6,10,7,col=cols[2]) rect(1,1,6,9,col=cols[1]) text(3.5,5,"Feminists (40)") text(3.5,6.5,"Lindas (5)") text(8,6.5,"Bank\nworkers (10)") text(7,.5,"Women (100)") #dev.off() ######################################################## ### Balloon plots ### ### From the gplots library library(gplots) catnames <- c("izzy", "dilly", "spooky", "bernard", "jack") catcolors <- c("black", "orange", "gray", "brown") datavals <- round(exp(runif(20)*5)) ##random 600 numbers with a mean of 76 and a standard deviation of 6.9 data <- data.frame(Cat=rep(catnames,4), Color=rep(catcolors, each=5), Count=datavals ) #pdf("c6balloon1.pdf",width=6,height=6) balloonplot( data$Cat, data$Color, data$Count, colmar=2,ylab="Color",xlab="Cat name") #dev.off() #pdf("c6balloon2.pdf",width=6,height=5,fonts=c("ZapfDingbats","Helvetica","Helvetica-Bold")) balloonplot( data$Cat, data$Color, data$Count, colmar=2, label=F,ylab="Color",xlab="Cat name", dotcol=rep(catcolors,each=5),dotsize=12,scale.range="absolute") #dev.off() #pdf("c6balloon3.pdf",width=6,height=5) balloonplot( data$Cat, data$Color, data$Count, colmar=2, dotchar=1,label.color="blue", ylab="",xlab="",main="Counts of Cat Color by Cat Name", dotsize=15,colsrt=90,scale.range="relative",dotcol=rep(catcolors,each=5)) #dev.off() ######################################################## ### gap.plot ### ### From the plotrix library library(plotrix) #create 40 random numbers twogrp<-c(rnorm(10)+5,rnorm(10)+200,rnorm(10)*3+5,rnorm(10)+205) #create color for each group gpcol<-rep(c(2,3,4,5),each=10) xvals <- rep(c(1,2,3,4),each=10) #this is a graph when using "plot" command #pdf("c6gap1.pdf", width=5,height=5) plot(xvals,twogrp,col=gpcol,xlab="Index",ylab="Group values",main="No Gap on Y axis") #dev.off() #pdf("c6gap2.pdf",width=8,height=5) par(mfrow=c(1,2)) gap.plot(xvals,twogrp,gap=c(10,190),xlab="Index",ylab="Group values", main="Gap on Y axis",col=gpcol) axis.break(2,10) gap.plot(xvals,twogrp,gap=c(11,194),xlab="Index",ylab="Group values", main="Gap on Y axis",col=gpcol,ytics=seq(-5,220,5)) axis.break(4,11) #dev.off() par(mfrow=c(1,2)) gap.plot(xvals,twogrp,gap=c(10,190),xlab="Index",ylab="Group values", main="Gap on Y axis",col=gpcol, breakcol="white") axis.break(2,10,breakcol="black") axis.break(4,10) gap.plot(xvals,twogrp,gap=c(11,194),xlab="Index",ylab="Group values", breakcol="white", main="Gap on Y axis",col=gpcol,ytics=seq(-5,220,5)) axis.break(2,11) axis.break(4,11) #pdf("c6gap3.pdf",width=8,height=5) dat<-rnorm(40) par(mfrow=c(1,2)) #plot(twogrp,dat,xlab="X values",xtics=c(4,7,17,20),ylab="Y values", # main="Gap on X axis with added lines") gap.plot(twogrp,dat,gap=c(11,194),gap.axis="x",xlab="X values", xtics=c(-2,8,2,seq(190,220,5)) ,ylab="Y values",main="Gap on X axis with added lines") gap.plot(c(seq(0,7.5,by=0.5),seq(16.5,22.5,by=0.5)), rnorm(22),gap=c(11,194),gap.axis="x",type="l",add=TRUE,col=2) twogrp2 <- twogrp+1 twogrp2[31:40] <- twogrp2[31:40]+50 #plot(twogrp,xlab="X values",ylab="Y values", # main="Test two gap plot with the lot", # lty=c(rep(1,10),rep(2,10)),pch=c(rep(2,10),rep(3,10)), # col=c(rep(2,10),rep(3,10)),type="b") gap.plot(twogrp2,gap=c(11,190,220,240), xlab="X values",ylab="Y values", main="Test of gap.plot with two gaps",xtics=seq(0,40,by=5), ytics=c(seq(0,300,5)), lty=c(rep(1,10),rep(2,10)), pch=c(rep(2,10),rep(3,10)), col=c(rep(2,10),rep(3,10)), type="b") #dev.off() ######################################################## ### the barplot2 function ### ### From the gplots library hh <- t(VADeaths[ 5:1,]) ##color for error bars later (can change colors) mybarcol <- "blue" ## I added a command to change the color of the title mymaincol<-"darkgreen" ##creates confidence interval for the lower side ## can change based off of confidence interval where it should be centered around--> currently centered ##around 1.00 to show the ci <- sqrt(hh/1000 * (1- hh/1000) / 1000)*1000 ##creates 95% confidence interval for the upper side ci.l <- hh -ci*1.96 ci.u <- hh +ci*1.96 pdf("c6bar1.pdf",width=6,height=4) mp <- barplot2(hh, beside = TRUE, ##TRUE plots them different series beside each other ##assigns color to the bar graph (can change colors and orders) col = c("lightblue", "mistyrose", "lightcyan", "lavender"), ##Creates legend using the column names as ##the lengend entries and gives min and max for y-axis legend = colnames(VADeaths), ylim = c(0, 100), main = "Death Rates in Virginia (per 1000)", font.main = 4, #Font size of main title col.main="darkgreen", #color of main title ##Change the border color of the bars: border=c(1:4), ##Creates sub title on x-axis, gives color to subtitle ## using mybarcol command, sub = "99 percent error bars", col.sub = "lightblue", cex.names = 1.5, ##Plot the confidence intervals: plot.ci = TRUE, ci.l = ci.l, ci.u = ci.u, ##Plot gridlines plot.grid = TRUE) ##Add margin text to provide summary mean values: mtext(side = 1, at = colMeans(mp), line = 2, text = paste("Mean", formatC(colMeans(hh))), col = "red") ## Also, add a box around the whole graphic box(col="purple",lwd=4) #dev.off() ######################################################## ### the bandplot function ### ### From the gplots library # fixed mean, changing variance x<-1:1000 y<-x + .003*x^2 + rnorm(1000,mean=0,sd=1+x^2/1000) #pdf("c6band1a.pdf",width=4,height=5) bandplot(x,y,main="Example bandplot\nVariance proportional to square of value") #dev.off() n<-1000 x <- round(runif(n, 0,100)) y <- x- (x-25)*(x-50)+ rnorm(n)*600 #pdf("c6band1b.pdf",width=4,height=5) # 0 stands for mean bandplot(x,y,sd=c(-1,0,1),col="grey",sd.col=c(1,4,1),sd.lty=c(3,1,3),sd.lwd=c(2,4,2), main = "Band plot with only +/- 1 s.d.") #dev.off() # "smooth"? using intervals that include the nearest 1/5 of the data (wapply) # fixed varance, changing mean n <- 150 x <- runif(n)*50 y <- rnorm(n, mean=x/10, sd=1) #pdf("c6band1c.pdf",width=4,height=5) bandplot(x,y,main="Example bandplot\nMean related to x value") #dev.off() # the 3rd problem in problem set 2 n<-10000 x <- round(runif(n, 0,100)) y <- (x-25)*(x-50)+ rnorm(n)*600 #pdf("c6band1d.pdf",width=4,height=5) bandplot(x,y,main="Example bandplot\nCustom graphing parameters", col="green3",sd.col=rep("darkgreen",5),sd.lwd=c(1,3,8,3,1)) #dev.off() ######################################################## ### the pyramid.plot ### From the plotrix library ##Pyramid plot: #pdf("c6pyr1.pdf",width=6,height=4) par(mfrow=c(1,2)) pyramid.plot(VADeaths[,1],VADeaths[,2]) pyramid.plot(VADeaths[,3],VADeaths[,4]) #dev.off() #pdf("c6pyr2.pdf",width=4,height=4) pyramid.plot(VADeaths[,1],VADeaths[,2], top.labels=c(colnames(VADeaths)[1],"",colnames(VADeaths)[2]), gap=3) #dev.off() #pdf("c6pyr3.pdf",width=4,height=4) pyramid.plot(VADeaths[,1],VADeaths[,2], top.labels=c(colnames(VADeaths)[1],"Age",colnames(VADeaths)[2]), gap=15, main="Death rate (per 1000) by age", labels=rownames(VADeaths)) #dev.off() #pdf("c6pyr4.pdf",width=8,height=4) par(mfrow=c(1,2)) pyramid.plot(VADeaths[,1],VADeaths[,2],xlim=c(80,80),gap=18, top.labels=c("Male","Age","Female" ), main="Death rate by age in Rural VA",lxcol="darkgreen",rxcol="navy", laxlab=c(0:3*25),raxlab=c(0:3*25), labels=rownames(VADeaths),unit="Deaths/1000") pyramid.plot(VADeaths[,3],VADeaths[,4],xlim=c(80,80),gap=18, top.labels=c("Male","Age","Female"), main="Death rate by age in Urban VA",lxcol="darkgreen",rxcol="navy", laxlab=c(0:3*25),raxlab=c(0:3*25), labels=rownames(VADeaths),unit="Deaths/1000") #dev.off() ##Other examples: ################################ ## Pyramid Plot ## ##Read CSV into workspace ##Assign Faculty and Industry responses to separate variables IvF <- read.csv("Industry_Faculty_Responses.csv") #IvF <- Industry_Faculty.Responses ix <- IvF$Industry fx <- IvF$Faculty library(plotrix) pyramid.plot(fx,ix) pyramid.plot(fx,ix,labels=IvF$X) pyramid.plot(fx,ix,labels=IvF$X,top.labels=c("Faculty","Skill Category","Industry")) pyramid.plot(fx,ix,labels=IvF$X,top.labels=c("Faculty","Skill Category","Industry"),main="Response Means") pyramid.plot(fx,ix,labels=IvF$X,top.labels=c("Faculty","Skill Category","Industry"),main="Response Means",gap=1,unit="5-point Likert",ppmar=c(4,1,4,1)) pyramid.plot(fx,ix,labels=IvF$X,top.labels=c("Faculty","Skill Category","Industry"),main="Response Means",gap=2,unit="5-point Likert",ppmar=c(4,1,4,1)) ##simpler example for balloonplot dogs <- data.frame( size = sample(c("s","m","l"),replace=T,p=c(.1,.3,.6),1000), type= sample(LETTERS[1:8],p=8:1,replace=T,1000) ) tab <- table(dogs$size,dogs$type) balloonplot(tab) # #Exercise: ###Exercise: ##Create three series of 5 numbers (i.e., a matrix with 3 columns and 5 rows). Create a side-by-side matplot and a stacked barplot with those values, and default color settings. Then, Pick a set of at least five named colors. ##Set the palette to these colors, and remake the same plots, so that they use these colors. data <- matrix((1:15)[order(runif(15))] ,ncol=3) par(mfrow=c(2,2)) palette("default") matplot(data,type="b") barplot(data) palette(c("seagreen","orange","gold","navy","red4")) matplot(data,type="b") barplot(data,col=palette()) library(gplots) vals <- matrix(runif(100),10,10) image(vals,col=colorpanel(50,"black","green")) image(vals,col=redblue(50)) ##Exercise: # Use a 'divisive' color palette from color brewer. Use it to make a non-stacked barplot from a data table having two columns and seven rows. pal <- brewer.pal(5,"RdYlBu") data <- cbind(c(5,2,3,2,1),c(9,1,3,4,1)) barplot(data,beside=T,col=pal) ##Exercise: x <- sample(1:10,100,replace=T) y <- round(x + rnorm(100)*2) mycol <- rgb(.8,.2,.3,.5) par(mfrow=c(1,2)) plot(x,y) plot(x,y,col=mycol,cex=2,pch=16) ##Anne's example par(mfrow=c(1,6),mar=c(1,5,1,0)) barplot(runif(20),horiz=T,xaxt="n",col="blue",names=LETTERS[1:20],las=1) par(mar=c(1,0,1,0)) barplot(runif(20),horiz=T,xaxt="n",col="salmon") barplot(runif(20),horiz=T,xaxt="n",col="limegreen") barplot(runif(20),horiz=T,xaxt="n",col="orange") barplot(runif(20),horiz=T,xaxt="n",col="purple") barplot(runif(20),horiz=T,xaxt="n",col="orange4") v2<- matrix(runif(20*8),ncol=8) rownames(v2) <-paste("COMPANY", LETTERS[1:20]) v2s <- v2/max(v2)*.8 ##max value is 1.0 top <- t(t(v2s)+1:8) par(mfrow=c(1,1),mar=c(4,10,1,1)) plot(0,type="n",xlim=c(0,ncol(v2))+1,ylim=c(0,nrow(v2)+1),bty="n",axes=F,xaxt="n",yaxt="n",ylab="",xlab="", main="Demonstration of many barplots") axis(2,1:20,rownames(v2s),las=1) left <- matrix(rep(1:8,each=20),ncol=8) rect(left,1:20+.35,top, 1:20-.35,col=rep(1:8,each=20)) segments(1:8,0,1:8,20) axis(1, 1:8+.4,paste("Sector",1:8),tick=F,line=1) for(i in 1:8) axis(1, i+ c(0,.45,.8), c(0,.5,1),line=-1,cex=.4)