##  Code for Book: Applied Statistical Analysis in R for Psychology and Human factors
##  (c) Shane T. Mueller, Ph.D.
##  Michigan Technological University
##  shanem@mtu.edu
##
##  Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210
##
##
## This file contains example software code relevant for Chapter 4 (Graphics).
##
## The code in this file may be used and shared freely with or without attribution.

## Data from Pursuit Rotor Task:
## Read in the data:
dat2 <- read.csv("pooled-pursuitrotor.csv")
dim(dat2)
head(dat2)
table(dat2$subnum)
table(dat2$subnum,dat2$trial)

##select on a specific trial/participant
tmp <- dat2[dat2$trial==1&dat2$subnum==12818,]
dim(dat2)
dim(tmp)

##see how tmp is about 90 lines, but dat2 is 43791.
##tmp now consists of a single time series, sampled at roughly equal times. Let's see how often and how regular:

table(tmp$tdiff)


#pdf("c4-prtiming.pdf",width=9,height=6)
par(mfrow=c(1,3))
plot(tmp$timeElapsed, main= paste("Mean:", round(mean(tmp$tdiff),3)))
plot(tmp$tdiff)
hist(tmp$tdiff)
abline(v=mean(tmp$tdiff))
#dev.off()


###################
## Plotting the mouse coordinates:
pdf("c4-prtpos.pdf",width=9,height=6)
matplot(cbind(tmp$targX,tmp$targY),
        type="l",bty="L",las=1,lty=2,
        xlab="Time step",ylab="Pixel location",
        ylim=c(0,1500))
legend(550,1500,c("X position","Y position"),lty=1:2,col=1:2)

matplot(acbind(tmp$mouseX,tmp$mouseY),add=T,
        type="l",las=1,lty=1)
        
dev.off()


#pdf("c4-f2.pdf",width=9,height=9)
xmid <- mean(tmp$targX) ## find the x center
ymid <- mean(tmp$targY) ## find they center
plot(tmp$targX-xmid,ymid-tmp$targY,pch=1,cex=.2,
     xlim=c(-300,300),ylim=c(-300,300),
     xlab="Horizontal pixel",ylab="Vertical pixel")


#mousex, mousey specify the mouse coordinates: Plot them too:
points(tmp$mouseX-xmid,ymid-tmp$mouseY,type="l",col="grey")

##two circles around. Let's connect the target to the mouse at each step. This is 568 line segments,
##but it can be done with one line:
segments(tmp$targX-xmid,ymid-tmp$targY,tmp$mouseX-xmid,
         ymid-tmp$mouseY,col="grey")

##This gives a nice visualizeation. The ontarget will mark when the cursor was on-target. 
##lets draw these as circles; I'll do this with a for loop, checking each time. 
##we could probably do this in a on-liner as well.

for(i in 1:nrow(tmp))
{
  if(tmp[i,]$ontarget)
  {
    points(tmp$mouseX[i]-xmid,ymid-tmp$mouseY[i],col="red",cex=.3)
  }else{
    points(tmp$mouseX[i]-xmid,ymid-tmp$mouseY[i],col="grey",cex=.3)
    
  }
}
#dev.off()

##put it in a function:
plottrial <- function(tmp,header="")
{
  
  xmid <- mean(tmp$targX)
  ymid <- mean(tmp$targY)
  
  meanoffset <- round(mean(tmp$diff),2)

    plot(tmp$targX-xmid,ymid-tmp$targY,pch=1,cex=.2,
       xlim=c(-300,300),ylim=c(-300,300),
#       main=paste(header,"\n","mean offset =", meanoffset),
       xlab="",ylab="",axes=F)

  #mousex, mousey specify the mouse coordinates: Plot them to:
  points(tmp$mouseX-xmid,ymid-tmp$mouseY,type="l",col="grey")
  
  segments(tmp$targX-xmid,ymid-tmp$targY,tmp$mouseX-xmid,ymid-tmp$mouseY,col="grey")
  
  ##I'll use a faster points method than we did above:
  points(tmp$mouseX-xmid,ymid-tmp$mouseY,
         col=c("grey","red")[tmp$ontarget+1],cex=.3)
  
  
}
plottrial(tmp,"test")


##how to we loop through all the trials:
##this does not work:
sub1 <- dat2[dat2$subnum==12887,]
for(i in levels(sub1$trial))
{
  print(i)
}

levels(sub1$trial)
names(table(sub1$trial))
levels(as.factor(sub1$trial))


###########iterate on the trials as levels of a factor:
for(i in levels(as.factor(sub1$trial)))
{
  print(i)
}

#pdf("c4-f3.pdf",width=6,height=7)
sub1 <- dat2[dat2$subnum==12887,]
par(mfrow=c(2,2))
for(i in levels(as.factor(sub1$trial)))
{
  
  tmp <- sub1[sub1$trial==i,]  
  plottrial(tmp)
}

#dev.off()


##Let's iterate through each subject and each trial. 
##this can be slow within RStudio, so let's write it to a pdf
#we could also do this within an .rmd file for similar effect.
tapply(dat2$ontarget,list(dat2$sub,dat2$trial),mean)


pdf("pr-experiment.pdf",width=7,height=8)

for(sub in levels(as.factor(dat2$subnum)))
{
  par(mfrow=c(2,2))
  tmp <- dat2[dat2$subnum==sub,]
  for(trial in levels(as.factor(tmp$trial)))
  {
    print(paste("plotting subject:", sub, " trial: ", trial))
    tmp2 <- tmp[tmp$trial==trial,]
    plottrial(tmp2,header=paste("plotting subject:", sub, " trial: ", trial))
  }
}
dev.off()


############################################
##New plotting functions:


#histogram
#pdf("c4-hists.pdf",width=8,height=7)
pr.agg <- aggregate(dat2$diff,list(dat2$subnum,dat2$trial),mean)
par(mfrow=c(2,2))
hist(pr.agg$x,xlab="Mean deviation")
hist(pr.agg$x,xlab= "Mean deviation", col="navy",breaks=20)
hist(pr.agg$x,xlab= "Mean deviation", col="red",breaks=c(0:40))
##plots density instead of counts:
hist(pr.agg$x,xlab= "Mean deviation", col="darkgreen",breaks=c(0,10:20,25,30,40),freq=F )
#dev.off()

##--------------------------
##exercise here
##--------------------------

View(InsectSprays)
## Notice this is in long data format. 
## Not included in book
boxplot(InsectSprays$count~InsectSprays$spray)

#pdf("c4bp1.pdf",width=6,height=5)
boxplot(count ~ spray, data = InsectSprays,
        col = "lightgray",  xlab="Spray treatment",
        ylab="Mortality count")
##this adds a layer:
boxplot(count ~ spray, data = InsectSprays,
        notch = TRUE, add = TRUE, col = "blue")
#dev.off()


View(OrchardSprays)
#pdf("c4bp2.pdf",width=6,height=5)
boxplot(decrease ~ treatment, data = OrchardSprays,
        col = "darkgreen",xlab="Treatment",ylab="Observed Decrease")
##overlay the raw data:
points(OrchardSprays$treatment,OrchardSprays$decrease,cex=.5)
#dev.off()


## This is not really helpful, because we have only on observation per design
boxplot(decrease~treatment+rowpos,data=OrchardSprays)

#pdf("c4-boxplots2.pdf",width=9,height=5)
par(mfrow=c(1,2))

##boxplot also works with a data.frame, plotting each column.

mallsales<-data.frame(Maurices=1000*exp(runif(365)),
                      JCPenney =2000*exp(runif(365)),
                      Sears = 3000*exp(runif(365)))
boxplot(mallsales)

birthdate <- round(1950 + runif(100)*40)
tenure <- (2010-birthdate - 20 ) - runif(100)* 10
boxplot(data.frame(birthdate,tenure),main="Age and years worked at ACME corporation")

#dev.off()


###############################
##  image() plots
##an image plots a bitmap of a matrix of values:
#pdf("c4image1.pdf",width=5,height=5)
layout <-   tapply(OrchardSprays$decrease,
                   list(OrchardSprays$rowpos,OrchardSprays$colpos),mean)
layout
image(1:8,1:8,(layout),xlab="Row",ylab="Column")      
text(OrchardSprays$rowpos,OrchardSprays$colpos,OrchardSprays$decrease)

#dev.off()

##you can do this by hand:
data <- matrix(c(8,1,1,1,1,1,8,
                 1,0,0,0,0,0,1,
                 1,0,3,3,3,0,1,
                 1,0,0,0,0,0,1,
                 1,0,0,4,0,0,1,
                 1,0,0,0,0,0,1,
                 1,0,5,0,5,0,1,
                 1,0,0,0,0,0,1,
                 8,1,1,1,1,1,8)
               ,7)

#pdf("c4image2.pdf",width=5,height=5)
image(data,xlab="",ylab="")
#dev.off()

##plot correlation matrices:
data(iris)
cor(iris[,1:4])
##this is a correlation among measures on a bunch of irises. doing image() helps
##to visualize groups that go together:
iris.cor <- cor(t(iris[,1:4]))
round(iris.cor,2)
#pdf("c4-iriscor.pdf",width=9,height=9)
image(iris.cor,
      main="Correlation map of iris data")
#dev.off()

##2D histogram:

#pdf("c4image3.pdf",width=9,height=6)
x <- rnorm(500,4,1.5)
y <- rnorm(500,2,1.5) + x*.2


par(mfrow=c(1,2))
plot(x,y)

##Create a 2-D histogram with 1-wide cell grids.
txy <- table(ceiling(x),ceiling(y))
txy
image(as.numeric(rownames(txy))-.5,
      as.numeric(colnames(txy))-.5,txy,xlab="X value",ylab="Y value")

##we can even overplot the original data:
points(x,y,col="yellow",pch=".",cex=3)

#dev.off()
##---------------------------
## Exercise here
##---------------------------

################################
## Bar plots

#pdf("c4bar1.pdf",width=9,height=6)
par(mfrow=c(1,2))
barplot(1:10)
plot(1:10,type="h",lwd=12)
#dev.off()

#pdf("c4bar2.pdf",width=9,height=9)
par(mfrow=c(2,2))
barplot(c(1,11,4,5),names=c("one","three","five","eight"))
barplot(c(1,11,4),names=c("one","three","five"),las=1)
#barplot(c(1,11,4),names=c("one","three","five"),las=2)
xs <- barplot(c(1,11,4),names=c("one","three","five"),
              col=c("red","green","black"),las=2, lwd=3)
text(xs,c(9,9,9),c("A","B","C"))
barplot(c(1,11,4),names=c("one","three","five"),
        col=c("red","green","black"),las=1, lwd=3,horiz=T)

#dev.off()

#pdf("c4bar3.pdf",width=9,height=4)
##A problem with barplot axes
par(mfrow=c(1,3))
barplot(-5:5,names=LETTERS[1:11])
barplot(-5:-10,name=LETTERS[5:10])
barplot(-5:-10,name=LETTERS[5:10],yaxt="n")
axis(2,0:-10,0:10)
#dev.off()


##there, the values are very close together so you can't see the differences:
#pdf("c4bar4.pdf",width=9,height=4)
par(mfrow=c(1,3))
barplot(1000:1010)
barplot(1000:1010,ylim=c(1000,1010))##ugh
barplot(1000:1010,ylim=c(999,1010),names=1000:1010,
        col=c("orange","black"),xpd=F)## chops off, but might be OK
#dev.off()


###barplot can plot multiple series
set.seed(100)
data <- data.frame(q1=sample(letters[1:5],100,replace=T,
                             prob=c(1,1,3,2,5)),
                   q2=sample(letters[1:5],100,replace=T,prob=c(3,1,3,2,1)),
                   q3=sample(letters[1:5],100,replace=T,prob=c(1,5,1,2,1)),
                   q4=sample(letters[1:5],100,replace=T,prob=c(1,2,8,2,2)),
                   q5=sample(letters[1:5],100,replace=T,prob=c(1,5,2,4,3)))


datatable<-apply(data,2,table)

datatable
library(gplots)##needed for colorpanel

#pdf("c4stackedbar.pdf",width=9,height=5)
par(mfrow=c(1,2),mar=c(5,8,1,2)) ##change margins
barplot(datatable,names=paste("Question",1:5),col=1:5,horiz=T,las=1,
        xlab="Proportion of each response",
        cex.names=.5)

par(mar=c(5,10,1,2)) ##change margins
##uses gplots to create a color sequence:
barplot(datatable,names=paste("Question",1:5),
        col=colorpanel(5,"grey","navy")[1:5],horiz=T,las=1,
        xlab="Proportion of each response")

#dev.off()


### Stacked bar with uneven numbers
###
juvy <- read.table(text="State White 	Black 	Hispanic 	Native	Asian 	Other
Alabama  	300 	507 	27 	0 	3 	9
Alaska 	 78 	30 	3 	75 	3 	21
Arizona 	237 	114 	255 	54 	9 	51
Arkansas 	198 	315 	33 	0 	6 	3
California 	900 	1863 	3729 	42 	138 	54",header=T)

cols=c("darkgreen","bisque","navy","red","violet","orange")
juvymat <- as.matrix(juvy[,-1])
rownames(juvymat) <- juvy[,1]
##normalize within each state.
juvymat.normed <- juvymat/rowSums(juvymat)*100

#pdf("c4-juvy.pdf",width=9,height=5)
par(mfrow=c(1,2),mar=c(8,5,3,0))

barplot(t(juvymat),las=3,legend=T,
        args.legend=list(x=4,bty="n"),
        ylab="Number of residents",
        col=cols, main="Juvenile Residential Placements")


barplot(t(juvymat.normed),las=3,legend=F,
        ylab="Percentage of residents",
        col=cols, main="Juvenile Residential Placements")

#dev.off()


set.seed(100)
data2 <- data.frame(q1=sample(letters[1:10],100,replace=T),
                    q2=sample(letters[1:10],100,replace=T),
                    q3=sample(letters[1:10],100,replace=T),
                    q4=sample(letters[1:10],100,replace=T), 
                    q5=sample(letters[1:10],100,replace=T))

datatable2<-apply(data2,2,table)


###barplot can plot multiple series


##plot by question or by answer

#pdf("c4bar5.pdf",width=9,height=6)
par(mfrow=c(2,2),mar=c(3,4,1,1))
barplot(datatable,col=1:5,beside=T)
barplot(t(datatable),beside=T,col=1:5)
barplot(datatable,col=1:5,beside=F)

par(mar=c(5,10,1,2))
barplot(datatable,names=paste("Question",1:5),col=1:5,horiz=T,las=1,
        xlab="Proportion of each response")
#dev.off()
##Plot stacked--after all, each answer must add up to 100%

##-------------------------
## barplot exercise here
##--------------------------


#####################################################################
#####################################################################
#Exercises:
  

## Exercise: pursuit rotor plot

# Remove the x and y labels from the plot by setting xlab and ylab
# Remove the x and y axes using xaxt/yaxt argument
# Change the box type using the bty argument
#  Set the main title after you create the plot so that it tells which trial

## To find out about the parameter settings for plots, try help(par)
##

plottrial <- function(tmp,header="")
{
  xmid <- mean(tmp$targX)
  ymid <- mean(tmp$targY)
  
  meanoffset <- round(mean(tmp$diff),2)
  plot(tmp$targX-xmid,ymid-tmp$targY,pch=1,cex=.2,
       xlim=c(-300,300),ylim=c(-300,300),
       main=paste(header,"\n","mean offset =", meanoffset),
       xlab="",ylab="",
       xaxt="n",yaxt="n",
       bty="n")
  
  #mousex, mousey specify the mouse coordinates: Plot them to:
  points(tmp$mouseX-xmid,ymid-tmp$mouseY,type="l",col="grey")
  
  segments(tmp$targX-xmid,ymid-tmp$targY,tmp$mouseX-xmid,ymid-tmp$mouseY,col="grey")
  
  ##I'll use a faster points method than we did above:
  points(tmp$mouseX-xmid,ymid-tmp$mouseY,col=c("grey","red")[tmp$ontarget+1],cex=.3)
  
  
}
dat2 <- read.csv("pooled-pursuitrotor.csv")
sub1 <- dat2[dat2$subnum==12887,]
par(mfrow=c(2,2))
for(i in levels(as.factor(sub1$trial)))
{
  trial <- i
  tmp <- sub1[sub1$trial==i,]  
  plottrial(tmp,header=paste("Trial",i))
}


####################################################################
###Exercise:
##Error values like diff are often skewed, because they cannot be smaller than zero, they
##are difficult to make smaller once they are small, but they can get very large.  Transform 
##diff to its natural logarithm using log(), then make two histograms side-by-side  #
##(using par(mfrow=c(1,2))) that have a gold filled color, one with raw diff scores and one 
##with the transformed diff sores.   Make the histogram filled with gold color, and specify 
##the breaks argument to most clearly see the distributions.

par(mfrow=c(1,2))
hist(dat2$diff,breaks=50,col="gold")
hist(log(dat2$diff),breaks=50,col="gold")

######################################################################
## Exercise:
##  Add gridlines to an image plot using the \texttt{segments} command.
#let's create an interesting grid.
tmpx <- rnorm(10000,mean=8,sd=1.7)
tmpy <- tmpx/2 + 3 + rnorm(10000,mean=3,sd=.7)
grid <- table(round(tmpx),round(tmpy))
image(1:nrow(grid),1:ncol(grid),grid,col=grey(1:100/100))
segments(1:20,0,1:20,10,col="red")
segments(0,1:20,20,1:20,col="red")


##this doesn't quite look right. The gridlines are down the center of each cell. Adjust by.5
image(1:nrow(grid),1:ncol(grid),grid,col=grey(1:100/100))
segments(1:20+.5,0,1:20+.5,15,col="red")
segments(0,1:20+.5,20,1:20+.5,col="red")


###################################################
## Exercise:
##  For the OrchardSprays data set, compute using tapply the average effect of each treatment, each row, and each column position. 
## Plot average decrease by each of these variables, on a single 1x3 plot.
##  Use tapply to compute the average effect of each treatment x colpos and treatment x rowpos.
##  Make two stacked barplots with treatment as the main axis, and either row or column position as the division within each bar (use the matrix just created)
##  Make two stacked barplots with row or column position as the main axis, and treatment as the division within each bar. Use the same matrix you just created.
##  Look across the 7 plots you created and describe the relative importance of the spray, versus the row and column position in the orchard grid.
par(mfrow=c(1,3))
barplot(tapply(OrchardSprays$decrease,
               list(OrchardSprays$treatment),mean),
        main="Decrease by treatment",
        col=1:8)
barplot(tapply(OrchardSprays$decrease,
               list(OrchardSprays$rowpos),mean),
        main="Decrease by Row position",
        col=1:8)
barplot(tapply(OrchardSprays$decrease,
               list(OrchardSprays$colpos),mean),
        main="Decrease by Column position",
        col=1:8)

print(tapply(OrchardSprays$decrease,
             list(OrchardSprays$rowpos,OrchardSprays$treatment),mean))

tab2r <- tapply(OrchardSprays$decrease,
                list(OrchardSprays$rowpos,OrchardSprays$treatment),mean)

tab2c <- tapply(OrchardSprays$decrease,
                list(OrchardSprays$colpos,OrchardSprays$treatment),mean)

par(mfrow=c(2,2))
##first by treatment, then by row/column
barplot(tab2r,col=1:8,main="Treatment")
barplot(tab2c,col=1:8,main="Treatment")

##first by row/column, then by treatment:
barplot(t(tab2r),col=1:8,main="Row")
barplot(t(tab2c),col=1:8,main="Column")


tmp <- data.frame(participant=rep(1:100,each=26), 
                  trial=rep(1:26,100), 
                  cond=rep(LETTERS,100), 
                  x=runif(2600)*1:26,
                  even=1:2)