##  Code for Book: Applied Statistical Analysis in R for Psychology and Human factors
##  (c) Shane T. Mueller, Ph.D.
##  Michigan Technological University
##  shanem@mtu.edu
##
##  Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210
##
##
## This file contains example software code relevant for Chapter 3.
##


#defining functions:
se <- function(x)
{
   stdev <- sd(x)
   se <-  stdev/sqrt(length(x))
   return(se)
}

se <- function(x)
{
   stdev <- sd(x)
   return(stdev/sqrt(length(x))  )
}

se <- function(x)
{
   stdev <- sd(x)
   stdev/sqrt(length(x))
}

se <- function(x) {sd(x)/sqrt(length(x))}


# In each of the cases, you would call the function on a data set like this:
 y <- runif(20)
 se(y)

 
##Wrapper function:
cleanmean <- function(x) 
    {
       mean(x, na.rm=T)
    }

x <- runif(1000)
x[x>.95] <- NA  ##Mark the highest ones as NA=missing

 mean(x)
 cleanmean(x)

 
 #Specifying default arguments
 
x <- runif(500)
y <- 2 *x + 25  - 2.5*x^2 + .2*rnorm(500)
plotnice  <- function(x,y, col="red", 
                    lty=3, bty="L", pch=16,las=1)
 {
   plot(x,y,col=col,lty=lty,bty=bty,pch=pch,las=las)
 }
#pdf("c3fun.pdf",width=9,height=5)
par(mfrow=c(1,2))
plotnice(y=y,x=x)
plotnice(x,y,col="blue")
#dev.off()

##Optional and Default values
increment <- function(value, addend=1)
{
 return(value + addend)
}


plotletters <- function(values,labels=LETTERS[1:length(values)])
  {
    barplot(values,names=labels)
  }
plotletters(runif(5))


##Dealing with missing/Null values
addup <- function(x,y=NULL)
{
  if(is.NULL(y))
  {
     return(x)
  }else{
   return( x+y)
  }
}

plotxy <- function(x,y)
{
  if(missing(y))
  {
   plot(x)
  } else{
  plot(x,y)
  }
}
plotxy(runif(10))
plotxy(runif(10),runif(10))


plotlabeled <- function(x,y,...)
{
   plot(x,y,main="Special plot",...)
}

par(mfrow=c(1,2))
plotlabeled(runif(10),runif(10))
plotlabeled(runif(10),runif(10),pch=16,cex=2)

mix <- "ERROR"
threevals <- function(x)
{
  minx <- min(x)
  medx <- median(x)
  maxx <- max(x)
 return(list(minx,medx,maxx))
}


a <- 1
incrementa <- function(){a <- a + 1; print(a)}
incrementa()

if(runif(1)<.5)
{
  print("YES")
}else 
  {
    if(runif(1)<.5){
        print("NO")
     } else {
  
       print("REALLY NO")
     }
  }
##Wrapping a function

q05 <- function(x){quantile(x,.05)}
 q05(runif(1000))

 
mat <- matrix(runif(100),4,25) ##a 4 row x 25 column matrix of random numbers
apply(mat,1,q05)               ##get the 5-percentile from each row:


set.seed(111)
a <- runif(10)
b <- runif(10)+a
c <- runif(10)+ b
x2 <- data.frame(a=a,b=b,c=c)

lapply(x2, function(x){  x * 2})
sapply(x2, function(x){  x * 2})

lapply(x2, function(x){mean(x)})
sapply(x2, function(x){mean(x)})


##Conditional Branching

if( 0 )
{
  print("This will never print")
}

if(1)
{
  print("But this will")
}


choice <- sample(c(T,F),1)

a <- NA
b <- NA

if(choice)
{
    print("choice was true")
    a <- 1100
   
} else {

     print("choice was false")
     b <- 13200
}
 a
 b
 
 
 ## type = hist, scatter
 ##
 x <- exp(rnorm(500)/3)
myplot <-  function(data,type="scatter")
 {
  if(type=="scatter")
  {
     plot(data)
  } else if(type=="hist"){
     hist(data)  
  }  else {
    warning(paste("Plot type",type, "Unknown"))
  }
}

myplot(x)
myplot(x,"hist")
myplot(x,"points")

 ##Chaining multiple if statements:

 test <- function()
{
  
 choice <- sample(c(1,2,3,16,40,80),1)

 print(paste("choice was:",choice))
 if(choice > 25 & choice < 90)
 {
  print("one")
 }else if(choice < 50)
 {
  print("two") 
 }else if(choice==3){
 
   print("three")
 }else{
     warning("This code should never execute")
  }
}

 test()
 test()
 test()
 test()
 test()
 test()

 
#Alternatives to if statements
x <- sample(c(1,2),1)

##The hard way:
if(x==1)
 {
    y <- "one"
 } else {
    y <- "two"#
 }


if(x==1)
{
  y <- "one"
} else {
  y <- "two"#
}


##The easy way:
y <- ifelse(x==1,"one","two")


#Recoding vectors using ifelse
 x <- sample(c(1,2),10, replace=T)
  print(x)
y <- ifelse(x==1,"one","two")
 y

 
#pdf("c3f1.pdf",width=9,height=6) 
x <- exp(rnorm(10000)) #Create a fake RT distribution
par(mfrow=c(1,2))
hist(x,breaks=0:70,main="RT Distribution")
hist(ifelse(x>10,10,x),breaks=0:60,xlim=c(0,15),
      main="Truncated \nRT Distribution")
#dev.off()


x <- sample(1:5,1)
print(x)
switch(x, "one","two","three","four","five")

#this breaks, because unlike ifelse, it will only process a single
##argument not a vector.
if(0)
{
x <- sample(letters[1:5],2)
print(x)
switch(x, a="one",b="two",c="three",d="four",e="five")
}


#Iteration and Looping

continue <- T
i <- 1
while(continue)
{
   x <- runif(1)
   print(paste(i,"--", x))
   if(x>.99)
   {
     continue <- F 
   }

   i <- i + 1  
}
##or this:


while(T)
{
   x <- runif(1)
   print(paste(i,"--", x))
   if(x>.99)
   {
      break
   }
  i <- i + 1  
}
print(i)


##or this:
repeat
{
   if(runif(1)<.1)
     {
        break
     } else {
      cat(".")
     } 
}

#The for loop

x <- rnorm(100)
for(i in 1:100)
{
  print(paste(i,x[i]))
}


x <- c("one","two","three","four","five")
for(i in x)
{
  print(i)
}  

##iterate a data frame:
a <- runif(100)
b <- runif(100)+a
c <- runif(100)+ b
x2 <- data.frame(a=a,b=b,c=c)

for(i in x2)
  print(mean(i))


##Iterate through subsets of a factor

x2$group <- as.factor((x2$c>1) +(x2$c>2) + (x2$c>2.5))

par(mfrow=c(2,2))
for(i in levels(x2$group))
{
    tmp <- x2[x2$group==i,]
    plot(tmp$a,tmp$b,ylim=c(0,2),xlim=c(0,1))
}

##other loops:
a <- runif(100)
b <- runif(100)+a
c <- runif(100)+ b
x3 <- data.frame(a=a,b=b,c=c)
lapply(x3,mean)
sapply(x3,mean)

##Exercise: Functions
mmm  <- function(x)
{
  c(min(x),median(x),max(x))
}

##See  how  these  change  when  you  have  more  samples:
mmm(1/runif (1000))
mmm(1/runif (100))
data  <- runif (100) + 1/rnorm (100)
mmm(data)

##Exercise: Conditionals
myplot <- function(x,type="scatter")
{  
  if(type=="scatter")
  {
    ##Plot a regular plot here
    plot(x)    
  }else if(type=="histogram")
  {
    ##Plot a histogram here
    hist(x)  
  }else{
    warning("Unable to create specified plot type")
  }
}

myplot(x,"histogram")
myplot(x,"scatter")


## Exercise: Conditionals


#  Write a new mean function that does not
#  return an error when given a factor.  Rather, it returns
#  the modal (most common) value of that factor.  Then use
#  that function in the lapply and sapply on x2.


newmean <- function(x)
{
  if(is.factor(x))
  {
    return(sort(table(x))[length(table(x))])
    
  } else {
    return(mean(x))
  }
  
}


x2 <- data.frame(a=runif(100),b=runif(100),c= as.factor(sample(LETTERS,100,replace=T)))
newmean <- function(data)
{
  if(is.factor(data))
  {
    tab <- table(data)
    names(tab)[which.max(tab)]
  } else {
    return(mean(data) )
  }
}

newmean(x2$a)
newmean(x2$c)
lapply(x2,newmean)
sapply(x2,newmean)

#########################################
##Exercise Looping:
#Create a series of 1,000,000 letters of the alphabet using 

items <- sample(letters,1000000,replace=T)
##Write functions that will replace all `a' values with an `A', and b with a `B'. 
##Write one that uses an \texttt{if} statement, on that uses \texttt{ifelse}, 
##and one that uses \texttt{which}.  
##Run the function inside a system.time() statement, and see which is the fastest.

items <- sample(letters,1000000,replace=T)
f1 <- function(items)
{
   for(i in 1:length(items))
   {
     if(items[i]=="a")
       items[i] <- "A"
     if(items[i] =="b")
       items[i] <- "B"
   }
  return(items)
}
f2 <- function(items)
{
  
 return (  ifelse(items=="a","A",
                  ifelse(items=="b","B",items)))
}


f3 <- function(items)
{
  items[which(items=="a")] <- "A"
  items[which(items=="b")] <- "B"
  return (items)
}

system.time(f1(items ))
system.time(f2(items))
system.time(f3(items))


x <- runif(1) * 20 - 10
if(x <0)
{
  "negative"

}else if(y<1){
  
  "small"
  
}else{
  "big"
}

a <- sample(1:5)
print(switch(a,{1+1;"one"},"two","three","four","five" ))


df <- data.frame(sub=rep(1:10,each=100),x=1:1000,y=runif(1000
                                                         ))