## Code for Book: Applied Statistical Analysis in R for Psychology and Human factors ## (c) Shane T. Mueller, Ph.D. ## Michigan Technological University ## shanem@mtu.edu ## ## Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210 ## ## ## This file contains example software code relevant for Chapter 3. ## #defining functions: se <- function(x) { stdev <- sd(x) se <- stdev/sqrt(length(x)) return(se) } se <- function(x) { stdev <- sd(x) return(stdev/sqrt(length(x)) ) } se <- function(x) { stdev <- sd(x) stdev/sqrt(length(x)) } se <- function(x) {sd(x)/sqrt(length(x))} # In each of the cases, you would call the function on a data set like this: y <- runif(20) se(y) ##Wrapper function: cleanmean <- function(x) { mean(x, na.rm=T) } x <- runif(1000) x[x>.95] <- NA ##Mark the highest ones as NA=missing mean(x) cleanmean(x) #Specifying default arguments x <- runif(500) y <- 2 *x + 25 - 2.5*x^2 + .2*rnorm(500) plotnice <- function(x,y, col="red", lty=3, bty="L", pch=16,las=1) { plot(x,y,col=col,lty=lty,bty=bty,pch=pch,las=las) } #pdf("c3fun.pdf",width=9,height=5) par(mfrow=c(1,2)) plotnice(y=y,x=x) plotnice(x,y,col="blue") #dev.off() ##Optional and Default values increment <- function(value, addend=1) { return(value + addend) } plotletters <- function(values,labels=LETTERS[1:length(values)]) { barplot(values,names=labels) } plotletters(runif(5)) ##Dealing with missing/Null values addup <- function(x,y=NULL) { if(is.NULL(y)) { return(x) }else{ return( x+y) } } plotxy <- function(x,y) { if(missing(y)) { plot(x) } else{ plot(x,y) } } plotxy(runif(10)) plotxy(runif(10),runif(10)) plotlabeled <- function(x,y,...) { plot(x,y,main="Special plot",...) } par(mfrow=c(1,2)) plotlabeled(runif(10),runif(10)) plotlabeled(runif(10),runif(10),pch=16,cex=2) mix <- "ERROR" threevals <- function(x) { minx <- min(x) medx <- median(x) maxx <- max(x) return(list(minx,medx,maxx)) } a <- 1 incrementa <- function(){a <- a + 1; print(a)} incrementa() if(runif(1)<.5) { print("YES") }else { if(runif(1)<.5){ print("NO") } else { print("REALLY NO") } } ##Wrapping a function q05 <- function(x){quantile(x,.05)} q05(runif(1000)) mat <- matrix(runif(100),4,25) ##a 4 row x 25 column matrix of random numbers apply(mat,1,q05) ##get the 5-percentile from each row: set.seed(111) a <- runif(10) b <- runif(10)+a c <- runif(10)+ b x2 <- data.frame(a=a,b=b,c=c) lapply(x2, function(x){ x * 2}) sapply(x2, function(x){ x * 2}) lapply(x2, function(x){mean(x)}) sapply(x2, function(x){mean(x)}) ##Conditional Branching if( 0 ) { print("This will never print") } if(1) { print("But this will") } choice <- sample(c(T,F),1) a <- NA b <- NA if(choice) { print("choice was true") a <- 1100 } else { print("choice was false") b <- 13200 } a b ## type = hist, scatter ## x <- exp(rnorm(500)/3) myplot <- function(data,type="scatter") { if(type=="scatter") { plot(data) } else if(type=="hist"){ hist(data) } else { warning(paste("Plot type",type, "Unknown")) } } myplot(x) myplot(x,"hist") myplot(x,"points") ##Chaining multiple if statements: test <- function() { choice <- sample(c(1,2,3,16,40,80),1) print(paste("choice was:",choice)) if(choice > 25 & choice < 90) { print("one") }else if(choice < 50) { print("two") }else if(choice==3){ print("three") }else{ warning("This code should never execute") } } test() test() test() test() test() test() #Alternatives to if statements x <- sample(c(1,2),1) ##The hard way: if(x==1) { y <- "one" } else { y <- "two"# } if(x==1) { y <- "one" } else { y <- "two"# } ##The easy way: y <- ifelse(x==1,"one","two") #Recoding vectors using ifelse x <- sample(c(1,2),10, replace=T) print(x) y <- ifelse(x==1,"one","two") y #pdf("c3f1.pdf",width=9,height=6) x <- exp(rnorm(10000)) #Create a fake RT distribution par(mfrow=c(1,2)) hist(x,breaks=0:70,main="RT Distribution") hist(ifelse(x>10,10,x),breaks=0:60,xlim=c(0,15), main="Truncated \nRT Distribution") #dev.off() x <- sample(1:5,1) print(x) switch(x, "one","two","three","four","five") #this breaks, because unlike ifelse, it will only process a single ##argument not a vector. if(0) { x <- sample(letters[1:5],2) print(x) switch(x, a="one",b="two",c="three",d="four",e="five") } #Iteration and Looping continue <- T i <- 1 while(continue) { x <- runif(1) print(paste(i,"--", x)) if(x>.99) { continue <- F } i <- i + 1 } ##or this: while(T) { x <- runif(1) print(paste(i,"--", x)) if(x>.99) { break } i <- i + 1 } print(i) ##or this: repeat { if(runif(1)<.1) { break } else { cat(".") } } #The for loop x <- rnorm(100) for(i in 1:100) { print(paste(i,x[i])) } x <- c("one","two","three","four","five") for(i in x) { print(i) } ##iterate a data frame: a <- runif(100) b <- runif(100)+a c <- runif(100)+ b x2 <- data.frame(a=a,b=b,c=c) for(i in x2) print(mean(i)) ##Iterate through subsets of a factor x2$group <- as.factor((x2$c>1) +(x2$c>2) + (x2$c>2.5)) par(mfrow=c(2,2)) for(i in levels(x2$group)) { tmp <- x2[x2$group==i,] plot(tmp$a,tmp$b,ylim=c(0,2),xlim=c(0,1)) } ##other loops: a <- runif(100) b <- runif(100)+a c <- runif(100)+ b x3 <- data.frame(a=a,b=b,c=c) lapply(x3,mean) sapply(x3,mean) ##Exercise: Functions mmm <- function(x) { c(min(x),median(x),max(x)) } ##See how these change when you have more samples: mmm(1/runif (1000)) mmm(1/runif (100)) data <- runif (100) + 1/rnorm (100) mmm(data) ##Exercise: Conditionals myplot <- function(x,type="scatter") { if(type=="scatter") { ##Plot a regular plot here plot(x) }else if(type=="histogram") { ##Plot a histogram here hist(x) }else{ warning("Unable to create specified plot type") } } myplot(x,"histogram") myplot(x,"scatter") ## Exercise: Conditionals # Write a new mean function that does not # return an error when given a factor. Rather, it returns # the modal (most common) value of that factor. Then use # that function in the lapply and sapply on x2. newmean <- function(x) { if(is.factor(x)) { return(sort(table(x))[length(table(x))]) } else { return(mean(x)) } } x2 <- data.frame(a=runif(100),b=runif(100),c= as.factor(sample(LETTERS,100,replace=T))) newmean <- function(data) { if(is.factor(data)) { tab <- table(data) names(tab)[which.max(tab)] } else { return(mean(data) ) } } newmean(x2$a) newmean(x2$c) lapply(x2,newmean) sapply(x2,newmean) ######################################### ##Exercise Looping: #Create a series of 1,000,000 letters of the alphabet using items <- sample(letters,1000000,replace=T) ##Write functions that will replace all `a' values with an `A', and b with a `B'. ##Write one that uses an \texttt{if} statement, on that uses \texttt{ifelse}, ##and one that uses \texttt{which}. ##Run the function inside a system.time() statement, and see which is the fastest. items <- sample(letters,1000000,replace=T) f1 <- function(items) { for(i in 1:length(items)) { if(items[i]=="a") items[i] <- "A" if(items[i] =="b") items[i] <- "B" } return(items) } f2 <- function(items) { return ( ifelse(items=="a","A", ifelse(items=="b","B",items))) } f3 <- function(items) { items[which(items=="a")] <- "A" items[which(items=="b")] <- "B" return (items) } system.time(f1(items )) system.time(f2(items)) system.time(f3(items)) x <- runif(1) * 20 - 10 if(x <0) { "negative" }else if(y<1){ "small" }else{ "big" } a <- sample(1:5) print(switch(a,{1+1;"one"},"two","three","four","five" )) df <- data.frame(sub=rep(1:10,each=100),x=1:1000,y=runif(1000 ))