## Code for Book: Applied Statistical Analysis in R for Psychology and Human factors ## (c) Shane T. Mueller, Ph.D. ## Michigan Technological University ## shanem@mtu.edu ## ## Textbook and videos available at http://pages.mtu.edu/~shanem/psy5210 ## ## ## This file contains example software code relevant for Chapter 1 (Introduction) ## The code in this file may be used and shared freely with or without attribution. ## Some basic arithmetic: 3 + 3 9 * 6*3.45 9 + 6*3.45 (9 + 6)*3.45 (9 + 6)^3.45 (9 + 6)^3.45/33.0 (9 + 6)^3.45/0 ##Numbers and vectors 3+1 c(1,3,5) 1:20 ##First graphics par(mfcol=c(1,3),las=1) x <- runif(100) plot(x) barplot(x) hist(x) ##Second example. par(mfrow=c(1,1),las=1) #reset viewport tmp <- runif(100)*.2 tmp2 <- tmp + 3 y <- x + tmp2 plot(x,y) cor(x,y) ############################## ## Functions xycorr < cor(x,y) xycorr plot(x,y,pch=16) ################################## ## Data arrays, frames, matrices: m <- matrix(runif(100),10,10) print(round(m,3)) ##change to a data frame: n <- as.data.frame(m) print(n) View(n) ## Accessing Sub-elements: print(x[20]) print(m[1,]) ##get first row print(m[,3]) ##get third column print(m[1,3]) ##get a specific element print(n$V2) print(n[,2]) ##Access by name and recoding: lookup <- c(A = "Alfa", B = "Bravo", C = "Charlie", D = "Delta") lookup["B"] lookup[c("A","A","C","B","D")] ##add new data column: n$new <- 43 #This puts 43 in each row print(n) n$new2 <- c(44,45) #this recycles the shorter list to fill print(n) n$new3 <- 1:10 ##Data types: c(1,3,3,44,5) typeof(5) typeof(as.integer(5)) 33.0 1.223 c(1,3,3,44.3,5) typeof(33.0) ##character strings: bb <- "HELLO" c("one","two","three") typeof("H") ##logicals: a <- c(TRUE,FALSE) typeof(T) ##Factors: factor1 <- as.factor(c("greg","jan","marsha","bobby", "tom","cindy","peter")) ##changing types: bb <- as.factor(c(5.5,1,2,3.2,3.3)) as.character(bb) as.numeric(bb) as.numeric(as.character(bb)) ##transforming between numbers and factors: doses <- c(0,2000,50,2000,50,2000,0,50,50) dosefactor <- as.factor(doses) benefit <- c(5,6,20,3,18,2,4,22,17) dosefactor ##simple filtering: set.seed(1111) x <- rnorm(100) y <- x + runif(100,-.3,.3) tmp <- (x>.5) x[tmp] x[!tmp] ##Using filtering to make complex plots: plot(x[tmp],y[tmp],col="darkgreen",xlim=c(0,1),ylim=c(0,1)) points(x[!tmp],y[!tmp],col="red",pch=16) ##add some additinoal lines: plot(x[tmp],y[tmp],col="darkgreen",xlim=c(0,1),ylim=c(0,1), xaxt="n",yaxt="n",xlab="",ylab="") points(x[!tmp],y[!tmp],col="red",pch=16) axis(1,0:10/10) axis(2,0:10/10,las=1) abline(0,1,lty=1) abline(.5,0,lty=3) abline(v=.5,lty=3) ## Using filters to specify aspects of a plot: c("red","darkgreen")[tmp+1] plot(x,y,pch=18,col=c("red","darkgreen")[tmp+1]) plot(x,y,col=c("red","darkgreen")[tmp+1], pch=c(1,16)[1+(y>.5)]) ############################################################## ## Exercises: ## Compute your height in inches, by multiplying your height in feet by 12 and adding the remainder ## Compute your height in meters, using the identity that 1 inch = .0254 meters. ## Compute the average height in meters of at least three people (one who is 5 foot 2, one ## who is 6 feet 5 inches, one who is 4 feet 8.5 inches). ## Suppose I were 5 feet, 9 inches tall. 12*5+9 ##height in feet (12*5+9)*.0254 ##height in meters ((5*12+2) + (6*12+5) + (4*12+8.5))/3 * .0254 ############################################## ##Exercise 2. ##Create z1, a set of 100 random numbers like we did above, but make it less highly correlated ## with x (try to aim for a correlation of .8). x <- runif(100) z1 <- runif(100) * .5 + x cor(z1, x) plot(x,z1) x <- runif(100) z1 <- runif(100)*1 + 1.5*x cor(z1, x) plot(z1,x) ##Create z2, a set of 100 random numbers, but make them negatively correlated with x. z2 <- x -runif(100) * .5 cor(z2, x) z2 <- runif(100) * .5 - x cor(z2, x) ##Compute the correlation between z1 and z2 cor(z1,z2) ############################################## ### Exercise ## Create a 10x20 matrix of uniform random numbers. x <- matrix(runif(10*20),10,20) x ############################################## ### Exercise ## The cex argument sets the size of the symbol. Use the distance from the origin to impact the size. #The distance to the origin (0,0) can be computed using the Pythagoras theorem: x <- runif(100) y <- runif(100) plot(x,y) abline(0,0) abline(v=0) distance <- sqrt(x ^2 + y^2) # We could use \texttt{dist} directly as the cex argument because the distance will always be less than sqrt(2): plot(x,y,cex=distance) ## However, we want to do this with two thresholds. The simplest way to do that is to create a vector if three cex ## sizes to use: c(.5,1,3), and then a vector to select which ones. choosepoint1 <- (distance > .4) choosepoint2 <- (distance > .8) choosepoint <- choosepoint1 + choosepoint2 + 1 plot(x,y,cex=c(.5,1,3)[choosepoint]) #Use the functions plot, points, and lines to create 500 random normal, and highlight the outliers more than 2.5 units from the mean. ## First, lets create the numbers, plus an 'index' vector that we will use later. nums <- rnorm(500) index <- 1:500 ## Now, plot the numbers with open circles: plot(index,nums) # Now, create a filter like we have done before, and overplot the ones we care about. Here, we filter both the index and the numbers with the filter. Also pch=16 plots in a solid point. filter <- abs(nums)>2.5 points(index[filter],nums[filter],col="red",pch=16) ## Finally: segments(0,2.5,500,2.5) segments(0,-2.5,500,-2.5) ############################################## ### Exercise ## Create an R Markdown document. In the document, incorporate the following: section header, ## section subheader, numeric bulleted lists, italic and bold text, R code that calculates the mean ## of four numbers, and R code that make a barplot of those four numbers.} # The following file should address each of these. # --- # title: "R Markdown Example" # author: "Shane Mueller" # date: "08/31/2016" # output: word_document # --- # # Top-level header # # ### Subsection title # This is is normal text. You can also do: # # * **bold** # * *italic* # # Or: # 1. First # 2. Second # 3. Third # # ## Display and execute code # This will both display and execute the code: # ```{r} # (33+55+192+12)/4 # barplot(c(33,55,192,12)) # ``` ## Issues/questions from class discussion forum ## basics of RStudio interface: ## * differences between run, source, source/echo/etc. ## * managing panes of rstudio ## breakpoints/debugging ## ##### ## more on factors: x <- factor(sample(letters,5)) x2 <- x[1:4] x2 ##compare these: x3 <- as.factor(x2) x4 <- factor(x2) x5 <- factor(x,levels=letters) ##what will this do? as.numeric(x2) ##what will this do? as.numeric(x5) str(x2) ## selecting/filtering ## we will cover this again next week: tmp <- c(T,F,F,T,T,T,F) colors <- c("red","darkgreen") colors[c(1)] colors[c(1,1)] colors[c(1,2,2,1)] colors[tmp] as.numeric(tmp) as.numeric(tmp)+1 tmp+1 colors[tmp+1] ##filter by value x <- runif(10) x x < .5 x[x<.5]