scoreme <- function(word) { lets <- strsplit(splus2R::upperCase(word),"")[[1]] data <- matrix(0, ncol = 4, nrow = length(lets)) for(i in 1:length(lets)) { index <- which(lets[i] == LETTERS) data[i,1] <- lf.table$freq[index] data[i,2] <- lf.table$points[index] data[i,3] <- lf.table$ntiles[index] } list(suminvfreq = sum(1/data[,1]), points = sum(data[,2]), meantiles = mean(data[,3]), length = length(lets)) } #Function Call horses <- scoreme("horses") horses #Detailed Word Analysis # These are individual metrics that're also available in 'Function Call' above. horses$suminvfreq horses$points horses$meantiles horses$length #Exercise 2 / Part 3 - Corpus Load # This chunk creates the cword corpus for data analysis. #Objects: # test: table containing rank frequency, word, and total frequency (in corpus). # Fields bound with end c/vector statement. #Table Creation test <- read.table(text='rank word frequency 1081 CUP 1441306 2310 FOUND 573305 5285 BUTTERFLY 171410 7371 brew 94904 11821 CUMBERSOME 39698 17331 useable 17790 18526 WHITTLE 15315 25416 SPINY 7207 27381 uppercase 5959 37281 halfnaked 2459 47381 bellhop 1106 57351 tetherball 425 7309 attic 2711 17311 tearful 542 27303 tailgate 198 37310 hydraulically 78 47309 unsparing 35 57309 embryogenesis 22 ',header=T)[,c(2,1,3)] ##reorder colums #Table Modification # Columns meantiles, suminvfreq, points, and length are added to the # text dataframe and set to null (NA). test$meantiles <- NA test$suminvfreq <- NA test$points <- NA test$length <- NA test scoreme("CUP") #test$meantiles <- scoreme("CUP") test for(i in 1:length(lets)) { index <- which(lets[i] == LETTERS) data[i,1] <- lf.table$freq[index] data[i,2] <- lf.table$points[index] data[i,3] <- lf.table$ntiles[index] } #The following creates intermingled distributions whose mean and variability are cor- # related. Thta is, as the mean increases, the standard deviation does as well. # Create a violin plot showing the distribution of the four conditions set.seed(100) conds <- sample(1:4,1000,replace=T) data <- (rnorm(1000,mean=10+conds*5,sd=conds+1)) vioplot(data[conds==1],data[conds==2],data[conds==3], data[conds==4]) boxplot ( len ~ dose * supp , data = ToothGrowth , col =( rep ( c ( " gold " ," grey20 " ) , each =3) ) , main = " Tooth Growth " , xlab = " Supplement and Dose " ) boxplot ( len ~ dose * supp , data = ToothGrowth , col =( rep ( c ( "gold" ,"grey20" ,"hotpink" ) ,2) ) , main = " Tooth Growth " , xlab = " Supplement and Dose " ) memscore <- (runif(200)<.8) + 0 subj <- factor(rep( (1:10),each=20)) cond <- factor(rep(rep(c("A","B"),each=10),10)) trial <- rep (1:10,20) data<- data.frame(subj,cond,memscore) for(i in levels(data$subj)) { tmp <- data[data$subj==i,] print(i) print(mean(data$memscore[data$subj==i])) } aggregate(memscore,list(subj=subj),mean) tapply(memscore,list(subj=subj),mean) aggregate(memscore,list(cond=cond,subj=subj),mean) x <- tapply(memscore,list(subj=subj,cond),mean) matplot(x) tapply(memscore,list(trial, subj,cond),mean) vals <- (sapply( ceiling(runif(10000)*10),function(x){rnorm(1,x*25,x)})) hist(vals,breaks=100) ?invisible setwd("~/Dropbox/courses/5210-2018/web-5210/psy5210/Projects/Chapter5") ## Pie charts considered harmful ##blech: pie(c(3,5,9),c("Dogs","Cats","Fish")) cols <- c("red","red3","orange","gold","yellow","white") ## what's worse than comparing category sizes between pie charts? Comparing them between two pie charts #pdf("c5-pets1.pdf",width=6,height=4) par(mfrow=c(1,2)) pie(c(3,5,9,3,5,9),c("Dogs","Cats","Fish","Gerbils","Horses","Rocks"),main="Pet Cemetery", col=cols) pie(c(5,5,2,3,3,1),c("Dogs","Cats","Fish","Gerbils","Horses","Rocks"),main="Pet Cremetorium", col=cols) ############################## ## 3D pie chart ## This section does not appear in the textbook, because ## pie charts ar bad, and 3D pie charts are terrible. library(plotrix) ############################## ## 3D pie chart ## This section does not appear in the textbook, because ## pie charts ar bad, and 3D pie charts are terrible. library(plotrix) pie<- c(2,4,6,8) pielabels<-c("we hate\n pies","we oppose\n pies", "we dont\n care", "we love pies") pie3D(pie) pie3D(pie,labels=pielabels) pie3D(pie,labels=pielabels,theta=.4) pie3D(pie,labels=pielabels,theta=pi/6,radius=.8) pie3D(pie,labels=pielabels,theta=pi/6,radius=.9,border="white") pie3D(pie,labels=pielabels,theta=pi/6,radius=.9,border="black",labelcol="black") pie3D(pie,labels=pielabels,theta=pi/6,radius=.9,border="black",labelcex=1.3) pie3D(pie,labels=pielabels,theta=pi/6,radius=.9,border="black",labelcex=1.3,shade=.2) color<-c("white","grey50","gray70","red") pie3D(pie,labels=pielabels,theta=pi/6,radius=.9,border="black",labelcex=1.3,shade=.4,col=color) pie3D(pie,labels=pielabels,theta=pi/6,radius=.9,border="black",labelcex=1.3,shade=.4,col=c("white","grey50","gray70","grey90")) color12<-heat.colors(4) pie3D(pie,labels=pie,theta=pi/6,radius=.9,border="black",labelcex=1.3,shade=.4,col=color12) labe<-c(pie/sum(pie)*100) pie3D(pie,labels=labe,theta=pi/6,radius=.9,border="black",labelcex=1.3,shade=.4,col=color12) lab1<-paste(labe,"%") pie3D(pie,labels=lab1,theta=pi/6,radius=.9,border="black",labelcex=1.3,shade=.4,col=color12) lab2<-paste(labe,"%",sep="") pie3D(pie,labels=lab2,theta=pi/6,radius=.9,border="black",labelcex=1.3,shade=.4,col=color12,main="Pie3D") legend(-.9,1.2,pielabels,cex=.6,fill=color12) #pdf("c5-pets2.pdf",width=7,height=7) ##stacked bar compares area, but is easier to judge: data <- cbind(c(3,5,9,3,5,9),c(5,6,3,4,3,4)) rownames(data) <- c("Dogs","Cats","Fish","Gerbils","Horses","Rocks") colnames(data) <- c("Cemetery","Cremetorium") data data <- cbind(c(3,5,9,3,5,9),c(5,6,3,4,3,4)) rownames(data) <- c("Dogs","Cats","Fish","Gerbils","Horses","Rocks") colnames(data) <- c("Cemetery","Cremetorium") data xs <- barplot(data,col=cols, ylim=c(0,40), main="Pet Burial Methods", legend=T, args.legend=list(x=2,y=40,bty="n")) xs <- barplot(data,col=cols, ylim=c(0,40), main="Pet Burial Methods", legend=T, args.legend=list(x=2,y=40,bty="n")) xs <- barplot(data,col=cols, ylim=c(0,40), main="Pet Burial Methods", legend=F, args.legend=list(x=2,y=40,bty="n")) yvals <- apply(data,2,cumsum) yv2 <- (rbind(yvals,0)+rbind(0,yvals))[1:6,]/2 text(xs[1],yv2[,1],rownames(yvals),cex=.8) text(xs[2],yv2[,2],rownames(yvals),cex=.8) matplot(1:6,data,pch=16,type="o") data matplot(data,1:6,pch=16,xlim=c(0,10),ylim=c(0,8), yaxt="n",xaxt="n", cex=1.5, xlab="Number of pets",ylab="", type="o",line=3) segments(0,1:6,10,1:6,lty=3) axis(2,1:6,c("Dogs","Cats","Fish", "Gerbils","Horses","Rocks"),las=1) legend(1,8,c("Pet cemetery","Pet cremotorium"), lty=1:2,pch=16,col=1:2,bty="n") title(ylab="Pet type",line=4) axis(1,0:10) dotchart(data,labels=c("Dogs","Cats","Figs","Gerbils","Horses","Rocks")) x <- read.table("aflcio-votes.txt") par(mfrow=c(2,2)) votes <- x[,3:21] senator <- paste(x$V1,x$V2) votes2 <- rowSums(votes=="R") ##Recode for voting ???Right??? dotchart(votes2) dotchart(votes2,labels=senator) dotchart(votes2,labels=senator,groups=x$V2) dotchart(votes2,labels=senator,groups=x$V2,cex=.5) votes x votes <- x[,3:21] senator <- paste(x$V1,x$V2) votes2 <- rowSums(votes=="R") ##Recode for voting ???Right??? votes2 dotchart(votes2) dotchart(votes2,labels=senator) dotchart(votes2,labels=senator,groups=x$V2) dotchart(votes2,labels=senator,groups=x$V2,cex=.5) set.seed(100) x <- rep(1:5,each=25) y <- x *3 + sqrt(x)*rnorm(25*5)*8 + runif(25*5)*3 se <- function(x){sd(x)/sqrt(length(x))} means <- aggregate(y,list(x),mean) sds <- aggregate(y,list(x),sd) ses <- aggregate(y,list(x),se) ##plot the means: plot(x,y,pch=16,cex=.8,col="darkgrey",xlim=c(0,5)) ##plot the means: plot(x,y,pch=16,cex=.8,col="darkgrey",xlim=c(0,5)) points(1:nrow(means),means$x,cex=1.2,col="red",pch=16) arrows(1,means[1,]$x+ses[1,]$x,1,means[1,]$x- ses[1,]$x,code=3,angle=90,length=.1,lwd=2) arrows(1,means[1,]$x+sds[1,]$x,1,means[1,]$x- sds[1,]$x,code=3,angle=90,col="red",lwd=2) ##In one fell swoop: plot(x,y,pch=16,cex=.8,col="darkgrey",xlim=c(0,5)) points(1:nrow(means),means$x,cex=1.2,col="red",pch=16) arrows(1:nrow(means),means$x+ses$x,1:nrow(means),means$x- ses$x,code=3,angle=90,length=.1,lwd=2,col="blue") ##more of a box-plot with +/- 1 sd: diff <- .2 plot(x,y,pch=16,cex=.2,type="n",xlim=c(0,nrow(means)+1),xaxt="n") rect(1:nrow(means)-diff,means$x-sds$x,1:nrow(means)+diff,means$x+sds$x,col="grey") points(x,y,cex=.8,pch=16) segments(1:nrow(means)-diff,means$x,1:nrow(means)+diff,means$x,lwd=3) axis(1,1:nrow(means)) ############################ ## Some built-in libraries #Run these or install via RStudio interface if necessary: #install.packages("plotrix") #install.packages(c("caTools","gplots")) library(gplots) library(plotrix) ##notice that they both have a function called plotCI: ??plotCI plotrix::plotCI(1:nrow(means),means$x,ses$x,main="plotrix plotCI") # like this one better gplots::plotCI(1:nrow(means),means$x,ses$x,main="gplots plotCI") # this one not as good plotrix::plotCI(1:nrow(means),means$x,ses$x,add=F,lwd=2,cex=1.2,sfrac=.04,col=1:4, pch=16, main="plotrix plotCI with additions ") ##gplots needs to be loaded: plotmeans(y~x,main="Gplots plotmeans") #give it the original data! #pdf("c5-barerror.pdf",width=9,height=5) par(mfrow=c(1,3)) ##This doesn't quite work: barplot(means$x,col="navy") plotCI(1:nrow(means),means$x,ses$x,add=T) ##This doesn't quite work: barplot(means$x,col="navy") plotCI(1:nrow(means),means$x,ses$x,add=T) ##Try this: newx <-barplot(means$x,names=letters[1:nrow(means)],col="navy",ylim=c(0,max(means$x) + max(ses$x))) newx #I don't like this: plotrix::plotCI(newx,means$x,ses$x,add=T,lwd=2) ##try #3. Add two versions of the error bars with a white outline newx <-barplot(means$x,names=letters[1:nrow(means)],col="navy",ylim=c(0,max(means$x) + max(ses$x))) gplots::plotCI(newx,means$x,ses$x,add=T,lwd=2.5,type="n",gap=0,col="white") gplots::plotCI(newx,means$x,ses$x,add=T,lwd=1.5,type="n",gap=0,col="black") plotMeansandSE <- function(x,conds,main="",xlab="",ylab="", col="grey20",cex=2) { require(gplots) conds <- factor(conds) agg <- aggregate(x,list(conds),mean) agg$sd <- aggregate(x,list(conds),sd)$x agg$n <- aggregate(x,list(conds), length)$x agg$se <- agg$sd/sqrt(agg$n) xvals <- as.numeric(agg$Group.1) xs <- barplot(agg$x,names=agg$Group.1, ylim=c(0,max(agg$x+agg$se*2)), col=col, main=main,xlab=xlab,ylab=ylab) gplots::plotCI(xs,agg$x,agg$se,add=T,lwd=.5,type="n",gap=0,col="black") } plotMeansandSE <- function(x,conds,main="",xlab="",ylab="", col="grey20",cex=2) { require(gplots) conds <- factor(conds) agg <- aggregate(x,list(conds),mean) agg$sd <- aggregate(x,list(conds),sd)$x agg$n <- aggregate(x,list(conds), length)$x agg$se <- agg$sd/sqrt(agg$n) xvals <- as.numeric(agg$Group.1) xs <- barplot(agg$x,names=agg$Group.1, ylim=c(0,max(agg$x+agg$se*2)), col=col, main=main,xlab=xlab,ylab=ylab) gplots::plotCI(xs,agg$x,agg$se,add=T,lwd=.5,type="n",gap=0,col="black") } set.seed(100) conds <- sample(1:4,100,replace=T) data <- (rnorm(100,mean=1+conds*5,sd=conds+1)) plotMeansandSE(data,conds,main="Stinky",col="darkgreen", xlab="Condition",ylab="Observed ability") plotMeans plotmeans ############################################################## ## Images in plots #Install if necessary: #install.packages(c("jpeg","pixmap")) library(jpeg) library(pixmap) bradys <- c("marsha","carol","greg","jan","alice", "peter","brady","cindy","mike","bobby") images <- c() for(i in bradys) { ##This comes from the jpeg library img <- readJPEG(paste("images/",i,".jpg",sep="")) ##Convert to a pixmap using from the pixmap library: img2<-pixmapRGB(img) images <- c(images,img2) } ##randomize the order ord <- order(runif(length(bradys))) par(mfrow=c(1,1)) plot(0,type="n",xlim=c(0,800),ylim=c(0,600),xaxt="n",yaxt="n", xlab="",ylab="",bty="n") rect(0,0,800,600,col="black") x <- 0 images for(i in images[ord]) { y <- round(runif(1)*500) addlogo(i,c(x,x+100),c(y,y+60),asp=1) ##addlogo is in pixmaps library x <- x + 70 } v1 <- tapply(OrchardSprays$decrease, list( row=OrchardSprays$rowpos, treatment=OrchardSprays$treatment), mean) v1 library(vioplot) library(violinmplot) vioplot(v1) vioplot(v1[,1]) vioplot(v1[,1],v1[,2],v1[,3],v1[,4],v1[,5],v1[,6],v1[,7],v1[,8],col="gold", names=LETTERS[1:8],ylim=c(0,200)) title(main="OrchardSprays violin plot with point overlay: vioplot", ylab="Decrease in bees",xlab="Treatment") matplot(t(v1),add=T,pch=1,col="grey30",cex=1) vioplot(v1[,1],v1[,2],v1[,3],v1[,4],v1[,5],v1[,6],v1[,7],v1[,8],col="gold", names=LETTERS[1:8],h=2,ylim=c(0,200)) title(main="OrchardSprays violin plot: vioplot\nsmoothing kernel h=2", ylab="Decrease in bees",xlab="Treatment") par(mfrow=c(1,1)) violinmplot(decrease~treatment,data=OrchardSprays,horizontal=F,violin.col="gold", ylim=c(0,200), main="Violin plot of OrchardSprays treatments using violinmplot") conds <- sample (1:4 ,1000 , replace = T ) data <- ( rnorm (1000 , mean = conds * 5 , sd = conds +1) ) set.seed(1000) levs <-c("Don't Know","Strongly Disagree", "Disagree","Neutral", "Agree", "Strongly Agree") gender=as.factor(sample(c("Men","Women"),1000,replace=T)) vals1 <- pmax(0,floor(rnorm(1000,mean=2.5,sd=.8) - as.numeric(gender)*.5))+1 vals1[sample(1000,20)]<- 0 ##ad some don't know responses resps <- factor(levs[vals1+1],levels=levs) data <- data.frame(gender=gender, value=vals1, resps) data[1:5,] #pdf("c5-likertviolin1.pdf",width=9,height=5) par(mfrow=c(1,3)) vioplot(data$value[data$gender=="Men"], data$value[data$gender=="Women"],col="gold") vioplot(data$value[data$gender=="Men"], data$value[data$gender=="Women"],h=1.2,col="gold") vioplot(data$value[data$gender=="Men"], data$value[data$gender=="Women"],h=.2,col="gold") vplot2 <- function (x, ..., range = 1.5, h = NULL, ylim = NULL,ylabs=NULL, names = NULL, title="", horizontal = FALSE, col = "gold", border = "black", lty = 1, lwd = 1, rectCol = "black", colMed = "white", pchMed = 19, at, add = FALSE, wex = 1, drawRect = TRUE,adjust=0, crit=3, ignore=c(0) ) { datas <- list(x, ...) n <- length(datas) if (missing(at)) at <- 1:n upper <- vector(mode = "numeric", length = n) lower <- vector(mode = "numeric", length = n) q1 <- vector(mode = "numeric", length = n) q3 <- vector(mode = "numeric", length = n) med <- vector(mode = "numeric", length = n) means <- vector(mode="numeric",length=n) base <- vector(mode = "list", length = n) height <- vector(mode = "list", length = n) Ns <- vector(mode="numeric",length=n) pcts <- vector(mode="numeric",length=n) baserange <- c(Inf, -Inf) args <- list(display = "none") if (!(is.null(h))) args <- c(args, h = h) data.max <- min(datas[[1]],na.rm=T) data.min <- max(datas[[1]],na.rm=T) for (i in 1:n) { data <- datas[[i]] data.min <- min(data,data.min,na.rm=T) data.max <- max(data,data.max,na.rm=T) q1[i] <- quantile(data, 0.25,na.rm=T) q3[i] <- quantile(data, 0.75,na.rm=T) med[i] <- median(data,na.rm=T) data1 <- data[!is.element(data,ignore) & !is.na(data)] Ns[i] <- sum(!is.na(data)) means[i] <- mean(data1) pcts[i] <- round(100*mean((data1+adjust)>crit)) # pcts[i] <- round(100*mean((data1+adjust)> crit)) iqd <- q3[i] - q1[i] upper[i] <- min(q3[i] + range * iqd, data.max) lower[i] <- max(q1[i] - range * iqd, data.min) est.xlim <- c(min(lower[i], data.min), max(upper[i], data.max)) # smout <- do.call("sm.density", c(list(data, xlim = est.xlim), # args)) tab <- tapply(data,list(factor(data,levels=data.min:data.max)),length) tab[is.na(tab)] <- 0 hscale <- .4/max(tab)*wex ##this scales by the maximum; ##it might be reasonable to scale by the sum. #hscale <- .8/sum(tab)*wex #this scales by the total number, #so the 'volume' is roughly the same across violins. #This produces skinnier curves, because you need to #scale for the lopsided questions ##hscale <- 0.4/max(smout$estimate) * wex ##base[[i]] <- smout$eval.points base[[i]] <- as.numeric(names(tab)) ##height[[i]] <- smout$estimate * hscale height[[i]] <- tab * hscale t <- range(base[[i]]) baserange[1] <- min(baserange[1], t[1]) baserange[2] <- max(baserange[2], t[2]) } if (!add) { xlim <- if (n == 1) at + c(-0.5, 0.5) else range(at) + min(diff(at))/2 * c(-1, 1) if (is.null(ylim)) { ylim <- baserange } } if (is.null(names)) { label <- 1:n } else { label <- names } boxwidth <- 0.05 * wex if (!add) plot.new() if (!add) { plot.window(xlim = xlim, ylim = ylim) title(title,cex.main=.8,col="darkgrey") if(is.null(ylabs)) { axis(2) }else{ ylength <- length(ylabs) yvals <- seq(data.max-ylength+1,data.max,1) axis(2,yvals,ylabs,las=1) } axis(1, at = at, label = label,tick=F,line=-1,cex.axis=.8) # box(col="grey50") for(i in yvals)abline(i,0,lty=3,col="grey20",lwd=.8) abline( crit-adjust,0,lty=3,col="black",lwd=1) } for (i in 1:n) { polygon(c(at[i] - height[[i]], rev(at[i] + height[[i]])), c(base[[i]], rev(base[[i]])), col = col, border = border, lty = lty, lwd = lwd) if (drawRect) { segments(at[i]-.2, means[i],at[i]+.2, means[i], lwd = lwd*1.5, lty = lty) #rect(at[i] - boxwidth/2, q1[i], at[i] + boxwidth/2, q3[i], col = rectCol) #points(at[i], med[i], pch = pchMed, col = colMed) ##text means: text(at[i],data.max+.6,paste(expression(mu),"=",(round(means[i],2)+adjust),sep=""),cex=.8, col="grey40") ##text counts text(at[i],data.min-.5,paste("N=",Ns[i],sep="" ),cex=.8,col="grey40") ##text of % positive text(at[i],data.max+.2,paste( pcts[i],"%>",crit,sep=""),cex=.8,col="grey40") } } invisible(list(upper = upper, lower = lower, median = med, q1 = q1, q3 = q3, means=(means+adjust), pct=pcts)) } #pdf("c5-likertviolin2.pdf",width=9,height=5) par(mar=c(2,8,3,1),mfrow=c(1,1)) vplot2(data$value, data$value[data$gender=="Men"], data$value[data$gender=="Women"], at=c(1,3,4), names=c("Overall","Men","Women"), drawRect = T, ylim=c(-1,7), ylabs=levs, ignore=c(0), title="Support for Issue among Constituency" )