x <- c(1,2,3,4,5,6,7) # Concatenate 7 numbers into a list called x y <- mean(x) # Assigned y to be the mean of x x2 <- x - y # Assigned x2 to be the outcome of x-y a1 <- runif(100) # a1 is the uniform random distribution of 100 numbers a2 <- rnorm(100) # a2 is the normal random distribution of 100 numbers a3 <- a1 * a2 # a3 is the multiplication of a1 and a2 a.tab <- cbind(a1,a2,a3) #a.tab is a matrix which combines a1, a2, and a3 columns. pairs(a.tab) # plot the scatterplots of the different pairwise columns of a.tab cor(a.tab) # Computes the correlation of a.tab x1 <- (1:20) par(mfrow=c(1,2)) plot(x1, main="before",sub=paste("mean=",round(mean(x1),2),"sd=",round(sd(x1),2)), cex=.5,col="navy") x1.norm <- (x1-mean(x1))/sd(x1) plot(x1.norm, main="after",sub=paste("mean=",round(mean(x1.norm),2),"sd=",round(sd(x1.norm),2)), cex=.5,col="navy") mean(x1.norm) #[1] 0 sd(x1.norm) #[1] 1 x2 <- runif(100) + rnorm(100)*3 hist(x2, main="before",sub=paste("mean=",round(mean(x2),2),"sd=",round(sd(x2),2)),cex=.5,col="navy") x2.norm <- (x2-mean(x2))/sd(x2) hist(x2.norm, main="after",sub=paste("mean=",round(mean(x2.norm),2),"sd=",round(sd(x2.norm),2)), cex=.5,col="navy") mean(x2.norm) #[1] almost 0 sd(x2.norm) #[1] 1 x3 <- seq(1,100,3) plot(x3, main="before",sub=paste("mean=",round(mean(x3),2),"sd=",round(sd(x3),2)), cex=.5,col="navy") x3.norm <- (x3-mean(x3))/sd(x3) plot(x3.norm, main="after",sub=paste("mean=",round(mean(x3.norm),2),"sd=",round(sd(x3.norm),2)), cex=.5,col="navy") mean(x3.norm) #[1] 0 sd(x3.norm) #[1] 1 x4 <- c(runif(10),300) hist(x4, main="before",sub=paste("mean=",round(mean(x4),2),"sd=",round(sd(x4),2)), cex=.5,col="navy") x4.norm <- (x4-mean(x4))/sd(x4) hist(x4.norm, main="after",sub=paste("mean=",round(mean(x4.norm),2),"sd=", round(sd(x4.norm),2)), cex=.5,col="navy") mean(x4.norm) #[1] almost 0 sd(x4.norm) #[1] 1 x5 <- 1/(runif(50)*3) plot(x5, main="before",sub=paste("mean=",round(mean(x5),2),"sd=",round(sd(x5),2)), cex=.5,col="navy") x5.norm <- (x5-mean(x5))/sd(x5) plot(x5.norm, main="after",sub=paste("mean=",round(mean(x5.norm),2),"sd=",round(sd(x5.norm),2)), cex=.5,col="navy") mean(x5.norm) #[1] almost 0 sd(x5.norm) #[1] 1 a.tab a <- c(1,2,3,4,5) b <- a b[3] <- 10 a b x <- x1 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") abline(v=x.hmean,lwd=3,col="red") rect(x.hmean-.1,60,x.hmean+.1,80,col="white",border=NA) text(x.hmean,70,"Harmonic\nmean") abline(v=x.gmean,lwd=3,col="orange") rect(x.gmean-.1,80,x.gmean+.1,100,col="white",border=NA) text(x.gmean,90,"Geometric\nmean") x <- x1 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") abline(v=x.hmean,lwd=3,col="red") rect(x.hmean-.1,60,x.hmean+.1,80,col="white",border=NA) text(x.hmean,70,"Harmonic\nmean") abline(v=x.gmean,lwd=3,col="orange") rect(x.gmean-.1,80,x.gmean+.1,100,col="white",border=NA) text(x.gmean,90,"Geometric\nmean") x <- x1 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,20),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") abline(v=x.hmean,lwd=3,col="red") rect(x.hmean-.1,60,x.hmean+.1,80,col="white",border=NA) text(x.hmean,70,"Harmonic\nmean") abline(v=x.gmean,lwd=3,col="orange") rect(x.gmean-.1,80,x.gmean+.1,100,col="white",border=NA) text(x.gmean,90,"Geometric\nmean") x <- c(1,2,3,4,5,6,7) # Concatenate 7 numbers into a list called x y <- mean(x) # Assigned y to be the mean of x x2 <- x - y # Assigned x2 to be the outcome of x-y a1 <- runif(100) # a1 is the uniform random distribution of 100 numbers a2 <- rnorm(100) # a2 is the normal random distribution of 100 numbers a3 <- a1 * a2 # a3 is the multiplication of a1 and a2 a.tab <- cbind(a1,a2,a3) #a.tab is a matrix which combines a1, a2, and a3 columns. pairs(a.tab) # plot the scatterplots of the different pairwise columns of a.tab cor(a.tab) # Computes the correlation of a.tab a <- 100 b <- a a <- 200 a <- c(1,2,3,4,5) b <- a b[3] <- 10 x1 <- runif(1000) x2 <- runif(1000) * 2 x3 <- runif(1000) + 2 x4 <- 5/(runif(1000)+.04) x5 <- exp(rnorm(1000)) x <- x1 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,20),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") abline(v=x.hmean,lwd=3,col="red") rect(x.hmean-.1,60,x.hmean+.1,80,col="white",border=NA) text(x.hmean,70,"Harmonic\nmean") abline(v=x.gmean,lwd=3,col="orange") rect(x.gmean-.1,80,x.gmean+.1,100,col="white",border=NA) text(x.gmean,90,"Geometric\nmean") x <- x1 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") abline(v=x.hmean,lwd=3,col="red") rect(x.hmean-.1,60,x.hmean+.1,80,col="white",border=NA) text(x.hmean,70,"Harmonic\nmean") abline(v=x.gmean,lwd=3,col="orange") rect(x.gmean-.1,80,x.gmean+.1,100,col="white",border=NA) text(x.gmean,90,"Geometric\nmean") hist(x,ylim=c(0,120),col="gold") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") text(x.mean,70,"Arithmetic\nmean") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") #rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") hist(x,breaks=c(1,5,22),ylim=c(0,120),col="gold") hist(x,breaks=0:5/5, hist(x,breaks=0:5/5,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") text(x.mean,70,"Arithmetic\nmean") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") #rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=T) text(x.mean,70,"Arithmetic\nmean") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=F) text(x.mean,70,"Arithmetic\nmean") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=F) x <- x1 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=50,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=F) text(x.mean,70,"Arithmetic\nmean") abline(v=x.hmean,lwd=3,col="red") rect(x.hmean-.1,60,x.hmean+.1,80,col="white",border=NA) text(x.hmean,70,"Harmonic\nmean") abline(v=x.gmean,lwd=3,col="orange") rect(x.gmean-.1,80,x.gmean+.1,100,col="white",border=NA) text(x.gmean,90,"Geometric\nmean") x <- x2 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") rect(x.mean-.1,60,x.mean+.1,80,col="white",border=NA) text(x.mean,70,"Arithmetic\nmean") abline(v=x.hmean,lwd=3,col="red") rect(x.hmean-.1,60,x.hmean+.1,80,col="white",border=NA) text(x.hmean,70,"Harmonic\nmean") abline(v=x.gmean,lwd=3,col="orange") rect(x.gmean-.1,80,x.gmean+.1,100,col="white",border=NA) text(x.gmean,90,"Geometric\nmean") x <- x3 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,120),col="gold") abline(v=x.mean,lwd=3,col="blue") text(x.mean,90,"Arithmetic\nmean",pos=4) abline(v=x.hmean,lwd=3,col="red") text(x.hmean,90,"Harmonic\nmean",pos=2) abline(v=x.gmean,lwd=3,col="orange") rect(x.gmean-.1,60,x.gmean+.1,80,col="white",border=NA) text(x.gmean,70,"Geometric\nmean") x <- x4 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,1000),col="gold") abline(v=x.mean,lwd=3,col="navy") text(20,900,"Arithmetic mean",pos=4) segments(20,900,x.mean,900,col="navy",lwd=2) abline(v=x.hmean,lwd=3,col="red") text(20,700,"Harmonic mean",pos=4) segments(20,700,x.hmean,700,col="red",lwd=2) abline(v=x.gmean,lwd=3,col="orange") text(20,800,"Geometric mean",pos=4) segments(20,800,x.gmean,800,col="orange",lwd=2) x <- x4 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=20,ylim=c(0,1000),col="gold") abline(v=x.mean,lwd=3,col="navy") text(20,900,"Arithmetic mean",pos=4) segments(20,900,x.mean,900,col="navy",lwd=2) abline(v=x.hmean,lwd=3,col="red",lty=3) text(20,700,"Harmonic mean",pos=4) segments(20,700,x.hmean,700,col="red",lwd=2) abline(v=x.gmean,lwd=3,col="orange") text(20,800,"Geometric mean",pos=4) segments(20,800,x.gmean,800,col="orange",lwd=2) x <- x5 x.mean <- mean(x) x.hmean <- 1/mean(1/x) x.gmean <- exp(mean(log(x))) hist(x,breaks=40,ylim=c(0,400),col="gold") abline(v=x.mean,lwd=3,col="navy") text(5,360,"Arithmetic mean",pos=4) segments(5,360,x.mean,360,col="navy",lwd=2) abline(v=x.hmean,lwd=3,col="red") text(5,300,"Harmonic mean",pos=4) segments(5,300,x.hmean,300,col="red",lwd=2) abline(v=x.gmean,lwd=3,col="orange") text(5,330,"Geometric mean",pos=4) segments(5,330,x.gmean,330,col="orange",lwd=2) x1 <- (1:20) par(mfrow=c(1,2)) plot(x1, main="before",sub=paste("mean=",round(mean(x1),2),"sd=",round(sd(x1),2)), cex=.5,col="navy") x1.norm <- (x1-mean(x1))/sd(x1) plot(x1.norm, main="after",sub=paste("mean=",round(mean(x1.norm),2),"sd=",round(sd(x1.norm),2)), cex=.5,col="navy") mean(x1.norm) #[1] 0 sd(x1.norm) #[1] 1 x2 <- runif(100) + rnorm(100)*3 hist(x2, main="before",sub=paste("mean=",round(mean(x2),2),"sd=",round(sd(x2),2)),cex=.5,col="navy") x2.norm <- (x2-mean(x2))/sd(x2) hist(x2.norm, main="after",sub=paste("mean=",round(mean(x2.norm),2),"sd=",round(sd(x2.norm),2)), cex=.5,col="navy") mean(x2.norm) #[1] almost 0 sd(x2.norm) #[1] 1 x3 <- seq(1,100,3) plot(x3, main="before",sub=paste("mean=",round(mean(x3),2),"sd=",round(sd(x3),2)), cex=.5,col="navy") x3.norm <- (x3-mean(x3))/sd(x3) plot(x3.norm, main="after",sub=paste("mean=",round(mean(x3.norm),2),"sd=",round(sd(x3.norm),2)), cex=.5,col="navy") mean(x3.norm) #[1] 0 sd(x3.norm) #[1] 1 x4 <- c(runif(10),300) hist(x4, main="before",sub=paste("mean=",round(mean(x4),2),"sd=",round(sd(x4),2)), cex=.5,col="navy") x4.norm <- (x4-mean(x4))/sd(x4) hist(x4.norm, main="after",sub=paste("mean=",round(mean(x4.norm),2),"sd=", round(sd(x4.norm),2)), cex=.5,col="navy") mean(x4.norm) #[1] almost 0 sd(x4.norm) #[1] 1 x5 <- 1/(runif(50)*3) plot(x5, main="before",sub=paste("mean=",round(mean(x5),2),"sd=",round(sd(x5),2)), cex=.5,col="navy") x5.norm <- (x5-mean(x5))/sd(x5) plot(x5.norm, main="after",sub=paste("mean=",round(mean(x5.norm),2),"sd=",round(sd(x5.norm),2)), cex=.5,col="navy") mean(x5.norm) #[1] almost 0 sd(x5.norm) #[1] 1 x1 mean.x1 x1 mean(x2) round(mean(x2),4) paste("mean=",round(mean(x2),4)) paste("mean=",round(mean(x2),4),sep="|") paste("mean=",round(mean(x2),4),sep="") 1525325262 round(1525325262,3) round(1525325262.252526262,3) round(2.252526262,3) round(2.252526262,-3) round(252626262.252526262,-3) round(1525325262.252526262,3)-1525325262 setwd("~/Dropbox/courses/5210-2024/web-5210/psy5210/Projects/Chapter2") setwd("~/Dropbox/courses/5210-2024/web-5210/psy5210/Projects") setwd("~/Dropbox/courses/5210-2024/web-5210/psy5210/Projects/Chapter2") data <- read.table("c5data.txt") c5data <- read.table("~/Dropbox/courses/5210-2024/web-5210/psy5210/Projects/Chapter2/c5data.txt", quote="\"", comment.char="") View(c5data) View(c5data) ##create a voter database: party <- c("R","R","D","R","R","D","D","D","R","R","D") gender <- c("M","M","F","F","F","F","M","M","F","M","M") vote <- c("A","B","A","A","A","B","A","A","B","B","A") survey <- data.frame(party,gender,vote) survey table(survey[,1]) (survey[,1]) table(survey[,1]) survey[1] survey["party"] survey$party table(survey$party) table(survey$party,survey$gender) table(survey$party,survey$gender,survey$vote) table(survey$party,survey$gender,survey$vote) -> tmp tmp tmp[1,1,1] tmp[1,2,1] tmp[1,,1] tmp survey ##create a voter database: party <- c("R","R","D","R","R","D","D","D","R","R","D") gender <- c("M","M","F","F","F","F","M","M","F","M","M") vote <- c("A","B","A","A","A","B","A","A","B","B","A") survey <- data.frame(party,gender,vote) tmp tmp[,,"A"] tmp[,,1] tmp[,,2] tmp[,,"B"] tmp[,"F","B"] tmp["R","F","B"] survey survey$vote table(survey$vote) survey[,1] survey[1,] survey[11,] 1:10 survey[1:10,] survey[1:9,] survey[2:9,] survey[c(2,3,4,5,7,8,9),] survey[2:5,7:9,] c(1:5, 13) c(1:5, 4, 9 1) c(1:5, 4, 9 ,1) survey[c(1:5, 4, 9 ,1),] survey[c(1:9,2,2,2,2),] row.names(survey[c(1:9,2,2,2,2),]) rep(2,112) 1:20 rep(1,100) seq(1,10,1) seq(1,10,2) ?seq seq(111,493,by=3) seq(111,493,length.out=50) row.names(survey[c(1:9,rep(2,100),]) row.names(survey[c(1:9,rep(2,100)),]) survey order(survey$party) survey[order(survey$party),] survey$income <- runif(10)*100000 survey$income <- runif(11)*100000 survey order(survey$income) survey[order(survey$income),] aggregate(survey$income,list(survey$party,survey$gender),mean)