plotme(x,y,20,4)
plotme(x,y,15,5)
par(mfrow=c(1,1))
manipulate(plotme(x,y,a,b),a=slider(-20,100,step=.1),b=slider(0,10,step=.1))
knitr::opts_chunk$set(echo = TRUE)
library(BayesFactor)
set.seed(1000)
x <- runif(20)
y <- runif(20)
z <- x + .5*y
z1 <- z
z2 <- z+10
z3 <- log(z)
z4 <- z*10
z5 <- z + 20*runif(20) + 5*runif(20)
plot(x,z)
cor(x,z)
cor(x,z)^2
cor.test(x,z1)
cor.test(x,z1,method="spearman")
correlationBF(x,z1)
plot(x,z2,col="navy",pch=16,cex=3,main="Additive transform")
cor.test(x,z2)
cor.test(x,z2,method="spearman")
correlationBF(x,z2)
plot(x,z3,col="darkred",cex=2.5,pch=16,main="log transform")
cor.test(x,z3)
cor.test(x,z3,method="spearman")
correlationBF(x,z3)
plot(x,z5,pch=16,cex=3,col="seashell")
points(x,z5,cex=3)
cor.test(x,z5)
cor.test(x,z5,method="spearman")
correlationBF(x,z5)
set.seed(1000)
indoor<- sample(c("A","B","C","D"),prob=c(5,8,25,6),
size=200,replace=T)
outdoor <-sample(c("A","B","C","D"),prob=c(5,20,3,6),
size=200,replace=T)
tab1 <- table(indoor,outdoor)
tab2 <- table(c(indoor,outdoor),rep(c("I","O"),each=200))
tab1
tab2
library(BayesFactor)
chisq.test(tab1)
contingencyTableBF(tab1,sampleType = "indepMulti", fixedMargin = "cols")
tab1 + diag(4) * c(25,30,15,20)
chisq.test(tab1 + diag(4) * c(25,30,15,20))
tabx <- tab1
tabx[4,2] <-   90
tabx
chisq.test(tabx)
chisq.test(tab2)
contingencyTableBF(tab2,sampleType = "indepMulti", fixedMargin = "cols")
contingencyTableBF(tab2,sampleType = "indepMulti", fixedMargin = "rows")
contingencyTableBF(tab2,sampleType = "indepMulti", fixedMargin = "rows")
library(RColorBrewer)
palette(brewer.pal(8,"Accent"))
tabA <- table(cardat$gender,cardat$type)
cardat <- read.table(text="age gender  type origin origin.last carval carval.last
1   34      M Truck     US      Europe  16400       15900
2   31      M Truck Europe      Europe  16900       15800
3   47      F Sedan Europe      Europe  18800       16600
4   21      F   SUV  Japan       Japan  16000       16200
5   42      F Truck     US          US  16800       16500
6   43      F Truck  Japan       Japan  17200       16200
7   60      M Truck  Japan       Japan  19900       20800
8   37      F Sedan  Japan       Japan  17100       16200
9   46      M   SUV     US          US  16900       16700
10  27      F Sedan     US       Japan  16200       15600
11  50      M   SUV     US      Europe  18800       17800
12  64      F Sedan  Japan       Japan  50700       18000
13  33      M   SUV     US          US  16500       15700
14  39      M   SUV     US          US  17000       17600
15  58      F Truck     US          US  19400       16600
16  53      M Sedan  Japan          US  19200       17700
17  29      F Sedan  Japan          US  16300       15600
18  37      M Truck  Japan      Europe  17300       16200
19  37      M Truck     US          US  18200       16600
20  54      M Sedan     US      Europe  24500       16400
21  46      M Truck  Japan      Europe  18000       17700
22  55      F Truck     US          US  28900       17900
23  46      F Sedan  Japan       Japan  16600       16200
24  57      F Truck Europe      Europe  24300       16600
25  40      F Truck     US          US  16800       16700
26  27      F Sedan  Japan          US  16900       15600
27  58      M Sedan     US          US  20300       19000
28  64      M Sedan     US          US  40600       23600
29  47      F Sedan     US          US  18400       19800
30  32      F   SUV     US      Europe  15900       15800
31  43      F Sedan  Japan      Europe  17200       16300
32  66      M   SUV     US          US  19100       30200
33  36      F   SUV  Japan      Europe  16900       15800
34  68      F Sedan  Japan      Europe  69300       18500
35  54      M Sedan     US          US  17000       17200
36  64      F   SUV     US          US  34900       42900
37  27      F Truck     US       Japan  15800       15500
38  51      F Sedan  Japan       Japan  29000       17900
39  69      F Sedan Europe       Japan  54400       18700
40  25      M Truck  Japan       Japan  15800       15400",header=T)
library(RColorBrewer)
palette(brewer.pal(8,"Accent"))
tabA <- table(cardat$gender,cardat$type)
tabA
chisq.test(tabA)
contingencyTableBF(tabA,sampleType = "indepMulti", fixedMargin = "rows")
barplot(tabA,beside=T,col=1:2, ylab="Number of respondents")
legend(3,12,c("Women","Men"),pch=15,col=1:2,bty="n",cex=2)
##Note that the hardcoded numbers below should be deleted and the ones above (or the code below) should be used post-2022.
cardat <- read.table(text="age gender  type origin origin.last carval carval.last
34      F   SUV     US          US  16400       15800
31      M Truck     US      Europe  16900       16000
47      M Sedan     US          US  18800       17100
21      F Sedan  Japan       Japan  16000       15500
42      M   SUV     US       Japan  16800       16100
43      F   SUV     US          US  17200       16300
60      F Truck Europe      Europe  19900       17800
37      M Truck Europe      Europe  17100       16200
46      F   SUV  Japan       Japan  16900       16300
27      M Sedan     US          US  16200       15700
50      M   SUV     US          US  18800       17100
64      F   SUV  Japan          US  50700       31700
33      M   SUV  Japan       Japan  16500       15900
39      M Truck     US      Europe  17000       16200
58      F Sedan  Japan          US  19400       17500
53      F   SUV     US      Europe  19200       17400
29      F Sedan     US       Japan  16300       15700
37      F Sedan     US          US  17300       16300
37      M   SUV     US       Japan  18200       16700
54      F Sedan  Japan       Japan  24500       19800
46      F   SUV  Japan      Europe  18000       16700
55      F   SUV     US       Japan  28900       21700
46      F Truck     US      Europe  16600       16100
57      M   SUV Europe      Europe  24300       19700
40      M   SUV     US          US  16800       16100
27      M Sedan  Japan          US  16900       16000
58      M   SUV Europe      Europe  20300       17900
64      M Truck     US          US  40600       27100
47      M Truck     US      Europe  18400       16900
32      M Truck     US          US  15900       15600
43      F Sedan  Japan          US  17200       16300
66      M Truck Europe      Europe  19100       17500
36      F   SUV     US       Japan  16900       16100
68      M Truck     US          US  69300       40100
54      F Sedan  Japan          US  17000       16400
64      M Truck  Japan      Europe  34900       24600
27      M   SUV  Japan      Europe  15800       15500
51      F Sedan  Japan       Japan  29000       21700
69      M Sedan     US       Japan  54400       33400
25      F Sedan  Japan       Japan  15800       15500",header=T)
if(0)
{
set.seed(100)
age <- round(runif(40,min=18,max=70))
carval<- round(15000 + age * 25 +  (exp (rnorm(40 ,mean= age/10)*.8))* 50,-2)
carval.last <- round(15000 + (age-10) * 25 +  (exp (rnorm(40 ,mean= (age-10)/10)*.8))* 50,-2)
origin <- sample(c("US","Japan","Europe"),p=c(.5,.4,.1),40,replace=T)
origin.last <- origin
origin.last[sample(1:40,25)] <- sample(c("US","Japan","Europe"),25,replace=T)
type <- sample(c("Sedan","SUV","Truck"),replace=T,40)
gender <- c("F","M")[(as.numeric(as.factor(type)) + rnorm(40)*2  > 2)+ 1]
cardat <-  data.frame(age,gender,type,origin,origin.last,carval,carval.last)
}
library(RColorBrewer)
palette(brewer.pal(8,"Accent"))
tabA <- table(cardat$gender,cardat$type)
tabA
chisq.test(tabA)
contingencyTableBF(tabA,sampleType = "indepMulti", fixedMargin = "rows")
barplot(tabA,beside=T,col=1:2, ylab="Number of respondents")
legend(3,12,c("Women","Men"),pch=15,col=1:2,bty="n",cex=2)
tabA*4
chisq.test(tabA*4)
contingencyTableBF(tabA*4,sampleType = "indepMulti", fixedMargin = "rows")
par(mfrow=c(1,2))
hist(cardat$carval,breaks=20,col=1,
main="Histogram of car value",xlab="Car value (US$)")
qqnorm(cardat$carval,pch=16,col=2,cex=1.5)
wilcox.test(carval~gender,data=cardat)
ttestBF(cardat$carval[cardat$gender=="F"],
cardat$carval[cardat$gender=="M"])
barplot(aggregate(cardat$carval,list(cardat$gender),mean)$x,main="Car purchase price",xlab="Gender",ylab="Purchase price",col=1:2,names=c("Women","Men"),
las=1)
tabc <- table(prior=cardat$origin.last,current=cardat$origin)
tabc
barplot(tabc,col=3:5,ylim=c(0,20),xlab="Current vehicle",ylab="Number of buyers")
legend(0.1,20,levels(cardat$origin),pch=15,col=3:5,cex=1.5,bty="n",title="Previous vehicle")
chisq.test(tabc)
contingencyTableBF(tabc,sampleType = "indepMulti", fixedMargin = "rows")
tabc
chisq.test(tabc)
contingencyTableBF(tabc,sampleType = "indepMulti", fixedMargin = "rows")
par(mfrow=c(1,3))
hist(cardat$age,col=3,xlab="Age of buyer")
hist(cardat$carval,col=4,xlab="Purchase price")
plot(cardat$age,cardat$carval,pch=16,col=5,xlab="Age of buyer",ylab="Purchase price")
cor.test(cardat$age,cardat$carval,method="spearman")
correlationBF(rank(age),rank(carval),data=cardat)
lm1 <- lm(carval~age,data=cardat)
pred1 <- predict(lm1,data.frame(age=1:70))
plot(cardat$age,cardat$carval,pch=16,col=5,xlab="Age of buyer",ylab="Purchase price",ylim=c(0,80000))
points(c(32,52,62),pred1[c(32,52,62)],col=5,cex=2)
abline(lm1$coef)
pred1[c(32,52,62)]
lm2 <- lm(log(carval)~age,data=cardat)
pred2 <- predict(lm2,data.frame(age=1:70))
plot(cardat$age,cardat$carval,pch=16,col=5,ylim=c(0,80000),xlab="Age of buyer",ylab="Purchase price")
points(1:70,exp(pred2),type="l")
points(c(32,52,62),exp(pred2[c(32,52,62)]),col=5,cex=2)
exp(pred2[c(32,52,62)])
mean(cardat$carval[abs(cardat$age-32)<5])
mean(cardat$carval[abs(cardat$age-52)<5])
mean(cardat$carval[abs(cardat$age-62)<5])
par(mfrow=c(1,2))
plot(cardat$carval.last,cardat$carval,xlab="Previous car value", ylab="Current car value",pch=16,col=3)
plot(rank(cardat$carval.last),rank(cardat$carval),xlab="Previous car value", ylab="Current car value",pch=16,col=3)
cor.test(cardat$carval.last,cardat$carval)
cor.test(cardat$carval.last,cardat$carval,method="spearman")
correlationBF(cardat$carval.last,cardat$carval)
correlationBF(rank(cardat$carval.last),rank(cardat$carval))
hist(cardat$carval-cardat$carval.last,col=1)
qqnorm(cardat$carval-cardat$carval.last,pch=16,col=2,cex=1.3)
binom.test(sum((cardat$carval-cardat$carval.last)>0), nrow(cardat))
t.test(cardat$carval,cardat$carval.last,paired=T,direction="less")
ttestBF(cardat$carval,cardat$carval.last,paired=T,nullInterval=c(-Inf,0))
library(vioplot)
cars <- cardat$carval[cardat$type=="Truck"]
suvs <- cardat$carval[cardat$type=="SUV"]
vioplot(cars,suvs,h=5000,col=3,names=c("Truck","SUV"))
title(ylab="Vehicle cost ($US)")
wilcox.test(cars,suvs)
ttestBF(cars,suvs)
runif(1)
runif(1)
set.seed(100)
runif(1)
runif(1)
set.seed(100)
runif(1)
runif(1)
x <- runif(100)*10
y <- x*5 + 15 + rnorm(100)*10
plot(x,y,pch=16,col="cadetblue3")
##
plotme <- function(x,y,a=0,b=1)
{
plot(x,y,col="gold",pch=16,
main=paste("intercept:",a,"slope:",b))
points(x,y)
abline(a,b)
pred <- a + b*x
points(x,pred,pch=1,cex=.5)
rmse <- sqrt(mean((y-pred)^2))
text(4,-10,paste("RMSE=",round(rmse,3)))
}
##You can play with plotme to find values that seem to work:
plotme(x,y,35,0)
plotme <- function(x,y,a=0,b=1)
{
plot(x,y,col="gold",pch=16,
main=paste("intercept:",a,"slope:",b))
points(x,y)
abline(a,b)
pred <- a + b*x
points(x,pred,pch=1,cex=.5)
rmse <- sqrt(mean((y-pred)^2))
mtext(1,3,paste("RMSE=",round(rmse,3)))
}
##You can play with plotme to find values that seem to work:
plotme(x,y,35,0)
plotme <- function(x,y,a=0,b=1)
{
plot(x,y,col="gold",pch=16,
main=paste("intercept:",a,"slope:",b))
points(x,y)
abline(a,b)
pred <- a + b*x
points(x,pred,pch=1,cex=.5)
rmse <- sqrt(mean((y-pred)^2))
mtext(1,3,paste("RMSE=",round(rmse,3)))
}
##You can play with plotme to find values that seem to work:
plotme(x,y,35,0)
plotme <- function(x,y,a=0,b=1)
{
plot(x,y,col="gold",pch=16,
main=paste("intercept:",a,"slope:",b))
points(x,y)
abline(a,b)
pred <- a + b*x
points(x,pred,pch=1,cex=.5)
rmse <- sqrt(mean((y-pred)^2))
mtext(1,3,paste("RMSE=",round(rmse,3)))
}
##You can play with plotme to find values that seem to work:
plotme(x,y,35,0)
plotme <- function(x,y,a=0,b=1)
{
plot(x,y,col="gold",pch=16,
main=paste("intercept:",a,"slope:",b))
points(x,y)
abline(a,b)
pred <- a + b*x
points(x,pred,pch=1,cex=.5)
rmse <- sqrt(mean((y-pred)^2))
title(sub=paste("RMSE=",round(rmse,3)))
}
##You can play with plotme to find values that seem to work:
plotme(x,y,35,0)
plotme(x,y,36,0)
##You can play with plotme to find values that seem to work:
plotme(x,y,35,0)
plotme(x,y,36,0)
plotme(x,y,37,0)
plotme(x,y,37,5)
plotme(x,y,20,5)
plotme(x,y,5,10)
plotme(x,y,5,8)
plotme(x,y,14,5.2)
plotme(x,y,15,5)
par(mfrow=c(1,1))
manipulate(plotme(x,y,a,b),a=slider(-20,100,step=.1),b=slider(0,10,step=.1))
n <- 100
beta1 <- (sum(x*y) - sum(x) * sum(y)/n)  /(sum(x^2) - sum(x)^2/n)
beta2 <- mean(y) - beta1 * mean(x)
beta2
beta2
beta1
xs <- cbind(1,x)
solve(t(xs) %*% xs) %*% t(xs) %*% y
x <- 1:1000000
y <- x + runif(1000000)
lm1 <- lm(y~x)
lm1 <- lm(y~x)
lm1 <- lm(y~x)
plot(x,y,pch=16,cex=1.5,col="gold",
main=paste("Best-fitting line\n",
"y = ",round(model$coef[1] ,2) ," + ", round(model$coef[2],3) , " * x",sep=""))
plot(x,y,pch=16,cex=1.5,col="gold",
main=paste("Best-fitting line\n",
"y = ",round(model$coef[1] ,2) ," + ", round(model$coef[2],3) , " * x",sep=""))
y <- x^2
plot(y,x)
x <- 1:100
y <- x^2 + runif(100)
plot(x,y)
lm(y~x)
abline(-1717,101)
aa <- lm(y~x)
predict(aa,10)
predict(aa,list(x=10))
predict(aa,list(x=1200))
y <- y + rnorm(100)*100
plot(x,y)
y <- y + rnorm(100)*1000
plot(x,y)
costs <- data.frame(
product=c("apples", "bananas", "eggs", "steak", "potatoes","oranges"),
cost1950=c(0.39,0.14,0.79,0.59,0.07,.23),
cost1990=c(1.98,   0.48,    1.05, 2.99,  0.31,.74)
)
plot(costs$cost1950,costs$cost1990,xlim=c(0,1),cex=.3,ylim=c(0,4),type="n",
xlab="Cost in 1950",ylab="Cost in 1990")
text(costs$cost1950,costs$cost1990,costs$product)
abline(lm(cost1990~cost1950,data=costs)$coef, col="red")
abline(0,lm(cost1990~cost1950+0,data=costs)$coef,col="black")
lm(cost1990~cost1950,data=costs)
lm(cost1990~cost1950+0,data=costs)
setwd("~/Dropbox/courses/5210-2022/web-5210/psy5210/Projects/Chapter9")
set.seed(20)
predictor <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)
outcome <- predictor + rnorm(40)
predictor
outcome
plot(predictor,outcome)
aggregate(outcome,list(predictor),mean)
outcome
predictor
t.test(outcome~predictor)
t.test(outcome[predictor==0],outcome[predictor==1])
cor(outcome,predictor)
lm(outcome~predictor)
summary(lm(outcome~predictor))
8.547^(-2)
sqrt(8.547)
outcome
pred2 <- runif(40)
summary(lm(outcome~predictor+pred2))
summary(lm(outcome~predictor))
summary(lm(outcome~predictor+pred2))
summary(lm(outcome~1))
lm0 <- lm(outcome~1)
lm2 <- lm(outcome~predictor+pred2)
lm1 <- lm(outcome~predictor)
anova(lm1,lm0)
anova(lm2,lm0)
anova(lm2,lm1)
summary(lm2)
pf(5.8,2,30)
pf(5.8,2,60)
pf(5.8,2,1000)
pf(2.8,2,1000)
pf(2.8,2,1000000)
pf(1,2,1000)
pf(1000,2,1000)
library(faraway)
data(stat500)
summary(stat500)
lm1 <- lm(total~midterm+final+hw,data=stat500)
lm2 <- lm(final~midterm,data=stat500)
lm3 <- lm(final~midterm+hw,data=stat500)
stat500
lm1
lm1$coefficients
round( lm1$coefficients,8)
stat500$newscore <- stat500$midterm*.5 + stat500$hw*.25+stat500$final*.25
stat500
lm5 <- lm(newscore~hw+midterm+final,data=stat500)
lm5
round(lm5$coefficients,3)
lm3
summary(lm(final~hw,data=stat500))
summary(lm2)
summary(lm3)
anova(lm3,lm2)
summary(lm2)
sqrt(.299)
summary(lm3)
summary(lm2)
summary(lm3)
lm3
lm3$coefficients
lm3$fitted.values
plot(stat500$midterm,stat500$final)
points(stat500$midterm,lm3$fitted.values,col="red",pch=16)
cor(stat500$midterm,lm3$fitted.values)
cor(stat500$final,lm3$fitted.values)
cor(stat500$final,lm3$fitted.values)^2
cor(lm1$fit,z)^2
q1 <- runif(50)
q2 <- runif(50)
q3 <- runif(50)
q4 <- runif(50)
q5 <- runif(50)
q6 <- runif(50)
q7 <- runif(50)
q8 <- runif(50)
q9 <- runif(50)
q10 <- runif(50)
summary(lm(z~q1))$r.squared
set.seed(1000)
sd <- 10
n <- 50
x <- 1:n/n*10
y <- runif(n)*10
y2 <- runif(n) * 3
z <- 10 +3*x + 5*y + rnorm(n)*sd
lm1 <- lm(z~x+y+y2)
lm1
res1 <- sqrt(sum(lm1$resid^2)/(n-4))
print(res1)
summary(lm1)
## Anova is the same as a t-test?
####comparing x+y to just x:
lm2 <- lm(z~x+y)
res2 <-(sum(lm2$resid^2))
lm3 <- lm(z~x)
res3 <-(sum(lm3$resid^2))
summary(lm3)
tval <-  sqrt((res3-res2)/(res2/(50-3)) )
tval
## comparing R^2 versus
##multiple R^2
cor(lm1$fit,z)^2
summary(lm1)
q1 <- runif(50)
q2 <- runif(50)
q3 <- runif(50)
q4 <- runif(50)
q5 <- runif(50)
q6 <- runif(50)
q7 <- runif(50)
q8 <- runif(50)
q9 <- runif(50)
q10 <- runif(50)
summary(lm(z~q1))$r.squared
summary(lm(z~q1))$adj.r.squared
summary(lm(z~q1+q2))$r.squared
summary(lm(z~q1+q2))$adj.r.squared
fruitfly
table(fruitfly$activity)
library(faraway)
data(fruitfly)
summary(fruitfly)
ff.lm0 <- lm(longevity~activity+0,data=fruitfly)
ff.lm <- lm(longevity~thorax,data=fruitfly)
summary(ff.lm)
ff.lm2 <- lm(longevity~thorax+activity+0,data=fruitfly)
summary(ff.lm2)
ff.lm1 <- lm(longevity~thorax+activity,data=fruitfly)
ff.lm1
summary(ff.lm1)
ff.lm <- lm(longevity~thorax,data=fruitfly)
ff.lm
ff.lm1
anova(ff.lm,ff.lm1)
anova(ff.lm1)
ff.lm0 <- lm(longevity~activity+0,data=fruitfly)
ff.lm2 <- lm(longevity~thorax+activity+0,data=fruitfly)
summary(ff.lm2)
?fruitfly
