plot(head(bmodel))
head(bmodel)
##########################################################
#######The effect of parameter selection on prediction
##normal prediction
set.seed(100)
age <- 20+runif(50) * 80
income <- 20000+ runif(50)*50000
digitspan <-  ( age * .5 + income * .003)/20 + rnorm(50)
data=data.frame(age,income,digitspan)
model <- lm(digitspan~age+income,data=data)
library(broom)
tidy(model)
summary(model)
tt <- tidy(model)
print(tt)
a <- c(1,0)
b <- c(0,1)
a %*% b
plot(0,0,type="n")
segments(0,0,1,0)
segments(0,0,0,1)
c<- c(-1,1)
d <- c(-1,-1)
c %*% d
c<- c(-1,1)
d <- c(-1,-1)
c %*% d
c<- c(-1,1)
d <- c(-1,-1)
c %*% d
plot(0,0,type="n")
segments(0,0,-1,1)
segments(0,0,-1,-1)
plot(0,type="n",bty="n",xlab="x",ylab="y",xlim=c(-1.5,1.5),ylim=c(-1.50,1.5))
segments(-1.5,-3:3/2,1.5,-3:3/2,lty=3)
segments(-3:3/2,1.5,-3:3/2,-1.5,lty=3)
abline(0,0,lty=1,lwd=3,col="grey20")
abline(v=0,lty=1,lwd=3,col="grey20")
points(c(.5,.1,-.5,-.5),c(.1,.5,.6,-.6),pch=16,cex=5,col="white")
text(.5,.1,"a")
text(.1,.5,"b")
text(-.5,.6,"c")
text(-.5,-.6,"d")
arrows(0,0,1,0,lwd=3,col="red")
arrows(0,0,0,1,lwd=3,col="red")
arrows(0,0,-1,1,lwd=3,col="darkgreen")
arrows(0,0,-1,-1,lwd=3,col="darkgreen")
x <- c(1,3,3, 4,-4,3)
y <- c(3,1,3,-5, 1,3)
plot(x,y,col="gold",pch=16,cex=1.2,
main="Orthogonal predictors x and y") ##this
grid()
points(x,y,cex=1.2)
arrows(0,0,x,y,length=.1)
#dev.off()
x %*% y
x
y
orthx1 <- c(1,-5,3,-1)
orthx2 <- c(5,1,1,3)
cor(orthx1,orthx2)
orthx1 %*% orthx2
uncorx1 <- c(0,0,1,1)
uncorx2 <- c(1,0,1,0)
cor(uncorx1,uncorx2)
(uncorx1)%*%(uncorx2)
uncorx1 <- c(0,0,1,1)
uncorx2 <- c(1,0,1,0)
cor(uncorx1,uncorx2)
(uncorx1)%*%(uncorx2)
uncorx1b <- uncorx1 - mean(uncorx1)
uncorx2b <- uncorx2 - mean(uncorx2)
cor(uncorx1b,uncorx2b)
uncorx1b%*% uncorx2b
orthy <- c(.2,.4,.8,1)
lm(orthy~orthx1+orthx2+0)$coef
lm(orthy~orthx1+0)$coef
lm(orthy~orthx2+0)$coef
lm(orthy~uncorx1+uncorx2+0)
lm(orthy~uncorx1+0)
lm(orthy~uncorx2+0)
set.seed(100)
x1.raw <- rep(1:10,each=10)
x2.raw <- rep(1:10,10)
x1.raw %*% x2.raw
cor(x1.raw,x2.raw)
x1 <- x1.raw-mean(x1.raw)
x2 <- x2.raw-mean(x2.raw)
x1 %*% x2
cor(x1,x2)
y <- x1 + x2 + rnorm(100)
library(faraway)
stat500
stat500$newtest <- stat500$final + runif(55)
stat500
s500 <- lm(newtest~midterm+final+hw+total,data=stat500)
s500
summary(s500)
s500b <- lm(newtest~total+midterm+final+hw,data=stat500)
s500b
mtcars
cor(mtcars)
round(cor(mtcars),3)
lm1 <- lm(mpg~.,data=mtcars)
lm1
summary(lm1)
library(mctest)
mctest
lm1
?mctest
mctest(lm1)
library(BayesFactor)
lm1BF <- regressionBF(mpg~.,data=mtcars)
lm1BF
head(lm1BF)
lm2 <- lm(mpg~cyl+wt,data=mtcars)
mctest(lm2)
summary(lm1)
summary(lm2)
anova(lm1,lm2)
lm1
lm2
?mtcars
round(cor(mtcars),3)
lm(mgp~wt+disp,data=mtcars)
lm3 <- lm(mpg~wt+disp,data=mtcars)
lm3
summary(lm3)
summary(lm2)
fitx <- function(n=1)
{
model <- lm(y~x[,1:n])
plot(y,model$fit,ylim=c(0,1),xlim=c(0,1))
abline(0,1)
}
y <- runif(10)
x <- matrix(runif(100),10,10)
model <- lm(y~x)
summary(model)
fitx <- function(n=1)
{
model <- lm(y~x[,1:n])
plot(y,model$fit,ylim=c(0,1),xlim=c(0,1))
abline(0,1)
}
fitx()
fitx()
y
x
fitx(2)
fitx(3)
fitx(5)
library(manipulate)
manipulate(fitx(x), x=slider(1,10))
cor(x)
round(cor(x),3)
plot(y,lm(y~x[,1])$fit)
library(manipulate)
manipulate(fitx(x), x=slider(1,10))
fitx <- function(n=1)
{
model <- lm(y~x[,1:n])
plot(y,model$fit,ylim=c(0,1),xlim=c(0,1),main=round(cor(model$fit,y)^2,3))
abline(0,1)
}
manipulate(fitx(x), x=slider(1,10))
?mctest
knitr::opts_chunk$set(echo = TRUE)
summary(cars)
## my answer here
plot(pressure)
plot(pressure)
plot(pressure)
summary(cars)
## my answer here
knitr::kable(summary(cars))
knitr::opts_chunk$set(echo = TRUE)
model1 <- lm(zygomatic.width~sex+species+basilar.length+occipitonasal.length+palate.length+palate.width+nasal.length+nasal.width+squamosal.depth+lacrymal.width+orbital.width+.rostral.width+ occipital.depth+crest.width+foramina.length+mandible.length+mandible.width+mandible.depth+ramus.height,
data=kanga2)
knitr::opts_chunk$set(echo = TRUE)
library(faraway)
data(kanga)
kanga2 <- kanga[!is.na(rowSums(kanga[,3:20])),]
model1 <- lm(zygomatic.width~sex+species+basilar.length+occipitonasal.length+palate.length+palate.width+nasal.length+nasal.width+squamosal.depth+lacrymal.width+orbital.width+.rostral.width+ occipital.depth+crest.width+foramina.length+mandible.length+mandible.width+mandible.depth+ramus.height,
data=kanga2)
summary(model1)
model2 <- lm(zygomatic.width~species+palate.width+squamosal.depth+.rostral.width+ crest.width+mandible.length+mandible.width+mandible.depth+ramus.height,
data=kanga2)
anova(model1,model2)
model2 <- lm(zygomatic.width~species+palate.width+squamosal.depth+.rostral.width+ crest.width+mandible.length+mandible.width+mandible.depth+ramus.height,
data=kanga2)
summary(model2)
summary(model1)
anova(model1,model2)
library(MASS)
smallAIC <- stepAIC(model1,direction="both",trace=0)
summary(smallAIC)
?stepAIC
library(MASS)
smallAIC <- stepAIC(model1,direction="both",trace=1)
summary(smallAIC)
library(MASS)
smallAIC <- stepAIC(model1,direction="both",trace=0)
summary(smallAIC)
smallBIC <- stepAIC(model1,direction="both",trace=0,k=log(nrow(kanga2)))
summary(smallBIC)
bayes.withspecies <- lmBF(zygomatic.width ~  species+ palate.width + squamosal.depth +
crest.width + mandible.length + mandible.width + ramus.height,data=kanga2)
lm.withspecies <- lm(zygomatic.width ~  species+ palate.width + squamosal.depth +
crest.width + mandible.length + mandible.width + ramus.height,data=kanga2)
bayes.withspecies/best.nospecies
library(BayesFactor)
##regressionBF does not accept categorical predictors
#bayes.nospecies <- regressionBF(zygomatic.width ~  species+ palate.width + squamosal.depth +
#    .rostral.width + crest.width + mandible.length + mandible.width +
#    mandible.depth + ramus.height,data=kanga2)
##do one without species
bayes.nospecies <- regressionBF(zygomatic.width ~  palate.width + squamosal.depth +
.rostral.width + crest.width + mandible.length + mandible.width +
mandible.depth + ramus.height,data=kanga2)
best.nospecies <- head(bayes.nospecies)[1]
best.nospecies
bayes.nospecies <- regressionBF(zygomatic.width ~  palate.width + squamosal.depth +
.rostral.width + crest.width + mandible.length + mandible.width +
mandible.depth + ramus.height,data=kanga2)
best.nospecies <- head(bayes.nospecies)[1]
best.nospecies
bayes.withspecies <- lmBF(zygomatic.width ~  species+ palate.width + squamosal.depth +
crest.width + mandible.length + mandible.width + ramus.height,data=kanga2)
lm.withspecies <- lm(zygomatic.width ~  species+ palate.width + squamosal.depth +
crest.width + mandible.length + mandible.width + ramus.height,data=kanga2)
bayes.withspecies/best.nospecies
bayes.withspecies/best.nospecies
plot(kanga2$zygomatic.width,model1$fitted.values,main='Full model')
plot(kanga2$zygomatic.width,model2$fitted.values,main='ANOVA-selected model')
plot(kanga2$zygomatic.width,smallAIC$fitted.values,main="AIC-selected model")
plot(kanga2$zygomatic.width,smallBIC$fitted.values,main="BIC-selected model")
plot(kanga2$zygomatic.width,lm.withspecies$fitted.values,main="Bayes factor selected model")
training <- rep(c(1,1,1,0,0,0,-1,-1,-1),each=3)
feedback <- rep(c(1,0,-1,1,0,-1,1,0,-1),each=3)
cor(training, feedback)
training %*% feedback
new <- rep(-1:1,9)
cor(new,training)
cor(new,feedback)
new %*% training
new %*% feedback
training
feedback
group1 <- 1:27
group2 <- rep(-1:1, 9)
group3 <- 1:27-mean(1:27)
group4 <- rep(1:3, 9)
group5 <- rep(-1:1,each=9)
group6 <- runif(27)-.5
group7 <- rep(1:9,each=3)
cat ("group 1 v training: ", cor(group1,training), "  " , group1 %*% training , "\n")
cat ("group 1 v feedback: ", cor(group1,feedback), "  " , group1 %*% feedback , "\n")
cat ("group 2 v training: ", cor(group2,training), "  " , group2 %*% training , "\n")
cat ("group 2 v feedback: ", cor(group2,feedback), "  " , group2 %*% feedback , "\n")
cat ("group 3 v training: ", cor(group3,training), "  " , group3 %*% training , "\n")
cat ("group 3 v feedback: ", cor(group3,feedback), "  " , group3 %*% feedback , "\n")
cat ("group 4 v training: ", cor(group4,training), "  " , group4 %*% training , "\n")
cat ("group 4 v feedback: ", cor(group4,feedback), "  " , group4 %*% feedback , "\n")
cat ("group 5 v training: ", cor(group5,training), "  " , group5 %*% training , "\n")
cat ("group 5 v feedback: ", cor(group5,feedback), "  " , group5 %*% feedback , "\n")
cat ("group 6 v training: ", cor(group6,training), "  " , group6 %*% training , "\n")
cat ("group 6 v feedback: ", cor(group6,feedback), "  " , group6 %*% feedback , "\n")
cat ("group 7 v training: ", cor(group7,training), "  " , group7 %*% training , "\n")
cat ("group 7 v feedback: ", cor(group7,feedback), "  " , group7 %*% feedback , "\n")
data <- data.frame(buyerid = group7,
timeframe=group4)
set.seed(103)
buyerbase <- runif(9)
timebase <- c(1,2,2.5)
agebase  <- 20+runif(9)* 40
data$age <- round(agebase[data$buyerid])
data$purchase <- round( 25 + buyerbase[data$buyerid] * 50 +
data$age * .5+
timebase[data$timeframe] * 7 +
rnorm(27)*4,2)
lm(purchase~timeframe+age,data=data)$coef
lm(purchase~timeframe,data=data)$coef
lm(purchase~age,data=data)$coef
data$time %*% data$age
cor(data$time,data$age)
data$orthotime <- data$time- mean(data$time)
data$orthoage <- data$age - mean(data$age)
data$orthotime %*% data$orthoage
cor(data$orthotime,data$orthoage)
lm(purchase~orthotime+orthoage,data=data)$coef
lm(purchase~orthotime,data=data)$coef
lm(purchase~orthoage,data=data)$coef
set.seed(1004)
x <- 1:10
y <- runif(10)
#pdf("c13-curve.pdf",width=8,height=5)
plot(x,y,pch=16,col="gold",main="Linear and quadratic models")
points(x,y)
##add the squared term:
x2 <- x^2
lm1 <- lm(y~x)
lm2 <- lm(y~x+x2)
anova(lm1,lm2)
preddat <- data.frame(x=0:100/10,x2=(0:100/10)^2)
points(preddat$x,predict(lm1,preddat),type="l")
points(preddat$x,predict(lm2,preddat),type="l")
plot(x,y,pch=16,col="gold",main="Linear and quadratic models")
points(x,y)
x2 <- x^2
lm1 <- lm(y~x)
lm2 <- lm(y~x+x2)
lm1 <- lm(y~x)
lm2 <- lm(y~x+x2)
anova(lm1,lm2)
points(preddat$x,predict(lm1,preddat),type="l")
points(preddat$x,predict(lm2,preddat),type="l")
lm2
x3 <- x^3
lm3 <- lm(y~x+x2+x3)
points(preddat$x,predict(lm3,preddat),type="l")
preddat <- data.frame(x=0:100/10,x2=(0:100/10)^2,x3=(0:100)^2)
preddat <- data.frame(x=0:100/10,x2=(0:100/10)^2,x3=(0:100/10)^3)
points(preddat$x,predict(lm1,preddat),type="l")
points(preddat$x,predict(lm2,preddat),type="l")
points(preddat$x,predict(lm3,preddat),type="l")
r2 <- 0:100/10  #identify a range to predict with
par(mfrow=c(3,3),mar=c(2,3,2,1))
poly1 <- lm(y~x)
plot(x,y,pch=16,main="1st order polynomial fit")
points(r2,predict(poly1,list(x=r2)),type="l")
r2 <- 0:100/10  #identify a range to predict with
par(mfrow=c(3,3),mar=c(2,3,2,1))
poly1 <- lm(y~x)
plot(x,y,pch=16,main="1st order polynomial fit")
points(r2,predict(poly1,list(x=r2)),type="l")
x2 <- x^2
poly2 <- lm(y ~ x + I(x^2))
plot(x,y,pch=16,main="2nd order polynomial fit")
points(x,poly2$fit,type="p")
points(r2,predict(poly2,list(x=r2)),type="l")
poly3 <- lm(y~x + I(x^2) + I( x^3))
plot(x,y,pch=16,main="3rd order polynomial fit")
points(x,poly3$fit,type="p")
points(r2,predict(poly3,list(x=r2)),type="l")
poly4 <- lm(y~x + I(x^2) + I( x^3) + I(x^4))
plot(x,y,pch=16,main="4th order polynomial fit")
points(x,poly4$fit,type="p")
points(r2,predict(poly4,list(x=r2)),type="l")
poly5 <- lm(y~x + I(x^2) + I( x^3) + I(x^4) + I(x^5))
plot(x,y,pch=16,main="5th order polynomial fit")
points(x,poly5$fit,type="p")
points(r2,predict(poly5,list(x=r2)),type="l")
poly6 <- lm(y~x + I(x^2) + I( x^3) + I(x^4) + I(x^5) +I(x^6))
plot(x,y,pch=16,main="6th order polynomial fit")
points(x,poly6$fit,type="p")
points(r2,predict(poly6,list(x=r2)),type="l")
poly7 <- lm(y~x + I(x^2) + I( x^3) + I(x^4) + I(x^5) +I(x^6) + I(x^7))
plot(x,y,pch=16,main="7th order polynomial fit")
points(x,poly7$fit,type="p")
points(r2,predict(poly7,list(x=r2)),type="l")
poly8 <- lm(y~x + I(x^2) + I( x^3) + I(x^4) + I(x^5) +I(x^6) + I(x^7) + I(x^8))
plot(x,y,pch=16,main="8th order polynomial fit")
points(x,poly8$fit,type="p")
points(r2,predict(poly8,list(x=r2)),type="l")
poly9 <- lm(y~x + I(x^2) + I( x^3) + I(x^4) + I(x^5) +I(x^6) + I(x^7) + I(x^8) + I(x^9))
plot(x,y,pch=16,main="9th order polynomial fit")
points(x,poly9$fit,type="p")
points(r2,predict(poly9,list(x=r2)),type="l")
ma <- function(x,n=5){filter(x,rep(1/n,n), sides=2)[1:length(x)]}
WWWusage
plot(WWWusage)
plot(WWWusage)
points(ma(WWWusage,n=5),type="o",col= "blue")
points(ma(WWWusage,n=15),type="o",col="green")
plot(WWWusage)
newdat <- WWWusage + rnorm(100)*20
points(newdat)
points(ma(newdat,3),col="red",type="l",lwd=3)
points(ma(newdat,5),col="red",type="l",lwd=3)
points(ma(newdat,6),col="red",type="l",lwd=3)
points(ma(newdat,10),col="red",type="l",lwd=3)
points(ma(newdat,20),col="red",type="l",lwd=3)
##moving average with triangular kernel
set.seed(100)
datx <- runif(50)*5
daty <- sin(datx*5) + ((datx-2)/3)^2 + rnorm(50)*.25
plot(datx,daty)
lmodel <- loess(daty~datx)
xs <- 0:500/100
points(xs,predict(lmodel,xs),type="l",col="red",lwd=3)
lmodel2 <- loess(daty~datx,span=.3)
points(xs,predict(lmodel2,xs),type="l",col="blue",lwd=3)
lmodel2 <- loess(daty~datx,span=.05)
points(xs,predict(lmodel2,xs),type="l",col="blue",lwd=3)
lmodel2 <- loess(daty~datx,span=.3)
points(xs,predict(lmodel2,xs),type="l",col="blue",lwd=3)
plot(datx,daty)
lmodel <- loess(daty~datx)
xs <- 0:500/100
points(xs,predict(lmodel,xs),type="l",col="red",lwd=3)
lmodel2 <- loess(daty~datx,span=.3)
points(xs,predict(lmodel2,xs),type="l",col="blue",lwd=3)
cw <- ChickWeight
g <-  gam(weight~s(Time)+ Diet,data=cw)
library(gam)
cw <- ChickWeight
g <-  gam(weight~s(Time)+ Diet,data=cw)
cw$fit <- g$fitted.values
library(ggplot2)
summary(g)
plot(g)
#pdf("c13-gam1.pdf")
ggplot(cw,aes(x=Time,y=fit,group=Diet,color=Diet,group=Chick)) +geom_point() +geom_line() +theme_bw()
#dev.off()
g <-  gam(weight~lo(Time)*Chick+ Diet,data=cw)
cw$fit2 <- g$fitted.values
#pdf("c13-gam2.pdf")
ggplot(cw,aes(x=Time,y=fit2,group=Chick,color=Diet)) +geom_point() + geom_line()+ theme_bw()
#dev.off()
g
summary(g)
poly(10)
lm(y~ poly(x,10))
?poly
lm(y~ poly(x,degree=10))
lm(y~ poly(x,degree=3))
lm(y~ poly(x,degree=2))
library(rcompanion)
install.packages("rcompanion")
x <-runif(100) * 20
z <- .1/x +runif(100)*.1
zinv <- (x+runif(100)/.2)
hist(z)
qqnorm(z)
x <-runif(100) * 20
z <- .1/x +runif(100)*.1
zinv <- (x+runif(100)/.2)
hist(z)
qqnorm(z)
hist(zinv)
qqnorm(zinv)
z2 <- transformTukey(z,plotit=T)
library(rcompanion)
##Alternate: Box-Cox transform
library(MASS)
par(mfrow=c(1,2))
bc<-boxcox(z~x,plotit=T)
bc2 <- boxcox(census$respop72011~census$GEO.id2,plotit=T)
par(mfrow=c(1,2))
bc<-boxcox(z~x,plotit=T)
bc2 <- boxcox(census$respop72011~census$GEO.id2,plotit=T)
bc2
bc
plot(x,WWWusage)
points(g1$fit,type="l",col="red")
plot(x,WWWusage)
points(g1$fit,type="l",col="red")
points(g1$fit,type="l",col="red")
x<- 1:100
g1 <- lm(WWWusage~poly(x,1))
g2 <- lm(WWWusage~poly(x,2))
g3 <- lm(WWWusage~poly(x,3))
g4 <- lm(WWWusage~poly(x,4))
g5 <- lm(WWWusage~poly(x,5))
g6 <- lm(WWWusage~poly(x,6))
g7 <- lm(WWWusage~poly(x,7))
plot(x,WWWusage)
points(g1$fit,type="l",col="red")
points(g2$fit,type="l",col="red")
points(g3$fit,type="l",col="red")
points(g4$fit,type="l",col="red")
points(g5$fit,type="l",col="red")
points(g6$fit,type="l",col="red")
points(g7$fit,type="l",col="red")
##consider the following two data sets. We can assess how good the model is by looking at R^2:
set.seed(302)
source("~/Dropbox/courses/5210-2024/web-5210/psy5210/Projects/Chapter14/Chapter14Walkthrough.R")
modela <- lm(y~x)
anova(modela)
source("~/Dropbox/courses/5210-2024/web-5210/psy5210/Projects/Chapter14/Chapter14Walkthrough.R")
set.seed(302)
x <- rnorm(1000)
y <- 33 + x + rnorm(1000)
modela <- lm(y~x)
anova(modela)
par(mfrow=c(1,3))
plot(x,y,col="gold",pch=16,ylim=c(10,50),
main=paste("R2=",round(summary(modela)$r.squared,3)))
points(x,y)
abline(modela$coef)
y2 <- 33 + x + rnorm(1000)*5
modelb <-lm(y2~x)
plot(x,y2,col="gold",pch=16,ylim=c(10,50),
main=paste("R2=",round(summary(modelb)$r.squared,3)))
abline(modelb$coef)
points(x,y2)
summary(lm(y2~x))
set.seed(100)
y3 <- y
noisy <- sample(1000,15)
y3[noisy] <- y3[noisy] + rnorm(15)* 35
modelc <- lm(y3~x)
plot(x,y3,col="gold",pch=16,
main=paste("R2=",round(summary(modelc)$r.squared,3)))
points(x,y3)
abline(modelc$coef)
set.seed(111)
x <- rnorm(20)
y <- x + rnorm(20)
g1 <- lm(y~x)
