source("~/Dropbox/courses/5210-2022/web-5210/psy5210/Projects/Chapter14/Chapter14Walkthrough.R", echo=TRUE) ##Non-constant variance (and a test for it) set.seed(5) x2 <- runif(200)*10 y2 <- 33 + x2 + rnorm(200)*(x2) ############################# ## Testing the assumptions of the model (normality) set.seed(111) x <- rnorm(20) y <- x + rnorm(20) set.seed(111) x <- rnorm(20) y <- x + rnorm(20) resid <- rep(0,20) for(i in 1:20) { keep <- rep(T,20) keep[i] <- F xtmp <- x[keep] model <- lm(y[keep]~xtmp) resid[i] <-(y[i]-predict(model,list(xtmp=x[i])))/sd(model$resid) } plot(resid/sd(resid),main="Jackknife on standardized residuals") model1 <- lm(y~x) int <- rep(0,20) beta <- rep(0,20) for(i in 1:20) { keep <- rep(T,20) keep[i] <- F xtmp <- x[keep] model <- lm(y[keep]~xtmp) int[i] <- model1$coef[1]-model$coef[1] beta[i] <- model1$coef[1]-model$coef[2] } #pdf("c14-jackknife2.pdf",width=8,height=5) par(mfrow=c(2,2),mar=c(2,2,2,2)) plot(int,main="Jackknife on Intercept") plot(beta,main="Jackknife on slope") plot(x,y,main="Jackknife on linear model") for(i in 1:20)abline(model1$coef[1]+int[i],beta[i]+model1$coef[2]) plot(x,y,main="Jackknife on beta diagnosticity") fits <- dfbetas(model1) for(i in 1:20)abline(fits[i,],col="red") #pdf("c14-comparison.pdf",width=8,height=6) plot(hatvalues(model1),type="o",col="red",ylim=c(-1,2)) points(cooks.distance(model1),type="o",col="darkgreen") points(dffits(model1),type="o",col="orange") influence.measures(model1) dat <- read.csv("tmt.csv") dat$age <- as.factor(dat$age) par(mfrow=c(1,2)) plot(dat$time~(dat$age)+dat$type) par(mfrow=c(2,2)) plot(lm1) lm1 <- lm(time~age+type,data=dat) qqnorm(lm1$resid) #pdf("c14-tmt-model-lmplot.pdf") par(mfrow=c(2,2)) plot(lm1) hist(lm1$resid) hist(log(lm1$resid-min(lm1$resid)),breaks=20) plot(log(dat$time)~(dat$age)) points(dat$age,log(dat$time)) plot(log(dat$time)~(dat$type)) lm2 <- lm(log(time)~age+type,data=dat) qqnorm(lm2$resid) ##This is a bit better; the fastest time was around 50 lm3 <- lm(log(time-50)~age+type,data=dat) qqnorm(lm3$resid) hist(lm3$resid) summary(lm1) summary(lm2) summary(lm3) #########Downsides: lm1.int <- lm(time~age*type,data=dat) lm3.int <- lm(log(time-50)~age*type,data=dat) anova(lm1.int) anova(lm3.int) agg1 <- tapply(dat$time,list(dat$age,dat$type),mean) agg3 <- tapply(log(dat$time-50),list(dat$age,dat$type),mean) matplot(t(agg1),type="o",ylab="Response time (s)") matplot(t(agg3),type="o",ylab="log(Response time)",yaxt="n",ylim=c(2.5,5)) axis(2,log(c(65,75,100,150,200,250)-50), c(65,75,100,150,200,250),las=1) library(car) ncvTest(lm1.int) ncvTest(lm3.int)