##first, recode the negative codings. answers <- select(big5,contains("Q")) ##mutate the columns with -1 valence: recoded <- answers %>% mutate_if(valence==-1,function(x){6-x}) melted <- melt(mutate(recoded,sub=1:nrow(recoded)), id.vars = c("sub") ) arrange(melted,sub,variable) big5 <- read.csv("bigfive.csv") qtype <- c("E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "O","A","C","O") valence <- c(1,-1,1,1,1, -1,1,-1,-1,1, 1,-1,1,1,1, 1,1,-1,1,1, -1,1,-1,-1,1, 1,-1,1,1,1, -1,1,1,-1,-1, 1,-1,1,1,1, -1,1,-1,1) varnames <- colnames(big5)[2:45] ##first, recode the negative codings. answers <- select(big5,contains("Q")) ##mutate the columns with -1 valence: recoded <- answers %>% mutate_if(valence==-1,function(x){6-x}) ##check this. For negative valence, 2 becomes 4 etc. bind_rows(recoded[1,],answers[1,]) ##create composite subsets b5.e <- select(recoded, one_of(varnames[qtype=="E"])) b5.a <- select(recoded, one_of(varnames[qtype=="A"])) b5.c <- select(recoded, one_of(varnames[qtype=="C"])) b5.n <- select(recoded, one_of(varnames[qtype=="N"])) b5.o <- select(recoded, one_of(varnames[qtype=="O"])) composites1 <- data.frame(e=rowMeans(b5.e,na.rm=T), a=rowMeans(b5.a,na.rm=T), c=rowMeans(b5.c,na.rm=T), n=rowMeans(b5.n,na.rm=T), o=rowMeans(b5.o,na.rm=T) ) dat1 |> group_by(sub) |> summarize( sub=sub,dv=dv, mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv))) |> mutate(zdv = (dv - mean)/sd) ##Compute a z-score dat1 colnames(dat3) <- c("subcode","q1","q2","q3","q4","q5") library(knitr) library(rmdformats) ## Global options options(max.print="75") opts_chunk$set(echo=TRUE, cache=TRUE, prompt=FALSE, tidy=TRUE, comment=NA, message=FALSE, warning=FALSE) opts_knit$set(width=75) library(magrittr) f1 <- function(x){abs(x)} f2 <- function(x){sqrt(x)} a <- f1(-33.2) b <- f2(a) f2( f1(-33.2 ) ) (-33.2) %>% f1 %>% f2 ##Without magrittr--requires () (-33.2) |> f1() |> f2() ##magrittr lets you specify the first argument of a function with a dot rnorm(10) %>% round(.,3) #rnorm(10) |> round(.,3) ##This won't work ##Piping automatically replaces the first argument and any other argumments are bound to later slots set.seed(100) rnorm(10) %>% round(3) ##standard |> works the same here: set.seed(100) rnorm(10) |> round(3) ##put the argument into the second slot: sample(1:5,replace=T,size=10) %>% round(runif(10), . ) runif(100) %>% sd %>% sqrt %>% log -> value value <- runif(100) %>% sd %>% sqrt %>% log value dat0 <- data.frame(sub= c(1,1,1,2,2,2,3,3,3,4,4,4), question = c("a","b","c","a","b","c","a","b","c","a","b","c"), dv = c(5,3,1,2,3,6,4,2,3,1,3,5)) dat <- data.frame(sub = sample(letters,100,replace=T), cond = sample(c("A","B","C"),100,replace=T), group = sample(1:10,100,replace=T), dv1 = runif(100)*5) library(dplyr) data <- dat dplyr::filter(data,sub=="b") ##this just returns the data for use, but does not save data2 <-filter(data,sub=="b") ## re-assign to data head(data) dim(data) dim(data2) data %>% filter(sub=="b") data |> filter(sub=="b") -> data3 ##use a pipe, then assign to data at the end data3 head((dat$sub=="b")) ##shows the first 6 elements of the boolean filter(dat,sub=="b") ##use filter to pick out just the sub == 'b' rows dat |> slice(1) ##first row slice(dat,2:10) ##9 rows after the first dat |> slice(1:20*2) ##even rows 2..40 slice(dat,-1) arrange(dat,sub) arrange(dat,sub,group) dat |> arrange(sub,cond) |> filter(dv1>1) select(dat0,sub,dv) select(dat0,sub:dv) select(dat0,-question) # piping example: filter sub 4 and select just dv value. dat0 |> filter(sub==4) |> select(dv) #select columns that start with s dat0 |> select(starts_with("s")) rename(dat0,participant=sub) |> head() dat0 |> rename(participant2=sub) |> head() dat2 <- data.frame(a=sample(1:10,20,replace=T), b=sample(c(100,200,300),20,replace=T)) distinct(dat2) distinct(dat,sub) distinct(dat,sub) distinct(dat,sub,.keep_all=T) ##distinct pairs of columns: dat |> distinct(cond,sub) ##reverse code a scale dat1 <- mutate(dat0,newdv=6-dv) ##alternative using pipes: dat0 |> mutate(newdv = 6-dv) -> dat1 ##rewrites new data set to dat1 dat0 |> mutate(dv3 = dv*2) #does not add to dat0 dat1$newdv2 = dat1$dv*dat1$newdv mutate(dat1,newdv3 = dv*newdv) dat1[1:5,] transmute(dat1,newdv2 = dv*newdv) dat1$newdv3 <- dat1 |> transmute(newdv2=dv*newdv) dat1 |> mutate(questionUC=toupper(question)) |> head() dat1 |> mutate(question= ifelse(question =="a","A", ifelse(question=="b","B","C"))) |> head() dat1 |> mutate(question= case_when(question=='a'~'A', question=='b'~'B', question=='c'~'C')) dat1 |> rowwise() |> mutate(question= switch((question), 'a'='A','b'='B','c'='C')) dat1 |> rowwise() |> mutate(odd= switch((dv %% 2+1),'even', 'odd')) |> select(sub,question, dv, odd) dat1 |> mutate(question= dplyr::recode(question, a="A", b="B", c='C')) A <- data.frame(sub=c("A","B","C","E"),data1=1:4) B <- data.frame(sub=c("A","B","D","F"),data2=11:14) left_join(A,B, by="sub") right_join(A,B, by="sub") inner_join(A,B, by="sub") full_join(A,B, by="sub") semi_join(A,B, by="sub") anti_join(A,B, by="sub") ##This doesn't make any sense, but it works: bind_rows(left_join(A,B,by="sub"), right_join(A,B,by="sub")) dat1 |> summarize(mean=mean(as.numeric(dv)), sd = sd(as.numeric(dv)), total=mean(dv+newdv)) dat1 |> summarize(mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv)), total=mean(dv+newdv)) dat1 |> summarize(sub=sub,question=question,dv=dv, absdv = sqrt(abs(dv)), mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv)), total=mean(dv+newdv)) -> newdat1 newdat1[1:10,] newdat1$z <- (newdat1$dv-newdat1$mean) / newdat1$sd dat1 dat1 |> group_by(sub) dat1 |> group_by(sub) |> summarize(mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv))) dat1 |> group_by(sub) |> summarize( sub=sub,dv=dv, mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv))) |> mutate(zdv = (dv - mean)/sd) ##Compute a z-score dat1 |> group_by(sub) |> summarize(mean=mean(as.numeric(dv)), N=n()) dat0$coding <- rep(c(-1,1),6) d1<-mutate(filter(dat0,coding==1),newdv=dv) d2<-mutate(filter(dat0,coding==-1),newdv=6-dv) dat0b <- bind_rows(d1,d2) arrange(dat0b,sub,question) dat0 |> mutate(newdv=ifelse(coding==1,dv,6-dv)) big5 <- read.csv("bigfive.csv") qtype <- c("E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "O","A","C","O") valence <- c(1,-1,1,1,1, -1,1,-1,-1,1, 1,-1,1,1,1, 1,1,-1,1,1, -1,1,-1,-1,1, 1,-1,1,1,1, -1,1,1,-1,-1, 1,-1,1,1,1, -1,1,-1,1) library(reshape2) dat2 <- read.csv("pooled-survey.csv") head(dat2) dat3 <-dcast(dat2,subcode~question,value.var="answer") head(dat3) colnames(dat3) <- c("subcode","q1","q2","q3","q4","q5") dat3 <-acast(dat3,subcode~question,value.var="answer") dat2[1:5,] dat3 <-acast(dat2,subcode~question,value.var="answer") dat3[1:5,] dat4 <-dcast(dat2,subcode~question,value.var="timestamp") dat4[1:10,] dat4 <-dcast(dat2,subcode~question,value.var="time") dat4[1:10,] melt(dat2)[1:10,] melt(dat3)[1:10,] melt(dat3,id.vars = c("subcode"))[1:10,] melt(dat3,id.vars = c("subcode","q3"))[1:10,] melt(dat3,id.vars = c("subcode","q3"),value.name="response",variable.name="Question") |> head() library(tidyr) d1<-gather(dat2,key="question",value="answer",q1,q2,q3,q4,q5) library(tidyr) d1<-gather(dat3,key="question",value="answer",q1,q2,q3,q4,q5) dat1 library(tidyr) d1<-gather(dat1,key="question",value="answer",q1,q2,q3,q4,q5) dat2[1:5,] library(knitr) library(rmdformats) ## Global options options(max.print="75") opts_chunk$set(echo=TRUE, cache=TRUE, prompt=FALSE, tidy=TRUE, comment=NA, message=FALSE, warning=FALSE) opts_knit$set(width=75) library(magrittr) f1 <- function(x){abs(x)} f2 <- function(x){sqrt(x)} a <- f1(-33.2) b <- f2(a) f2( f1(-33.2 ) ) (-33.2) %>% f1 %>% f2 ##Without magrittr--requires () (-33.2) |> f1() |> f2() ##magrittr lets you specify the first argument of a function with a dot rnorm(10) %>% round(.,3) #rnorm(10) |> round(.,3) ##This won't work ##Piping automatically replaces the first argument and any other argumments are bound to later slots set.seed(100) rnorm(10) %>% round(3) ##standard |> works the same here: set.seed(100) rnorm(10) |> round(3) ##put the argument into the second slot: sample(1:5,replace=T,size=10) %>% round(runif(10), . ) runif(100) %>% sd %>% sqrt %>% log -> value value <- runif(100) %>% sd %>% sqrt %>% log value dat0 <- data.frame(sub= c(1,1,1,2,2,2,3,3,3,4,4,4), question = c("a","b","c","a","b","c","a","b","c","a","b","c"), dv = c(5,3,1,2,3,6,4,2,3,1,3,5)) dat <- data.frame(sub = sample(letters,100,replace=T), cond = sample(c("A","B","C"),100,replace=T), group = sample(1:10,100,replace=T), dv1 = runif(100)*5) library(dplyr) data <- dat dplyr::filter(data,sub=="b") ##this just returns the data for use, but does not save data2 <-filter(data,sub=="b") ## re-assign to data head(data) dim(data) dim(data2) data %>% filter(sub=="b") data |> filter(sub=="b") -> data3 ##use a pipe, then assign to data at the end data3 head((dat$sub=="b")) ##shows the first 6 elements of the boolean filter(dat,sub=="b") ##use filter to pick out just the sub == 'b' rows dat |> slice(1) ##first row slice(dat,2:10) ##9 rows after the first dat |> slice(1:20*2) ##even rows 2..40 slice(dat,-1) arrange(dat,sub) arrange(dat,sub,group) dat |> arrange(sub,cond) |> filter(dv1>1) select(dat0,sub,dv) select(dat0,sub:dv) select(dat0,-question) # piping example: filter sub 4 and select just dv value. dat0 |> filter(sub==4) |> select(dv) #select columns that start with s dat0 |> select(starts_with("s")) rename(dat0,participant=sub) |> head() dat0 |> rename(participant2=sub) |> head() dat2 <- data.frame(a=sample(1:10,20,replace=T), b=sample(c(100,200,300),20,replace=T)) distinct(dat2) distinct(dat,sub) distinct(dat,sub) distinct(dat,sub,.keep_all=T) ##distinct pairs of columns: dat |> distinct(cond,sub) ##reverse code a scale dat1 <- mutate(dat0,newdv=6-dv) ##alternative using pipes: dat0 |> mutate(newdv = 6-dv) -> dat1 ##rewrites new data set to dat1 dat0 |> mutate(dv3 = dv*2) #does not add to dat0 dat1$newdv2 = dat1$dv*dat1$newdv mutate(dat1,newdv3 = dv*newdv) dat1[1:5,] transmute(dat1,newdv2 = dv*newdv) dat1$newdv3 <- dat1 |> transmute(newdv2=dv*newdv) dat1 |> mutate(questionUC=toupper(question)) |> head() dat1 |> mutate(question= ifelse(question =="a","A", ifelse(question=="b","B","C"))) |> head() dat1 |> mutate(question= case_when(question=='a'~'A', question=='b'~'B', question=='c'~'C')) dat1 |> rowwise() |> mutate(question= switch((question), 'a'='A','b'='B','c'='C')) dat1 |> rowwise() |> mutate(odd= switch((dv %% 2+1),'even', 'odd')) |> select(sub,question, dv, odd) dat1 |> mutate(question= dplyr::recode(question, a="A", b="B", c='C')) A <- data.frame(sub=c("A","B","C","E"),data1=1:4) B <- data.frame(sub=c("A","B","D","F"),data2=11:14) left_join(A,B, by="sub") right_join(A,B, by="sub") inner_join(A,B, by="sub") full_join(A,B, by="sub") semi_join(A,B, by="sub") anti_join(A,B, by="sub") ##This doesn't make any sense, but it works: bind_rows(left_join(A,B,by="sub"), right_join(A,B,by="sub")) dat1 |> summarize(mean=mean(as.numeric(dv)), sd = sd(as.numeric(dv)), total=mean(dv+newdv)) dat1 |> summarize(mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv)), total=mean(dv+newdv)) dat1 |> summarize(sub=sub,question=question,dv=dv, absdv = sqrt(abs(dv)), mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv)), total=mean(dv+newdv)) -> newdat1 newdat1[1:10,] newdat1$z <- (newdat1$dv-newdat1$mean) / newdat1$sd dat1 dat1 |> group_by(sub) dat1 |> group_by(sub) |> summarize(mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv))) dat1 |> group_by(sub) |> summarize( sub=sub,dv=dv, mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv))) |> mutate(zdv = (dv - mean)/sd) ##Compute a z-score dat1 |> group_by(sub) |> summarize(mean=mean(as.numeric(dv)), N=n()) dat0$coding <- rep(c(-1,1),6) d1<-mutate(filter(dat0,coding==1),newdv=dv) d2<-mutate(filter(dat0,coding==-1),newdv=6-dv) dat0b <- bind_rows(d1,d2) arrange(dat0b,sub,question) dat0 |> mutate(newdv=ifelse(coding==1,dv,6-dv)) big5 <- read.csv("bigfive.csv") qtype <- c("E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "E","A","C","N","O", "O","A","C","O") valence <- c(1,-1,1,1,1, -1,1,-1,-1,1, 1,-1,1,1,1, 1,1,-1,1,1, -1,1,-1,-1,1, 1,-1,1,1,1, -1,1,1,-1,-1, 1,-1,1,1,1, -1,1,-1,1) library(reshape2) dat2 <- read.csv("pooled-survey.csv") head(dat2) dat3 <-dcast(dat2,subcode~question,value.var="answer") head(dat3) colnames(dat3) <- c("subcode","q1","q2","q3","q4","q5") dat3 <-acast(dat2,subcode~question,value.var="answer") dat3[1:5,] dat4 <-dcast(dat2,subcode~question,value.var="timestamp") dat4[1:10,] melt(dat1)[1:10,] melt(dat2)[1:10,] colnames(dat3) <- c("subcode","q1","q2","q3","q4","q5") dat3 <-dcast(dat2,subcode~question,value.var="answer") head(dat3) colnames(dat3) <- c("subcode","q1","q2","q3","q4","q5") dat3 <-acast(dat2,subcode~question,value.var="answer") dat3[1:5,] acast(dat2,subcode~question,value.var="answer") |> head() colnames(dat3) <- c("subcode","q1","q2","q3","q4","q5") dat3 <-dcast(dat2,subcode~question,value.var="answer") head(dat3) colnames(dat3) <- c("subcode","q1","q2","q3","q4","q5") dcast(dat2,subcode~question,value.var="timestamp") |> head() dcast(dat2,subcode~question,value.var="time") |> head() melt(dat3)[1:10,] melt(dat3) |> head() melt(dat3,id.vars = c("subcode")) |> head() melt(dat3,id.vars = c("subcode","q3"))[1:10,] dat2 |> melt(id.vars = c("subcode","q3"), measure.vars=c("q2","q4","q5"), value.name="response",variable.name="Question") |> head() dat3 |> melt(id.vars = c("subcode","q3"), measure.vars=c("q2","q4","q5"), value.name="response",variable.name="Question") |> head() dat3[1:5,] dat3$q1 dat2 colnames(dat3) <- c("subcode","q1","q2","q3","q4","q5") dat3$q1 <- "Missing" ##all the data are empty so we will fill it in acast(dat2,subcode~question,value.var="answer") |> head() dcast(dat2,subcode~question,value.var="timestamp") |> head() dcast(dat2,subcode~question,value.var="time") |> head() melt(dat3) |> head() dat3[1:5,] dat1[1:5,] melt(dat3) |> head() melt(dat3,id.vars = c("subcode")) |> head() dat2[1:5,] melt(dat3,id.vars = c("subcode"), value.name="response",variable.name="Question") |> head() dat3 |> melt(id.vars = c("subcode"), measure.vars=c("q2","q4","q5"), value.name="response",variable.name="Question") |> head() library(tidyr) d1<-gather(dat1,key="question",value="answer",q1,q2,q3,q4,q5) d1<-gather(dat2,key="question",value="answer",q1,q2,q3,q4,q5) dat2[1:5,] gather(dat3,key="question",value="answer",q1,q2,q3,q4,q5) |> head() gather(dat3,key="question",value="answer",q1,q2,q3,q4,q5) |> head() gather(dat3,key="question",value="answer",q2:q5) |>head() #only q1 to q5 gather(dat3,key="question",value="answer",-subcode,-q1) tmp <- gather(dat3,key="question",value="answer",-subcode,-q1) head(tmp) tmp %>% arrange(subcode,question) d1<-pivot_longer(dat2,names_to="question",values_to="answer", cols=c(q1,q2,q3,q4,q5)) pivot_longer(dat3,names_to="question",values_to="answer", cols=c(q1,q2,q3,q4,q5)) |> head() pivot_longer(dat3,names_to="question",values_to="answer", cols=q1:q4) |> head() #only q1 to q5 pivot_longer(dat3,names_to="question",values_to="answer", cols=c(-subcode,-q1)) |> head() pivot_longer(dat3,names_to="question",values_to="answer", cols=c(-subcode,-q1)) |> arrange(subcode, question) |> head() d1.wide <- d1 %>% spread(question,answer) d3 %>% spread(question,answer) d3 %>% spread(question,answer) |> head() d3 |> spread(question,answer) |> head() d1.wide2 <- d1 |> pivot_wider(names_from=question,values_from=answer) d3 |> pivot_wider(names_from=question,values_from=answer) dat1 |> group_by(sub) |> summarize( sub=sub,dv=dv, mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv))) |> mutate(zdv = (dv - mean)/sd) ##Compute a z-score dat1 |> group_by(sub) |> summarize(mean=mean(as.numeric(dv)), N=n()) dat1 |> group_by(sub) |> summarize(mean=mean(as.numeric(dv)), dat1 |> group_by(sub) |> summarize(mean=mean(as.numeric(dv)), N=n()) dat1 |> group_by(sub) |> summarize( sub=sub,dv=dv, mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv))) |> mutate(zdv = (dv - mean)/sd) ##Compute a z-score dat1 |> group_by(sub) |> reframe( sub=sub,dv=dv, mean=mean(as.numeric(dv)), sd=sd(as.numeric(dv))) |> mutate(zdv = (dv - mean)/sd) ##Compute a z-score dostats <- function(x) {data.frame(mean=mean(x),sd=sd(x),n=length(x))} dat1 |> group_by(sub) |> reframe(dostats(dv)) dostats <- function(x) { mu <- mean(x) sd <- sd(x) z <- (x-mu)/sd n <- length(x) se <- sd/sqrt(n) data.frame(mu,sd,z,n,se) } dat1 |> group_by(sub) |> reframe(dostats(dv)) dostats <- function(x) { mu <- mean(x) sd <- sd(x) z <- (mu)/sd n <- length(x) se <- sd/sqrt(n) data.frame(mu,sd,z,n,se) } dat1 |> group_by(sub) |> reframe(dostats(dv))