# install packages
# devtools::install_github("https://github.com/tdienlin/td@v.0.0.2.5")
# define packages
library(broom.mixed)
library(brms)
library(devtools)
library(GGally)
library(ggplot2)
library(gridExtra)
library(haven)
library(kableExtra)
library(knitr)
library(lavaan)
library(lme4)
library(magrittr)
library(mice)
library(mvnormalTest)
library(PerFit)
library(performance)
library(psych)
library(quanteda.textstats)
library(readxl)
library(semTools)
library(td)
library(tidyverse)
# unload("MASS")
Provide session info to make results reproducible.
sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Vienna
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 tidyverse_2.0.0
## [10] td_0.0.1 semTools_0.5-6 readxl_1.4.2 quanteda.textstats_0.96.2 psych_2.3.3 performance_0.10.4 PerFit_1.4.6 mirt_1.39 lattice_0.21-8
## [19] ltm_1.2-0 polycor_0.8-1 msm_1.7 MASS_7.3-60 mvnormalTest_1.0.0 mice_3.16.0 magrittr_2.0.3 lme4_1.1-33 Matrix_1.5-4.1
## [28] lavaan_0.6-15 knitr_1.43 kableExtra_1.3.4 haven_2.5.2 gridExtra_2.3 GGally_2.1.2 ggplot2_3.4.2 devtools_2.4.5 usethis_2.2.0
## [37] brms_2.19.0 Rcpp_1.0.10 broom.mixed_0.2.9.4
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.2 matrixStats_1.0.0 bitops_1.0-7 insight_0.19.2 httr_1.4.6 webshot_0.5.4 RColorBrewer_1.1-3 numDeriv_2016.8-1.1 profvis_0.3.8 tools_4.3.0 backports_1.4.1 utf8_1.2.3
## [13] R6_2.5.1 DT_0.28 vegan_2.6-4 sm_2.2-5.7.1 mgcv_1.8-42 nortest_1.0-4 jomo_2.7-6 permute_0.9-7 urlchecker_1.0.1 withr_2.5.0 Brobdingnag_1.2-9 prettyunits_1.1.1
## [25] cli_3.6.1 pspline_1.0-19 shinyjs_2.1.0 sass_0.4.6 mvtnorm_1.2-2 pbapply_1.7-0 pbivnorm_0.6.0 systemfonts_1.0.4 StanHeaders_2.26.27 foreign_0.8-84 svglite_2.1.1 parallelly_1.36.0
## [37] sessioninfo_1.2.2 rstudioapi_0.14 generics_0.1.3 shape_1.4.6 gtools_3.9.4 crosstalk_1.2.0 distributional_0.3.2 inline_0.3.19 loo_2.6.0 fansi_1.0.4 abind_1.4-5 lifecycle_1.0.3
## [49] yaml_2.3.7 grid_4.3.0 promises_1.2.0.1 irtoys_0.2.2 crayon_1.5.2 mitml_0.4-5 miniUI_0.1.1.1 pillar_1.9.0 boot_1.3-28.1 fda_6.1.4 estimability_1.4.1 shinystan_2.6.0
## [61] stopwords_2.3 admisc_0.32 codetools_0.2-19 fastmatch_1.1-3 pan_1.6 glue_1.6.2 data.table_1.14.8 remotes_2.4.2 vctrs_0.6.3 cellranger_1.1.0 gtable_0.3.3 cachem_1.0.8
## [73] ks_1.14.0 xfun_0.39 mime_0.12 fds_1.8 pracma_2.4.2 coda_0.19-4 pcaPP_2.0-3 survival_3.5-5 iterators_1.0.14 shinythemes_1.2.0 ellipsis_0.3.2 nlme_3.1-162
## [85] xts_0.13.1 threejs_0.3.3 rstan_2.21.8 tensorA_0.36.2 bslib_0.5.0 Deriv_4.1.3 KernSmooth_2.23-21 rpart_4.1.19 colorspace_2.1-0 Hmisc_5.1-0 nnet_7.3-19 mnormt_2.1.1
## [97] tidyselect_1.2.0 processx_3.8.1 emmeans_1.8.6 moments_0.14.1 compiler_4.3.0 glmnet_4.1-7 rvest_1.0.3 htmlTable_2.4.1 expm_0.999-7 xml2_1.3.4 colourpicker_1.2.0 posterior_1.4.1
## [109] checkmate_2.2.0 scales_1.2.1 dygraphs_1.1.1.6 quadprog_1.5-8 callr_3.7.3 digest_0.6.31 rainbow_3.7 minqa_1.2.5 rmarkdown_2.22 htmltools_0.5.5 pkgconfig_2.0.3 base64enc_0.1-3
## [121] stabledist_0.7-1 fastmap_1.1.1 ADGofTest_0.3 rlang_1.1.1 htmlwidgets_1.6.2 shiny_1.7.4 farver_2.1.1 jquerylib_0.1.4 zoo_1.8-12 jsonlite_1.8.5 mclust_6.0.0 dcurver_0.9.2
## [133] RCurl_1.98-1.12 Formula_1.2-5 bayesplot_1.10.0 munsell_0.5.0 furrr_0.3.1 stringi_1.7.12 plyr_1.8.8 pkgbuild_1.4.0 parallel_4.3.0 listenv_0.9.0 splines_4.3.0 hms_1.1.3
## [145] ps_1.7.5 igraph_1.5.0 markdown_1.7 reshape2_1.4.4 pkgload_1.3.2 rstantools_2.3.1 GPArotation_2023.3-1 evaluate_0.21 RcppParallel_5.1.7 deSolve_1.35 tzdb_0.4.0 nloptr_2.0.3
## [157] foreach_1.5.2 httpuv_1.6.11 quanteda_3.3.1 future_1.32.0 reshape_0.8.9 broom_1.0.5 xtable_1.8-4 later_1.3.1 viridisLite_0.4.2 gsl_2.1-8 nsyllable_1.0.1 copula_1.1-2
## [169] memoise_2.0.1 cluster_2.1.4 timechange_0.2.0 globals_0.16.2 hdrcde_3.4 bridgesampling_1.1-2
Find variables in dataset.
find_var <- function(name, data = doc)(
# finds the variables names for an item for each wave
doc %>%
filter(Label == name) %>%
dplyr::select(Variable) %>%
unlist() %>%
set_names(sub("\\_.*", "", .))
)
Extract characteristics from fitted lmer models.
get_specs <- function(model){
# Get mean, max, and min values
dat <- coefficients(model)$wave
specs <- data.frame(
sd = attr(VarCorr(model), "sc"),
min = min(dat),
max = max(dat),
mean = mean(dat$`(Intercept)`)
)
}
Get data from fitted lmer objects for descriptives.
get_dat <- function(model){
coefficients(model)$wave %>%
tibble::rownames_to_column("wave") %>%
rename(value = "(Intercept)") %>%
mutate(wave = as.integer(.$wave),
value = as.numeric(.$value))
}
Determine average reliability for measures across waves
get_rel <- function(data, waves=24){
# extract average reliability from lavaan fitted cfa with several groups
reliability(data) %>%
unlist() %>%
matrix(5, waves) %>%
as.data.frame() %>%
set_rownames(c("alpha", "omega", "omega2", "omega3", "avevar")) %>%
summarise(omega = rowMeans(.["omega",])) %>%
return()
}
Make graph of variables’ development.
make_graph <- function(model, title, ll, ul, line = TRUE, points = TRUE, labels = FALSE, lmer=FALSE, legend=FALSE){
if(isTRUE(lmer)){
dat <- get_dat(model)
} else{
dat <- model
}
graph <-
ggplot(dat, aes(wave, value, color = dimension)) +
{if(legend) theme(
legend.position = c(0.85, 0.8),
legend.title = element_blank()
)} +
{if(!legend) theme(
legend.position = "none"
)} +
theme(axis.title.x=element_blank()) +
coord_cartesian(ylim = c(ll, ul)) +
ggtitle(title) +
{if(line) geom_smooth(se = FALSE, method = 'loess')} +
{if(points) geom_point()} +
scale_color_manual(values=c("dodgerblue3", "deepskyblue2", "magenta2", "green2", "red"))
graph
}
# print results for online material
print_res <- function(object, imputation = TRUE){
if(isTRUE(imputation)){
object %>%
as.data.frame %>%
select(term, estimate, `2.5 %`, `97.5 %`, p.value) %>%
mutate(p.value = td::my_round(p.value, "p"))
}
else{
object %>%
as.data.frame %>%
select(term, estimate, `2.5 %` = conf.low, `97.5 %` = conf.high, p.value) %>%
mutate(p.value = td::my_round(p.value, "p"))
}
}
Get data of lmer objects for specific results.
# get data
get_dat_res <- function(data_aff_neg, data_aff_pos, data_life_sat, variance = "within", type = "channels", analysis = "Standardized"){
if(isTRUE(class(data_aff_neg) == "lmerModLmerTest")) {
dat_fig_results <-
broom.mixed::tidy(data_aff_neg, conf.int = T) %>%
mutate(dv = "aff_neg") %>%
rbind(
broom.mixed::tidy(data_aff_pos, conf.int = T) %>%
mutate(dv = "aff_pos")) %>%
rbind(
broom.mixed::tidy(data_life_sat, conf.int = T) %>%
mutate(dv = "life_sat"))
} else{
dat_fig_results <-
data_aff_neg %>%
mutate(dv = "aff_neg") %>%
rbind(data_aff_pos %>%
mutate(dv = "aff_pos")) %>%
rbind(data_life_sat %>%
mutate(dv = "life_sat")) %>%
rename(conf.low = `2.5 %`, conf.high = `97.5 %`)
}
dat_fig_results %<>%
mutate(
Variance = ifelse(grepl(".*_w\\>", .$term), "within", "between"),
iv = gsub("_(w|b)\\>", "", .$term)
) %>%
mutate(
Variance = factor(.$Variance,
levels = c("within", "between")),
dv = factor(.$dv,
levels = c("life_sat", "aff_pos", "aff_neg"),
labels = c("Life satisfaction", "Positive affect", "Negative affect")),
Type = type,
Analysis = analysis
) %>%
select(dv, iv, Variance, Type, Analysis, estimate, conf.low, conf.high, p.value) %>%
filter(Variance == variance)
# select Social Media type of activity
if(type == "Activity") {
dat_fig_results %<>%
filter(iv %in% c("soc_med_read", "soc_med_like_share", "soc_med_post")) %>%
mutate(
iv = factor(.$iv,
levels = c("soc_med_post", "soc_med_like_share", "soc_med_read"),
labels = c("Posting", "Liking & Sharing", "Reading"))
)
} else if(type == "Channels"){
dat_fig_results %<>%
filter(iv %in% c("soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw")) %>%
mutate(iv = factor(.$iv, levels = c("soc_med_yt", "soc_med_wa", "soc_med_ig", "soc_med_tw", "soc_med_fb"), labels = c("YouTube", "WhatsApp", "Instagram", "Twitter", "Facebook")))
} else if(type == "Controls"){
dat_fig_results %<>%
filter(iv %in% c("male", "health", "loc_cntrl_int_m",
"act_spo", "sat_dem", "corona_pos")) %>%
mutate(iv = factor(.$iv, levels = c("male", "health", "loc_cntrl_int_m",
"act_spo", "sat_dem", "corona_pos"),
labels = c("Male", "Health", "Internal locus of control",
"Sport", "Satisfaction democracy", "Corona positive")))
} else if(type == "Living\nconditions"){
dat_fig_results %<>%
filter(iv %in% c("health", "corona_pos", "work_h", "work_homeoff", "hh_income")) %>%
mutate(iv = factor(.$iv, levels = c("health", "corona_pos", "work_h", "work_homeoff", "hh_income"),
labels = c("Health", "Corona positive", "Work hours", "Homeoffice", "Household income")))
} else if(type == "News\nuse"){
dat_fig_results %<>%
filter(iv %in% c("med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri")) %>%
mutate(iv = factor(.$iv, levels = c("med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri"),
labels = c("Krone", "Der Standard", "Die Presse", "Österreich", "Kurier", "Salzb. Nachrichten", "Other", "ORF", "Private news")))
} else if(type == "Outdoor\nactivities"){
dat_fig_results %<>%
filter(iv %in% c("act_wrk", "act_spo", "act_frn", "act_sho", "act_pet")) %>%
mutate(iv = factor(.$iv, levels = c("act_wrk", "act_spo", "act_frn", "act_sho", "act_pet"),
labels = c("Working", "Sports", "Friends", "Shopping", "With pets")))
} else if(type == "Psycho-\nlogy"){
dat_fig_results %<>%
filter(iv %in% c("risk_prop", "loc_cntrl_int_m", "sat_dem")) %>%
mutate(iv = factor(.$iv, levels = c("risk_prop", "loc_cntrl_int_m", "sat_dem"),
labels = c("Risk taking", "Internal locus of control", "Satisfaction democracy")))
}
return(dat_fig_results)
}
Make graph of effects
make_graph_res <- function(data, sesoi = NULL, legend = TRUE, facet = "type", title = NULL){
ggplot(data, aes(x = estimate, y = iv)) +
scale_color_manual(values = c("black", "grey75", "darkcyan", "deepskyblue", "cornflowerblue", "darkcyan", "aquamarine")) +
geom_vline(xintercept = 0, lwd = .75, colour = "darkgrey") +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, color = Analysis),
lwd = .75, height = .2, position = position_dodge(-.7)) +
geom_point(aes(color = Analysis), size = 1.5, position = position_dodge(-.7)) +
{if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv=="Life satisfaction"), aes(xintercept=-.3), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv=="Life satisfaction"), aes(xintercept=.3), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv!="Life satisfaction"), aes(xintercept=-.15), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv!="Life satisfaction"), aes(xintercept=.15), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "std")) geom_vline(aes(xintercept=.1), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "std")) geom_vline(aes(xintercept=-.1), colour="darkgrey", linetype = "dashed")} +
theme(
axis.title.y = element_blank(),
plot.title = element_text(hjust = .5),
panel.spacing = unit(.9, "lines"),
text = element_text(size = 12),
axis.text.y = element_text(hjust = 1)
) +
{if(!is.null(title)) ggtitle(title)} +
{if(!isTRUE(legend)) theme(legend.position = "none")} +
# guides(colour = guide_legend(reverse=T)) +
{if(facet == "type")
facet_grid(rows = vars(Type),
cols = vars(dv),
scales = "free",
space = "free_y")
} +
{if(facet == "analysis")
facet_grid(rows = vars(Variance),
cols = vars(dv),
scales = "free",
space = "free_y")
}
}
Extract factor scores.
get_fs <- function(model) {
# the aim is to get factor scores on original scaling
# hence export factor values for all items
# then compute average
dat_ov <- lavaan::lavPredict(model, type = "ov", assemble = TRUE) %>%
mutate(fs = rowMeans(.[1:3]))
return(dat_ov$fs)
}
d_spss_en <- read_spss("data/W1-34_ACPP_V20230303_EN.SAV")
d_spss <- read_spss("data/W1-34_ACPP_V20230303.SAV")
write.csv(d_spss, file = "data/W1-34_ACPP_V20230303_EN.csv")
d_raw <- read_csv("data/W1-34_ACPP_V20230303_EN.csv")
Make file with documentation about variables
# get labels
doc_raw <- map(d_spss, attr, "label")
# reformat
doc_raw <-
data.frame(
Variable = names(doc),
Label = unlist(doc, use.names = FALSE)
)
# define waves where media use was measured
waves_media_use <- c(1, 8, 17, 23, 28)
Identify the names of each item for each wave.
# identify variable names of items, using custom function
health <- find_var("Gesundheitszustand") # wsn't collected at wave 21
aff_neg_1 <- find_var("Depressivitaet: Einsam")
aff_neg_2 <- find_var("Depressivitaet: Aergerlich")
aff_neg_3 <- find_var("Depressivitaet: So niedergeschlagen")
aff_neg_4 <- find_var("Depressivitaet: Sehr nervoes")
aff_neg_5 <- find_var("Depressivitaet: Aengstlich")
aff_neg_6 <- find_var("Depressivitaet: Bedrueckt und traurig")
aff_pos_1 <- find_var("Depressivitaet: Ruhig und gelassen")
aff_pos_2 <- find_var("Depressivitaet: Gluecklich")
aff_pos_3 <- find_var("Depressivitaet: Voller Energie")
act_wrk <- find_var("Zuhause verlassen: Arbeit")
act_spo <- find_var("Zuhause verlassen: Sport")
act_frn <- find_var("Zuhause verlassen: Freunde oder Verwandte treffen")
act_sho <- find_var("Zuhause verlassen: Essen einkaufen")
act_pet <- find_var("Zuhause verlassen: Haustier ausfuehren")
sat_dem <- find_var("Demokratiezufriedenheit: Oesterreich")
work_h <- find_var("Arbeitsstunden: Jetzt pro Woche")
work_homeoff <- find_var("Aenderung berufliche Situation: Home-Office")
hh_income <- find_var("Aktuelles Haushaltseinkommen")
med_txt_kro <- find_var("Mediennutzung: Kronen Zeitung oder www.krone.at")
med_txt_sta <- find_var("Mediennutzung: Der Standard oder derstandard.at")
med_txt_pre <- find_var("Mediennutzung: Die Presse oder diepresse.com")
med_txt_oes <- find_var("Mediennutzung: Oesterreich oder oe24.at")
med_txt_kur <- find_var("Mediennutzung: Kurier oder kurier.at")
med_txt_slz <- find_var("Mediennutzung: Salzburger Nachrichten oder salzburg.at")
med_txt_son <- find_var("Mediennutzung: Sonstige oesterreichische Tageszeitungen")
med_vid_orf <- find_var("Mediennutzung: ORF (Nachrichten)")
med_vid_pri <- find_var("Mediennutzung: Privatfernsehen (Nachrichten)")
soc_med_fb <- find_var("Soziale Medien: Facebook")
soc_med_tw <- find_var("Soziale Medien: Twitter")
soc_med_ig <- find_var("Soziale Medien: Instagram")
soc_med_yt <- find_var("Soziale Medien: Youtube")
soc_med_wa <- find_var("Soziale Medien: WhatsApp")
soc_med_read <- find_var("Soziale Medien Aktivitaet: Postings zu Corona anderer lesen")
soc_med_like_share <- find_var("Soziale Medien Aktivitaet: Postings liken, teilen oder retweeten")
soc_med_post <- find_var("Soziale Medien Aktivitaet: Selber Postings zu Corona verfassen")
life_sat <- find_var("Lebenszufriedenheit")
risk_prop <- find_var("Risikobereitschaft")
loc_cntrl_int_1 <- find_var("Psychologie: Habe Leben selbst in der Hand")
loc_cntrl_int_2 <- find_var("Psychologie: Belohnung durch Anstrengung")
loc_cntrl_int_3 <- find_var("Psychologie: Fremdbestimmung")
loc_cntrl_int_4 <- find_var("Psychologie: Schicksal")
trst_media <- find_var("Vertrauen: ORF")
trst_police <- find_var("Vertrauen: Polizei")
trst_media <- find_var("Vertrauen: Parlament")
trst_hlthsec <- find_var("Vertrauen: Gesundheitswesen")
trst_gov <- find_var("Vertrauen: Bundesregierung")
trst_army <- find_var("Vertrauen: Bundesheer")
corona_pos <- c(
find_var("Corona-Diagnose: Respondent"),
find_var("Corona-Diagnose: Monat")
)
d_wide <- d_raw %>%
select(
id = RESPID,
gender = SD_GENDER,
acc_bal = SD_ACCESS_BALCONY,
acc_gar = SD_ACCESS_GARDEN,
year_birth = SD_BIRTHYEAR,
born_aus = SD_BORN_AUSTRIA,
born_aus_prnts = SD_MIGR_BACKGR,
county = SD_BULA,
edu = SD_EDU,
employment = SD_EMPLSTATUS_FEB2020,
hh_adults = SD_HH_ADULTS,
hh_child18 = SD_HH_CHILD18,
hh_child17 = SD_HH_TEENS,
hh_child14 = SD_HH_CHILD14,
hh_child5 = SD_HH_CHILD5,
hh_child2 = SD_HH_CHILD2,
hh_oldfam = SD_HH_OLDERFAM,
hh_outfam = SD_HH_OUTERFAM,
hh_partner = SD_HH_PARTNER,
# hh_income = SD_HHINCOME_FEB2020,
home_sqm = SD_HOME_SQM,
home_owner = SD_HOMEOWNER,
# work_h = SD_WORKHOURS_FEB2020,
health = all_of(health),
life_sat = all_of(life_sat),
aff_neg_1 = all_of(aff_neg_1),
aff_neg_2 = all_of(aff_neg_2),
aff_neg_3 = all_of(aff_neg_3),
aff_neg_4 = all_of(aff_neg_4),
aff_neg_5 = all_of(aff_neg_5),
aff_neg_6 = all_of(aff_neg_6),
aff_pos_1 = all_of(aff_pos_1),
aff_pos_2 = all_of(aff_pos_2),
aff_pos_3 = all_of(aff_pos_3),
act_wrk = all_of(act_wrk),
act_spo = all_of(act_spo),
act_frn = all_of(act_frn),
act_sho = all_of(act_sho),
act_pet = all_of(act_pet),
sat_dem = all_of(sat_dem),
sat_dem = all_of(sat_dem),
work_h = all_of(work_h),
work_homeoff = all_of(work_homeoff),
hh_income = all_of(hh_income),
med_txt_kro = all_of(med_txt_kro),
med_txt_sta = all_of(med_txt_sta),
med_txt_pre = all_of(med_txt_pre),
med_txt_oes = all_of(med_txt_oes),
med_txt_kur = all_of(med_txt_kur),
med_txt_slz = all_of(med_txt_slz),
med_txt_son = all_of(med_txt_son),
med_vid_orf = all_of(med_vid_orf),
med_vid_pri = all_of(med_vid_pri),
soc_med_fb = all_of(soc_med_fb),
soc_med_tw = all_of(soc_med_tw),
soc_med_ig = all_of(soc_med_ig),
soc_med_yt = all_of(soc_med_yt),
soc_med_wa = all_of(soc_med_wa),
soc_med_like_share = all_of(soc_med_like_share),
soc_med_read = all_of(soc_med_read),
soc_med_post = all_of(soc_med_post),
risk_prop = all_of(risk_prop),
loc_cntrl_int_1 = all_of(loc_cntrl_int_1),
loc_cntrl_int_2 = all_of(loc_cntrl_int_2),
loc_cntrl_int_3 = all_of(loc_cntrl_int_3),
loc_cntrl_int_4 = all_of(loc_cntrl_int_4),
trst_media = all_of(trst_media),
trst_police = all_of(trst_police),
trst_media = all_of(trst_media),
trst_hlthsec = all_of(trst_hlthsec),
trst_gov = all_of(trst_gov),
trst_army = all_of(trst_army),
corona_pos = all_of(corona_pos),
wave_part_1 = W1_PANELIST,
wave_part_8 = W8_PANELIST,
wave_part_17 = W17_PANELIST,
wave_part_23 = W23_PANELIST,
wave_part_28 = W28_PANELIST
)
Make new documentation with selected variables.
doc_selected <- filter(doc_raw, Variable %in% c(
"RESPID",
"SD_GENDER",
"SD_ACCESS_BALCONY",
"SD_ACCESS_GARDEN",
"SD_BIRTHYEAR",
"SD_BORN_AUSTRIA",
"SD_MIGR_BACKGR",
"SD_BULA",
"SD_EDU",
"SD_EMPLSTATUS_FEB2020",
"SD_HH_ADULTS",
"SD_HH_CHILD18",
"SD_HH_TEENS",
"SD_HH_CHILD14",
"SD_HH_CHILD5",
"SD_HH_CHILD2",
"SD_HH_OLDERFAM",
"SD_HH_OUTERFAM",
"SD_HH_PARTNER",
"SD_HHINCOME_FEB2020",
"SD_HOME_SQM",
"SD_HOMEOWNER",
"SD_WORKHOURS_FEB2020",
health,
life_sat,
aff_neg_1,
aff_neg_2,
aff_neg_3,
aff_neg_4,
aff_neg_5,
aff_neg_6,
aff_pos_1,
aff_pos_2,
aff_pos_3,
act_wrk,
act_spo,
act_frn,
act_sho,
act_pet,
sat_dem,
work_h,
work_homeoff,
hh_income,
med_txt_kro,
med_txt_sta,
med_txt_pre,
med_txt_oes,
med_txt_kur,
med_txt_slz,
med_txt_son,
med_vid_orf,
med_vid_pri,
soc_med_fb,
soc_med_tw,
soc_med_ig,
soc_med_yt,
soc_med_wa,
soc_med_like_share,
soc_med_post,
soc_med_read,
risk_prop,
loc_cntrl_int_1,
loc_cntrl_int_2,
loc_cntrl_int_3,
loc_cntrl_int_4,
trst_media,
trst_police,
trst_media,
trst_hlthsec,
trst_gov,
trst_army,
corona_pos
))
write.csv(doc_selected, "data/used_variables.csv")
d_wide %<>%
mutate_at(vars(everything(.)), funs(na_if(., 88))) %>%
mutate_at(vars(everything(.)), funs(na_if(., 99))) %>%
mutate(
male = 2 - .$gender,
age = 2021 - .$year_birth,
res_vienna = recode(.$county, `8` = 1L, .default = 0L,),
born_aus = 2 - .$born_aus,
home_owner = 2 - .$home_owner,
employment_fac = factor(.$employment,
labels = c("Unemployed",
"Industrie",
"Public service",
"Self-employed",
"Retired",
"Housekeeping",
"Student",
"Incapacitated",
"Parental Leave"),
levels = c(4, 1:3, 5:8, 10) # make unemployment reference cat
),
edu_fac = factor(.$edu,
labels = c("No degree",
"Middle school",
"Vocational school",
"Technical school",
"High school",
"Applied high school",
"State college",
"Bachelor",
"Master",
"PhD")
))
d_long <-
d_wide %>%
pivot_longer(
cols = health...W1:corona_pos...W34,
names_to = "item",
values_to = "value"
) %>%
separate(item, c("item", "wave"), sep = "\\.\\.\\.", extra = "merge") %>%
pivot_wider(names_from = "item", values_from = "value")
# recode such that higher values imply more strength / align with wording
d_long %<>%
mutate_at(
vars(
med_txt_kro:med_vid_pri,
health,
sat_dem,
soc_med_fb:soc_med_post
),
funs(recode(., `1` = 5L, `2` = 4L, `3` = 3L, `4` = 2L, `5` = 1L))) %>%
mutate_at(
vars(
loc_cntrl_int_1:loc_cntrl_int_4
),
funs(recode(., `1` = 4L, `2` = 3L, `3` = 2L, `4` = 1L))) %>%
mutate_at(
vars(born_aus_prnts),
funs(recode(., `3` = 0L, `2` = 2L, `1` = 1L))
)
# recode inverted items
d_long %<>%
mutate_at(vars(loc_cntrl_int_3, loc_cntrl_int_4),
funs(recode(., `1` = 4L, `2` = 3L, `3` = 2L, `4` = 1L)))
# recode other
d_long %<>%
mutate(
wave = gsub("W", "", .$wave) %>% as.integer(),
id = as.integer(id)
)
d_long %<>%
arrange(id, wave)
Next we impute data. Note that the actual imputation below is deactivated and loaded from memory to save time.
Determine amount of missingness per respondent per wave.
vars_used <- c("life_sat", "aff_pos_1", "aff_pos_2", "aff_pos_3", "aff_neg_1", "aff_neg_2", "aff_neg_3", "aff_neg_4", "aff_neg_5", "aff_neg_6", "soc_med_read", "soc_med_like_share", "soc_med_post", "soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw", "age", "male", "born_aus", "born_aus_prnts", "edu_fac", "employment_fac", "health", "res_vienna", "acc_bal", "acc_gar", "home_sqm", "med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri", "risk_prop", "loc_cntrl_int_1", "loc_cntrl_int_2", "loc_cntrl_int_3", "loc_cntrl_int_4", "act_wrk", "act_spo", "act_frn", "act_sho", "act_pet", "sat_dem", "trst_media", "trst_police", "trst_media", "trst_hlthsec", "trst_gov", "trst_army", "corona_pos") # only include vars measured at _all_ points
Filter respondents with more than 50% missing data – only needed for analyses as originally preregistered (see additional analyses).
d_long_50 <-
d_long %>%
mutate(na_perc = rowSums(is.na(select(., all_of(vars_used))) / ncol(select(., all_of(vars_used))))) %>%
filter(na_perc < .5)
write_csv(d_long_50, "data/data_50.csv")
Exclude social media use data, because they were measured only on selected waves.
vars_excl <- c("soc_med_read", "soc_med_like_share", "soc_med_post", "soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw")
incl_ma <- d_long %>%
mutate(across(.cols = everything(), .fns = is.na))
incl_ma[vars_excl] <- FALSE
# now also for data where 50% missing excluded
incl_ma_50 <- d_long_50 %>%
mutate(across(.cols = everything(), .fns = is.na))
incl_ma_50[vars_excl] <- FALSE
For analyses as originally preregistered (see additional analyses), hence using only participants who provided more than 50% of all data.
d_long_50_imp <- mice(d_long_50,
method = "pmm", # use predictive mean matching
m = 1, maxit = 30, # only 1 imputation
# where = incl_ma_50,
seed = 180719, print = FALSE)
d_long_50_imp <- mice::complete(d_long_50_imp)
write_csv(d_long_50_imp, "data/data_50_imputed.csv")
Because CFAs aren’t evaluated with multiple imputation, impute single data-set. Missing data was imputed for everyone.
d_long_100_imp <- mice(d_long,
method = "pmm", # use predictive mean matching
m = 1, # only 1 imputation
maxit = 30,
# where = incl_ma,
seed = 180719, print = FALSE)
d_long_100_imp <- mice::complete(d_long_100_imp)
write_csv(d_long_100_imp, "data/data_100_imputed.csv")
Now, multiple imputation of data-sets (5 data sets, 20 iterations), used for the final analyses.
# impute missing data with multiple imputation
d_long_100_mim_mice <- mice(d_long,
method = "pmm", # use predictive mean matching
m = 20, # 5 imputations,
maxit = 5, # 5 iterations
# where = incl_ma,
seed = 180719, print = FALSE)
d_long_100_mim <- mice::complete(d_long_100_mim_mice, action = "long", include = TRUE)
write_csv(d_long_100_mim, "data/data_100_mim.csv")
To increase speed, you can also load the imputed data from the directory
d_long_50_imp <- read_csv("data/data_50_imputed.csv") # data where people with >50% missing data were filtered
d_long_100_imp <- read_csv("data/data_100_imputed.csv") # data where all people were included
d_long_100_mim <- read_csv("data/data_100_mim.csv") # data where all people were included
First, create means for scales. Then, create mean values for each person (between), and unique changes per wave (within).
d_long_100_mim <-
d_long_100_mim %>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_100_mim %<>%
group_by(id) %>%
mutate_at(vars(health:loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
d_long_100_mim_mice <- as.mids(d_long_100_mim)
d_long_100_imp <-
d_long_100_imp %>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_100_imp %<>%
group_by(id) %>%
mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
And now the same for the standardized data set.
d_long_100_mim_std <-
d_long_100_mim %>%
group_by(.imp) %>%
mutate_at(vars(gender:corona_pos, -id, -wave, -gender, -male, -born_aus, -born_aus_prnts, -edu_fac, -employment_fac, -res_vienna, -acc_bal, -acc_gar), ~ c(scale(.))) %>%
ungroup()
d_long_100_mim_std %<>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_100_mim_std %<>%
group_by(id) %>%
mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
d_long_100_mim_mice_std <- as.mids(d_long_100_mim_std)
And because later we want to rerun the results as originally preregistered, we’ll now do the same for analyses without filtered data.
d_long_50_imp <-
d_long_50_imp %>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_50_imp %<>%
group_by(id) %>%
mutate_at(vars(health:corona_pos, loc_cntrl_int_m),
funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
And because later we want to rerun the results without imputed data, now the same for data without imputation.
d_long_50 %<>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_50 %<>%
group_by(id) %>%
mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
save.image("data/workspace_1.RData")