Here you can find the set-up for the analyses and the results.
Includes custom functions, data-wrangling, and data imputation.
To see the code, click on button “Code”.
Alternatively, you can download the rmd file from the github repo.
The data are hosted on aussda. Please download them separately, to rerun the analyses (https://doi.org/10.11587/28KQNS).
Note that this paper uses a scientific use file, which is at the time of writing not officially published.
Download the .sav file and save it in the folder “data”

Set-up

Load packages

# install packages
# devtools::install_github("https://github.com/tdienlin/td@v.0.0.2.5")

# define packages
library(broom.mixed)
library(brms)
library(devtools)
library(GGally)
library(ggplot2)
library(gridExtra)
library(haven)
library(kableExtra)
library(knitr)
library(lavaan)
library(lme4)
library(magrittr)
library(mice)
library(mvnormalTest)
library(PerFit)
library(performance)
library(psych)
library(quanteda.textstats)
library(readxl)
library(semTools)
library(td)
library(tidyverse)
# unload("MASS")

Session info

Provide session info to make results reproducible.

sessionInfo()

## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Vienna
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.9.2           forcats_1.0.0             stringr_1.5.0             dplyr_1.1.2               purrr_1.0.1               readr_2.1.4               tidyr_1.3.0               tibble_3.2.1              tidyverse_2.0.0          
## [10] td_0.0.1                  semTools_0.5-6            readxl_1.4.2              quanteda.textstats_0.96.2 psych_2.3.3               performance_0.10.4        PerFit_1.4.6              mirt_1.39                 lattice_0.21-8           
## [19] ltm_1.2-0                 polycor_0.8-1             msm_1.7                   MASS_7.3-60               mvnormalTest_1.0.0        mice_3.16.0               magrittr_2.0.3            lme4_1.1-33               Matrix_1.5-4.1           
## [28] lavaan_0.6-15             knitr_1.43                kableExtra_1.3.4          haven_2.5.2               gridExtra_2.3             GGally_2.1.2              ggplot2_3.4.2             devtools_2.4.5            usethis_2.2.0            
## [37] brms_2.19.0               Rcpp_1.0.10               broom.mixed_0.2.9.4      
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.2             matrixStats_1.0.0    bitops_1.0-7         insight_0.19.2       httr_1.4.6           webshot_0.5.4        RColorBrewer_1.1-3   numDeriv_2016.8-1.1  profvis_0.3.8        tools_4.3.0          backports_1.4.1      utf8_1.2.3          
##  [13] R6_2.5.1             DT_0.28              vegan_2.6-4          sm_2.2-5.7.1         mgcv_1.8-42          nortest_1.0-4        jomo_2.7-6           permute_0.9-7        urlchecker_1.0.1     withr_2.5.0          Brobdingnag_1.2-9    prettyunits_1.1.1   
##  [25] cli_3.6.1            pspline_1.0-19       shinyjs_2.1.0        sass_0.4.6           mvtnorm_1.2-2        pbapply_1.7-0        pbivnorm_0.6.0       systemfonts_1.0.4    StanHeaders_2.26.27  foreign_0.8-84       svglite_2.1.1        parallelly_1.36.0   
##  [37] sessioninfo_1.2.2    rstudioapi_0.14      generics_0.1.3       shape_1.4.6          gtools_3.9.4         crosstalk_1.2.0      distributional_0.3.2 inline_0.3.19        loo_2.6.0            fansi_1.0.4          abind_1.4-5          lifecycle_1.0.3     
##  [49] yaml_2.3.7           grid_4.3.0           promises_1.2.0.1     irtoys_0.2.2         crayon_1.5.2         mitml_0.4-5          miniUI_0.1.1.1       pillar_1.9.0         boot_1.3-28.1        fda_6.1.4            estimability_1.4.1   shinystan_2.6.0     
##  [61] stopwords_2.3        admisc_0.32          codetools_0.2-19     fastmatch_1.1-3      pan_1.6              glue_1.6.2           data.table_1.14.8    remotes_2.4.2        vctrs_0.6.3          cellranger_1.1.0     gtable_0.3.3         cachem_1.0.8        
##  [73] ks_1.14.0            xfun_0.39            mime_0.12            fds_1.8              pracma_2.4.2         coda_0.19-4          pcaPP_2.0-3          survival_3.5-5       iterators_1.0.14     shinythemes_1.2.0    ellipsis_0.3.2       nlme_3.1-162        
##  [85] xts_0.13.1           threejs_0.3.3        rstan_2.21.8         tensorA_0.36.2       bslib_0.5.0          Deriv_4.1.3          KernSmooth_2.23-21   rpart_4.1.19         colorspace_2.1-0     Hmisc_5.1-0          nnet_7.3-19          mnormt_2.1.1        
##  [97] tidyselect_1.2.0     processx_3.8.1       emmeans_1.8.6        moments_0.14.1       compiler_4.3.0       glmnet_4.1-7         rvest_1.0.3          htmlTable_2.4.1      expm_0.999-7         xml2_1.3.4           colourpicker_1.2.0   posterior_1.4.1     
## [109] checkmate_2.2.0      scales_1.2.1         dygraphs_1.1.1.6     quadprog_1.5-8       callr_3.7.3          digest_0.6.31        rainbow_3.7          minqa_1.2.5          rmarkdown_2.22       htmltools_0.5.5      pkgconfig_2.0.3      base64enc_0.1-3     
## [121] stabledist_0.7-1     fastmap_1.1.1        ADGofTest_0.3        rlang_1.1.1          htmlwidgets_1.6.2    shiny_1.7.4          farver_2.1.1         jquerylib_0.1.4      zoo_1.8-12           jsonlite_1.8.5       mclust_6.0.0         dcurver_0.9.2       
## [133] RCurl_1.98-1.12      Formula_1.2-5        bayesplot_1.10.0     munsell_0.5.0        furrr_0.3.1          stringi_1.7.12       plyr_1.8.8           pkgbuild_1.4.0       parallel_4.3.0       listenv_0.9.0        splines_4.3.0        hms_1.1.3           
## [145] ps_1.7.5             igraph_1.5.0         markdown_1.7         reshape2_1.4.4       pkgload_1.3.2        rstantools_2.3.1     GPArotation_2023.3-1 evaluate_0.21        RcppParallel_5.1.7   deSolve_1.35         tzdb_0.4.0           nloptr_2.0.3        
## [157] foreach_1.5.2        httpuv_1.6.11        quanteda_3.3.1       future_1.32.0        reshape_0.8.9        broom_1.0.5          xtable_1.8-4         later_1.3.1          viridisLite_0.4.2    gsl_2.1-8            nsyllable_1.0.1      copula_1.1-2        
## [169] memoise_2.0.1        cluster_2.1.4        timechange_0.2.0     globals_0.16.2       hdrcde_3.4           bridgesampling_1.1-2

Custom functions

find_var

Find variables in dataset.

find_var <- function(name, data = doc)(
  # finds the variables names for an item for each wave

  doc %>% 
    filter(Label == name) %>%
    dplyr::select(Variable) %>%
    unlist() %>%
    set_names(sub("\\_.*", "", .))
)

get_specs

Extract characteristics from fitted lmer models.

get_specs <- function(model){
  # Get mean, max, and min values
  
  dat <- coefficients(model)$wave
  specs <- data.frame(
    sd = attr(VarCorr(model), "sc"),
    min = min(dat),
    max = max(dat),
    mean = mean(dat$`(Intercept)`)
  )
}

get_dat

Get data from fitted lmer objects for descriptives.

get_dat <- function(model){
  coefficients(model)$wave %>% 
    tibble::rownames_to_column("wave") %>% 
    rename(value = "(Intercept)") %>% 
    mutate(wave = as.integer(.$wave),
           value = as.numeric(.$value)) 
}

get_rel

Determine average reliability for measures across waves

get_rel <- function(data, waves=24){
  
  # extract average reliability from lavaan fitted cfa with several groups
  reliability(data) %>% 
    unlist() %>% 
    matrix(5, waves) %>% 
    as.data.frame() %>% 
    set_rownames(c("alpha", "omega", "omega2", "omega3", "avevar")) %>% 
    summarise(omega = rowMeans(.["omega",])) %>% 
    return()
}

make_graph

Make graph of variables’ development.

make_graph <- function(model, title, ll, ul, line = TRUE, points = TRUE, labels = FALSE, lmer=FALSE, legend=FALSE){

    if(isTRUE(lmer)){
      dat <- get_dat(model)
    } else{
    dat <- model
  }
    
  graph <- 
    ggplot(dat, aes(wave, value, color = dimension)) +
    {if(legend) theme(
      legend.position = c(0.85, 0.8),
      legend.title = element_blank()
      )} + 
    {if(!legend) theme(
      legend.position = "none"
    )} +
    theme(axis.title.x=element_blank()) +
    coord_cartesian(ylim = c(ll, ul)) +
    ggtitle(title) +
    {if(line) geom_smooth(se = FALSE, method = 'loess')} +
    {if(points) geom_point()} +
    scale_color_manual(values=c("dodgerblue3", "deepskyblue2", "magenta2", "green2", "red"))
  graph
}

print_res

# print results for online material
print_res <- function(object, imputation = TRUE){
  
  if(isTRUE(imputation)){
    object %>% 
      as.data.frame %>% 
      select(term, estimate, `2.5 %`, `97.5 %`, p.value) %>% 
      mutate(p.value = td::my_round(p.value, "p"))
    }
  else{
    object %>% 
      as.data.frame %>% 
      select(term, estimate, `2.5 %` = conf.low, `97.5 %` = conf.high, p.value) %>% 
      mutate(p.value = td::my_round(p.value, "p"))
  }
}

get_dat_res

Get data of lmer objects for specific results.

# get data
get_dat_res <- function(data_aff_neg, data_aff_pos, data_life_sat, variance = "within", type = "channels", analysis = "Standardized"){
  
  if(isTRUE(class(data_aff_neg) == "lmerModLmerTest")) {
         dat_fig_results <- 
           broom.mixed::tidy(data_aff_neg, conf.int = T) %>% 
           mutate(dv = "aff_neg") %>% 
           rbind(
             broom.mixed::tidy(data_aff_pos, conf.int = T) %>% 
               mutate(dv = "aff_pos")) %>% 
           rbind(
             broom.mixed::tidy(data_life_sat, conf.int = T) %>% 
               mutate(dv = "life_sat"))
         } else{ 
           dat_fig_results <- 
             data_aff_neg %>% 
             mutate(dv = "aff_neg") %>% 
             rbind(data_aff_pos %>% 
                   mutate(dv = "aff_pos")) %>% 
             rbind(data_life_sat %>% 
                     mutate(dv = "life_sat")) %>% 
           rename(conf.low = `2.5 %`, conf.high = `97.5 %`)
           }
  dat_fig_results %<>% 
  mutate(
    Variance = ifelse(grepl(".*_w\\>", .$term), "within", "between"),
    iv = gsub("_(w|b)\\>", "", .$term)
         ) %>%
  mutate(
    Variance = factor(.$Variance, 
                  levels = c("within", "between")),
    dv = factor(.$dv, 
                levels = c("life_sat", "aff_pos", "aff_neg"),
                labels = c("Life satisfaction", "Positive affect", "Negative affect")),
    Type = type,
    Analysis = analysis
  ) %>% 
  select(dv, iv, Variance, Type, Analysis, estimate, conf.low, conf.high, p.value) %>% 
    filter(Variance == variance)

  # select Social Media type of activity
  if(type == "Activity") {
    dat_fig_results %<>%
    filter(iv %in% c("soc_med_read", "soc_med_like_share", "soc_med_post")) %>% 
    mutate(
      iv = factor(.$iv, 
                  levels = c("soc_med_post", "soc_med_like_share", "soc_med_read"), 
                  labels = c("Posting", "Liking & Sharing", "Reading"))
    )
  } else if(type == "Channels"){
    dat_fig_results %<>%
    filter(iv %in% c("soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw")) %>% 
    mutate(iv = factor(.$iv, levels = c("soc_med_yt", "soc_med_wa", "soc_med_ig", "soc_med_tw", "soc_med_fb"), labels = c("YouTube", "WhatsApp", "Instagram", "Twitter", "Facebook")))
  } else if(type == "Controls"){
    dat_fig_results %<>%
      filter(iv %in% c("male", "health", "loc_cntrl_int_m",
                       "act_spo", "sat_dem", "corona_pos")) %>% 
      mutate(iv = factor(.$iv, levels = c("male", "health", "loc_cntrl_int_m",
                                          "act_spo", "sat_dem", "corona_pos"), 
                         labels = c("Male", "Health", "Internal locus of control",
                                    "Sport", "Satisfaction democracy", "Corona positive")))
  } else if(type == "Living\nconditions"){
    dat_fig_results %<>%
      filter(iv %in% c("health", "corona_pos", "work_h", "work_homeoff", "hh_income")) %>% 
      mutate(iv = factor(.$iv, levels = c("health", "corona_pos", "work_h", "work_homeoff", "hh_income"),
                         labels = c("Health", "Corona positive", "Work hours", "Homeoffice", "Household income")))
  } else if(type == "News\nuse"){
    dat_fig_results %<>%
      filter(iv %in% c("med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri")) %>% 
      mutate(iv = factor(.$iv, levels = c("med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri"),
                         labels = c("Krone", "Der Standard", "Die Presse", "Österreich", "Kurier", "Salzb. Nachrichten", "Other", "ORF", "Private news")))
  } else if(type == "Outdoor\nactivities"){
    dat_fig_results %<>%
      filter(iv %in% c("act_wrk", "act_spo", "act_frn", "act_sho", "act_pet")) %>% 
      mutate(iv = factor(.$iv, levels = c("act_wrk", "act_spo", "act_frn", "act_sho", "act_pet"),
                         labels = c("Working", "Sports", "Friends", "Shopping", "With pets")))
  } else if(type == "Psycho-\nlogy"){
    dat_fig_results %<>%
      filter(iv %in% c("risk_prop", "loc_cntrl_int_m", "sat_dem")) %>% 
      mutate(iv = factor(.$iv, levels = c("risk_prop", "loc_cntrl_int_m", "sat_dem"),
                         labels = c("Risk taking", "Internal locus of control", "Satisfaction democracy")))
  }
  return(dat_fig_results)
}

make_graph_res

Make graph of effects

make_graph_res <- function(data, sesoi = NULL, legend = TRUE, facet = "type", title = NULL){
  ggplot(data, aes(x = estimate, y = iv)) + 
  scale_color_manual(values = c("black", "grey75", "darkcyan", "deepskyblue", "cornflowerblue", "darkcyan", "aquamarine")) +
  geom_vline(xintercept = 0, lwd = .75, colour = "darkgrey") + 
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, color = Analysis),
                lwd = .75, height = .2, position = position_dodge(-.7)) +
  geom_point(aes(color = Analysis), size = 1.5, position = position_dodge(-.7)) +
    {if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv=="Life satisfaction"), aes(xintercept=-.3), colour="darkgrey", linetype = "dashed")} + 
    {if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv=="Life satisfaction"), aes(xintercept=.3), colour="darkgrey", linetype = "dashed")} + 
    {if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv!="Life satisfaction"), aes(xintercept=-.15), colour="darkgrey", linetype = "dashed")} +
    {if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv!="Life satisfaction"), aes(xintercept=.15), colour="darkgrey", linetype = "dashed")} +
    {if(isTRUE(sesoi == "std")) geom_vline(aes(xintercept=.1), colour="darkgrey", linetype = "dashed")} + 
    {if(isTRUE(sesoi == "std")) geom_vline(aes(xintercept=-.1), colour="darkgrey", linetype = "dashed")} +   
  theme(
    axis.title.y = element_blank(),
    plot.title = element_text(hjust = .5),
    panel.spacing = unit(.9, "lines"),
    text = element_text(size = 12),
    axis.text.y = element_text(hjust = 1)
    ) +
  {if(!is.null(title)) ggtitle(title)} +
  {if(!isTRUE(legend)) theme(legend.position = "none")} + 
  # guides(colour = guide_legend(reverse=T)) +
    {if(facet == "type")
      facet_grid(rows = vars(Type),
             cols = vars(dv),
             scales = "free",
             space = "free_y")
    } + 
    {if(facet == "analysis")
      facet_grid(rows = vars(Variance),
             cols = vars(dv),
             scales = "free",
             space = "free_y")
    }
}

get_fs

Extract factor scores.

get_fs <- function(model) {
  # the aim is to get factor scores on original scaling
  # hence export factor values for all items
  # then compute average
  dat_ov <- lavaan::lavPredict(model, type = "ov", assemble = TRUE) %>% 
    mutate(fs = rowMeans(.[1:3]))
  return(dat_ov$fs)
}

Data Wrangling

Load data

d_spss_en <- read_spss("data/W1-34_ACPP_V20230303_EN.SAV")
d_spss <- read_spss("data/W1-34_ACPP_V20230303.SAV")
write.csv(d_spss, file = "data/W1-34_ACPP_V20230303_EN.csv")
d_raw <- read_csv("data/W1-34_ACPP_V20230303_EN.csv")

Documentation

Make file with documentation about variables

# get labels
doc_raw <- map(d_spss, attr, "label")

# reformat
doc_raw <- 
  data.frame(
    Variable = names(doc),
    Label = unlist(doc, use.names = FALSE)
  )

# define waves where media use was measured
waves_media_use <- c(1, 8, 17, 23, 28)

Find names

Identify the names of each item for each wave.

# identify variable names of items, using custom function
health <- find_var("Gesundheitszustand") # wsn't collected at wave 21
aff_neg_1 <- find_var("Depressivitaet: Einsam")
aff_neg_2 <- find_var("Depressivitaet: Aergerlich")
aff_neg_3 <- find_var("Depressivitaet: So niedergeschlagen")
aff_neg_4 <- find_var("Depressivitaet: Sehr nervoes")
aff_neg_5 <- find_var("Depressivitaet: Aengstlich")
aff_neg_6 <- find_var("Depressivitaet: Bedrueckt und traurig")
aff_pos_1 <- find_var("Depressivitaet: Ruhig und gelassen")
aff_pos_2 <- find_var("Depressivitaet: Gluecklich")
aff_pos_3 <- find_var("Depressivitaet: Voller Energie")
act_wrk <- find_var("Zuhause verlassen: Arbeit")
act_spo <- find_var("Zuhause verlassen: Sport")
act_frn <- find_var("Zuhause verlassen: Freunde oder Verwandte treffen")
act_sho <- find_var("Zuhause verlassen: Essen einkaufen")
act_pet <- find_var("Zuhause verlassen: Haustier ausfuehren")
sat_dem <- find_var("Demokratiezufriedenheit: Oesterreich")
work_h <- find_var("Arbeitsstunden: Jetzt pro Woche")
work_homeoff <- find_var("Aenderung berufliche Situation: Home-Office")
hh_income <- find_var("Aktuelles Haushaltseinkommen")
med_txt_kro <- find_var("Mediennutzung: Kronen Zeitung oder www.krone.at")
med_txt_sta <- find_var("Mediennutzung: Der Standard oder derstandard.at")
med_txt_pre <- find_var("Mediennutzung: Die Presse oder diepresse.com")
med_txt_oes <- find_var("Mediennutzung: Oesterreich oder oe24.at")
med_txt_kur <- find_var("Mediennutzung: Kurier oder kurier.at")
med_txt_slz <- find_var("Mediennutzung: Salzburger Nachrichten oder salzburg.at")
med_txt_son <- find_var("Mediennutzung: Sonstige oesterreichische Tageszeitungen")
med_vid_orf <- find_var("Mediennutzung: ORF (Nachrichten)")
med_vid_pri <- find_var("Mediennutzung: Privatfernsehen (Nachrichten)")
soc_med_fb <- find_var("Soziale Medien: Facebook")
soc_med_tw <- find_var("Soziale Medien: Twitter")
soc_med_ig <- find_var("Soziale Medien: Instagram")
soc_med_yt <- find_var("Soziale Medien: Youtube")
soc_med_wa <- find_var("Soziale Medien: WhatsApp")
soc_med_read <- find_var("Soziale Medien Aktivitaet: Postings zu Corona anderer lesen")
soc_med_like_share <- find_var("Soziale Medien Aktivitaet: Postings liken, teilen oder retweeten")
soc_med_post <- find_var("Soziale Medien Aktivitaet: Selber Postings zu Corona verfassen")
life_sat <- find_var("Lebenszufriedenheit")
risk_prop <- find_var("Risikobereitschaft")
loc_cntrl_int_1 <- find_var("Psychologie: Habe Leben selbst in der Hand")
loc_cntrl_int_2 <- find_var("Psychologie: Belohnung durch Anstrengung")
loc_cntrl_int_3 <- find_var("Psychologie: Fremdbestimmung")
loc_cntrl_int_4 <- find_var("Psychologie: Schicksal")
trst_media <- find_var("Vertrauen: ORF")
trst_police <- find_var("Vertrauen: Polizei")
trst_media <- find_var("Vertrauen: Parlament")
trst_hlthsec <- find_var("Vertrauen: Gesundheitswesen")
trst_gov <- find_var("Vertrauen: Bundesregierung")
trst_army <- find_var("Vertrauen: Bundesheer")
corona_pos <- c(
  find_var("Corona-Diagnose: Respondent"),
  find_var("Corona-Diagnose: Monat")
)

Select variables

d_wide <- d_raw %>% 
  select(
    id = RESPID,
    gender = SD_GENDER,
    acc_bal = SD_ACCESS_BALCONY,
    acc_gar = SD_ACCESS_GARDEN,
    year_birth = SD_BIRTHYEAR,
    born_aus = SD_BORN_AUSTRIA,
    born_aus_prnts = SD_MIGR_BACKGR,
    county = SD_BULA,
    edu = SD_EDU,
    employment = SD_EMPLSTATUS_FEB2020,
    hh_adults = SD_HH_ADULTS,
    hh_child18 = SD_HH_CHILD18,
    hh_child17 = SD_HH_TEENS,
    hh_child14 = SD_HH_CHILD14,
    hh_child5 = SD_HH_CHILD5,
    hh_child2 = SD_HH_CHILD2,
    hh_oldfam = SD_HH_OLDERFAM,
    hh_outfam = SD_HH_OUTERFAM,
    hh_partner = SD_HH_PARTNER,
    # hh_income = SD_HHINCOME_FEB2020,
    home_sqm = SD_HOME_SQM,
    home_owner = SD_HOMEOWNER,
    # work_h = SD_WORKHOURS_FEB2020,
    health = all_of(health),
    life_sat = all_of(life_sat),
    aff_neg_1 = all_of(aff_neg_1),
    aff_neg_2 = all_of(aff_neg_2),
    aff_neg_3 = all_of(aff_neg_3),
    aff_neg_4 = all_of(aff_neg_4),
    aff_neg_5 = all_of(aff_neg_5),
    aff_neg_6 = all_of(aff_neg_6),
    aff_pos_1 = all_of(aff_pos_1),
    aff_pos_2 = all_of(aff_pos_2),
    aff_pos_3 = all_of(aff_pos_3),
    act_wrk = all_of(act_wrk),
    act_spo = all_of(act_spo),
    act_frn = all_of(act_frn),
    act_sho = all_of(act_sho),
    act_pet = all_of(act_pet),
    sat_dem = all_of(sat_dem),
    sat_dem = all_of(sat_dem),
    work_h = all_of(work_h),
    work_homeoff = all_of(work_homeoff),
    hh_income = all_of(hh_income),
    med_txt_kro = all_of(med_txt_kro),
    med_txt_sta = all_of(med_txt_sta),
    med_txt_pre = all_of(med_txt_pre),
    med_txt_oes = all_of(med_txt_oes),
    med_txt_kur = all_of(med_txt_kur),
    med_txt_slz = all_of(med_txt_slz),
    med_txt_son = all_of(med_txt_son),
    med_vid_orf = all_of(med_vid_orf),
    med_vid_pri = all_of(med_vid_pri),
    soc_med_fb = all_of(soc_med_fb),
    soc_med_tw = all_of(soc_med_tw),
    soc_med_ig = all_of(soc_med_ig),
    soc_med_yt = all_of(soc_med_yt),
    soc_med_wa = all_of(soc_med_wa),
    soc_med_like_share = all_of(soc_med_like_share),
    soc_med_read = all_of(soc_med_read),
    soc_med_post = all_of(soc_med_post),
    risk_prop = all_of(risk_prop),
    loc_cntrl_int_1 = all_of(loc_cntrl_int_1),
    loc_cntrl_int_2 = all_of(loc_cntrl_int_2),
    loc_cntrl_int_3 = all_of(loc_cntrl_int_3),
    loc_cntrl_int_4 = all_of(loc_cntrl_int_4),
    trst_media = all_of(trst_media),
    trst_police = all_of(trst_police),
    trst_media = all_of(trst_media),
    trst_hlthsec = all_of(trst_hlthsec),
    trst_gov = all_of(trst_gov),
    trst_army = all_of(trst_army),
    corona_pos = all_of(corona_pos),
    wave_part_1 = W1_PANELIST,
    wave_part_8 = W8_PANELIST,
    wave_part_17 = W17_PANELIST,
    wave_part_23 = W23_PANELIST,
    wave_part_28 = W28_PANELIST
  )

Make new documentation with selected variables.

doc_selected <- filter(doc_raw, Variable %in% c(
  "RESPID",
  "SD_GENDER",
  "SD_ACCESS_BALCONY",
  "SD_ACCESS_GARDEN",
  "SD_BIRTHYEAR",
  "SD_BORN_AUSTRIA",
  "SD_MIGR_BACKGR",
  "SD_BULA",
  "SD_EDU",
  "SD_EMPLSTATUS_FEB2020",
  "SD_HH_ADULTS",
  "SD_HH_CHILD18",
  "SD_HH_TEENS",
  "SD_HH_CHILD14",
  "SD_HH_CHILD5",
  "SD_HH_CHILD2",
  "SD_HH_OLDERFAM",
  "SD_HH_OUTERFAM",
  "SD_HH_PARTNER",
  "SD_HHINCOME_FEB2020",
  "SD_HOME_SQM",
  "SD_HOMEOWNER",
  "SD_WORKHOURS_FEB2020",
  health,
  life_sat,
  aff_neg_1,
  aff_neg_2,
  aff_neg_3,
  aff_neg_4,
  aff_neg_5,
  aff_neg_6,
  aff_pos_1,
  aff_pos_2,
  aff_pos_3,
  act_wrk,
  act_spo,
  act_frn,
  act_sho,
  act_pet,
  sat_dem,
  work_h,
  work_homeoff,
  hh_income,
  med_txt_kro,
  med_txt_sta,
  med_txt_pre,
  med_txt_oes,
  med_txt_kur,
  med_txt_slz,
  med_txt_son,
  med_vid_orf,
  med_vid_pri,
  soc_med_fb,
  soc_med_tw,
  soc_med_ig,
  soc_med_yt,
  soc_med_wa,
  soc_med_like_share,
  soc_med_post,
  soc_med_read,
  risk_prop,
  loc_cntrl_int_1,
  loc_cntrl_int_2,
  loc_cntrl_int_3,
  loc_cntrl_int_4,
  trst_media,
  trst_police,
  trst_media,
  trst_hlthsec,
  trst_gov,
  trst_army,
  corona_pos
))
write.csv(doc_selected, "data/used_variables.csv")

Recode variables

d_wide %<>%
  mutate_at(vars(everything(.)), funs(na_if(., 88))) %>%
  mutate_at(vars(everything(.)), funs(na_if(., 99))) %>%
  mutate(
    male = 2 - .$gender,
    age = 2021 - .$year_birth,
    res_vienna = recode(.$county, `8` = 1L, .default = 0L,),
    born_aus = 2 - .$born_aus,
    home_owner = 2 - .$home_owner,
    employment_fac = factor(.$employment, 
                            labels = c("Unemployed",
                                       "Industrie",
                                       "Public service",
                                       "Self-employed",
                                       "Retired",
                                       "Housekeeping",
                                       "Student",
                                       "Incapacitated",
                                       "Parental Leave"),
                            levels = c(4, 1:3, 5:8, 10) # make unemployment reference cat
    ),
    edu_fac = factor(.$edu,
                     labels = c("No degree", 
                                "Middle school",
                                "Vocational school",
                                "Technical school",
                                "High school",
                                "Applied high school",
                                "State college",
                                "Bachelor",
                                "Master",
                                "PhD")
    ))

Make long data format

d_long <-
  d_wide %>% 
  pivot_longer(
    cols = health...W1:corona_pos...W34,
    names_to = "item",
    values_to = "value"
  ) %>% 
  separate(item, c("item", "wave"), sep = "\\.\\.\\.", extra = "merge") %>% 
  pivot_wider(names_from = "item", values_from = "value")

Recode values

# recode such that higher values imply more strength / align with wording
d_long %<>% 
  mutate_at(
    vars(
      med_txt_kro:med_vid_pri, 
      health, 
      sat_dem, 
      soc_med_fb:soc_med_post
      ), 
    funs(recode(., `1` = 5L, `2` = 4L, `3` = 3L, `4` = 2L, `5` = 1L))) %>% 
  mutate_at(
    vars(
      loc_cntrl_int_1:loc_cntrl_int_4
      ), 
    funs(recode(., `1` = 4L, `2` = 3L, `3` = 2L, `4` = 1L))) %>% 
  mutate_at(
    vars(born_aus_prnts), 
    funs(recode(., `3` = 0L, `2` = 2L, `1` = 1L))
    )

# recode inverted items
d_long %<>%
  mutate_at(vars(loc_cntrl_int_3, loc_cntrl_int_4), 
            funs(recode(., `1` = 4L, `2` = 3L, `3` = 2L, `4` = 1L)))

# recode other
d_long %<>%
  mutate(
    wave = gsub("W", "", .$wave) %>% as.integer(),
    id = as.integer(id)
  )

Order dataset

d_long %<>%
  arrange(id, wave)

Impute data

Next we impute data. Note that the actual imputation below is deactivated and loaded from memory to save time.

Prep

Determine amount of missingness per respondent per wave.

vars_used <- c("life_sat", "aff_pos_1", "aff_pos_2", "aff_pos_3", "aff_neg_1", "aff_neg_2", "aff_neg_3", "aff_neg_4", "aff_neg_5", "aff_neg_6", "soc_med_read", "soc_med_like_share", "soc_med_post", "soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw", "age", "male", "born_aus", "born_aus_prnts", "edu_fac", "employment_fac", "health", "res_vienna", "acc_bal", "acc_gar", "home_sqm", "med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri", "risk_prop", "loc_cntrl_int_1", "loc_cntrl_int_2", "loc_cntrl_int_3", "loc_cntrl_int_4", "act_wrk", "act_spo", "act_frn", "act_sho", "act_pet", "sat_dem", "trst_media", "trst_police", "trst_media", "trst_hlthsec", "trst_gov", "trst_army", "corona_pos") # only include vars measured at _all_ points

Filter respondents with more than 50% missing data – only needed for analyses as originally preregistered (see additional analyses).

d_long_50 <- 
  d_long %>% 
  mutate(na_perc = rowSums(is.na(select(., all_of(vars_used))) / ncol(select(., all_of(vars_used))))) %>% 
  filter(na_perc < .5)
write_csv(d_long_50, "data/data_50.csv")

Exclude social media use data, because they were measured only on selected waves.

vars_excl <- c("soc_med_read", "soc_med_like_share", "soc_med_post", "soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw")
incl_ma <- d_long %>% 
  mutate(across(.cols = everything(), .fns = is.na))
incl_ma[vars_excl] <- FALSE

# now also for data where 50% missing excluded
incl_ma_50 <- d_long_50 %>% 
  mutate(across(.cols = everything(), .fns = is.na))
incl_ma_50[vars_excl] <- FALSE

Single imputation

For analyses as originally preregistered (see additional analyses), hence using only participants who provided more than 50% of all data.

d_long_50_imp <- mice(d_long_50, 
                      method = "pmm", # use predictive mean matching
                      m = 1, maxit = 30, # only 1 imputation
                      # where = incl_ma_50,
                      seed = 180719, print = FALSE)
d_long_50_imp <- mice::complete(d_long_50_imp)
write_csv(d_long_50_imp, "data/data_50_imputed.csv")

Because CFAs aren’t evaluated with multiple imputation, impute single data-set. Missing data was imputed for everyone.

d_long_100_imp <- mice(d_long, 
                   method = "pmm", # use predictive mean matching
                   m = 1, # only 1 imputation
                   maxit = 30, 
                   # where = incl_ma,
                   seed = 180719, print = FALSE)
d_long_100_imp <- mice::complete(d_long_100_imp)
write_csv(d_long_100_imp, "data/data_100_imputed.csv")

Multiple Imputation

Now, multiple imputation of data-sets (5 data sets, 20 iterations), used for the final analyses.

# impute missing data with multiple imputation
d_long_100_mim_mice <- mice(d_long, 
                   method = "pmm", # use predictive mean matching
                   m = 20, # 5 imputations, 
                   maxit = 5, # 5 iterations
                   # where = incl_ma,
                   seed = 180719, print = FALSE)
d_long_100_mim <- mice::complete(d_long_100_mim_mice, action = "long", include = TRUE)
write_csv(d_long_100_mim, "data/data_100_mim.csv")

Load data

To increase speed, you can also load the imputed data from the directory

d_long_50_imp <- read_csv("data/data_50_imputed.csv") # data where people with >50% missing data were filtered
d_long_100_imp <- read_csv("data/data_100_imputed.csv") # data where all people were included
d_long_100_mim <- read_csv("data/data_100_mim.csv") # data where all people were included

Create within-between data

First, create means for scales. Then, create mean values for each person (between), and unique changes per wave (within).

d_long_100_mim <-
  d_long_100_mim %>% 
  mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
         aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
         loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))

d_long_100_mim %<>%
  group_by(id) %>% 
  mutate_at(vars(health:loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>% 
  ungroup()

d_long_100_mim_mice <- as.mids(d_long_100_mim)

d_long_100_imp <-
  d_long_100_imp %>% 
  mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
         aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
         loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))

d_long_100_imp %<>%
  group_by(id) %>% 
  mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>% 
  ungroup()

And now the same for the standardized data set.

d_long_100_mim_std <-
  d_long_100_mim %>% 
  group_by(.imp) %>% 
  mutate_at(vars(gender:corona_pos, -id, -wave, -gender, -male, -born_aus, -born_aus_prnts, -edu_fac, -employment_fac, -res_vienna, -acc_bal, -acc_gar), ~ c(scale(.))) %>% 
  ungroup()

d_long_100_mim_std %<>%
  mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
         aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
         loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))

d_long_100_mim_std %<>%
  group_by(id) %>%
  mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
  ungroup()

d_long_100_mim_mice_std <- as.mids(d_long_100_mim_std)

And because later we want to rerun the results as originally preregistered, we’ll now do the same for analyses without filtered data.

d_long_50_imp <-
  d_long_50_imp %>% 
  mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
         aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
         loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))

d_long_50_imp %<>% 
  group_by(id) %>% 
  mutate_at(vars(health:corona_pos, loc_cntrl_int_m), 
            funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>% 
  ungroup()

And because later we want to rerun the results without imputed data, now the same for data without imputation.

d_long_50 %<>% 
  mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
         aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
         loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))

d_long_50 %<>% 
  group_by(id) %>% 
  mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>% 
  ungroup()

Save workspace

save.image("data/workspace_1.RData")

Analyses Setup

Set-up

Load packages

Session info

Custom functions

find_var

get_specs

get_dat

get_rel

make_graph

print_res

get_dat_res

make_graph_res

get_fs

Data Wrangling

Load data

Documentation

Find names

Select variables

Recode variables

Make long data format

Recode values

Order dataset

Impute data

Prep

Single imputation

Multiple Imputation

Load data

Create within-between data

Save workspace