# Create longitudinal dataset
df_long <- abcd_data %>%
# Filter to ERQ assessment waves (Years 3-6)
filter(session_id %in% c("ses-03A", "ses-04A", "ses-05A", "ses-06A")) %>%
arrange(participant_id, session_id)
# Clean and transform variables
df_long <- df_long %>%
mutate(
participant_id = factor(participant_id),
session_id = factor(session_id,
levels = c("ses-03A", "ses-04A", "ses-05A", "ses-06A"),
labels = c("Year_3", "Year_4", "Year_5", "Year_6")),
site = factor(ab_g_dyn__design_site),
family_id = factor(ab_g_stc__design_id__fam),
age = as.numeric(ab_g_dyn__visit_age),
sex = factor(ab_g_stc__cohort_sex,
levels = c("1", "2"),
labels = c("Male", "Female")),
race = factor(ab_g_stc__cohort_race__nih,
levels = c("2", "3", "4", "5", "6", "7", "8"),
labels = c("White", "Black", "Asian", "AI/AN", "NH/PI", "Multi-Race", "Other")),
education = as.numeric(ab_g_dyn__cohort_edu__cgs),
income = as.numeric(ab_g_dyn__cohort_income__hhold__3lvl),
suppression = round(as.numeric(mh_y_erq__suppr_mean), 2)
) %>%
# Select analysis variables
select(participant_id, session_id, site, family_id, age, sex, race, education, income, suppression) %>%
drop_na()
# Get baseline covariates (Year 3)
baseline_covariates <- df_long %>%
filter(session_id == "Year_3") %>%
select(participant_id, age, sex, education, income) %>%
mutate(
age_c = age - mean(age, na.rm = TRUE),
female = ifelse(sex == "Female", 1, 0),
education_c = education - mean(education, na.rm = TRUE),
income_c = income - mean(income, na.rm = TRUE)
) %>%
select(participant_id, age_c, female, education_c, income_c)