Descriptives

Published

April 20, 2026

Show/hide the code
# install.packages(c("dplyr", "tidyr", "emmeans", "ggplot2", "car", "knitr"))
options(scipen = 999)
library(here)
library(dplyr) # data manipulation
library(tidyr) # reshaping
library(emmeans) # estimated marginal means for interaction plots
library(ggplot2) # interaction plots
library(car) # vif(), Anova() Type III SS
library(knitr) # kable() for clean table output
library(ggconsort)
library(ggpubr)
# set wd
i_am("descriptives.qmd")

source(here("R/utils.R"))
cohorts <- readRDS(here("R/data/versions/cohorts_script1.RData"))
engagement <- readRDS(engagement)
df_long <- readRDS(df_long)
df <- cohorts |> cohort_pull(randomised)

Of the 484 adolescents who completed the screening (an additional 343 completed but weren’t adolescents), 316 completed the baseline survey and 271 were randomised into the trial conditions. Participants average age was approximately 15.4 years (SD = 1.4), with 67.9% identifying as woman or female, 84.8% as heterosexual.

Show/hide the code
dem <- bl |> 
    filter(!is.na(group)) |> 
    select(
    gender, age, sexuality, english_main, schoolyear, livewithparents) |> 
    tbl_summary(
        statistic = list(
            all_continuous() ~ "{mean} ({sd})",
            all_categorical() ~ "{n} ({p}%)"),
        digits = list(
            all_continuous() ~ c(1, 1), 
            all_categorical() ~ c(0, 1)
        ),
        missing = "no",
    label = list(
        gender = "Gender", 
        age = "Approximate age (years)", 
        sexuality = "Sexuality",
        english_main = "English as main language at home",
        schoolyear = "School year",
        livewithparents = "Lives with parents")
    ) |> 
    modify_header(list(
        label ~ "Characteristic",
        stat_0 ~ "Total (n = 271)"
    )) |> 
        add_stat_label() |> 
        as_flex_table() |> 
        my_flextable()
   
dem
Table 1: Characteristics of randomised sample (n = 271)

Characteristic

Total (n = 271)

Gender, n (%)

Woman or female

184 (67.9%)

Man or male

78 (28.8%)

Non-binary

6 (2.2%)

Other/Prefer not to say

3 (1.1%)

Approximate age (years), Mean (SD)

15.4 (1.4)

Sexuality, n (%)

Straight (heterosexual)

228 (84.8%)

Gay or lesbian

6 (2.2%)

Bisexual

12 (4.5%)

Other/Prefer to self-describe

15 (5.6%)

Prefer not to answer

8 (3.0%)

English as main language at home, n (%)

259 (95.9%)

School year, n (%)

Year 7

7 (2.7%)

Year 8

48 (18.3%)

Year 9

58 (22.1%)

Year 10

55 (21.0%)

Year 11

59 (22.5%)

Year 12

35 (13.4%)

Lives with parents, n (%)

266 (99.3%)

Engagement data was available for 217 participants, and the EI score had a distribution that was statistically significant, as evidenced by a significant shapiro-wilk test (W = 0.98, p = .005). The mean EI score was at 48.9% (SD = 13.7%) and ranged between 12.5% to 75.0%.

Following Taki et al. Interquartile ranges were used for categorising engagement levels, with low engagement defined as below 40.4% (Q1), moderate engagement between 40.4% and 59.1% (Q1 to Q3), and high engagement above 59.1% (Q3). The distribution of EI scores is visualised in Figure 1, with dashed lines indicating the quartiles used for categorisation.

Show/hide the code
q1 <- quantile(engagement$EI_pct, 0.25, na.rm = TRUE)
q3 <- quantile(engagement$EI_pct, 0.75, na.rm = TRUE)

ggplot(engagement, aes(x = EI_pct)) +
  geom_histogram(
    aes(y = after_stat(density)),
    bins = 30,
    fill = "#D8A5D8",
    color = "black",
    linewidth = 0.3
  ) +
  geom_density(color = "black", linewidth = 0.7) +
  geom_vline(xintercept = q1, linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_vline(xintercept = q3, linetype = "dashed", color = "black", linewidth = 0.8) +
  
  annotate("text", x = q1, y = -Inf, label = "Q1", vjust = 4, size = 4) +
  annotate("text", x = q3, y = -Inf, label = "Q3", vjust = 4, size = 4) +
  
  scale_x_continuous(
    limits = c(0, 100),
    breaks = seq(0, 100, 20)
  ) +
  scale_y_continuous(
    limits = c(0, NA),
    expand = expansion(mult = c(0, 0.05))
  ) +
  coord_cartesian(clip = "off") +
  theme_minimal() +
  theme_pubr() +
  theme(
    panel.grid = element_blank(),
    axis.line = element_line(color = "black"),
    axis.title.x = element_text(margin = margin(t = 22)),
    plot.margin = margin(t = 5, r = 5, b = 30, l = 5)
  ) +
  labs(
    title = "Engagement Index Distribution",
    x = "Engagement Index (%)",
    y = "Density"
  ) 
Figure 1: Distribution of Overall Engagement Index (EI) with quartiles

Participant characteristics associated with engagement are presented in Table 2. Approximate age (p = .056) and sexuality (p = .025) were the only characteristics that showed significant differences across engagement levels.

Show/hide the code
engagement |> 
    select(dyad_id, engagement_quartile)|>
    left_join(bl |> filter(!is.na(group)) |> 
    select(dyad_id, gender, age, sexuality, english_main, schoolyear, livewithparents), by = "dyad_id") |> 
    select(-dyad_id) |> 
    tbl_summary(
        by = engagement_quartile,
        statistic = list(
            all_continuous() ~ "{mean} ({sd})",
            all_categorical() ~ "{n} ({p}%)"),
        digits = list(
            all_continuous() ~ c(1, 1), 
            all_categorical() ~ c(0, 1)
        ),
        missing = "no",
        type = list(
            starts_with("strategy") ~ "categorical",
            schoolyear ~ "categorical"
        ),
    label = list(
        gender = "Gender", 
        age = "Approximate age (years)", 
        sexuality = "Sexuality",
        english_main = "English as main language at home",
        schoolyear = "School year",
        livewithparents = "Lives with parents")
    ) |> 
    add_stat_label() |> 
    add_p() |> 
    modify_footnote(everything() ~ NA) |> 
    as_flex_table() |> my_flextable()
Table 2: Participant characteristics by engagement level

Characteristic

Low
N = 48

Moderate
N = 115

High
N = 54

p-value

Gender, n (%)

0.5

Woman or female

32 (66.7%)

79 (68.7%)

38 (70.4%)

Man or male

14 (29.2%)

33 (28.7%)

13 (24.1%)

Non-binary

1 (2.1%)

1 (0.9%)

3 (5.6%)

Other/Prefer not to say

1 (2.1%)

2 (1.7%)

0 (0.0%)

Approximate age (years), Mean (SD)

15.7 (1.5)

15.3 (1.4)

15.0 (1.2)

0.056

Sexuality, n (%)

0.025

Straight (heterosexual)

43 (91.5%)

94 (81.7%)

40 (75.5%)

Gay or lesbian

0 (0.0%)

4 (3.5%)

2 (3.8%)

Bisexual

1 (2.1%)

9 (7.8%)

1 (1.9%)

Other/Prefer to self-describe

3 (6.4%)

7 (6.1%)

4 (7.5%)

Prefer not to answer

0 (0.0%)

1 (0.9%)

6 (11.3%)

English as main language at home, n (%)

46 (95.8%)

106 (93.0%)

53 (98.1%)

0.4

School year, n (%)

Year 7

3 (6.5%)

4 (3.6%)

0 (0.0%)

Year 8

5 (10.9%)

21 (18.9%)

14 (26.4%)

Year 9

8 (17.4%)

27 (24.3%)

12 (22.6%)

Year 10

9 (19.6%)

19 (17.1%)

15 (28.3%)

Year 11

11 (23.9%)

25 (22.5%)

11 (20.8%)

Year 12

10 (21.7%)

15 (13.5%)

1 (1.9%)

Lives with parents, n (%)

48 (100.0%)

112 (99.1%)

53 (100.0%)

>0.9

(based on Kruskal-Wallis test for age and chi-square tests or Fisher’s exact tests for categorical variables)