Sequence and Cluster Analysis

Sequence Analysis
Cluster Analysis
Alcohol Use
Depression
Sequence and cluster analysis of daily time use data on a nationally representative sample of Australian adolescents
Published

March 20, 2026

to continue moving across from old notes

Introduction

This analysis aims to identify distinct clusters of daily time use patterns and to examine how cluster membership is associated with depression and alcohol use, as well as their co-occurrence.

Using data from the Longitudinal Study of Australian Children (Wave 8, B cohort, aged 14-15), I employed sequence analysis of daily time use diary data to identify homogenous clusters of adolescents with common time use patterns.

Subsequently, I explored membership in these clusters and their associations with depression, alcohol use, and their co-occurrence to gain insight into how daily patterns may relate to the combined challenges of depression and alcohol related issues in adolescence (document to come)

All analyses are split by day type (school day vs non school day)

Descriptives of Sequences

Across both day types, sleep made up the largest proportion of daily time, ranging from 6 – 10.5 hours.

Show/hide the code
# school days
statemeans <- as_tibble(
  round(seqmeant(seqs, serr = TRUE), 1),
  rownames = "State"
) |>
  mutate(State = labels) |>
  mutate(
    Sample = "s",
    Percent = round(Mean / 1440 * 100, 1),
    ifelse(grepl("\\.", Percent), Percent, paste0(Percent, ".0"))
  ) |>
  as.data.frame() |>
  select(State, Mean, Percent, SD = Stdev) |>
  mutate(
    Percent = paste0(Percent, "%"),
    Mean = ifelse(grepl("\\.", Mean), Mean, paste0(Mean, ".0")),
    SD = ifelse(grepl("\\.", SD), SD, paste0(SD, ".0")),
    Mean = paste0(Mean, " (", SD, ")")
  ) |>
  select(-SD) |>
  rename(`Mean minutes (SD)` = Mean, `% of Day` = Percent)

# non school days
statemeann <- as_tibble(
  round(seqmeant(seqn, serr = TRUE), 1),
  rownames = "State"
) |>
  mutate(State = labels) |>
  mutate(
    Sample = "Non-s",
    Percent = round(Mean / 1440 * 100, 1),
    ifelse(grepl("\\.", Percent), Percent, paste0(Percent, ".0"))
  ) |>
  as.data.frame() |>
  select(State, Mean, Percent, SD = Stdev) |>
  mutate(
    Percent = paste0(Percent, "%"),
    Mean = ifelse(grepl("\\.", Mean), Mean, paste0(Mean, ".0")),
    SD = ifelse(grepl("\\.", SD), SD, paste0(SD, ".0")),
    Mean = paste0(Mean, " (", SD, ")")
  ) |>
  select(-SD) |>
  rename(`Mean minutes (SD)` = Mean, `% of Day` = Percent)


# create overall descriptives table
descriptives <- statemeans |> left_join(statemeann, by = "State")
descriptives <- descriptives |>
  rename(
    minss = `Mean minutes (SD).x`,
    percs = `% of Day.x`,
    minsn = `Mean minutes (SD).y`,
    percn = `% of Day.y`,
  )

descriptives_tbl <- descriptives |> flextable()

typology <- data.frame(
  col_keys = names(descriptives_tbl$body$dataset),
  head = c("", rep("School days", 2), rep("Non-school days", 2)),
  head2 = c(
    "Activity",
    rep(c("Mean minutes (SD)", "% of Day"), 2)
  )
)
descriptives_tbl <- descriptives_tbl |>
  set_header_df(mapping = typology, key = "col_keys") |>
  merge_at(i = 1, j = c(2:3), part = "header") |>
  merge_at(i = 1, j = c(4:5), part = "header") |>
  align(align = "center", j = c(2:5), part = "all") |>
  my_flextable() |>
  hline(i = c(1, 2), part = "header", border = fp_border(color = "black")) |>
  hline_top(border = fp_border(color = "black"), part = "all") |>
  hline_bottom(border = fp_border(color = "black"), part = "all")

descriptives_tbl
Table 1: Descriptives of sequences on school and non school days

School days

Non-school days

Activity

Mean minutes (SD)

% of Day

Mean minutes (SD)

% of Day

Sleep

528.8 (65.3)

36.7%

617.1 (108.0)

42.9%

Education

326.5 (112.2)

22.7%

28.5 (77.6)

2%

Work (paid/unpaid)

33.0 (52.4)

2.3%

70.9 (103.5)

4.9%

Personal care

61.3 (50.8)

4.3%

75.5 (80.6)

5.2%

Eating and drinking

57.8 (86.6)

4%

54.8 (75.3)

3.8%

Sedentary behaviour

149.1 (126.8)

10.4%

290.5 (187.4)

20.2%

Physical activity

60.5 (75.9)

4.2%

74.6 (98.3)

5.2%

Leisure

36.9 (60.2)

2.6%

58.2 (90.6)

4%

Socialising

118.2 (110.9)

8.2%

115.4 (139.7)

8%

Travel

67.9 (61.4)

4.7%

54.4 (81.2)

3.8%

Sequence Plots

Mean time plot displaying the average amount of time in each activity state

Show/hide the code
meantimeplot1 <- ggseqmtplot(seqs, weighted = TRUE) +
  scale_y_continuous(breaks = seq(0, 700, 50), limits = c(0, 700)) +
  labs(title = "School days", y = "Mean minutes per day") +
  scale_x_discrete(labels = plot_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    legend.position = "none",
    axis.title = element_text(size = 10, face = "bold"),
    axis.text = element_text(size = 9, face = "bold"),
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8)
  ) +
  common_theme

meantimeplot2 <- ggseqmtplot(seqn, weighted = TRUE) +
  scale_y_continuous(breaks = seq(0, 700, 50), limits = c(0, 700)) +
  labs(title = "Non-school days", y = "Mean minutes per day") +
  scale_x_discrete(labels = plot_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    legend.position = "none",
    axis.title = element_text(size = 10, face = "bold"),
    axis.text = element_text(size = 9, face = "bold"),
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8)
  ) +
  common_theme

meantimeplot <- wrap_elements(full = meantimeplot1) /
  wrap_elements(full = meantimeplot2)

meantimeplot
Figure 1: Mean time plots on school and non-school days

Modal state sequence plots displaying the sequence of the modal states with each mode proportional to its frequency at the given time

Show/hide the code
hrly <- as.character(seq(1, 1440, by = 120))
hrly_labs <- c(
  "04:00",
  "06:00",
  "08:00",
  "10:00",
  "12:00",
  "14:00",
  "16:00",
  "18:00",
  "20:00",
  "22:00",
  "00:00",
  "02:00"
)

modalstatesequences <- ggseqmsplot(seqs, weighted = TRUE) +
  labs(
    title = "School days",
    y = "Relative Frequency (Weighted)",
    x = "Time of Day"
  ) +
  scale_x_discrete(breaks = hrly, labels = hrly_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    #panel.border = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 9, colour = "black", face = "bold"),
    axis.title = element_text(size = 10, colour = "black", face = "bold")
  ) +
  common_theme

legend_plot <- modalstatesequences +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 9, face = "bold"),
    legend.spacing.x = unit(0.6, "cm"),
    legend.key.width = unit(1.2, "cm"),
    legend.box.margin = margin(0, 0, 0, 0),
    legend.margin = margin(0, 0, 0, 0)
  )

legend <- get_legend(legend_plot)
modalstatesequencen <- ggseqmsplot(seqn, weighted = TRUE) +
  labs(
    title = "Non-school days",
    y = "Relative Frequency (Weighted)",
    x = "Time of Day"
  ) +
  scale_x_discrete(breaks = hrly, labels = hrly_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    #panel.border = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 9, colour = "black", face = "bold"),
    axis.title = element_text(size = 10, colour = "black", face = "bold")
  ) +
  common_theme

modalstatesequences <- modalstatesequences + theme(legend.position = "none")
modalstatesequencen <- modalstatesequencen + theme(legend.position = "none")

modalstatesequencess <- wrap_elements(full = modalstatesequences) /
  wrap_elements(full = modalstatesequencen)
modalstatesequencess <- plot_grid(
  modalstatesequencess,
  legend,
  ncol = 1,
  rel_heights = c(1, 0.1)
)
modalstatesequencess
Figure 2: Modal state sequence plots on school and non-school days

State distribution plot representing the sequence of the state frequencies by time point (chronograms)

Show/hide the code
statedistributions <- ggseqdplot(seqs) +
  labs(title = "School days", y = "Relative Frequency", x = "Time of Day") +
  scale_x_discrete(breaks = hrly, labels = hrly_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    #panel.border = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 9, colour = "black", face = "bold"),
    axis.title = element_text(size = 10, colour = "black", face = "bold")
  ) +
  common_theme


statedistributionn <- ggseqdplot(seqn) +
  labs(title = "Non-school days", y = "Relative Frequency", x = "Time of Day") +
  scale_x_discrete(breaks = hrly, labels = hrly_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    #panel.border = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 9, colour = "black", face = "bold"),
    axis.title = element_text(size = 10, colour = "black", face = "bold")
  ) +
  common_theme


statedistributions <- statedistributions + theme(legend.position = "none")
statedistributionn <- statedistributionn + theme(legend.position = "none")

statedistributionplot <- wrap_elements(full = statedistributions) /
  wrap_elements(full = statedistributionn)
statedistributionplot <- plot_grid(
  statedistributionplot,
  legend,
  ncol = 1,
  rel_heights = c(1, 0.1)
)


statedistributionplot
Figure 3: State distribution plots on school and non-school days

Cluster Analysis

Hierarchical clustering using ward method on school days

Show/hide the code
set.seed(123)
# get weights in the right order 
sw <- data_day |> 
  filter(hicid %in% s_id) |> 
  mutate(.ord = match(hicid, as.numeric(rownames(s_data)))) |> 
  arrange(.ord) |> pull(hweights)
nw <- data_day |> 
  filter(hicid %in% n_id) |> 
  mutate(.ord = match(hicid, as.numeric(rownames(n_data)))) |> 
  arrange(.ord) |> pull(hweights)
stopifnot(length(sw) == nrow(s_data))
stopifnot(length(nw) == nrow(n_data))

om_times <- sequence_objects$om_times
# turn off scientfic notation
options(scipen = 99)
# ward hierarchical clustering; unweighted and weighted versions
clusterwards <- hclust(as.dist(om_times), method = "ward.D")
clusterward_wtds <- hclust(as.dist(om_times), method = "ward.D", members = sw)

# weighted cluster range diagnostics up to 15 clusters.
wardRanges <- as.clustrange(
  clusterwards,
  diss = om_times,
  weights = sw,
  ncluster = 15
)

plot(wardRanges, stat = c("ASWw", "HG", "PBC", "HC"))
Figure 4: Hierarchical ward clustering ranges on school days

Partitioning around medioids

Show/hide the code
# weighted k medoids range diagnostics.
pamwardRanges <- wcKMedRange(
  om_times,
  kvals = 2:15,
  weights = sw
)

plot(pamwardRanges, stat = c("ASWw", "HG", "PBC", "HC"))
Figure 5: Partitioning around medioids ward clustering ranges on school days

Final solution is four clusters on school days.

Show/hide the code
# final solution;
pamclust4s <- wcKMedoids(
  om_times,
  k = 4,
  weights = sw,
  initialclust = clusterward_wtds
)

pamclust4s$stats
        PBC          HG        HGSD         ASW        ASWw          CH 
 0.28169716  0.36985476  0.36985476  0.08017240  0.08307900 42.75257434 
         R2        CHsq        R2sq          HC 
 0.09207103 87.69424686  0.17219089  0.27781771 
Show/hide the code
# final cluster membership table for joining.
data_fins <- data.frame(
  hicid = as.numeric(rownames(s_data)),
  cluster = as.numeric(pamclust4s$clustering)
)


# per person minute totals; compute on the state columns only.
data_times <- s_data |>
  mutate(
    hicid = as.numeric(rownames(s_data)),
    cluster = as.numeric(pamclust4s$clustering),
    hweights = sw,
    sleepsum = rowSums(s_data == 1),
    edusum = rowSums(s_data == 2),
    worksum = rowSums(s_data == 3),
    caresum = rowSums(s_data == 4),
    eatsum = rowSums(s_data == 5),
    sedsum = rowSums(s_data == 6),
    actsum = rowSums(s_data == 7),
    leisum = rowSums(s_data == 8),
    socsum = rowSums(s_data == 9),
    trasum = rowSums(s_data == 10)
  )

Hierarchical ward clustering on non school days

Show/hide the code
# non school clustering
om_timen <- sequence_objects$om_timen
clusterwardn <- hclust(as.dist(om_timen), method = "ward.D")
clusterward_wtdn <- hclust(as.dist(om_timen), method = "ward.D", members = nw)

wardRangen <- as.clustrange(
  clusterwardn,
  diss = om_timen,
  weights = nw,
  ncluster = 15
)

plot(wardRangen, stat = c("ASWw", "HG", "PBC", "HC"))
Figure 6: Hierarchical ward clustering ranges on non school days

Partitioning around medioids

Show/hide the code
# weighted k medoids range diagnostics.
pamwardRangen <- wcKMedRange(
  om_timen,
  kvals = 2:15,
  weights = nw
)

plot(pamwardRangen, stat = c("ASWw", "HG", "PBC", "HC"))
Figure 7: Partitioning around medioids ward clustering ranges on non school days

Final solution is five clusters on non school days

Show/hide the code
pamclust5n <- wcKMedoids(
  om_timen,
  k = 5,
  weights = nw,
  initialclust = clusterward_wtdn
)
pamclust5n$stats
         PBC           HG         HGSD          ASW         ASWw           CH 
  0.33135440   0.44368368   0.44368367   0.08873401   0.09202345  52.94472048 
          R2         CHsq         R2sq           HC 
  0.13591826 107.54551011   0.24214636   0.26235154 
Show/hide the code
data_finn <- data.frame(
  hicid = as.numeric(rownames(n_data)),
  cluster = as.numeric(pamclust5n$clustering)
)

data_timen <- n_data |>
  mutate(
    hicid = as.numeric(rownames(n_data)),
    cluster = as.numeric(pamclust5n$clustering),
    hweights = nw,
    sleepsum = rowSums(n_data == 1),
    edusum = rowSums(n_data == 2),
    worksum = rowSums(n_data == 3),
    caresum = rowSums(n_data == 4),
    eatsum = rowSums(n_data == 5),
    sedsum = rowSums(n_data == 6),
    actsum = rowSums(n_data == 7),
    leisum = rowSums(n_data == 8),
    socsum = rowSums(n_data == 9),
    trasum = rowSums(n_data == 10)
  )

Cluster descriptives

to be continued