Sequence and Cluster Analysis

Sequence Analysis

Cluster Analysis

Alcohol Use

Depression

Sequence and cluster analysis of daily time use data on a nationally representative sample of Australian adolescents

Published

March 20, 2026

to continue moving across from old notes

Introduction

This analysis aims to identify distinct clusters of daily time use patterns and to examine how cluster membership is associated with depression and alcohol use, as well as their co-occurrence.

Using data from the Longitudinal Study of Australian Children (Wave 8, B cohort, aged 14-15), I employed sequence analysis of daily time use diary data to identify homogenous clusters of adolescents with common time use patterns.

Subsequently, I explored membership in these clusters and their associations with depression, alcohol use, and their co-occurrence to gain insight into how daily patterns may relate to the combined challenges of depression and alcohol related issues in adolescence (document to come)

All analyses are split by day type (school day vs non school day)

Descriptives of Sequences

Across both day types, sleep made up the largest proportion of daily time, ranging from 6 – 10.5 hours.

Show/hide the code

# school days
statemeans <- as_tibble(
  round(seqmeant(seqs, serr = TRUE), 1),
  rownames = "State"
) |>
  mutate(State = labels) |>
  mutate(
    Sample = "s",
    Percent = round(Mean / 1440 * 100, 1),
    ifelse(grepl("\\.", Percent), Percent, paste0(Percent, ".0"))
  ) |>
  as.data.frame() |>
  select(State, Mean, Percent, SD = Stdev) |>
  mutate(
    Percent = paste0(Percent, "%"),
    Mean = ifelse(grepl("\\.", Mean), Mean, paste0(Mean, ".0")),
    SD = ifelse(grepl("\\.", SD), SD, paste0(SD, ".0")),
    Mean = paste0(Mean, " (", SD, ")")
  ) |>
  select(-SD) |>
  rename(`Mean minutes (SD)` = Mean, `% of Day` = Percent)

# non school days
statemeann <- as_tibble(
  round(seqmeant(seqn, serr = TRUE), 1),
  rownames = "State"
) |>
  mutate(State = labels) |>
  mutate(
    Sample = "Non-s",
    Percent = round(Mean / 1440 * 100, 1),
    ifelse(grepl("\\.", Percent), Percent, paste0(Percent, ".0"))
  ) |>
  as.data.frame() |>
  select(State, Mean, Percent, SD = Stdev) |>
  mutate(
    Percent = paste0(Percent, "%"),
    Mean = ifelse(grepl("\\.", Mean), Mean, paste0(Mean, ".0")),
    SD = ifelse(grepl("\\.", SD), SD, paste0(SD, ".0")),
    Mean = paste0(Mean, " (", SD, ")")
  ) |>
  select(-SD) |>
  rename(`Mean minutes (SD)` = Mean, `% of Day` = Percent)


# create overall descriptives table
descriptives <- statemeans |> left_join(statemeann, by = "State")
descriptives <- descriptives |>
  rename(
    minss = `Mean minutes (SD).x`,
    percs = `% of Day.x`,
    minsn = `Mean minutes (SD).y`,
    percn = `% of Day.y`,
  )

descriptives_tbl <- descriptives |> flextable()

typology <- data.frame(
  col_keys = names(descriptives_tbl$body$dataset),
  head = c("", rep("School days", 2), rep("Non-school days", 2)),
  head2 = c(
    "Activity",
    rep(c("Mean minutes (SD)", "% of Day"), 2)
  )
)
descriptives_tbl <- descriptives_tbl |>
  set_header_df(mapping = typology, key = "col_keys") |>
  merge_at(i = 1, j = c(2:3), part = "header") |>
  merge_at(i = 1, j = c(4:5), part = "header") |>
  align(align = "center", j = c(2:5), part = "all") |>
  my_flextable() |>
  hline(i = c(1, 2), part = "header", border = fp_border(color = "black")) |>
  hline_top(border = fp_border(color = "black"), part = "all") |>
  hline_bottom(border = fp_border(color = "black"), part = "all")

descriptives_tbl

Table 1: Descriptives of sequences on school and non school days

	School days		Non-school days
Activity	Mean minutes (SD)	% of Day	Mean minutes (SD)	% of Day
Sleep	528.8 (65.3)	36.7%	617.1 (108.0)	42.9%
Education	326.5 (112.2)	22.7%	28.5 (77.6)	2%
Work (paid/unpaid)	33.0 (52.4)	2.3%	70.9 (103.5)	4.9%
Personal care	61.3 (50.8)	4.3%	75.5 (80.6)	5.2%
Eating and drinking	57.8 (86.6)	4%	54.8 (75.3)	3.8%
Sedentary behaviour	149.1 (126.8)	10.4%	290.5 (187.4)	20.2%
Physical activity	60.5 (75.9)	4.2%	74.6 (98.3)	5.2%
Leisure	36.9 (60.2)	2.6%	58.2 (90.6)	4%
Socialising	118.2 (110.9)	8.2%	115.4 (139.7)	8%
Travel	67.9 (61.4)	4.7%	54.4 (81.2)	3.8%

Sequence Plots

Mean time plot displaying the average amount of time in each activity state

Show/hide the code

meantimeplot1 <- ggseqmtplot(seqs, weighted = TRUE) +
  scale_y_continuous(breaks = seq(0, 700, 50), limits = c(0, 700)) +
  labs(title = "School days", y = "Mean minutes per day") +
  scale_x_discrete(labels = plot_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    legend.position = "none",
    axis.title = element_text(size = 10, face = "bold"),
    axis.text = element_text(size = 9, face = "bold"),
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8)
  ) +
  common_theme

meantimeplot2 <- ggseqmtplot(seqn, weighted = TRUE) +
  scale_y_continuous(breaks = seq(0, 700, 50), limits = c(0, 700)) +
  labs(title = "Non-school days", y = "Mean minutes per day") +
  scale_x_discrete(labels = plot_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    legend.position = "none",
    axis.title = element_text(size = 10, face = "bold"),
    axis.text = element_text(size = 9, face = "bold"),
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8)
  ) +
  common_theme

meantimeplot <- wrap_elements(full = meantimeplot1) /
  wrap_elements(full = meantimeplot2)

meantimeplot

Figure 1: Mean time plots on school and non-school days

Modal state sequence plots displaying the sequence of the modal states with each mode proportional to its frequency at the given time

Show/hide the code

hrly <- as.character(seq(1, 1440, by = 120))
hrly_labs <- c(
  "04:00",
  "06:00",
  "08:00",
  "10:00",
  "12:00",
  "14:00",
  "16:00",
  "18:00",
  "20:00",
  "22:00",
  "00:00",
  "02:00"
)

modalstatesequences <- ggseqmsplot(seqs, weighted = TRUE) +
  labs(
    title = "School days",
    y = "Relative Frequency (Weighted)",
    x = "Time of Day"
  ) +
  scale_x_discrete(breaks = hrly, labels = hrly_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    #panel.border = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 9, colour = "black", face = "bold"),
    axis.title = element_text(size = 10, colour = "black", face = "bold")
  ) +
  common_theme

legend_plot <- modalstatesequences +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 9, face = "bold"),
    legend.spacing.x = unit(0.6, "cm"),
    legend.key.width = unit(1.2, "cm"),
    legend.box.margin = margin(0, 0, 0, 0),
    legend.margin = margin(0, 0, 0, 0)
  )

legend <- get_legend(legend_plot)
modalstatesequencen <- ggseqmsplot(seqn, weighted = TRUE) +
  labs(
    title = "Non-school days",
    y = "Relative Frequency (Weighted)",
    x = "Time of Day"
  ) +
  scale_x_discrete(breaks = hrly, labels = hrly_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    #panel.border = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 9, colour = "black", face = "bold"),
    axis.title = element_text(size = 10, colour = "black", face = "bold")
  ) +
  common_theme

modalstatesequences <- modalstatesequences + theme(legend.position = "none")
modalstatesequencen <- modalstatesequencen + theme(legend.position = "none")

modalstatesequencess <- wrap_elements(full = modalstatesequences) /
  wrap_elements(full = modalstatesequencen)
modalstatesequencess <- plot_grid(
  modalstatesequencess,
  legend,
  ncol = 1,
  rel_heights = c(1, 0.1)
)
modalstatesequencess

State distribution plot representing the sequence of the state frequencies by time point (chronograms)

Show/hide the code

statedistributions <- ggseqdplot(seqs) +
  labs(title = "School days", y = "Relative Frequency", x = "Time of Day") +
  scale_x_discrete(breaks = hrly, labels = hrly_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    #panel.border = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 9, colour = "black", face = "bold"),
    axis.title = element_text(size = 10, colour = "black", face = "bold")
  ) +
  common_theme


statedistributionn <- ggseqdplot(seqn) +
  labs(title = "Non-school days", y = "Relative Frequency", x = "Time of Day") +
  scale_x_discrete(breaks = hrly, labels = hrly_labs) +
  labs_pubr() +
  theme_pubclean() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    #panel.border = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 9, colour = "black", face = "bold"),
    axis.title = element_text(size = 10, colour = "black", face = "bold")
  ) +
  common_theme


statedistributions <- statedistributions + theme(legend.position = "none")
statedistributionn <- statedistributionn + theme(legend.position = "none")

statedistributionplot <- wrap_elements(full = statedistributions) /
  wrap_elements(full = statedistributionn)
statedistributionplot <- plot_grid(
  statedistributionplot,
  legend,
  ncol = 1,
  rel_heights = c(1, 0.1)
)


statedistributionplot

Figure 3: State distribution plots on school and non-school days

Cluster Analysis

Hierarchical clustering using ward method on school days

Show/hide the code

set.seed(123)
# get weights in the right order 
sw <- data_day |> 
  filter(hicid %in% s_id) |> 
  mutate(.ord = match(hicid, as.numeric(rownames(s_data)))) |> 
  arrange(.ord) |> pull(hweights)
nw <- data_day |> 
  filter(hicid %in% n_id) |> 
  mutate(.ord = match(hicid, as.numeric(rownames(n_data)))) |> 
  arrange(.ord) |> pull(hweights)
stopifnot(length(sw) == nrow(s_data))
stopifnot(length(nw) == nrow(n_data))

om_times <- sequence_objects$om_times
# turn off scientfic notation
options(scipen = 99)
# ward hierarchical clustering; unweighted and weighted versions
clusterwards <- hclust(as.dist(om_times), method = "ward.D")
clusterward_wtds <- hclust(as.dist(om_times), method = "ward.D", members = sw)

# weighted cluster range diagnostics up to 15 clusters.
wardRanges <- as.clustrange(
  clusterwards,
  diss = om_times,
  weights = sw,
  ncluster = 15
)

plot(wardRanges, stat = c("ASWw", "HG", "PBC", "HC"))

Figure 4: Hierarchical ward clustering ranges on school days

Partitioning around medioids

Show/hide the code

# weighted k medoids range diagnostics.
pamwardRanges <- wcKMedRange(
  om_times,
  kvals = 2:15,
  weights = sw
)

plot(pamwardRanges, stat = c("ASWw", "HG", "PBC", "HC"))

Figure 5: Partitioning around medioids ward clustering ranges on school days

Final solution is four clusters on school days.

Show/hide the code

# final solution;
pamclust4s <- wcKMedoids(
  om_times,
  k = 4,
  weights = sw,
  initialclust = clusterward_wtds
)

pamclust4s$stats

        PBC          HG        HGSD         ASW        ASWw          CH 
 0.28169716  0.36985476  0.36985476  0.08017240  0.08307900 42.75257434 
         R2        CHsq        R2sq          HC 
 0.09207103 87.69424686  0.17219089  0.27781771

Show/hide the code

# final cluster membership table for joining.
data_fins <- data.frame(
  hicid = as.numeric(rownames(s_data)),
  cluster = as.numeric(pamclust4s$clustering)
)


# per person minute totals; compute on the state columns only.
data_times <- s_data |>
  mutate(
    hicid = as.numeric(rownames(s_data)),
    cluster = as.numeric(pamclust4s$clustering),
    hweights = sw,
    sleepsum = rowSums(s_data == 1),
    edusum = rowSums(s_data == 2),
    worksum = rowSums(s_data == 3),
    caresum = rowSums(s_data == 4),
    eatsum = rowSums(s_data == 5),
    sedsum = rowSums(s_data == 6),
    actsum = rowSums(s_data == 7),
    leisum = rowSums(s_data == 8),
    socsum = rowSums(s_data == 9),
    trasum = rowSums(s_data == 10)
  )

Hierarchical ward clustering on non school days

Show/hide the code

# non school clustering
om_timen <- sequence_objects$om_timen
clusterwardn <- hclust(as.dist(om_timen), method = "ward.D")
clusterward_wtdn <- hclust(as.dist(om_timen), method = "ward.D", members = nw)

wardRangen <- as.clustrange(
  clusterwardn,
  diss = om_timen,
  weights = nw,
  ncluster = 15
)

plot(wardRangen, stat = c("ASWw", "HG", "PBC", "HC"))

Figure 6: Hierarchical ward clustering ranges on non school days

Partitioning around medioids

Show/hide the code

# weighted k medoids range diagnostics.
pamwardRangen <- wcKMedRange(
  om_timen,
  kvals = 2:15,
  weights = nw
)

plot(pamwardRangen, stat = c("ASWw", "HG", "PBC", "HC"))

Figure 7: Partitioning around medioids ward clustering ranges on non school days

Final solution is five clusters on non school days

Show/hide the code

pamclust5n <- wcKMedoids(
  om_timen,
  k = 5,
  weights = nw,
  initialclust = clusterward_wtdn
)
pamclust5n$stats

         PBC           HG         HGSD          ASW         ASWw           CH 
  0.33135440   0.44368368   0.44368367   0.08873401   0.09202345  52.94472048 
          R2         CHsq         R2sq           HC 
  0.13591826 107.54551011   0.24214636   0.26235154

Show/hide the code

data_finn <- data.frame(
  hicid = as.numeric(rownames(n_data)),
  cluster = as.numeric(pamclust5n$clustering)
)

data_timen <- n_data |>
  mutate(
    hicid = as.numeric(rownames(n_data)),
    cluster = as.numeric(pamclust5n$clustering),
    hweights = nw,
    sleepsum = rowSums(n_data == 1),
    edusum = rowSums(n_data == 2),
    worksum = rowSums(n_data == 3),
    caresum = rowSums(n_data == 4),
    eatsum = rowSums(n_data == 5),
    sedsum = rowSums(n_data == 6),
    actsum = rowSums(n_data == 7),
    leisum = rowSums(n_data == 8),
    socsum = rowSums(n_data == 9),
    trasum = rowSums(n_data == 10)
  )

Cluster descriptives

to be continued