# function to run univariable models at once
run_uv_hurdle <- function(outcome, predictors,
data, exponentiate = T) {
# build both models seperately
model_tables <- map(predictors, ~ {
fmla <- as.formula(paste0(outcome, " ~ ", .x, " | ", .x))
model <- hurdle(fmla, data = data, dist = "negbin", zero.dist = "binomial")
# logit part
tbl_logit <- tbl_regression(
model,
component = "zero_inflated",
exponentiate = TRUE,
estimate_fun = function(x) formatC(x, digits = 2, format = "f"),
pvalue_fun = function(x) {
p <- style_pvalue(x, digits = 3)
gsub("(?<=<)0\\.|^0\\.", ".", p, perl = TRUE)
},
tidy_fun = broom.helpers::tidy_zeroinfl
) |>
modify_header(label = "Variable", estimate = "OR") |>
modify_table_body(~ .x |>
mutate(
label = ifelse(
row_type == "label" & variable %in% names(labs_vars),
labs_vars[variable],
label
),
ci = ifelse(
!is.na(estimate) & (conf.high - conf.low > 50),
">50",
ci
)
)
) |>
add_n()
# NB count part
tbl_count <- tbl_regression(
model,
component = "conditional",
exponentiate = T,
estimate_fun = function(x) formatC(x, digits = 2, format = "f"),
pvalue_fun = function(x) {
p <- style_pvalue(x, digits = 3)
# remove leading zero from both "0.03" and "<0.001"
gsub("(?<=<)0\\.|^0\\.", ".", p, perl = TRUE)
},
tidy_fun = broom.helpers::tidy_zeroinfl
) |>
modify_header(label = "Variable", estimate = "IRR") |>
modify_table_body(~ .x |>
mutate(
label = ifelse(
row_type == "label" & variable %in% names(labs_vars),
labs_vars[variable],
label
),
ci = ifelse(
!is.na(estimate) & (conf.high - conf.low > 50),
">50",
ci
)
))
list(tbl_logit = tbl_logit, tbl_count = tbl_count)
})
# separate and flatten count and zero tables
tbls_logit <- map(model_tables, "tbl_logit")
tbls_count <- map(model_tables, "tbl_count")
tbl_merge(
tbls = list(
tbl_stack(tbls_logit),
tbl_stack(tbls_count)
),
tab_spanner = c("Hurdle model (logit)", "Count model (NB)")
)
}
## apply to absenteeism
outcome <- "absenteeism"
predictors <- df_sub |> dplyr::select(-presenteeism, -absenteeism) |> names()
tbl_abs_uv <- suppressMessages(
run_uv_hurdle(outcome, predictors, data = df_sub, exponentiate = TRUE)
)
tbl_abs_uv <- tbl_abs_uv |>
modify_table_body(~ .x |>
mutate(
estimate_1 = number(estimate_1, accuracy = .01),
estimate_1 = if_else(is.na(estimate_1), "—", estimate_1),
estimate_2 = number(estimate_2, accuracy = .01),
estimate_2 = if_else(is.na(estimate_2), "—", estimate_2),
label = case_when(label == "0" ~ "No", label == "1" ~ "Yes", TRUE ~ label),
groupname_col_1 = NULL,
groupname_col_2 = NULL
)
) |>
as_flex_table() |>
delete_part("footer")
# change spanning headers
typology <- data.frame(
col_keys = tbl_abs_uv$col_keys,
head1 = c("", "", rep("Hurdle Model (logit)", 3), rep("Count Model (NB)", 3)),
head2 = c("Variable", "N", "OR", "95% CI", "p", "IRR", "95% CI", "p"),
stringsAsFactors = FALSE
)
# print and format
tbl_abs_uv <- tbl_abs_uv |>
set_header_df(mapping = typology, key = "col_keys") |>
merge_h(part = "header") |>
my_flextable(font_size = 10)
# fix NA
tbl_abs_uv <- flextable::compose(tbl_abs_uv, i = 2, j = c(3, 4, 6, 7), value = flextable::as_paragraph(" "))
tbl_abs_uv <- flextable::compose(tbl_abs_uv, i = 3, j = c(3, 4, 6, 7), value = flextable::as_paragraph("\u2014"))
tbl_abs_uv